diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index d9781fe..0000000 --- a/.dockerignore +++ /dev/null @@ -1,20 +0,0 @@ -# Secrets — prevent .env files from being baked into the image -.env -.env.enc -.env.vault -.env.vault.enc - -# Version control — .git is huge and not needed in image -.git - -# Archives — not needed at runtime -*.tar.gz - -# Prometheus data — large, ephemeral data -prometheus-data/ - -# Compose files — only needed at runtime via volume mount -docker-compose.yml - -# Project TOML files — gitignored anyway, won't be in build context -projects/*.toml diff --git a/.env.example b/.env.example index 037abe1..762acd3 100644 --- a/.env.example +++ b/.env.example @@ -20,15 +20,14 @@ FORGE_URL=http://localhost:3000 # [CONFIG] local Forgejo instance # Each agent has its own Forgejo account and API token (#747). # Per-agent tokens fall back to FORGE_TOKEN if not set. FORGE_TOKEN= # [SECRET] dev-bot API token (default for all agents) -FORGE_TOKEN_DEVQWEN= # [SECRET] dev-qwen API token (for agents-llama) FORGE_REVIEW_TOKEN= # [SECRET] review-bot API token FORGE_PLANNER_TOKEN= # [SECRET] planner-bot API token FORGE_GARDENER_TOKEN= # [SECRET] gardener-bot API token FORGE_VAULT_TOKEN= # [SECRET] vault-bot API token FORGE_SUPERVISOR_TOKEN= # [SECRET] supervisor-bot API token FORGE_PREDICTOR_TOKEN= # [SECRET] predictor-bot API token -FORGE_ARCHITECT_TOKEN= # [SECRET] architect-bot API token -FORGE_BOT_USERNAMES=dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot +FORGE_ACTION_TOKEN= # [SECRET] action-bot API token +FORGE_BOT_USERNAMES=dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,action-bot # ── Backwards compatibility ─────────────────────────────────────────────── # If CODEBERG_TOKEN is set but FORGE_TOKEN is not, env.sh falls back to @@ -50,7 +49,7 @@ WOODPECKER_DB_NAME=woodpecker # [CONFIG] Postgres database name # ── Vault-only secrets (DO NOT put these in .env) ──────────────────────── # These tokens grant access to external systems (GitHub, ClawHub, deploy targets). -# They live ONLY in .env.vault.enc and are injected into the ephemeral runner +# They live ONLY in .env.vault.enc and are injected into the ephemeral vault-runner # container at fire time (#745). lib/env.sh explicitly unsets them so agents # can never hold them directly — all external actions go through vault dispatch. # @@ -59,7 +58,7 @@ WOODPECKER_DB_NAME=woodpecker # [CONFIG] Postgres database name # (deploy keys) — SSH keys for deployment targets # # To manage vault secrets: disinto secrets edit-vault -# (vault redesign in progress: PR-based approval, see #73-#77) +# See also: vault/vault-run-action.sh, vault/vault-fire.sh # ── Project-specific secrets ────────────────────────────────────────────── # Store all project secrets here so formulas reference env vars, never hardcode. diff --git a/.gitignore b/.gitignore index fc2d715..dd9365d 100644 --- a/.gitignore +++ b/.gitignore @@ -22,9 +22,3 @@ metrics/supervisor-metrics.jsonl .DS_Store dev/ci-fixes-*.json gardener/dust.jsonl - -# Individual encrypted secrets (managed by disinto secrets add) -secrets/ - -# Pre-built binaries for Docker builds (avoid network calls during build) -docker/agents/bin/ diff --git a/.woodpecker/agent-smoke.sh b/.woodpecker/agent-smoke.sh index 40fc580..9a37bf4 100644 --- a/.woodpecker/agent-smoke.sh +++ b/.woodpecker/agent-smoke.sh @@ -6,6 +6,8 @@ # 2. Every custom function called by agent scripts is defined in lib/ or the script itself # # Fast (<10s): no network, no tmux, no Claude needed. +# Would have caught: kill_tmux_session (renamed), create_agent_session (missing), +# read_phase (missing from dev-agent.sh scope) set -euo pipefail @@ -19,16 +21,12 @@ FAILED=0 # Uses awk instead of grep -Eo for busybox/Alpine compatibility (#296). get_fns() { local f="$1" - # Pure-awk implementation: avoids grep/sed cross-platform differences - # (BusyBox grep BRE quirks, sed ; separator issues on Alpine). - awk ' - /^[[:space:]]*[a-zA-Z_][a-zA-Z0-9_][a-zA-Z0-9_]*[[:space:]]*[(][)]/ { - line = $0 - gsub(/^[[:space:]]+/, "", line) - sub(/[[:space:]]*[(].*/, "", line) - print line - } - ' "$f" 2>/dev/null | sort -u || true + # BRE mode (no -E). Use [(][)] for literal parens — unambiguous across + # GNU grep and BusyBox grep (some BusyBox builds treat bare () as grouping + # even in BRE). BRE one-or-more via [X][X]* instead of +. + grep '^[[:space:]]*[a-zA-Z_][a-zA-Z0-9_][a-zA-Z0-9_]*[[:space:]]*[(][)]' "$f" 2>/dev/null \ + | sed 's/^[[:space:]]*//; s/[[:space:]]*[(][)].*$//' \ + | sort -u || true } # Extract call-position identifiers that look like custom function calls: @@ -86,7 +84,7 @@ while IFS= read -r -d '' f; do printf 'FAIL [syntax] %s\n' "$f" FAILED=1 fi -done < <(find dev gardener review planner supervisor architect lib vault -name "*.sh" -print0 2>/dev/null) +done < <(find dev gardener review planner supervisor lib vault action -name "*.sh" -print0 2>/dev/null) echo "syntax check done" # ── 2. Function-resolution check ───────────────────────────────────────────── @@ -97,12 +95,13 @@ echo "=== 2/2 Function resolution ===" # # Included — these are inline-sourced by agent scripts: # lib/env.sh — sourced by every agent (log, forge_api, etc.) +# lib/agent-session.sh — sourced by orchestrators (create_agent_session, monitor_phase_loop, etc.) # lib/agent-sdk.sh — sourced by SDK agents (agent_run, agent_recover_session) # lib/ci-helpers.sh — sourced by pollers and review (ci_passed, classify_pipeline_failure, etc.) # lib/load-project.sh — sourced by env.sh when PROJECT_TOML is set # lib/file-action-issue.sh — sourced by gardener-run.sh (file_action_issue) -# lib/secret-scan.sh — sourced by file-action-issue.sh (scan_for_secrets, redact_secrets) -# lib/formula-session.sh — sourced by formula-driven agents (acquire_cron_lock, check_memory, etc.) +# lib/secret-scan.sh — sourced by file-action-issue.sh, phase-handler.sh (scan_for_secrets, redact_secrets) +# lib/formula-session.sh — sourced by formula-driven agents (acquire_cron_lock, run_formula_and_monitor, etc.) # lib/mirrors.sh — sourced by merge sites (mirror_push) # lib/guard.sh — sourced by all cron entry points (check_active) # lib/issue-lifecycle.sh — sourced by agents for issue claim/release/block/deps @@ -117,7 +116,7 @@ echo "=== 2/2 Function resolution ===" # If a new lib file is added and sourced by agents, add it to LIB_FUNS below # and add a check_script call for it in the lib files section further down. LIB_FUNS=$( - for f in lib/agent-sdk.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do + for f in lib/agent-session.sh lib/agent-sdk.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do if [ -f "$f" ]; then get_fns "$f"; fi done | sort -u ) @@ -181,12 +180,13 @@ check_script() { # These are already in LIB_FUNS (their definitions are available to agents), # but this verifies calls *within* each lib file are also resolvable. check_script lib/env.sh lib/mirrors.sh +check_script lib/agent-session.sh check_script lib/agent-sdk.sh check_script lib/ci-helpers.sh check_script lib/secret-scan.sh check_script lib/file-action-issue.sh lib/secret-scan.sh check_script lib/tea-helpers.sh lib/secret-scan.sh -check_script lib/formula-session.sh +check_script lib/formula-session.sh lib/agent-session.sh check_script lib/load-project.sh check_script lib/mirrors.sh lib/env.sh check_script lib/guard.sh @@ -199,19 +199,26 @@ check_script lib/ci-debug.sh check_script lib/parse-deps.sh # Agent scripts — list cross-sourced files where function scope flows across files. +# phase-handler.sh defines default callback stubs; sourcing agents may override. check_script dev/dev-agent.sh +check_script dev/phase-handler.sh lib/secret-scan.sh check_script dev/dev-poll.sh check_script dev/phase-test.sh check_script gardener/gardener-run.sh check_script review/review-pr.sh lib/agent-sdk.sh check_script review/review-poll.sh -check_script planner/planner-run.sh lib/formula-session.sh +check_script planner/planner-run.sh lib/agent-session.sh lib/formula-session.sh check_script supervisor/supervisor-poll.sh check_script supervisor/update-prompt.sh +check_script vault/vault-agent.sh +check_script vault/vault-fire.sh +check_script vault/vault-poll.sh +check_script vault/vault-reject.sh +check_script action/action-poll.sh +check_script action/action-agent.sh check_script supervisor/supervisor-run.sh check_script supervisor/preflight.sh check_script predictor/predictor-run.sh -check_script architect/architect-run.sh echo "function resolution check done" diff --git a/.woodpecker/ci.yml b/.woodpecker/ci.yml index fc2f12a..08ae24d 100644 --- a/.woodpecker/ci.yml +++ b/.woodpecker/ci.yml @@ -8,19 +8,6 @@ when: event: [push, pull_request] -# Override default clone to authenticate against Forgejo using FORGE_TOKEN. -# Required because Forgejo is configured with REQUIRE_SIGN_IN, so anonymous -# git clones fail with exit code 128. FORGE_TOKEN is injected globally via -# WOODPECKER_ENVIRONMENT in docker-compose.yml (generated by lib/generators.sh). -clone: - git: - image: alpine/git - commands: - - AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:$FORGE_TOKEN@|") - - git clone --depth 1 "$AUTH_URL" . - - git fetch --depth 1 origin "$CI_COMMIT_REF" - - git checkout FETCH_HEAD - steps: - name: shellcheck image: koalaman/shellcheck-alpine:stable diff --git a/.woodpecker/detect-duplicates.py b/.woodpecker/detect-duplicates.py index 35f3aa8..c43fd1f 100644 --- a/.woodpecker/detect-duplicates.py +++ b/.woodpecker/detect-duplicates.py @@ -179,16 +179,9 @@ def collect_findings(root): Returns ``(ap_hits, dup_groups)`` with file paths relative to *root*. """ root = Path(root) - # Skip architect scripts for duplicate detection (stub formulas, see #99) - EXCLUDED_SUFFIXES = ("architect/architect-run.sh",) - - def is_excluded(p): - """Check if path should be excluded by suffix match.""" - return p.suffix == ".sh" and ".git" not in p.parts and any( - str(p).endswith(suffix) for suffix in EXCLUDED_SUFFIXES - ) - - sh_files = sorted(p for p in root.rglob("*.sh") if not is_excluded(p)) + sh_files = sorted( + p for p in root.rglob("*.sh") if ".git" not in p.parts + ) ap_hits = check_anti_patterns(sh_files) dup_groups = check_duplicates(sh_files) @@ -245,54 +238,9 @@ def print_duplicates(groups, label=""): # --------------------------------------------------------------------------- def main() -> int: - # Skip architect scripts for duplicate detection (stub formulas, see #99) - EXCLUDED_SUFFIXES = ("architect/architect-run.sh",) - - def is_excluded(p): - """Check if path should be excluded by suffix match.""" - return p.suffix == ".sh" and ".git" not in p.parts and any( - str(p).endswith(suffix) for suffix in EXCLUDED_SUFFIXES - ) - - sh_files = sorted(p for p in Path(".").rglob("*.sh") if not is_excluded(p)) - - # Standard patterns that are intentionally repeated across formula-driven agents - # These are not copy-paste violations but the expected structure - ALLOWED_HASHES = { - # Standard agent header: shebang, set -euo pipefail, directory resolution - "c93baa0f19d6b9ba271428bf1cf20b45": "Standard agent header (set -euo pipefail, SCRIPT_DIR, FACTORY_ROOT)", - # formula_prepare_profile_context followed by scratch context reading - "eaa735b3598b7b73418845ab00d8aba5": "Standard .profile context setup (formula_prepare_profile_context + SCRATCH_CONTEXT)", - # Standard prompt template: GRAPH_SECTION, SCRATCH_CONTEXT, FORMULA_CONTENT, SCRATCH_INSTRUCTION - "2653705045fdf65072cccfd16eb04900": "Standard prompt template (GRAPH_SECTION, SCRATCH_CONTEXT, FORMULA_CONTENT)", - "93726a3c799b72ed2898a55552031921": "Standard prompt template continuation (SCRATCH_CONTEXT, FORMULA_CONTENT, SCRATCH_INSTRUCTION)", - "c11eaaacab69c9a2d3c38c75215eca84": "Standard prompt template end (FORMULA_CONTENT, SCRATCH_INSTRUCTION)", - # Appears in stack_lock_acquire (lib/stack-lock.sh) and lib/pr-lifecycle.sh - "29d4f34b703f44699237713cc8d8065b": "Structural end-of-while-loop+case (return 1, esac, done, closing brace)", - # Forgejo org-creation API call pattern shared between forge-setup.sh and ops-setup.sh - # Extracted from bin/disinto (not a .sh file, excluded from prior scans) into lib/forge-setup.sh - "059b11945140c172465f9126b829ed7f": "Forgejo org-creation curl pattern (forge-setup.sh + ops-setup.sh)", - # Docker compose environment block for agents service (generators.sh + hire-agent.sh) - # Intentional duplicate - both generate the same docker-compose.yml template - "8066210169a462fe565f18b6a26a57e0": "Docker compose environment block (generators.sh + hire-agent.sh) - old", - "fd978fcd726696e0f280eba2c5198d50": "Docker compose environment block continuation (generators.sh + hire-agent.sh) - old", - "e2760ccc2d4b993a3685bd8991594eb2": "Docker compose env_file + depends_on block (generators.sh + hire-agent.sh) - old", - # The hash shown in output is 161a80f7 - need to match exactly what the script finds - "161a80f7296d6e9d45895607b7f5b9c9": "Docker compose env_file + depends_on block (generators.sh + hire-agent.sh) - old", - # New hash after explicit environment fix (#381) - "83fa229b86a7fdcb1d3591ab8e718f9d": "Docker compose explicit environment block (generators.sh + hire-agent.sh) - #381", - # Verification mode helper functions - intentionally duplicated in dispatcher and entrypoint - # These functions check if bug-report parent issues have all sub-issues closed - "b783d403276f78b49ad35840845126a1": "Verification helper: sub_issues variable declaration", - "4b19b9a1bdfbc62f003fc237ed270ed9": "Verification helper: python3 -c invocation", - "cc1d0a9f85dfe0cc32e9ef6361cb8c3a": "Verification helper: Python imports and args", - "768926748b811ebd30f215f57db5de40": "Verification helper: json.load from /dev/stdin", - "4c58586a30bcf6b009c02010ed8f6256": "Verification helper: sub_issues list initialization", - "53ea3d6359f51d622467bd77b079cc88": "Verification helper: iterate issues in data", - "21aec56a99d5252b23fb9a38b895e8e8": "Verification helper: check body for Decomposed from pattern", - "60ea98b3604557d539193b2a6624e232": "Verification helper: append sub-issue number", - "9f6ae8e7811575b964279d8820494eb0": "Verification helper: for loop done pattern", - } + sh_files = sorted( + p for p in Path(".").rglob("*.sh") if ".git" not in p.parts + ) if not sh_files: print("No .sh files found.") @@ -328,13 +276,8 @@ def main() -> int: # Duplicate diff: key by content hash base_dup_hashes = {g[0] for g in base_dups} - # Filter out allowed standard patterns that are intentionally repeated - new_dups = [ - g for g in cur_dups - if g[0] not in base_dup_hashes and g[0] not in ALLOWED_HASHES - ] - # Also filter allowed hashes from pre_dups for reporting - pre_dups = [g for g in cur_dups if g[0] in base_dup_hashes and g[0] not in ALLOWED_HASHES] + new_dups = [g for g in cur_dups if g[0] not in base_dup_hashes] + pre_dups = [g for g in cur_dups if g[0] in base_dup_hashes] # Report pre-existing as info if pre_ap or pre_dups: diff --git a/.woodpecker/smoke-init.yml b/.woodpecker/smoke-init.yml index 3953053..69afddb 100644 --- a/.woodpecker/smoke-init.yml +++ b/.woodpecker/smoke-init.yml @@ -1,19 +1,45 @@ +# .woodpecker/smoke-init.yml — End-to-end smoke test for disinto init +# +# Uses the Forgejo image directly (not as a service) so we have CLI +# access to set up Forgejo and create the bootstrap admin user. +# Then runs disinto init --bare --yes against the local Forgejo instance. +# +# Forgejo refuses to run as root, so all forgejo commands use su-exec +# to run as the 'git' user (pre-created in the Forgejo Docker image). + when: - event: pull_request path: - "bin/disinto" - "lib/load-project.sh" - - "lib/env.sh" - - "lib/generators.sh" - - "tests/**" + - "tests/smoke-init.sh" - ".woodpecker/smoke-init.yml" + - "docker/**" + - event: push + branch: main + path: + - "bin/disinto" + - "lib/load-project.sh" + - "tests/smoke-init.sh" + - ".woodpecker/smoke-init.yml" + - "docker/**" steps: - name: smoke-init - image: python:3-alpine + image: codeberg.org/forgejo/forgejo:11.0 + environment: + SMOKE_FORGE_URL: http://localhost:3000 commands: - - apk add --no-cache bash curl jq git coreutils - - python3 tests/mock-forgejo.py & echo $! > /tmp/mock-forgejo.pid - - sleep 2 + # Install test dependencies (Alpine-based image) + - apk add --no-cache bash curl jq python3 git >/dev/null 2>&1 + # Set up Forgejo data directories and config (owned by git user) + - mkdir -p /data/gitea/conf /data/gitea/repositories /data/gitea/lfs /data/gitea/log /data/git/.ssh /data/ssh + - printf '[database]\nDB_TYPE = sqlite3\nPATH = /data/gitea/forgejo.db\n\n[server]\nHTTP_PORT = 3000\nROOT_URL = http://localhost:3000/\nLFS_START_SERVER = false\n\n[security]\nINSTALL_LOCK = true\n\n[service]\nDISABLE_REGISTRATION = true\n' > /data/gitea/conf/app.ini + - chown -R git:git /data + # Start Forgejo as git user in background and wait for API + - su-exec git forgejo web --config /data/gitea/conf/app.ini & + - for i in $(seq 1 30); do curl -sf http://localhost:3000/api/v1/version >/dev/null 2>&1 && break; sleep 1; done + # Create bootstrap admin user via CLI + - su-exec git forgejo admin user create --admin --username setup-admin --password "SetupPass-789xyz" --email "setup-admin@smoke.test" --must-change-password=false --config /data/gitea/conf/app.ini + # Run the smoke test (as root is fine — only forgejo binary needs git user) - bash tests/smoke-init.sh - - kill $(cat /tmp/mock-forgejo.pid) 2>/dev/null || true diff --git a/AGENTS.md b/AGENTS.md index 78f1c29..ffc5561 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,19 +1,12 @@ - + # Disinto — Agent Instructions ## What this repo is -Disinto is an autonomous code factory. It manages seven agents (dev, review, -gardener, supervisor, planner, predictor, architect) that pick up issues from -forge, implement them, review PRs, plan from the vision, and keep the system -healthy — all via cron and `claude -p`. The dispatcher executes formula-based -operational tasks. - -Each agent has a `.profile` repository on Forgejo that stores lessons learned -from prior sessions, providing continuous improvement across runs. - -> **Note:** The vault is being redesigned as a PR-based approval workflow on the -> ops repo (see issues #73-#77). See [docs/VAULT.md](docs/VAULT.md) for details. Old vault scripts are being removed. +Disinto is an autonomous code factory. It manages eight agents (dev, review, +gardener, supervisor, planner, predictor, action, vault) that pick up issues from forge, +implement them, review PRs, plan from the vision, gate dangerous actions, and +keep the system healthy — all via cron and `claude -p`. See `README.md` for the full architecture and `disinto-factory/SKILL.md` for setup. @@ -21,16 +14,17 @@ See `README.md` for the full architecture and `disinto-factory/SKILL.md` for set ``` disinto/ (code repo) -├── dev/ dev-poll.sh, dev-agent.sh, phase-test.sh — issue implementation +├── dev/ dev-poll.sh, dev-agent.sh, phase-handler.sh — issue implementation ├── review/ review-poll.sh, review-pr.sh — PR review ├── gardener/ gardener-run.sh — direct cron executor for run-gardener formula ├── predictor/ predictor-run.sh — daily cron executor for run-predictor formula ├── planner/ planner-run.sh — direct cron executor for run-planner formula ├── supervisor/ supervisor-run.sh — formula-driven health monitoring (cron wrapper) │ preflight.sh — pre-flight data collection for supervisor formula -├── architect/ architect-run.sh — strategic decomposition of vision into sprints -├── vault/ vault-env.sh — shared env setup (vault redesign in progress, see #73-#77) -├── lib/ env.sh, agent-sdk.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, formula-session.sh, stack-lock.sh, forge-setup.sh, forge-push.sh, ops-setup.sh, ci-setup.sh, generators.sh, hire-agent.sh, release.sh, build-graph.py +│ supervisor-poll.sh — legacy bash orchestrator (superseded) +├── vault/ vault-poll.sh, vault-agent.sh, vault-fire.sh — action gating + procurement +├── action/ action-poll.sh, action-agent.sh — operational task execution +├── lib/ env.sh, agent-session.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, build-graph.py ├── projects/ *.toml.example — templates; *.toml — local per-box config (gitignored) ├── formulas/ Issue templates (TOML specs for multi-step agent tasks) └── docs/ Protocol docs (PHASE-PROTOCOL.md, EVIDENCE-ARCHITECTURE.md) @@ -41,6 +35,9 @@ disinto-ops/ (ops repo — {project}-ops) │ ├── approved/ approved vault items │ ├── fired/ executed vault items │ └── rejected/ rejected vault items +├── journal/ +│ ├── planner/ daily planning logs +│ └── supervisor/ operational health logs ├── knowledge/ shared agent knowledge + best practices ├── evidence/ engagement data, experiment results ├── portfolio.md addressables + observables @@ -48,13 +45,10 @@ disinto-ops/ (ops repo — {project}-ops) └── RESOURCES.md accounts, tokens (refs), infra inventory ``` -> **Note:** Journal directories (`journal/planner/` and `journal/supervisor/`) have been removed from the ops repo. Agent journals are now stored in each agent's `.profile` repo on Forgejo. - -## Agent .profile Model - -Each agent has a `.profile` repository on Forgejo storing `knowledge/lessons-learned.md` (injected into each session prompt) and `journal/` reflection entries (digested into lessons). Pre-session: `formula_prepare_profile_context()` loads lessons. Post-session: `profile_write_journal` records reflections. See `lib/profile.sh`. - -> **Terminology note:** "Formulas" are TOML issue templates in `formulas/` that orchestrate multi-step agent tasks. Distinct from "processes" in `docs/EVIDENCE-ARCHITECTURE.md`. +> **Terminology note:** "Formulas" in this repo are TOML issue templates in `formulas/` that +> orchestrate multi-step agent tasks (e.g., `run-gardener.toml`, `run-planner.toml`). This is +> distinct from "processes" described in `docs/EVIDENCE-ARCHITECTURE.md`, which are measurement +> and mutation pipelines that read external platforms and write structured evidence to git. ## Tech stack @@ -96,10 +90,8 @@ bash dev/phase-test.sh | Supervisor | `supervisor/` | Health monitoring | [supervisor/AGENTS.md](supervisor/AGENTS.md) | | Planner | `planner/` | Strategic planning | [planner/AGENTS.md](planner/AGENTS.md) | | Predictor | `predictor/` | Infrastructure pattern detection | [predictor/AGENTS.md](predictor/AGENTS.md) | -| Architect | `architect/` | Strategic decomposition | [architect/AGENTS.md](architect/AGENTS.md) | - -> **Vault:** Being redesigned as a PR-based approval workflow (issues #73-#77). -> See [docs/VAULT.md](docs/VAULT.md) for the vault PR workflow details. +| Action | `action/` | Operational task execution | [action/AGENTS.md](action/AGENTS.md) | +| Vault | `vault/` | Action gating + resource procurement | [vault/AGENTS.md](vault/AGENTS.md) | See [lib/AGENTS.md](lib/AGENTS.md) for the full shared helper reference. @@ -116,16 +108,14 @@ Issues flow: `backlog` → `in-progress` → PR → CI → review → merge → | `backlog` | Issue is queued for implementation. Dev-poll picks the first ready one. | Planner, gardener, humans | | `priority` | Queue tier above plain backlog. Issues with both `priority` and `backlog` are picked before plain `backlog` issues. FIFO within each tier. | Planner, humans | | `in-progress` | Dev-agent is actively working on this issue. Only one issue per project is in-progress at a time. | dev-agent.sh (claims issue) | -| `blocked` | Issue is stuck — agent session failed, crashed, timed out, or CI exhausted. Diagnostic comment on the issue has details. Also used for unmet dependencies. | dev-agent.sh, dev-poll.sh (on failure) | +| `blocked` | Issue is stuck — agent session failed, crashed, timed out, or CI exhausted. Diagnostic comment on the issue has details. Also used for unmet dependencies. | dev-agent.sh, action-agent.sh, dev-poll.sh (on failure) | | `tech-debt` | Pre-existing issue flagged by AI reviewer, not introduced by a PR. | review-pr.sh (auto-created follow-ups) | | `underspecified` | Dev-agent refused the issue as too large or vague. | dev-poll.sh (on preflight `too_large`), dev-agent.sh (on mid-run `too_large` refusal) | -| `bug-report` | Issue describes user-facing broken behavior with reproduction steps. Separate triage track for reproduction automation. | Gardener (bug-report detection in grooming) | -| `in-triage` | Bug reproduced but root cause not obvious — triage agent investigates. Set alongside `bug-report`. | reproduce-agent (when reproduction succeeds but cause unclear) | -| `rejected` | Issue formally rejected — cannot reproduce, out of scope, or invalid. | reproduce-agent, humans | | `vision` | Goal anchors — high-level objectives from VISION.md. | Planner, humans | | `prediction/unreviewed` | Unprocessed prediction filed by predictor. | predictor-run.sh | | `prediction/dismissed` | Prediction triaged as DISMISS — planner disagrees, closed with reason. | Planner (triage-predictions step) | | `prediction/actioned` | Prediction promoted or dismissed by planner. | Planner (triage-predictions step) | +| `action` | Operational task for the action-agent to execute via formula. | Planner, humans | ### Dependency conventions @@ -170,12 +160,12 @@ Humans write these. Agents read and enforce them. | ID | Decision | Rationale | |---|---|---| -| AD-001 | Nervous system runs from cron, not PR-based actions. | Planner, predictor, gardener, supervisor run directly via `*-run.sh`. They create work, they don't become work. (See PR #474 revert.) | +| AD-001 | Nervous system runs from cron, not action issues. | Planner, predictor, gardener, supervisor run directly via `*-run.sh`. They create work, they don't become work. (See PR #474 revert.) | | AD-002 | Single-threaded pipeline per project. | One dev issue at a time. No new work while a PR awaits CI or review. Prevents merge conflicts and keeps context clear. | | AD-003 | The runtime creates and destroys, the formula preserves. | Runtime manages worktrees/sessions/temp. Formulas commit knowledge to git before signaling done. | | AD-004 | Event-driven > polling > fixed delays. | Never `waitForTimeout` or hardcoded sleep. Use phase files, webhooks, or poll loops with backoff. | -| AD-005 | Secrets via env var indirection, never in issue bodies. | Issue bodies become code. Agent secrets go in `.env.enc`, vault secrets in `.env.vault.enc` (both SOPS-encrypted). Referenced as `$VAR_NAME`. Runner gets only vault secrets; agents get only agent secrets. | -| AD-006 | External actions go through vault dispatch, never direct. | Agents build addressables; only the vault exercises them (publishes, deploys, posts). Tokens for external systems (`GITHUB_TOKEN`, `CLAWHUB_TOKEN`, deploy keys) live only in `.env.vault.enc` and are injected into the ephemeral runner container. `lib/env.sh` unsets them so agents never hold them. PRs with direct external actions without vault dispatch get REQUEST_CHANGES. (Vault redesign in progress: PR-based approval on ops repo, see #73-#77) | +| AD-005 | Secrets via env var indirection, never in issue bodies. | Issue bodies become code. Agent secrets go in `.env.enc`, vault secrets in `.env.vault.enc` (both SOPS-encrypted). Referenced as `$VAR_NAME`. Vault-runner gets only vault secrets; agents get only agent secrets. | +| AD-006 | External actions go through vault dispatch, never direct. | Agents build addressables; only the vault exercises them (publishes, deploys, posts). Tokens for external systems (`GITHUB_TOKEN`, `CLAWHUB_TOKEN`, deploy keys) live only in `.env.vault.enc` and are injected into the ephemeral vault-runner container. `lib/env.sh` unsets them so agents never hold them. PRs with direct external actions without vault dispatch get REQUEST_CHANGES. | **Who enforces what:** - **Gardener** checks open backlog issues against ADs during grooming; closes violations with a comment referencing the AD number. diff --git a/README.md b/README.md index 40c9889..2d0a798 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,9 @@ cron (daily) ──→ gardener-poll.sh ← backlog grooming (duplicates, stale cron (weekly) ──→ planner-poll.sh ← gap-analyse VISION.md, create backlog issues └── claude -p: update AGENTS.md → create issues +cron (*/30) ──→ vault-poll.sh ← safety gate for dangerous/irreversible actions + └── claude -p: classify → auto-approve/reject or escalate + ``` ## Prerequisites @@ -93,6 +96,7 @@ crontab -e # 3,13,23,33,43,53 * * * * /path/to/disinto/review/review-poll.sh # 6,16,26,36,46,56 * * * * /path/to/disinto/dev/dev-poll.sh # 15 8 * * * /path/to/disinto/gardener/gardener-poll.sh +# 0,30 * * * * /path/to/disinto/vault/vault-poll.sh # 0 9 * * 1 /path/to/disinto/planner/planner-poll.sh # 4. Verify @@ -119,13 +123,16 @@ disinto/ │ └── best-practices.md # Gardener knowledge base ├── planner/ │ ├── planner-poll.sh # Cron entry: weekly vision gap analysis -│ └── (formula-driven) # run-planner.toml executed by dispatcher +│ └── (formula-driven) # run-planner.toml executed by action-agent ├── vault/ -│ └── vault-env.sh # Shared env setup (vault redesign in progress, see #73-#77) -├── docs/ -│ └── VAULT.md # Vault PR workflow and branch protection documentation +│ ├── vault-poll.sh # Cron entry: process pending dangerous actions +│ ├── vault-agent.sh # Classifies and routes actions (claude -p) +│ ├── vault-fire.sh # Executes an approved action +│ ├── vault-reject.sh # Marks an action as rejected +│ └── PROMPT.md # System prompt for vault agent └── supervisor/ ├── supervisor-poll.sh # Supervisor: health checks + claude -p + ├── PROMPT.md # Supervisor's system prompt ├── update-prompt.sh # Self-learning: append to best-practices └── best-practices/ # Progressive disclosure knowledge base ├── memory.md @@ -146,9 +153,7 @@ disinto/ | **Review** | Every 10 min | Finds PRs without review, runs Claude-powered code review, approves or requests changes. | | **Gardener** | Daily | Grooms the issue backlog: detects duplicates, promotes `tech-debt` to `backlog`, closes stale issues, escalates ambiguous items. | | **Planner** | Weekly | Updates AGENTS.md documentation to reflect recent code changes, then gap-analyses VISION.md vs current state and creates up to 5 backlog issues for the highest-leverage gaps. | - -> **Vault:** Being redesigned as a PR-based approval workflow (issues #73-#77). -> See [docs/VAULT.md](docs/VAULT.md) for the vault PR workflow and branch protection details. +| **Vault** | Every 30 min | Safety gate for dangerous or irreversible actions. Classifies pending actions via Claude: auto-approve, auto-reject, or escalate to a human via vault/forge. | ## Design Principles diff --git a/action/AGENTS.md b/action/AGENTS.md new file mode 100644 index 0000000..55dadae --- /dev/null +++ b/action/AGENTS.md @@ -0,0 +1,34 @@ + +# Action Agent + +**Role**: Execute operational tasks described by action formulas — run scripts, +call APIs, send messages, collect human approval. Shares the same phase handler +as the dev-agent: if an action produces code changes, the orchestrator creates a +PR and drives the CI/review loop; otherwise Claude closes the issue directly. + +**Trigger**: `action-poll.sh` runs every 10 min via cron. Sources `lib/guard.sh` +and calls `check_active action` first — skips if `$FACTORY_ROOT/state/.action-active` +is absent. Then scans for open issues labeled `action` that have no active tmux +session, and spawns `action-agent.sh `. + +**Key files**: +- `action/action-poll.sh` — Cron scheduler: finds open action issues with no active tmux session, spawns action-agent.sh +- `action/action-agent.sh` — Orchestrator: fetches issue body + prior comments, **checks all dependencies via `lib/parse-deps.sh` before spawning** (skips silently if any dep is still open), creates tmux session (`action-{project}-{issue_num}`) with interactive `claude`, injects formula prompt with phase protocol, enters `monitor_phase_loop` (shared via `dev/phase-handler.sh`) for CI/review lifecycle or direct completion + +**Session lifecycle**: +1. `action-poll.sh` finds open `action` issues with no active tmux session. +2. Spawns `action-agent.sh `. +3. Agent creates tmux session `action-{project}-{issue_num}`, injects prompt (formula + prior comments + phase protocol). +4. Agent enters `monitor_phase_loop` (shared with dev-agent via `dev/phase-handler.sh`). +5. **Path A (git output):** Claude pushes branch → `PHASE:awaiting_ci` → handler creates PR, polls CI → injects failures → Claude fixes → push → re-poll → CI passes → `PHASE:awaiting_review` → handler polls reviews → injects REQUEST_CHANGES → Claude fixes → approved → merge → cleanup. +6. **Path B (no git output):** Claude posts results as comment, closes issue → `PHASE:done` → handler cleans up (kill session, docker compose down, remove temp files). +7. For human input: Claude writes `PHASE:escalate`; human responds via vault/forge. + +**Crash recovery**: on `PHASE:crashed` or non-zero exit, the worktree is **preserved** (not destroyed) for debugging. Location logged. Supervisor housekeeping removes stale crashed worktrees older than 24h. + +**Environment variables consumed**: +- `FORGE_TOKEN`, `FORGE_ACTION_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `FORGE_URL`, `PROJECT_NAME`, `FORGE_WEB` +- `ACTION_IDLE_TIMEOUT` — Max seconds before killing idle session (default 14400 = 4h) +- `ACTION_MAX_LIFETIME` — Max total session wall-clock seconds (default 28800 = 8h); caps session independently of idle timeout + +**FORGE_REMOTE**: `action-agent.sh` auto-detects the git remote for `FORGE_URL` (same logic as dev-agent). Exported as `FORGE_REMOTE`, used for worktree creation and push instructions injected into the Claude prompt. diff --git a/action/action-agent.sh b/action/action-agent.sh new file mode 100755 index 0000000..38d7d39 --- /dev/null +++ b/action/action-agent.sh @@ -0,0 +1,323 @@ +#!/usr/bin/env bash +# ============================================================================= +# action-agent.sh — Synchronous action agent: SDK + shared libraries +# +# Synchronous bash loop using claude -p (one-shot invocation). +# No tmux sessions, no phase files — the bash script IS the state machine. +# +# Usage: ./action-agent.sh [project.toml] +# +# Flow: +# 1. Preflight: issue_check_deps(), memory guard, concurrency lock +# 2. Parse model from YAML front matter in issue body (custom model selection) +# 3. Worktree: worktree_create() for action isolation +# 4. Load formula from issue body +# 5. Build prompt: formula + prior non-bot comments (resume context) +# 6. agent_run(worktree, prompt) → Claude executes action, may push +# 7. If pushed: pr_walk_to_merge() from lib/pr-lifecycle.sh +# 8. Cleanup: worktree_cleanup(), issue_close() +# +# Action-specific (stays in runner): +# - YAML front matter parsing (model selection) +# - Bot username filtering for prior comments +# - Lifetime watchdog (MAX_LIFETIME=8h wall-clock cap) +# - Child process cleanup (docker compose, background jobs) +# +# From shared libraries: +# - Issue lifecycle: lib/issue-lifecycle.sh +# - Worktree: lib/worktree.sh +# - PR lifecycle: lib/pr-lifecycle.sh +# - Agent SDK: lib/agent-sdk.sh +# +# Log: action/action-poll-{project}.log +# ============================================================================= +set -euo pipefail + +ISSUE="${1:?Usage: action-agent.sh [project.toml]}" +export PROJECT_TOML="${2:-${PROJECT_TOML:-}}" + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +FACTORY_ROOT="$(dirname "$SCRIPT_DIR")" + +# shellcheck source=../lib/env.sh +source "$FACTORY_ROOT/lib/env.sh" +# Use action-bot's own Forgejo identity (#747) +FORGE_TOKEN="${FORGE_ACTION_TOKEN:-${FORGE_TOKEN}}" +# shellcheck source=../lib/ci-helpers.sh +source "$FACTORY_ROOT/lib/ci-helpers.sh" +# shellcheck source=../lib/worktree.sh +source "$FACTORY_ROOT/lib/worktree.sh" +# shellcheck source=../lib/issue-lifecycle.sh +source "$FACTORY_ROOT/lib/issue-lifecycle.sh" +# shellcheck source=../lib/agent-sdk.sh +source "$FACTORY_ROOT/lib/agent-sdk.sh" +# shellcheck source=../lib/pr-lifecycle.sh +source "$FACTORY_ROOT/lib/pr-lifecycle.sh" + +BRANCH="action/issue-${ISSUE}" +WORKTREE="/tmp/action-${ISSUE}-$(date +%s)" +LOCKFILE="/tmp/action-agent-${ISSUE}.lock" +LOGFILE="${DISINTO_LOG_DIR}/action/action-poll-${PROJECT_NAME:-default}.log" +# shellcheck disable=SC2034 # consumed by agent-sdk.sh +SID_FILE="/tmp/action-session-${PROJECT_NAME:-default}-${ISSUE}.sid" +MAX_LIFETIME="${ACTION_MAX_LIFETIME:-28800}" # 8h default wall-clock cap +SESSION_START_EPOCH=$(date +%s) + +log() { + printf '[%s] action#%s %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$ISSUE" "$*" >> "$LOGFILE" +} + +# --- Concurrency lock (per issue) --- +if [ -f "$LOCKFILE" ]; then + LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || echo "") + if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then + log "SKIP: action-agent already running for #${ISSUE} (PID ${LOCK_PID})" + exit 0 + fi + rm -f "$LOCKFILE" +fi +echo $$ > "$LOCKFILE" + +cleanup() { + local exit_code=$? + # Kill lifetime watchdog if running + if [ -n "${LIFETIME_WATCHDOG_PID:-}" ] && kill -0 "$LIFETIME_WATCHDOG_PID" 2>/dev/null; then + kill "$LIFETIME_WATCHDOG_PID" 2>/dev/null || true + wait "$LIFETIME_WATCHDOG_PID" 2>/dev/null || true + fi + rm -f "$LOCKFILE" + # Kill any remaining child processes spawned during the run + local children + children=$(jobs -p 2>/dev/null) || true + if [ -n "$children" ]; then + # shellcheck disable=SC2086 # intentional word splitting + kill $children 2>/dev/null || true + # shellcheck disable=SC2086 + wait $children 2>/dev/null || true + fi + # Best-effort docker cleanup for containers started during this action + (cd "${WORKTREE}" 2>/dev/null && docker compose down 2>/dev/null) || true + # Preserve worktree on crash for debugging; clean up on success + if [ "$exit_code" -ne 0 ]; then + worktree_preserve "$WORKTREE" "crashed (exit=$exit_code)" + else + worktree_cleanup "$WORKTREE" + fi + rm -f "$SID_FILE" +} +trap cleanup EXIT + +# --- Memory guard --- +memory_guard 2000 + +# --- Fetch issue --- +log "fetching issue #${ISSUE}" +ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues/${ISSUE}") || true + +if [ -z "$ISSUE_JSON" ] || ! printf '%s' "$ISSUE_JSON" | jq -e '.id' >/dev/null 2>&1; then + log "ERROR: failed to fetch issue #${ISSUE}" + exit 1 +fi + +ISSUE_TITLE=$(printf '%s' "$ISSUE_JSON" | jq -r '.title') +ISSUE_BODY=$(printf '%s' "$ISSUE_JSON" | jq -r '.body // ""') +ISSUE_STATE=$(printf '%s' "$ISSUE_JSON" | jq -r '.state') + +if [ "$ISSUE_STATE" != "open" ]; then + log "SKIP: issue #${ISSUE} is ${ISSUE_STATE}" + exit 0 +fi + +log "Issue: ${ISSUE_TITLE}" + +# --- Dependency check (shared library) --- +if ! issue_check_deps "$ISSUE"; then + log "SKIP: issue #${ISSUE} blocked by: ${_ISSUE_BLOCKED_BY[*]}" + exit 0 +fi + +# --- Extract model from YAML front matter (if present) --- +YAML_MODEL=$(printf '%s' "$ISSUE_BODY" | \ + sed -n '/^---$/,/^---$/p' | grep '^model:' | awk '{print $2}' | tr -d '"' || true) +if [ -n "$YAML_MODEL" ]; then + export CLAUDE_MODEL="$YAML_MODEL" + log "model from front matter: ${YAML_MODEL}" +fi + +# --- Resolve bot username(s) for comment filtering --- +_bot_login=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API%%/repos*}/user" | jq -r '.login // empty' 2>/dev/null || true) + +# Build list: token owner + any extra names from FORGE_BOT_USERNAMES (comma-separated) +_bot_logins="${_bot_login}" +if [ -n "${FORGE_BOT_USERNAMES:-}" ]; then + _bot_logins="${_bot_logins:+${_bot_logins},}${FORGE_BOT_USERNAMES}" +fi + +# --- Fetch existing comments (resume context, excluding bot comments) --- +COMMENTS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues/${ISSUE}/comments?limit=50") || true + +PRIOR_COMMENTS="" +if [ -n "$COMMENTS_JSON" ] && [ "$COMMENTS_JSON" != "null" ] && [ "$COMMENTS_JSON" != "[]" ]; then + PRIOR_COMMENTS=$(printf '%s' "$COMMENTS_JSON" | \ + jq -r --arg bots "$_bot_logins" \ + '($bots | split(",") | map(select(. != ""))) as $bl | + .[] | select(.user.login as $u | $bl | index($u) | not) | + "[\(.user.login) at \(.created_at[:19])]\n\(.body)\n---"' 2>/dev/null || true) +fi + +# --- Determine git remote --- +cd "${PROJECT_REPO_ROOT}" +_forge_host=$(echo "$FORGE_URL" | sed 's|https\?://||; s|/.*||') +FORGE_REMOTE=$(git remote -v | awk -v host="$_forge_host" '$2 ~ host && /\(push\)/ {print $1; exit}') +FORGE_REMOTE="${FORGE_REMOTE:-origin}" +export FORGE_REMOTE + +# --- Create isolated worktree --- +log "creating worktree: ${WORKTREE}" +git fetch "${FORGE_REMOTE}" "${PRIMARY_BRANCH}" 2>/dev/null || true +if ! worktree_create "$WORKTREE" "$BRANCH"; then + log "ERROR: worktree creation failed" + exit 1 +fi +log "worktree ready: ${WORKTREE}" + +# --- Build prompt --- +PRIOR_SECTION="" +if [ -n "$PRIOR_COMMENTS" ]; then + PRIOR_SECTION="## Prior comments (resume context) + +${PRIOR_COMMENTS} + +" +fi + +GIT_INSTRUCTIONS=$(build_phase_protocol_prompt "$BRANCH" "$FORGE_REMOTE") + +PROMPT="You are an action agent. Your job is to execute the action formula +in the issue below. + +## Issue #${ISSUE}: ${ISSUE_TITLE} + +${ISSUE_BODY} + +${PRIOR_SECTION}## Instructions + +1. Read the action formula steps in the issue body carefully. + +2. Execute each step in order using your Bash tool and any other tools available. + +3. Post progress as comments on issue #${ISSUE} after significant steps: + curl -sf -X POST \\ + -H \"Authorization: token \${FORGE_TOKEN}\" \\ + -H 'Content-Type: application/json' \\ + \"${FORGE_API}/issues/${ISSUE}/comments\" \\ + -d \"{\\\"body\\\": \\\"your comment here\\\"}\" + +4. If a step requires human input or approval, post a comment explaining what + is needed and stop — the orchestrator will block the issue. + +### Path A: If this action produces code changes (e.g. config updates, baselines): + - You are already in an isolated worktree at: ${WORKTREE} + - You are on branch: ${BRANCH} + - Make your changes, commit, and push: git push ${FORGE_REMOTE} ${BRANCH} + - **IMPORTANT:** The worktree is destroyed after completion. Push all + results before finishing — unpushed work will be lost. + +### Path B: If this action produces no code changes (investigation, report): + - Post results as a comment on issue #${ISSUE}. + - **IMPORTANT:** The worktree is destroyed after completion. Copy any + files you need to persistent paths before finishing. + +5. Environment variables available in your bash sessions: + FORGE_TOKEN, FORGE_API, FORGE_REPO, FORGE_WEB, PROJECT_NAME + (all sourced from ${FACTORY_ROOT}/.env) + +### CRITICAL: Never embed secrets in issue bodies, comments, or PR descriptions + - NEVER put API keys, tokens, passwords, or private keys in issue text or comments. + - Always reference secrets via env var names (e.g. \\\$BASE_RPC_URL, \\\${FORGE_TOKEN}). + - If a formula step needs a secret, read it from .env or the environment at runtime. + - Before posting any comment, verify it contains no credentials, hex keys > 32 chars, + or URLs with embedded API keys. + +If the prior comments above show work already completed, resume from where it +left off. + +${GIT_INSTRUCTIONS}" + +# --- Wall-clock lifetime watchdog (background) --- +# Caps total run time independently of claude -p timeout. When the cap is +# hit the watchdog kills the main process, which triggers cleanup via trap. +_lifetime_watchdog() { + local remaining=$(( MAX_LIFETIME - ($(date +%s) - SESSION_START_EPOCH) )) + [ "$remaining" -le 0 ] && remaining=1 + sleep "$remaining" + local hours=$(( MAX_LIFETIME / 3600 )) + log "MAX_LIFETIME (${hours}h) reached — killing agent" + # Post summary comment on issue + local body="Action agent killed: wall-clock lifetime cap (${hours}h) reached." + curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H 'Content-Type: application/json' \ + "${FORGE_API}/issues/${ISSUE}/comments" \ + -d "{\"body\": \"${body}\"}" >/dev/null 2>&1 || true + kill $$ 2>/dev/null || true +} +_lifetime_watchdog & +LIFETIME_WATCHDOG_PID=$! + +# --- Run agent --- +log "running agent (worktree: ${WORKTREE})" +agent_run --worktree "$WORKTREE" "$PROMPT" +log "agent_run complete" + +# --- Detect if branch was pushed (Path A vs Path B) --- +PUSHED=false +# Check if remote branch exists +git fetch "${FORGE_REMOTE}" "$BRANCH" 2>/dev/null || true +if git rev-parse --verify "${FORGE_REMOTE}/${BRANCH}" >/dev/null 2>&1; then + PUSHED=true +fi +# Fallback: check local commits ahead of base +if [ "$PUSHED" = false ]; then + if git -C "$WORKTREE" log "${FORGE_REMOTE}/${PRIMARY_BRANCH}..${BRANCH}" --oneline 2>/dev/null | grep -q .; then + PUSHED=true + fi +fi + +if [ "$PUSHED" = true ]; then + # --- Path A: code changes pushed — create PR and walk to merge --- + log "branch pushed — creating PR" + PR_NUMBER="" + PR_NUMBER=$(pr_create "$BRANCH" "action: ${ISSUE_TITLE}" \ + "Closes #${ISSUE} + +Automated action execution by action-agent.") || true + + if [ -n "$PR_NUMBER" ]; then + log "walking PR #${PR_NUMBER} to merge" + pr_walk_to_merge "$PR_NUMBER" "$_AGENT_SESSION_ID" "$WORKTREE" || true + + case "${_PR_WALK_EXIT_REASON:-}" in + merged) + log "PR #${PR_NUMBER} merged — closing issue" + issue_close "$ISSUE" + ;; + *) + log "PR #${PR_NUMBER} not merged (reason: ${_PR_WALK_EXIT_REASON:-unknown})" + issue_block "$ISSUE" "pr_not_merged: ${_PR_WALK_EXIT_REASON:-unknown}" + ;; + esac + else + log "ERROR: failed to create PR" + issue_block "$ISSUE" "pr_creation_failed" + fi +else + # --- Path B: no code changes — close issue directly --- + log "no branch pushed — closing issue (Path B)" + issue_close "$ISSUE" +fi + +log "action-agent finished for issue #${ISSUE}" diff --git a/action/action-poll.sh b/action/action-poll.sh new file mode 100755 index 0000000..8d67c47 --- /dev/null +++ b/action/action-poll.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# action-poll.sh — Cron scheduler: find open 'action' issues, spawn action-agent +# +# An issue is ready for action if: +# - It is open and labeled 'action' +# - No tmux session named action-{project}-{issue_num} is already active +# +# Usage: +# cron every 10min +# action-poll.sh [projects/foo.toml] # optional project config + +set -euo pipefail + +export PROJECT_TOML="${1:-}" +source "$(dirname "$0")/../lib/env.sh" +# Use action-bot's own Forgejo identity (#747) +FORGE_TOKEN="${FORGE_ACTION_TOKEN:-${FORGE_TOKEN}}" +# shellcheck source=../lib/guard.sh +source "$(dirname "$0")/../lib/guard.sh" +check_active action + +LOGFILE="${DISINTO_LOG_DIR}/action/action-poll-${PROJECT_NAME:-default}.log" +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +log() { + printf '[%s] poll: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" +} + +# --- Memory guard --- +memory_guard 2000 + +# --- Find open 'action' issues --- +log "scanning for open action issues" +ACTION_ISSUES=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues?state=open&labels=action&limit=50&type=issues") || true + +if [ -z "$ACTION_ISSUES" ] || [ "$ACTION_ISSUES" = "null" ]; then + log "no action issues found" + exit 0 +fi + +COUNT=$(printf '%s' "$ACTION_ISSUES" | jq 'length') +if [ "$COUNT" -eq 0 ]; then + log "no action issues found" + exit 0 +fi + +log "found ${COUNT} open action issue(s)" + +# Spawn action-agent for each issue that has no active tmux session. +# Only one agent is spawned per poll to avoid memory pressure; the next +# poll picks up remaining issues. +for i in $(seq 0 $((COUNT - 1))); do + ISSUE_NUM=$(printf '%s' "$ACTION_ISSUES" | jq -r ".[$i].number") + SESSION="action-${PROJECT_NAME}-${ISSUE_NUM}" + + if tmux has-session -t "$SESSION" 2>/dev/null; then + log "issue #${ISSUE_NUM}: session ${SESSION} already active, skipping" + continue + fi + + LOCKFILE="/tmp/action-agent-${ISSUE_NUM}.lock" + if [ -f "$LOCKFILE" ]; then + LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || echo "") + if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then + log "issue #${ISSUE_NUM}: agent starting (PID ${LOCK_PID}), skipping" + continue + fi + fi + + log "spawning action-agent for issue #${ISSUE_NUM}" + nohup "${SCRIPT_DIR}/action-agent.sh" "$ISSUE_NUM" "$PROJECT_TOML" >> "$LOGFILE" 2>&1 & + log "started action-agent PID $! for issue #${ISSUE_NUM}" + break +done diff --git a/architect/AGENTS.md b/architect/AGENTS.md deleted file mode 100644 index 64b325e..0000000 --- a/architect/AGENTS.md +++ /dev/null @@ -1,65 +0,0 @@ - -# Architect — Agent Instructions - -## What this agent is - -The architect is a strategic decomposition agent that breaks down vision issues -into development sprints. It proposes sprints via PRs on the ops repo and -converses with humans through PR comments. - -## Role - -- **Input**: Vision issues from VISION.md, prerequisite tree from ops repo -- **Output**: Sprint proposals as PRs on the ops repo, sub-issue files -- **Mechanism**: Formula-driven execution via `formulas/run-architect.toml` -- **Identity**: `architect-bot` on Forgejo - -## Responsibilities - -1. **Strategic decomposition**: Break down large vision items into coherent - sprints that can be executed by the dev agent -2. **Design fork identification**: When multiple implementation approaches exist, - identify the forks and file sub-issues for each path -3. **Sprint PR creation**: Propose sprints as PRs on the ops repo with clear - acceptance criteria and dependencies -4. **Human conversation**: Respond to PR comments, refine sprint proposals based - on human feedback -5. **Sub-issue filing**: After design forks are resolved, file concrete sub-issues - for implementation - -## Formula - -The architect is driven by `formulas/run-architect.toml`. This formula defines -the steps for: -- Research: analyzing vision items and prerequisite tree -- Design: identifying implementation approaches and forks -- Sprint proposal: creating structured sprint PRs -- Sub-issue filing: creating concrete implementation issues - -## Execution - -Run via `architect/architect-run.sh`, which: -- Acquires a cron lock and checks available memory -- Sources shared libraries (env.sh, formula-session.sh) -- Uses FORGE_ARCHITECT_TOKEN for authentication -- Loads the formula and builds context from VISION.md, AGENTS.md, and ops repo -- Executes the formula via `agent_run` - -## Cron - -Suggested cron entry (every 6 hours): -```cron -0 */6 * * * cd /path/to/disinto && bash architect/architect-run.sh -``` - -## State - -Architect state is tracked in `state/.architect-active` (disabled by default — -empty file not created, just document it). - -## Related issues - -- #96: Architect agent parent issue -- #100: Architect formula — research + design fork identification -- #101: Architect formula — sprint PR creation with questions -- #102: Architect formula — answer parsing + sub-issue filing diff --git a/architect/architect-run.sh b/architect/architect-run.sh deleted file mode 100755 index 0edeb70..0000000 --- a/architect/architect-run.sh +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# architect-run.sh — Cron wrapper: architect execution via SDK + formula -# -# Synchronous bash loop using claude -p (one-shot invocation). -# No tmux sessions, no phase files — the bash script IS the state machine. -# -# Flow: -# 1. Guards: cron lock, memory check -# 2. Load formula (formulas/run-architect.toml) -# 3. Context: VISION.md, AGENTS.md, ops:prerequisites.md, structural graph -# 4. agent_run(worktree, prompt) → Claude decomposes vision into sprints -# -# Usage: -# architect-run.sh [projects/disinto.toml] # project config (default: disinto) -# -# Cron: 0 */6 * * * # every 6 hours -# ============================================================================= -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -FACTORY_ROOT="$(dirname "$SCRIPT_DIR")" - -# Accept project config from argument; default to disinto -export PROJECT_TOML="${1:-$FACTORY_ROOT/projects/disinto.toml}" -# shellcheck source=../lib/env.sh -source "$FACTORY_ROOT/lib/env.sh" -# Override FORGE_TOKEN with architect-bot's token (#747) -FORGE_TOKEN="${FORGE_ARCHITECT_TOKEN:-${FORGE_TOKEN}}" -# shellcheck source=../lib/formula-session.sh -source "$FACTORY_ROOT/lib/formula-session.sh" -# shellcheck source=../lib/worktree.sh -source "$FACTORY_ROOT/lib/worktree.sh" -# shellcheck source=../lib/guard.sh -source "$FACTORY_ROOT/lib/guard.sh" -# shellcheck source=../lib/agent-sdk.sh -source "$FACTORY_ROOT/lib/agent-sdk.sh" - -LOG_FILE="${DISINTO_LOG_DIR}/architect/architect.log" -# shellcheck disable=SC2034 # consumed by agent-sdk.sh -LOGFILE="$LOG_FILE" -# shellcheck disable=SC2034 # consumed by agent-sdk.sh -SID_FILE="/tmp/architect-session-${PROJECT_NAME}.sid" -SCRATCH_FILE="/tmp/architect-${PROJECT_NAME}-scratch.md" -WORKTREE="/tmp/${PROJECT_NAME}-architect-run" - -# Override LOG_AGENT for consistent agent identification -# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() -LOG_AGENT="architect" - -# Override log() to append to architect-specific log file -# shellcheck disable=SC2034 -log() { - local agent="${LOG_AGENT:-architect}" - printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE" -} - -# ── Guards ──────────────────────────────────────────────────────────────── -check_active architect -acquire_cron_lock "/tmp/architect-run.lock" -memory_guard 2000 - -log "--- Architect run start ---" - -# ── Resolve forge remote for git operations ───────────────────────────── -resolve_forge_remote - -# ── Resolve agent identity for .profile repo ──────────────────────────── -if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_ARCHITECT_TOKEN:-}" ]; then - AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_ARCHITECT_TOKEN}" \ - "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true) -fi - -# ── Load formula + context ─────────────────────────────────────────────── -load_formula_or_profile "architect" "$FACTORY_ROOT/formulas/run-architect.toml" || exit 1 -build_context_block VISION.md AGENTS.md ops:prerequisites.md - -# ── Prepare .profile context (lessons injection) ───────────────────────── -formula_prepare_profile_context - -# ── Build structural analysis graph ────────────────────────────────────── -build_graph_section - -# ── Read scratch file (compaction survival) ─────────────────────────────── -SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE") -SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE") - -# ── Build prompt ───────────────────────────────────────────────────────── -build_sdk_prompt_footer - -# Architect prompt: strategic decomposition of vision into sprints -# See: architect/AGENTS.md for full role description -# Pattern: heredoc function to avoid inline prompt construction -# Note: Uses CONTEXT_BLOCK, GRAPH_SECTION, SCRATCH_CONTEXT from formula-session.sh -# Architecture Decision: AD-003 — The runtime creates and destroys, the formula preserves. -build_architect_prompt() { - cat <<_PROMPT_EOF_ -You are the architect agent for ${FORGE_REPO}. Work through the formula below. - -Your role: strategic decomposition of vision issues into development sprints. -Propose sprints via PRs on the ops repo, converse with humans through PR comments, -and file sub-issues after design forks are resolved. - -## Project context -${CONTEXT_BLOCK} -${GRAPH_SECTION} -${SCRATCH_CONTEXT} -$(formula_lessons_block) -## Formula -${FORMULA_CONTENT} - -${SCRATCH_INSTRUCTION} -${PROMPT_FOOTER} -_PROMPT_EOF_ -} - -PROMPT=$(build_architect_prompt) - -# ── Create worktree ────────────────────────────────────────────────────── -formula_worktree_setup "$WORKTREE" - -# ── Run agent ───────────────────────────────────────────────────────────── -export CLAUDE_MODEL="sonnet" - -agent_run --worktree "$WORKTREE" "$PROMPT" -log "agent_run complete" - -rm -f "$SCRATCH_FILE" - -# Write journal entry post-session -profile_write_journal "architect-run" "Architect run $(date -u +%Y-%m-%d)" "complete" "" || true - -log "--- Architect run done ---" diff --git a/bin/disinto b/bin/disinto index 7d507a7..7a0714e 100755 --- a/bin/disinto +++ b/bin/disinto @@ -10,8 +10,7 @@ # disinto shell Shell into the agent container # disinto status Show factory status # disinto secrets Manage encrypted secrets -# disinto run Run action in ephemeral runner container -# disinto ci-logs [--step ] Read CI logs from Woodpecker SQLite +# disinto vault-run Run action in ephemeral vault container # # Usage: # disinto init https://github.com/user/repo @@ -25,13 +24,6 @@ set -euo pipefail FACTORY_ROOT="$(cd "$(dirname "$0")/.." && pwd)" source "${FACTORY_ROOT}/lib/env.sh" -source "${FACTORY_ROOT}/lib/ops-setup.sh" -source "${FACTORY_ROOT}/lib/hire-agent.sh" -source "${FACTORY_ROOT}/lib/forge-setup.sh" -source "${FACTORY_ROOT}/lib/generators.sh" -source "${FACTORY_ROOT}/lib/forge-push.sh" -source "${FACTORY_ROOT}/lib/ci-setup.sh" -source "${FACTORY_ROOT}/lib/release.sh" # ── Helpers ────────────────────────────────────────────────────────────────── @@ -47,12 +39,7 @@ Usage: disinto shell Shell into the agent container disinto status Show factory status disinto secrets Manage encrypted secrets - disinto run Run action in ephemeral runner container - disinto ci-logs [--step ] - Read CI logs from Woodpecker SQLite - disinto release Create vault PR for release (e.g., v1.2.0) - disinto hire-an-agent [--formula ] - Hire a new agent (create user + .profile repo) + disinto vault-run Run action in ephemeral vault container Init options: --branch Primary branch (default: auto-detect) @@ -61,12 +48,6 @@ Init options: --forge-url Forge base URL (default: http://localhost:3000) --bare Skip compose generation (bare-metal setup) --yes Skip confirmation prompts - -Hire an agent options: - --formula Path to role formula TOML (default: formulas/.toml) - -CI logs options: - --step Filter logs to a specific step (e.g., smoke-init) EOF exit 1 } @@ -167,38 +148,381 @@ write_secrets_encrypted() { return 0 } -export FORGEJO_DATA_DIR="${HOME}/.disinto/forgejo" +FORGEJO_DATA_DIR="${HOME}/.disinto/forgejo" # Generate docker-compose.yml in the factory root. -# (Implementation in lib/generators.sh) generate_compose() { - _generate_compose_impl "$@" + local forge_port="${1:-3000}" + local compose_file="${FACTORY_ROOT}/docker-compose.yml" + + cat > "$compose_file" <<'COMPOSEEOF' +# docker-compose.yml — generated by disinto init +# Brings up Forgejo, Woodpecker, and the agent runtime. + +services: + forgejo: + image: codeberg.org/forgejo/forgejo:11.0 + restart: unless-stopped + security_opt: + - apparmor=unconfined + volumes: + - forgejo-data:/data + environment: + FORGEJO__database__DB_TYPE: sqlite3 + FORGEJO__server__ROOT_URL: http://forgejo:3000/ + FORGEJO__server__HTTP_PORT: "3000" + FORGEJO__security__INSTALL_LOCK: "true" + FORGEJO__service__DISABLE_REGISTRATION: "true" + FORGEJO__webhook__ALLOWED_HOST_LIST: "private" + networks: + - disinto-net + + woodpecker: + image: woodpeckerci/woodpecker-server:v3 + restart: unless-stopped + security_opt: + - apparmor=unconfined + ports: + - "8000:8000" + - "9000:9000" + volumes: + - woodpecker-data:/var/lib/woodpecker + environment: + WOODPECKER_FORGEJO: "true" + WOODPECKER_FORGEJO_URL: http://forgejo:3000 + WOODPECKER_FORGEJO_CLIENT: ${WP_FORGEJO_CLIENT:-} + WOODPECKER_FORGEJO_SECRET: ${WP_FORGEJO_SECRET:-} + WOODPECKER_HOST: http://woodpecker:8000 + WOODPECKER_OPEN: "true" + WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-} + WOODPECKER_DATABASE_DRIVER: sqlite3 + WOODPECKER_DATABASE_DATASOURCE: /var/lib/woodpecker/woodpecker.sqlite + depends_on: + - forgejo + networks: + - disinto-net + + woodpecker-agent: + image: woodpeckerci/woodpecker-agent:v3 + restart: unless-stopped + network_mode: host + privileged: true + volumes: + - /var/run/docker.sock:/var/run/docker.sock + environment: + WOODPECKER_SERVER: localhost:9000 + WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-} + WOODPECKER_GRPC_SECURE: "false" + WOODPECKER_HEALTHCHECK_ADDR: ":3333" + WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net + WOODPECKER_MAX_WORKFLOWS: 1 + depends_on: + - woodpecker + + agents: + build: ./docker/agents + restart: unless-stopped + security_opt: + - apparmor=unconfined + volumes: + - agent-data:/home/agent/data + - project-repos:/home/agent/repos + - ./:/home/agent/disinto:ro + - ${HOME}/.claude:/home/agent/.claude + - ${HOME}/.claude.json:/home/agent/.claude.json:ro + - CLAUDE_BIN_PLACEHOLDER:/usr/local/bin/claude:ro + - \${HOME}/.ssh:/home/agent/.ssh:ro + environment: + FORGE_URL: http://forgejo:3000 + WOODPECKER_SERVER: http://woodpecker:8000 + DISINTO_CONTAINER: "1" + PROJECT_REPO_ROOT: /home/agent/repos/\${PROJECT_NAME:-project} + env_file: + - .env + # IMPORTANT: agents get .env only (forge tokens, CI tokens, config). + # Vault-only secrets (GITHUB_TOKEN, CLAWHUB_TOKEN, deploy keys) live in + # .env.vault.enc and are NEVER injected here — only the vault-runner + # container receives them at fire time (AD-006, #745). + depends_on: + - forgejo + - woodpecker + networks: + - disinto-net + + vault-runner: + build: ./docker/agents + profiles: ["vault"] + security_opt: + - apparmor=unconfined + volumes: + - ./vault:/home/agent/disinto/vault + - ./lib:/home/agent/disinto/lib:ro + - ./formulas:/home/agent/disinto/formulas:ro + environment: + FORGE_URL: http://forgejo:3000 + DISINTO_CONTAINER: "1" + PROJECT_REPO_ROOT: /home/agent/repos/\${PROJECT_NAME:-project} + # env_file set at runtime by: disinto vault-run --env-file + entrypoint: ["bash", "/home/agent/disinto/vault/vault-run-action.sh"] + networks: + - disinto-net + + # Edge proxy — reverse proxy to Forgejo, Woodpecker, and staging + # Serves on ports 80/443, routes based on path + edge: + image: caddy:alpine + ports: + - "80:80" + - "443:443" + volumes: + - ./docker/Caddyfile:/etc/caddy/Caddyfile + - caddy_data:/data + depends_on: + - forgejo + - woodpecker + - staging + networks: + - disinto-net + + # Staging container — static file server for staging artifacts + # Edge proxy routes to this container for default requests + staging: + image: caddy:alpine + command: ["caddy", "file-server", "--root", "/srv/site"] + volumes: + - ./docker:/srv/site:ro + networks: + - disinto-net + + # Staging deployment slot — activated by Woodpecker staging pipeline (#755). + # Profile-gated: only starts when explicitly targeted by deploy commands. + # Customize image/ports/volumes for your project after init. + staging-deploy: + image: alpine:3 + profiles: ["staging"] + security_opt: + - apparmor=unconfined + environment: + DEPLOY_ENV: staging + networks: + - disinto-net + command: ["echo", "staging slot — replace with project image"] + +volumes: + forgejo-data: + woodpecker-data: + agent-data: + project-repos: + caddy_data: + +networks: + disinto-net: + driver: bridge +COMPOSEEOF + + # Patch the Claude CLI binary path — resolve from host PATH at init time. + local claude_bin + claude_bin="$(command -v claude 2>/dev/null || true)" + if [ -n "$claude_bin" ]; then + # Resolve symlinks to get the real binary path + claude_bin="$(readlink -f "$claude_bin")" + sed -i "s|CLAUDE_BIN_PLACEHOLDER|${claude_bin}|" "$compose_file" + else + echo "Warning: claude CLI not found in PATH — update docker-compose.yml volumes manually" >&2 + sed -i "s|CLAUDE_BIN_PLACEHOLDER|/usr/local/bin/claude|" "$compose_file" + fi + + # Patch the forgejo port mapping into the file if non-default + if [ "$forge_port" != "3000" ]; then + # Add port mapping to forgejo service so it's reachable from host during init + sed -i "/image: codeberg\.org\/forgejo\/forgejo:11\.0/a\\ ports:\\n - \"${forge_port}:3000\"" "$compose_file" + else + sed -i "/image: codeberg\.org\/forgejo\/forgejo:11\.0/a\\ ports:\\n - \"3000:3000\"" "$compose_file" + fi + + echo "Created: ${compose_file}" } # Generate docker/agents/ files if they don't already exist. -# (Implementation in lib/generators.sh) generate_agent_docker() { - _generate_agent_docker_impl "$@" + local docker_dir="${FACTORY_ROOT}/docker/agents" + mkdir -p "$docker_dir" + + if [ ! -f "${docker_dir}/Dockerfile" ]; then + echo "Warning: docker/agents/Dockerfile not found — expected in repo" >&2 + fi + if [ ! -f "${docker_dir}/entrypoint.sh" ]; then + echo "Warning: docker/agents/entrypoint.sh not found — expected in repo" >&2 + fi } # Generate docker/Caddyfile template for edge proxy. -# (Implementation in lib/generators.sh) generate_caddyfile() { - _generate_caddyfile_impl "$@" + local docker_dir="${FACTORY_ROOT}/docker" + local caddyfile="${docker_dir}/Caddyfile" + + if [ -f "$caddyfile" ]; then + echo "Caddyfile: ${caddyfile} (already exists, skipping)" + return + fi + + cat > "$caddyfile" <<'CADDYFILEEOF' +# Caddyfile — edge proxy configuration +# IP-only binding at bootstrap; domain + TLS added later via vault resource request + +:80 { + # Reverse proxy to Forgejo + handle /forgejo/* { + reverse_proxy forgejo:3000 + } + + # Reverse proxy to Woodpecker CI + handle /ci/* { + reverse_proxy woodpecker:8000 + } + + # Default: proxy to staging container + handle { + reverse_proxy staging:80 + } +} +CADDYFILEEOF + + echo "Created: ${caddyfile}" } # Generate docker/index.html default page. -# (Implementation in lib/generators.sh) generate_staging_index() { - _generate_staging_index_impl "$@" + local docker_dir="${FACTORY_ROOT}/docker" + local index_file="${docker_dir}/index.html" + + if [ -f "$index_file" ]; then + echo "Staging: ${index_file} (already exists, skipping)" + return + fi + + cat > "$index_file" <<'INDEXEOF' + + + + + + Nothing shipped yet + + + +
+

Nothing shipped yet

+

CI pipelines will update this page with your staging artifacts.

+
+ + +INDEXEOF + + echo "Created: ${index_file}" } # Generate template .woodpecker/ deployment pipeline configs in a project repo. # Creates staging.yml and production.yml alongside the project's existing CI config. # These pipelines trigger on Woodpecker's deployment event with environment filters. -# (Implementation in lib/generators.sh) generate_deploy_pipelines() { - _generate_deploy_pipelines_impl "$@" + local repo_root="$1" project_name="$2" + local wp_dir="${repo_root}/.woodpecker" + + mkdir -p "$wp_dir" + + # Skip if deploy pipelines already exist + if [ -f "${wp_dir}/staging.yml" ] && [ -f "${wp_dir}/production.yml" ]; then + echo "Deploy: .woodpecker/{staging,production}.yml (already exist)" + return + fi + + if [ ! -f "${wp_dir}/staging.yml" ]; then + cat > "${wp_dir}/staging.yml" <<'STAGINGEOF' +# .woodpecker/staging.yml — Staging deployment pipeline +# Triggered by vault-runner via Woodpecker promote API. +# Human approves promotion in vault → vault-runner calls promote → this runs. + +when: + event: deployment + environment: staging + +steps: + - name: deploy-staging + image: docker:27 + commands: + - echo "Deploying to staging environment..." + - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from CI #${CI_PIPELINE_PARENT}" + # Pull the image built by CI and deploy to staging + # Customize these commands for your project: + # - docker compose -f docker-compose.yml --profile staging up -d + - echo "Staging deployment complete" + + - name: verify-staging + image: alpine:3 + commands: + - echo "Verifying staging deployment..." + # Add health checks, smoke tests, or integration tests here: + # - curl -sf http://staging:8080/health || exit 1 + - echo "Staging verification complete" +STAGINGEOF + echo "Created: ${wp_dir}/staging.yml" + fi + + if [ ! -f "${wp_dir}/production.yml" ]; then + cat > "${wp_dir}/production.yml" <<'PRODUCTIONEOF' +# .woodpecker/production.yml — Production deployment pipeline +# Triggered by vault-runner via Woodpecker promote API. +# Human approves promotion in vault → vault-runner calls promote → this runs. + +when: + event: deployment + environment: production + +steps: + - name: deploy-production + image: docker:27 + commands: + - echo "Deploying to production environment..." + - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from staging" + # Pull the verified image and deploy to production + # Customize these commands for your project: + # - docker compose -f docker-compose.yml up -d + - echo "Production deployment complete" + + - name: verify-production + image: alpine:3 + commands: + - echo "Verifying production deployment..." + # Add production health checks here: + # - curl -sf http://production:8080/health || exit 1 + - echo "Production verification complete" +PRODUCTIONEOF + echo "Created: ${wp_dir}/production.yml" + fi } # Check whether compose mode is active (docker-compose.yml exists). @@ -206,11 +530,497 @@ is_compose_mode() { [ -f "${FACTORY_ROOT}/docker-compose.yml" ] } +# Provision or connect to a local Forgejo instance. +# Creates admin + bot users, generates API tokens, stores in .env. +# When $DISINTO_BARE is set, uses standalone docker run; otherwise uses compose. +setup_forge() { + local forge_url="$1" + local repo_slug="$2" + local use_bare="${DISINTO_BARE:-false}" + + echo "" + echo "── Forge setup ────────────────────────────────────────" + + # Helper: run a command inside the Forgejo container + _forgejo_exec() { + if [ "$use_bare" = true ]; then + docker exec -u git disinto-forgejo "$@" + else + docker compose -f "${FACTORY_ROOT}/docker-compose.yml" exec -T -u git forgejo "$@" + fi + } + + # Check if Forgejo is already running + if curl -sf --max-time 5 "${forge_url}/api/v1/version" >/dev/null 2>&1; then + echo "Forgejo: ${forge_url} (already running)" + else + echo "Forgejo not reachable at ${forge_url}" + echo "Starting Forgejo via Docker..." + + if ! command -v docker &>/dev/null; then + echo "Error: docker not found — needed to provision Forgejo" >&2 + echo " Install Docker or start Forgejo manually at ${forge_url}" >&2 + exit 1 + fi + + # Extract port from forge_url + local forge_port + forge_port=$(printf '%s' "$forge_url" | sed -E 's|.*:([0-9]+)/?$|\1|') + forge_port="${forge_port:-3000}" + + if [ "$use_bare" = true ]; then + # Bare-metal mode: standalone docker run + mkdir -p "${FORGEJO_DATA_DIR}" + + if docker ps -a --format '{{.Names}}' | grep -q '^disinto-forgejo$'; then + docker start disinto-forgejo >/dev/null 2>&1 || true + else + docker run -d \ + --name disinto-forgejo \ + --restart unless-stopped \ + -p "${forge_port}:3000" \ + -p 2222:22 \ + -v "${FORGEJO_DATA_DIR}:/data" \ + -e "FORGEJO__database__DB_TYPE=sqlite3" \ + -e "FORGEJO__server__ROOT_URL=${forge_url}/" \ + -e "FORGEJO__server__HTTP_PORT=3000" \ + -e "FORGEJO__service__DISABLE_REGISTRATION=true" \ + codeberg.org/forgejo/forgejo:11.0 + fi + else + # Compose mode: start Forgejo via docker compose + docker compose -f "${FACTORY_ROOT}/docker-compose.yml" up -d forgejo + fi + + # Wait for Forgejo to become healthy + echo -n "Waiting for Forgejo to start" + local retries=0 + while ! curl -sf --max-time 3 "${forge_url}/api/v1/version" >/dev/null 2>&1; do + retries=$((retries + 1)) + if [ "$retries" -gt 60 ]; then + echo "" + echo "Error: Forgejo did not become ready within 60s" >&2 + exit 1 + fi + echo -n "." + sleep 1 + done + echo " ready" + fi + + # Wait for Forgejo database to accept writes (API may be ready before DB is) + echo -n "Waiting for Forgejo database" + local db_ready=false + for _i in $(seq 1 30); do + if _forgejo_exec forgejo admin user list >/dev/null 2>&1; then + db_ready=true + break + fi + echo -n "." + sleep 1 + done + echo "" + if [ "$db_ready" != true ]; then + echo "Error: Forgejo database not ready after 30s" >&2 + exit 1 + fi + + # Create admin user if it doesn't exist + local admin_user="disinto-admin" + local admin_pass + admin_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + + if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then + echo "Creating admin user: ${admin_user}" + local create_output + if ! create_output=$(_forgejo_exec forgejo admin user create \ + --admin \ + --username "${admin_user}" \ + --password "${admin_pass}" \ + --email "admin@disinto.local" \ + --must-change-password=false 2>&1); then + echo "Error: failed to create admin user '${admin_user}':" >&2 + echo " ${create_output}" >&2 + exit 1 + fi + # Forgejo 11.x ignores --must-change-password=false on create; + # explicitly clear the flag so basic-auth token creation works. + _forgejo_exec forgejo admin user change-password \ + --username "${admin_user}" \ + --password "${admin_pass}" \ + --must-change-password=false + + # Verify admin user was actually created + if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then + echo "Error: admin user '${admin_user}' not found after creation" >&2 + exit 1 + fi + # Preserve password for Woodpecker OAuth2 token generation (#779) + _FORGE_ADMIN_PASS="$admin_pass" + fi + + # Get or create admin token + local admin_token + admin_token=$(curl -sf -X POST \ + -u "${admin_user}:${admin_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${admin_user}/tokens" \ + -d '{"name":"disinto-admin-token","scopes":["all"]}' 2>/dev/null \ + | jq -r '.sha1 // empty') || admin_token="" + + if [ -z "$admin_token" ]; then + # Token might already exist — try listing + admin_token=$(curl -sf \ + -u "${admin_user}:${admin_pass}" \ + "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \ + | jq -r '.[0].sha1 // empty') || admin_token="" + fi + + if [ -z "$admin_token" ]; then + echo "Error: failed to obtain admin API token" >&2 + exit 1 + fi + + # Create bot users and tokens + # Each agent gets its own Forgejo account for identity and audit trail (#747). + # Map: bot-username -> env-var-name for the token + local -A bot_token_vars=( + [dev-bot]="FORGE_TOKEN" + [review-bot]="FORGE_REVIEW_TOKEN" + [planner-bot]="FORGE_PLANNER_TOKEN" + [gardener-bot]="FORGE_GARDENER_TOKEN" + [vault-bot]="FORGE_VAULT_TOKEN" + [supervisor-bot]="FORGE_SUPERVISOR_TOKEN" + [predictor-bot]="FORGE_PREDICTOR_TOKEN" + [action-bot]="FORGE_ACTION_TOKEN" + ) + + local env_file="${FACTORY_ROOT}/.env" + local bot_user bot_pass token token_var + + for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot action-bot; do + bot_pass="bot-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + token_var="${bot_token_vars[$bot_user]}" + + if ! curl -sf --max-time 5 \ + -H "Authorization: token ${admin_token}" \ + "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then + echo "Creating bot user: ${bot_user}" + local create_output + if ! create_output=$(_forgejo_exec forgejo admin user create \ + --username "${bot_user}" \ + --password "${bot_pass}" \ + --email "${bot_user}@disinto.local" \ + --must-change-password=false 2>&1); then + echo "Error: failed to create bot user '${bot_user}':" >&2 + echo " ${create_output}" >&2 + exit 1 + fi + # Forgejo 11.x ignores --must-change-password=false on create; + # explicitly clear the flag so basic-auth token creation works. + _forgejo_exec forgejo admin user change-password \ + --username "${bot_user}" \ + --password "${bot_pass}" \ + --must-change-password=false + + # Verify bot user was actually created + if ! curl -sf --max-time 5 \ + -H "Authorization: token ${admin_token}" \ + "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then + echo "Error: bot user '${bot_user}' not found after creation" >&2 + exit 1 + fi + fi + + # Generate token via API (basic auth as the bot user — Forgejo requires + # basic auth on POST /users/{username}/tokens, token auth is rejected) + token=$(curl -sf -X POST \ + -u "${bot_user}:${bot_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${bot_user}/tokens" \ + -d "{\"name\":\"disinto-${bot_user}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \ + | jq -r '.sha1 // empty') || token="" + + if [ -z "$token" ]; then + # Token name collision — create with timestamp suffix + token=$(curl -sf -X POST \ + -u "${bot_user}:${bot_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${bot_user}/tokens" \ + -d "{\"name\":\"disinto-${bot_user}-$(date +%s)\",\"scopes\":[\"all\"]}" 2>/dev/null \ + | jq -r '.sha1 // empty') || token="" + fi + + if [ -z "$token" ]; then + echo "Error: failed to create API token for '${bot_user}'" >&2 + exit 1 + fi + + # Store token in .env under the per-agent variable name + if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then + sed -i "s|^${token_var}=.*|${token_var}=${token}|" "$env_file" + else + printf '%s=%s\n' "$token_var" "$token" >> "$env_file" + fi + export "${token_var}=${token}" + echo " ${bot_user} token saved (${token_var})" + + # Backwards-compat aliases for dev-bot and review-bot + if [ "$bot_user" = "dev-bot" ]; then + export CODEBERG_TOKEN="$token" + elif [ "$bot_user" = "review-bot" ]; then + export REVIEW_BOT_TOKEN="$token" + fi + done + + # Store FORGE_URL in .env if not already present + if ! grep -q '^FORGE_URL=' "$env_file" 2>/dev/null; then + printf 'FORGE_URL=%s\n' "$forge_url" >> "$env_file" + fi + + # Create the repo on Forgejo if it doesn't exist + local org_name="${repo_slug%%/*}" + local repo_name="${repo_slug##*/}" + + # Check if repo already exists + if ! curl -sf --max-time 5 \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/repos/${repo_slug}" >/dev/null 2>&1; then + + # Try creating org first (ignore if exists) + curl -sf -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/orgs" \ + -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true + + # Create repo under org + if ! curl -sf -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/orgs/${org_name}/repos" \ + -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then + # Fallback: create under the dev-bot user + curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/user/repos" \ + -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1 || true + fi + + # Add all bot users as collaborators + for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot action-bot; do + curl -sf -X PUT \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/repos/${repo_slug}/collaborators/${bot_user}" \ + -d '{"permission":"write"}' >/dev/null 2>&1 || true + done + + echo "Repo: ${repo_slug} created on Forgejo" + else + echo "Repo: ${repo_slug} (already exists on Forgejo)" + fi + + echo "Forge: ${forge_url} (ready)" +} + # Create and seed the {project}-ops repo on Forgejo with initial directory structure. # The ops repo holds operational data: vault items, journals, evidence, prerequisites. -# ops repo setup is now in lib/ops-setup.sh +setup_ops_repo() { + local forge_url="$1" ops_slug="$2" ops_root="$3" primary_branch="${4:-main}" + local org_name="${ops_slug%%/*}" + local ops_name="${ops_slug##*/}" -# push_to_forge() is sourced from lib/forge-push.sh + echo "" + echo "── Ops repo setup ─────────────────────────────────────" + + # Check if ops repo already exists on Forgejo + if curl -sf --max-time 5 \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/repos/${ops_slug}" >/dev/null 2>&1; then + echo "Ops repo: ${ops_slug} (already exists on Forgejo)" + else + # Create ops repo under org + if ! curl -sf -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/orgs/${org_name}/repos" \ + -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" >/dev/null 2>&1; then + # Fallback: create under the user + curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/user/repos" \ + -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data\"}" >/dev/null 2>&1 || true + fi + + # Add all bot users as collaborators + local bot_user + for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot action-bot; do + curl -sf -X PUT \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/repos/${ops_slug}/collaborators/${bot_user}" \ + -d '{"permission":"write"}' >/dev/null 2>&1 || true + done + + echo "Ops repo: ${ops_slug} created on Forgejo" + fi + + # Clone ops repo locally if not present + if [ ! -d "${ops_root}/.git" ]; then + local auth_url + auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_TOKEN}@|") + local clone_url="${auth_url}/${ops_slug}.git" + echo "Cloning: ops repo -> ${ops_root}" + git clone --quiet "$clone_url" "$ops_root" 2>/dev/null || { + echo "Initializing: ops repo at ${ops_root}" + mkdir -p "$ops_root" + git -C "$ops_root" init --initial-branch="${primary_branch}" -q + } + else + echo "Ops repo: ${ops_root} (already exists locally)" + fi + + # Seed directory structure + local seeded=false + mkdir -p "${ops_root}/vault/pending" + mkdir -p "${ops_root}/vault/approved" + mkdir -p "${ops_root}/vault/fired" + mkdir -p "${ops_root}/vault/rejected" + mkdir -p "${ops_root}/journal/planner" + mkdir -p "${ops_root}/journal/supervisor" + mkdir -p "${ops_root}/knowledge" + mkdir -p "${ops_root}/evidence/engagement" + + if [ ! -f "${ops_root}/README.md" ]; then + cat > "${ops_root}/README.md" < "${ops_root}/portfolio.md"; seeded=true; } + [ -f "${ops_root}/prerequisites.md" ] || { echo "# Prerequisite Tree" > "${ops_root}/prerequisites.md"; seeded=true; } + [ -f "${ops_root}/RESOURCES.md" ] || { echo "# Resources" > "${ops_root}/RESOURCES.md"; seeded=true; } + + # Commit and push seed content + if [ "$seeded" = true ] && [ -d "${ops_root}/.git" ]; then + # Auto-configure repo-local git identity if missing (#778) + if [ -z "$(git -C "$ops_root" config user.name 2>/dev/null)" ]; then + git -C "$ops_root" config user.name "disinto-admin" + fi + if [ -z "$(git -C "$ops_root" config user.email 2>/dev/null)" ]; then + git -C "$ops_root" config user.email "disinto-admin@localhost" + fi + + git -C "$ops_root" add -A + if ! git -C "$ops_root" diff --cached --quiet 2>/dev/null; then + git -C "$ops_root" commit -m "chore: seed ops repo structure" -q + # Push if remote exists + if git -C "$ops_root" remote get-url origin >/dev/null 2>&1; then + git -C "$ops_root" push origin "${primary_branch}" -q 2>/dev/null || true + fi + fi + echo "Seeded: ops repo with initial structure" + fi +} + +# Push local clone to the Forgejo remote. +push_to_forge() { + local repo_root="$1" forge_url="$2" repo_slug="$3" + + # Build authenticated remote URL: http://dev-bot:@host:port/org/repo.git + if [ -z "${FORGE_TOKEN:-}" ]; then + echo "Error: FORGE_TOKEN not set — cannot push to Forgejo" >&2 + return 1 + fi + local auth_url + auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_TOKEN}@|") + local remote_url="${auth_url}/${repo_slug}.git" + # Display URL without token + local display_url="${forge_url}/${repo_slug}.git" + + # Always set the remote URL to ensure credentials are current + if git -C "$repo_root" remote get-url forgejo >/dev/null 2>&1; then + git -C "$repo_root" remote set-url forgejo "$remote_url" + else + git -C "$repo_root" remote add forgejo "$remote_url" + fi + echo "Remote: forgejo -> ${display_url}" + + # Skip push if local repo has no commits (e.g. cloned from empty Forgejo repo) + if ! git -C "$repo_root" rev-parse HEAD >/dev/null 2>&1; then + echo "Push: skipped (local repo has no commits)" + return 0 + fi + + # Push all branches and tags + echo "Pushing: branches to forgejo" + if ! git -C "$repo_root" push forgejo --all 2>&1; then + echo "Error: failed to push branches to Forgejo" >&2 + return 1 + fi + echo "Pushing: tags to forgejo" + if ! git -C "$repo_root" push forgejo --tags 2>&1; then + echo "Error: failed to push tags to Forgejo" >&2 + return 1 + fi + + # Verify the repo is no longer empty (Forgejo may need a moment to index pushed refs) + local is_empty="true" + local verify_attempt + for verify_attempt in $(seq 1 5); do + local repo_info + repo_info=$(curl -sf --max-time 10 \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/repos/${repo_slug}" 2>/dev/null) || repo_info="" + if [ -z "$repo_info" ]; then + is_empty="skipped" + break # API unreachable, skip verification + fi + is_empty=$(printf '%s' "$repo_info" | jq -r '.empty // "unknown"') + if [ "$is_empty" != "true" ]; then + echo "Verify: repo is not empty (push confirmed)" + break + fi + if [ "$verify_attempt" -lt 5 ]; then + sleep 2 + fi + done + if [ "$is_empty" = "true" ]; then + echo "Warning: Forgejo repo still reports empty after push" >&2 + return 1 + fi +} # Preflight check — verify all factory requirements before proceeding. preflight_check() { @@ -379,15 +1189,6 @@ create_labels() { ["underspecified"]="#fbca04" ["vision"]="#0e8a16" ["action"]="#1d76db" - ["prediction/unreviewed"]="#a2eeef" - ["prediction/dismissed"]="#d73a4a" - ["prediction/actioned"]="#28a745" - ["bug-report"]="#e11d48" - ["needs-triage"]="#f9d0c4" - ["reproduced"]="#0e8a16" - ["cannot-reproduce"]="#cccccc" - ["in-triage"]="#1d76db" - ["rejected"]="#cccccc" ) echo "Creating labels on ${repo}..." @@ -400,11 +1201,9 @@ create_labels() { | grep -o '"name":"[^"]*"' | cut -d'"' -f4) || existing="" local name color - local created=0 skipped=0 failed=0 - for name in backlog in-progress blocked tech-debt underspecified vision action bug-report prediction/unreviewed prediction/dismissed prediction/actioned needs-triage reproduced cannot-reproduce in-triage rejected; do + for name in backlog in-progress blocked tech-debt underspecified vision action; do if echo "$existing" | grep -qx "$name"; then echo " . ${name} (already exists)" - skipped=$((skipped + 1)) continue fi color="${labels[$name]}" @@ -413,15 +1212,11 @@ create_labels() { -H "Content-Type: application/json" \ "${api}/labels" \ -d "{\"name\":\"${name}\",\"color\":\"${color}\"}" >/dev/null 2>&1; then - echo " + ${name} (created)" - created=$((created + 1)) + echo " + ${name}" else echo " ! ${name} (failed to create)" - failed=$((failed + 1)) fi done - - echo "Labels: ${created} created, ${skipped} skipped, ${failed} failed" } # Generate a minimal VISION.md template in the target project. @@ -461,57 +1256,402 @@ EOF echo " Commit this to your repo when ready" } -# Copy issue templates from templates/ to target project repo. -copy_issue_templates() { - local repo_root="$1" - local template_dir="${FACTORY_ROOT}/templates" - local target_dir="${repo_root}/.forgejo/ISSUE_TEMPLATE" +# Generate and optionally install cron entries for the project agents. +install_cron() { + local name="$1" toml="$2" auto_yes="$3" bare="${4:-false}" - # Skip if templates directory doesn't exist - if [ ! -d "$template_dir" ]; then + # In compose mode, skip host cron — the agents container runs cron internally + if [ "$bare" = false ]; then + echo "" + echo "Cron: skipped (agents container handles scheduling in compose mode)" return fi - # Create target directory - mkdir -p "$target_dir" + # Bare mode: crontab is required on the host + if ! command -v crontab &>/dev/null; then + echo "Error: crontab not found (required for bare-metal mode)" >&2 + echo " Install: apt install cron / brew install cron" >&2 + exit 1 + fi - # Copy each template file if it doesn't already exist - for template in "$template_dir"/issue/*; do - [ -f "$template" ] || continue - local filename - filename=$(basename "$template") - local target_path="${target_dir}/${filename}" - if [ ! -f "$target_path" ]; then - cp "$template" "$target_path" - echo "Copied: ${target_path}" + # Use absolute path for the TOML in cron entries + local abs_toml + abs_toml="$(cd "$(dirname "$toml")" && pwd)/$(basename "$toml")" + + local cron_block + cron_block="# disinto: ${name} +2,7,12,17,22,27,32,37,42,47,52,57 * * * * ${FACTORY_ROOT}/review/review-poll.sh ${abs_toml} >/dev/null 2>&1 +4,9,14,19,24,29,34,39,44,49,54,59 * * * * ${FACTORY_ROOT}/dev/dev-poll.sh ${abs_toml} >/dev/null 2>&1 +0 0,6,12,18 * * * cd ${FACTORY_ROOT} && bash gardener/gardener-run.sh ${abs_toml} >/dev/null 2>&1" + + echo "" + echo "Cron entries to install:" + echo "$cron_block" + echo "" + + if [ "$auto_yes" = false ] && [ -t 0 ]; then + read -rp "Install these cron entries? [y/N] " confirm + if [[ ! "$confirm" =~ ^[Yy] ]]; then + echo "Skipped cron install. Add manually with: crontab -e" + return + fi + fi + + # Append to existing crontab + { crontab -l 2>/dev/null || true; printf '%s\n' "$cron_block"; } | crontab - + echo "Cron entries installed" +} + +# Set up Woodpecker CI to use Forgejo as its forge backend. +# Creates an OAuth2 app on Forgejo for Woodpecker, activates the repo. +create_woodpecker_oauth() { + local forge_url="$1" repo_slug="$2" + + echo "" + echo "── Woodpecker OAuth2 setup ────────────────────────────" + + # Create OAuth2 application on Forgejo for Woodpecker + local oauth2_name="woodpecker-ci" + local redirect_uri="http://localhost:8000/authorize" + local existing_app client_id client_secret + + # Check if OAuth2 app already exists + existing_app=$(curl -sf \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/user/applications/oauth2" 2>/dev/null \ + | jq -r --arg name "$oauth2_name" '.[] | select(.name == $name) | .client_id // empty' 2>/dev/null) || true + + if [ -n "$existing_app" ]; then + echo "OAuth2: ${oauth2_name} (already exists, client_id=${existing_app})" + client_id="$existing_app" + else + local oauth2_resp + oauth2_resp=$(curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/user/applications/oauth2" \ + -d "{\"name\":\"${oauth2_name}\",\"redirect_uris\":[\"${redirect_uri}\"],\"confidential_client\":true}" \ + 2>/dev/null) || oauth2_resp="" + + if [ -z "$oauth2_resp" ]; then + echo "Warning: failed to create OAuth2 app on Forgejo" >&2 + return + fi + + client_id=$(printf '%s' "$oauth2_resp" | jq -r '.client_id // empty') + client_secret=$(printf '%s' "$oauth2_resp" | jq -r '.client_secret // empty') + + if [ -z "$client_id" ]; then + echo "Warning: OAuth2 app creation returned no client_id" >&2 + return + fi + + echo "OAuth2: ${oauth2_name} created (client_id=${client_id})" + fi + + # Store Woodpecker forge config in .env + # WP_FORGEJO_CLIENT/SECRET match the docker-compose.yml variable references + local env_file="${FACTORY_ROOT}/.env" + local wp_vars=( + "WOODPECKER_FORGEJO=true" + "WOODPECKER_FORGEJO_URL=${forge_url}" + ) + if [ -n "${client_id:-}" ]; then + wp_vars+=("WP_FORGEJO_CLIENT=${client_id}") + fi + if [ -n "${client_secret:-}" ]; then + wp_vars+=("WP_FORGEJO_SECRET=${client_secret}") + fi + + for var_line in "${wp_vars[@]}"; do + local var_name="${var_line%%=*}" + if grep -q "^${var_name}=" "$env_file" 2>/dev/null; then + sed -i "s|^${var_name}=.*|${var_line}|" "$env_file" else - echo "Skipped: ${target_path} (already exists)" + printf '%s\n' "$var_line" >> "$env_file" fi done + echo "Config: Woodpecker forge vars written to .env" } -# Install cron entries for project agents (implementation in lib/ci-setup.sh) -install_cron() { - _load_ci_context - _install_cron_impl "$@" -} - -# Create Woodpecker OAuth2 app on Forgejo (implementation in lib/ci-setup.sh) -create_woodpecker_oauth() { - _load_ci_context - _create_woodpecker_oauth_impl "$@" -} - -# Generate WOODPECKER_TOKEN via Forgejo OAuth2 flow (implementation in lib/ci-setup.sh) +# Auto-generate WOODPECKER_TOKEN by driving the Forgejo OAuth2 login flow. +# Requires _FORGE_ADMIN_PASS (set by setup_forge when admin user was just created). +# Called after compose stack is up, before activate_woodpecker_repo. generate_woodpecker_token() { - _load_ci_context - _generate_woodpecker_token_impl "$@" + local forge_url="$1" + local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}" + local env_file="${FACTORY_ROOT}/.env" + local admin_user="disinto-admin" + local admin_pass="${_FORGE_ADMIN_PASS:-}" + + # Skip if already set + if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then + echo "Config: WOODPECKER_TOKEN already set in .env" + return 0 + fi + + echo "" + echo "── Woodpecker token generation ────────────────────────" + + if [ -z "$admin_pass" ]; then + echo "Warning: Forgejo admin password not available — cannot generate WOODPECKER_TOKEN" >&2 + echo " Log into Woodpecker at ${wp_server} and create a token manually" >&2 + return 1 + fi + + # Wait for Woodpecker to become ready + echo -n "Waiting for Woodpecker" + local retries=0 + while ! curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; do + retries=$((retries + 1)) + if [ "$retries" -gt 30 ]; then + echo "" + echo "Warning: Woodpecker not ready at ${wp_server} — skipping token generation" >&2 + return 1 + fi + echo -n "." + sleep 2 + done + echo " ready" + + # Flow: Forgejo web login → OAuth2 authorize → Woodpecker callback → token + local cookie_jar auth_body_file + cookie_jar=$(mktemp /tmp/wp-auth-XXXXXX) + auth_body_file=$(mktemp /tmp/wp-body-XXXXXX) + + # Step 1: Log into Forgejo web UI (session cookie needed for OAuth consent) + local csrf + csrf=$(curl -sf -c "$cookie_jar" "${forge_url}/user/login" 2>/dev/null \ + | grep -o 'name="_csrf"[^>]*' | head -1 \ + | grep -oE '(content|value)="[^"]*"' | head -1 \ + | cut -d'"' -f2) || csrf="" + + if [ -z "$csrf" ]; then + echo "Warning: could not get Forgejo CSRF token — skipping token generation" >&2 + rm -f "$cookie_jar" "$auth_body_file" + return 1 + fi + + curl -sf -b "$cookie_jar" -c "$cookie_jar" -X POST \ + -o /dev/null \ + "${forge_url}/user/login" \ + --data-urlencode "_csrf=${csrf}" \ + --data-urlencode "user_name=${admin_user}" \ + --data-urlencode "password=${admin_pass}" \ + 2>/dev/null || true + + # Step 2: Start Woodpecker OAuth2 flow (captures authorize URL with state param) + local wp_redir + wp_redir=$(curl -sf -o /dev/null -w '%{redirect_url}' \ + "${wp_server}/authorize" 2>/dev/null) || wp_redir="" + + if [ -z "$wp_redir" ]; then + echo "Warning: Woodpecker did not provide OAuth redirect — skipping token generation" >&2 + rm -f "$cookie_jar" "$auth_body_file" + return 1 + fi + + # Rewrite internal Docker network URLs to host-accessible URLs. + # Handle both plain and URL-encoded forms of the internal hostnames. + local forge_url_enc wp_server_enc + forge_url_enc=$(printf '%s' "$forge_url" | sed 's|:|%3A|g; s|/|%2F|g') + wp_server_enc=$(printf '%s' "$wp_server" | sed 's|:|%3A|g; s|/|%2F|g') + wp_redir=$(printf '%s' "$wp_redir" \ + | sed "s|http://forgejo:3000|${forge_url}|g" \ + | sed "s|http%3A%2F%2Fforgejo%3A3000|${forge_url_enc}|g" \ + | sed "s|http://woodpecker:8000|${wp_server}|g" \ + | sed "s|http%3A%2F%2Fwoodpecker%3A8000|${wp_server_enc}|g") + + # Step 3: Hit Forgejo OAuth authorize endpoint with session + # First time: shows consent page. Already approved: redirects with code. + local auth_headers redirect_loc auth_code + auth_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \ + -D - -o "$auth_body_file" \ + "$wp_redir" 2>/dev/null) || auth_headers="" + + redirect_loc=$(printf '%s' "$auth_headers" \ + | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') + + if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then + # Auto-approved: extract code from redirect + auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/') + else + # Consent page: extract CSRF and all form fields, POST grant approval + local consent_csrf form_client_id form_state form_redirect_uri + consent_csrf=$(grep -o 'name="_csrf"[^>]*' "$auth_body_file" 2>/dev/null \ + | head -1 | grep -oE '(content|value)="[^"]*"' | head -1 \ + | cut -d'"' -f2) || consent_csrf="" + form_client_id=$(grep 'name="client_id"' "$auth_body_file" 2>/dev/null \ + | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_client_id="" + form_state=$(grep 'name="state"' "$auth_body_file" 2>/dev/null \ + | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_state="" + form_redirect_uri=$(grep 'name="redirect_uri"' "$auth_body_file" 2>/dev/null \ + | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_redirect_uri="" + + if [ -n "$consent_csrf" ]; then + local grant_headers + grant_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \ + -D - -o /dev/null -X POST \ + "${forge_url}/login/oauth/grant" \ + --data-urlencode "_csrf=${consent_csrf}" \ + --data-urlencode "client_id=${form_client_id}" \ + --data-urlencode "state=${form_state}" \ + --data-urlencode "scope=" \ + --data-urlencode "nonce=" \ + --data-urlencode "redirect_uri=${form_redirect_uri}" \ + --data-urlencode "granted=true" \ + 2>/dev/null) || grant_headers="" + + redirect_loc=$(printf '%s' "$grant_headers" \ + | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') + + if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then + auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/') + fi + fi + fi + + rm -f "$auth_body_file" + + if [ -z "${auth_code:-}" ]; then + echo "Warning: could not obtain OAuth2 authorization code — skipping token generation" >&2 + rm -f "$cookie_jar" + return 1 + fi + + # Step 4: Complete Woodpecker OAuth callback (exchanges code for session) + local state + state=$(printf '%s' "$wp_redir" | sed -n 's/.*[&?]state=\([^&]*\).*/\1/p') + + local wp_headers wp_token + wp_headers=$(curl -sf -c "$cookie_jar" \ + -D - -o /dev/null \ + "${wp_server}/authorize?code=${auth_code}&state=${state:-}" \ + 2>/dev/null) || wp_headers="" + + # Extract token from redirect URL (Woodpecker returns ?access_token=...) + redirect_loc=$(printf '%s' "$wp_headers" \ + | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') + + wp_token="" + if printf '%s' "${redirect_loc:-}" | grep -q 'access_token='; then + wp_token=$(printf '%s' "$redirect_loc" | sed 's/.*access_token=\([^&]*\).*/\1/') + fi + + # Fallback: check for user_sess cookie + if [ -z "$wp_token" ]; then + wp_token=$(awk '/user_sess/{print $NF}' "$cookie_jar" 2>/dev/null) || wp_token="" + fi + + rm -f "$cookie_jar" + + if [ -z "$wp_token" ]; then + echo "Warning: could not obtain Woodpecker token — skipping token generation" >&2 + return 1 + fi + + # Step 5: Create persistent personal access token via Woodpecker API + # WP v3 requires CSRF header for POST operations with session tokens. + local wp_csrf + wp_csrf=$(curl -sf -b "user_sess=${wp_token}" \ + "${wp_server}/web-config.js" 2>/dev/null \ + | sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p') || wp_csrf="" + + local pat_resp final_token + pat_resp=$(curl -sf -X POST \ + -b "user_sess=${wp_token}" \ + ${wp_csrf:+-H "X-CSRF-Token: ${wp_csrf}"} \ + "${wp_server}/api/user/token" \ + 2>/dev/null) || pat_resp="" + + final_token="" + if [ -n "$pat_resp" ]; then + final_token=$(printf '%s' "$pat_resp" \ + | jq -r 'if .token then .token elif .access_token then .access_token else empty end' \ + 2>/dev/null) || final_token="" + fi + + # Use persistent token if available, otherwise use session token + final_token="${final_token:-$wp_token}" + + # Save to .env + if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then + sed -i "s|^WOODPECKER_TOKEN=.*|WOODPECKER_TOKEN=${final_token}|" "$env_file" + else + printf 'WOODPECKER_TOKEN=%s\n' "$final_token" >> "$env_file" + fi + export WOODPECKER_TOKEN="$final_token" + echo "Config: WOODPECKER_TOKEN generated and saved to .env" } -# Activate repo in Woodpecker CI (implementation in lib/ci-setup.sh) activate_woodpecker_repo() { - _load_ci_context - _activate_woodpecker_repo_impl "$@" + local forge_repo="$1" + local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}" + + # Wait for Woodpecker to become ready after stack start + local retries=0 + while [ $retries -lt 10 ]; do + if curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; then + break + fi + retries=$((retries + 1)) + sleep 2 + done + + if ! curl -sf --max-time 5 "${wp_server}/api/version" >/dev/null 2>&1; then + echo "Woodpecker: not reachable at ${wp_server} after stack start, skipping repo activation" >&2 + return + fi + + echo "" + echo "── Woodpecker repo activation ─────────────────────────" + + local wp_token="${WOODPECKER_TOKEN:-}" + if [ -z "$wp_token" ]; then + echo "Warning: WOODPECKER_TOKEN not set — cannot activate repo" >&2 + echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2 + return + fi + + local wp_repo_id + wp_repo_id=$(curl -sf \ + -H "Authorization: Bearer ${wp_token}" \ + "${wp_server}/api/repos/lookup/${forge_repo}" 2>/dev/null \ + | jq -r '.id // empty' 2>/dev/null) || true + + if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then + echo "Repo: ${forge_repo} already active in Woodpecker (id=${wp_repo_id})" + else + # Get Forgejo repo numeric ID for WP activation + local forge_repo_id + forge_repo_id=$(curl -sf \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_URL:-http://localhost:3000}/api/v1/repos/${forge_repo}" 2>/dev/null \ + | jq -r '.id // empty' 2>/dev/null) || forge_repo_id="" + + local activate_resp + activate_resp=$(curl -sf -X POST \ + -H "Authorization: Bearer ${wp_token}" \ + "${wp_server}/api/repos?forge_remote_id=${forge_repo_id:-0}" \ + 2>/dev/null) || activate_resp="" + + wp_repo_id=$(printf '%s' "$activate_resp" | jq -r '.id // empty' 2>/dev/null) || true + + if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then + echo "Repo: ${forge_repo} activated in Woodpecker (id=${wp_repo_id})" + + # Set pipeline timeout to 5 minutes (default is 60) + curl -sf -X PATCH -H "Authorization: Bearer ${wp_token}" -H "Content-Type: application/json" "${wp_server}/api/repos/${wp_repo_id}" -d '{"timeout": 5}' >/dev/null 2>&1 && echo "Config: pipeline timeout set to 5 minutes" || true + else + echo "Warning: could not activate repo in Woodpecker" >&2 + echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2 + fi + fi + + # Store repo ID for later TOML generation + if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then + _WP_REPO_ID="$wp_repo_id" + fi } # ── init command ───────────────────────────────────────────────────────────── @@ -644,10 +1784,8 @@ p.write_text(text) echo "Repo: ${repo_root} (existing clone)" fi - # Push to local Forgejo (skip if SKIP_PUSH is set) - if [ "${SKIP_PUSH:-false}" = "false" ]; then - push_to_forge "$repo_root" "$forge_url" "$forge_repo" - fi + # Push to local Forgejo + push_to_forge "$repo_root" "$forge_url" "$forge_repo" # Detect primary branch if [ -z "$branch" ]; then @@ -656,26 +1794,10 @@ p.write_text(text) echo "Branch: ${branch}" # Set up {project}-ops repo (#757) - # Always use disinto-admin as the ops repo owner — forge_repo owner may be - # the calling user (e.g. johba) but the ops repo belongs to disinto-admin. - local ops_slug="disinto-admin/${project_name}-ops" + local ops_slug="${forge_repo}-ops" local ops_root="/home/${USER}/${project_name}-ops" setup_ops_repo "$forge_url" "$ops_slug" "$ops_root" "$branch" - # Set up vault branch protection on ops repo (#77) - # This ensures admin-only merge to main, blocking bots from merging vault PRs - # Use HUMAN_TOKEN (disinto-admin) or FORGE_TOKEN (dev-bot) for admin operations - export FORGE_OPS_REPO="$ops_slug" - # Source env.sh to ensure FORGE_TOKEN is available - source "${FACTORY_ROOT}/lib/env.sh" - source "${FACTORY_ROOT}/lib/branch-protection.sh" - if setup_vault_branch_protection "$branch"; then - echo "Branch protection: vault protection configured on ${ops_slug}" - else - echo "Warning: failed to set up vault branch protection" >&2 - fi - unset FORGE_OPS_REPO - # Generate project TOML (skip if already exists) if [ "$toml_exists" = false ]; then # Prompt for CI ID if interactive and not already set via flag @@ -688,24 +1810,6 @@ p.write_text(text) echo "Created: ${toml_path}" fi - # Update ops_repo in TOML with the resolved actual ops slug. - # Uses in-place substitution to prevent duplicate keys on repeated init runs. - # If the key is missing (manually created TOML), it is inserted after the repo line. - if [ -n "${_ACTUAL_OPS_SLUG:-}" ] && [ -f "$toml_path" ]; then - python3 -c " -import sys, re, pathlib -p = pathlib.Path(sys.argv[1]) -text = p.read_text() -new_val = 'ops_repo = \"' + sys.argv[2] + '\"' -if re.search(r'^ops_repo\s*=', text, re.MULTILINE): - text = re.sub(r'^ops_repo\s*=\s*.*\$', new_val, text, flags=re.MULTILINE) -else: - text = re.sub(r'^(repo\s*=\s*\"[^\"]*\")', r'\1\n' + new_val, text, flags=re.MULTILINE) -p.write_text(text) -" "$toml_path" "${_ACTUAL_OPS_SLUG}" - echo "Updated: ops_repo in ${toml_path}" - fi - # Create OAuth2 app on Forgejo for Woodpecker (before compose up) _WP_REPO_ID="" create_woodpecker_oauth "$forge_url" "$forge_repo" @@ -728,23 +1832,12 @@ p.write_text(text) # Create labels on remote create_labels "$forge_repo" "$forge_url" - # Set up branch protection on project repo (#10) - # This enforces PR flow: no direct pushes, 1 approval required, dev-bot can merge after CI - if setup_project_branch_protection "$forge_repo" "$branch"; then - echo "Branch protection: project protection configured on ${forge_repo}" - else - echo "Warning: failed to set up project branch protection" >&2 - fi - # Generate VISION.md template generate_vision "$repo_root" "$project_name" # Generate template deployment pipeline configs in project repo generate_deploy_pipelines "$repo_root" "$project_name" - # Copy issue templates to target project - copy_issue_templates "$repo_root" - # Install cron jobs install_cron "$project_name" "$toml_path" "$auto_yes" "$bare" @@ -753,36 +1846,17 @@ p.write_text(text) if [ -n "${MIRROR_NAMES:-}" ]; then echo "Mirrors: setting up remotes" local mname murl - local mirrors_ok=true for mname in $MIRROR_NAMES; do murl=$(eval "echo \"\$MIRROR_$(echo "$mname" | tr '[:lower:]' '[:upper:]')\"") || true [ -z "$murl" ] && continue - if git -C "$repo_root" remote get-url "$mname" >/dev/null 2>&1; then - if git -C "$repo_root" remote set-url "$mname" "$murl"; then - echo " + ${mname} -> ${murl} (updated)" - else - echo " ! ${mname} -> ${murl} (failed to update URL)" - mirrors_ok=false - fi - else - if git -C "$repo_root" remote add "$mname" "$murl"; then - echo " + ${mname} -> ${murl} (added)" - else - echo " ! ${mname} -> ${murl} (failed to add remote)" - mirrors_ok=false - fi - fi + git -C "$repo_root" remote add "$mname" "$murl" 2>/dev/null \ + || git -C "$repo_root" remote set-url "$mname" "$murl" 2>/dev/null || true + echo " + ${mname} -> ${murl}" done # Initial sync: push current primary branch to mirrors - if [ "$mirrors_ok" = true ]; then - source "${FACTORY_ROOT}/lib/mirrors.sh" - export PROJECT_REPO_ROOT="$repo_root" - if mirror_push; then - echo "Mirrors: initial sync complete" - else - echo "Warning: mirror push failed" >&2 - fi - fi + source "${FACTORY_ROOT}/lib/mirrors.sh" + export PROJECT_REPO_ROOT="$repo_root" + mirror_push fi # Encrypt secrets if SOPS + age are available @@ -821,16 +1895,9 @@ p.write_text(text) # Activate default agents (zero-cost when idle — they only invoke Claude # when there is actual work, so an empty project burns no LLM tokens) mkdir -p "${FACTORY_ROOT}/state" - - # State files are idempotent — create if missing, skip if present - for state_file in ".dev-active" ".reviewer-active" ".gardener-active"; do - if [ -f "${FACTORY_ROOT}/state/${state_file}" ]; then - echo "State: ${state_file} (already active)" - else - touch "${FACTORY_ROOT}/state/${state_file}" - echo "State: ${state_file} (created)" - fi - done + touch "${FACTORY_ROOT}/state/.dev-active" + touch "${FACTORY_ROOT}/state/.reviewer-active" + touch "${FACTORY_ROOT}/state/.gardener-active" echo "" echo "Done. Project ${project_name} is ready." @@ -955,88 +2022,7 @@ disinto_secrets() { fi } - local secrets_dir="${FACTORY_ROOT}/secrets" - local age_key_file="${HOME}/.config/sops/age/keys.txt" - - # Shared helper: ensure age key exists and export AGE_PUBLIC_KEY - _secrets_ensure_age_key() { - if ! command -v age &>/dev/null; then - echo "Error: age is required." >&2 - echo " Install age: apt install age / brew install age" >&2 - exit 1 - fi - if [ ! -f "$age_key_file" ]; then - echo "Error: age key not found at ${age_key_file}" >&2 - echo " Run 'disinto init' to generate one, or create manually with:" >&2 - echo " mkdir -p ~/.config/sops/age && age-keygen -o ${age_key_file}" >&2 - exit 1 - fi - AGE_PUBLIC_KEY="$(age-keygen -y "$age_key_file" 2>/dev/null)" - if [ -z "$AGE_PUBLIC_KEY" ]; then - echo "Error: failed to read public key from ${age_key_file}" >&2 - exit 1 - fi - export AGE_PUBLIC_KEY - } - case "$subcmd" in - add) - local name="${2:-}" - if [ -z "$name" ]; then - echo "Usage: disinto secrets add " >&2 - exit 1 - fi - _secrets_ensure_age_key - mkdir -p "$secrets_dir" - - printf 'Enter value for %s: ' "$name" >&2 - local value - IFS= read -rs value - echo >&2 - if [ -z "$value" ]; then - echo "Error: empty value" >&2 - exit 1 - fi - - local enc_path="${secrets_dir}/${name}.enc" - if [ -f "$enc_path" ]; then - printf 'Secret %s already exists. Overwrite? [y/N] ' "$name" >&2 - local confirm - read -r confirm - if [ "$confirm" != "y" ] && [ "$confirm" != "Y" ]; then - echo "Aborted." >&2 - exit 1 - fi - fi - if ! printf '%s' "$value" | age -r "$AGE_PUBLIC_KEY" -o "$enc_path"; then - echo "Error: encryption failed" >&2 - exit 1 - fi - echo "Stored: ${enc_path}" - ;; - show) - local name="${2:-}" - if [ -n "$name" ]; then - # Show individual secret: disinto secrets show - local enc_path="${secrets_dir}/${name}.enc" - if [ ! -f "$enc_path" ]; then - echo "Error: ${enc_path} not found" >&2 - exit 1 - fi - if [ ! -f "$age_key_file" ]; then - echo "Error: age key not found at ${age_key_file}" >&2 - exit 1 - fi - age -d -i "$age_key_file" "$enc_path" - else - # Show all agent secrets: disinto secrets show - if [ ! -f "$enc_file" ]; then - echo "Error: ${enc_file} not found." >&2 - exit 1 - fi - sops -d "$enc_file" - fi - ;; edit) if [ ! -f "$enc_file" ]; then echo "Error: ${enc_file} not found. Run 'disinto secrets migrate' first." >&2 @@ -1044,6 +2030,13 @@ disinto_secrets() { fi sops "$enc_file" ;; + show) + if [ ! -f "$enc_file" ]; then + echo "Error: ${enc_file} not found." >&2 + exit 1 + fi + sops -d "$enc_file" + ;; migrate) if [ ! -f "$env_file" ]; then echo "Error: ${env_file} not found — nothing to migrate." >&2 @@ -1051,12 +2044,6 @@ disinto_secrets() { fi _secrets_ensure_sops encrypt_env_file "$env_file" "$enc_file" - # Verify decryption works - if ! sops -d "$enc_file" >/dev/null 2>&1; then - echo "Error: failed to verify .env.enc decryption" >&2 - rm -f "$enc_file" - exit 1 - fi rm -f "$env_file" echo "Migrated: .env -> .env.enc (plaintext removed)" ;; @@ -1082,12 +2069,6 @@ disinto_secrets() { fi _secrets_ensure_sops encrypt_env_file "$vault_env_file" "$vault_enc_file" - # Verify decryption works before removing plaintext - if ! sops -d "$vault_enc_file" >/dev/null 2>&1; then - echo "Error: failed to verify .env.vault.enc decryption" >&2 - rm -f "$vault_enc_file" - exit 1 - fi rm -f "$vault_env_file" echo "Migrated: .env.vault -> .env.vault.enc (plaintext removed)" ;; @@ -1095,13 +2076,9 @@ disinto_secrets() { cat <&2 Usage: disinto secrets -Individual secrets (secrets/.enc): - add Prompt for value, encrypt, store in secrets/.enc - show Decrypt and print an individual secret - Agent secrets (.env.enc): edit Edit agent secrets (FORGE_TOKEN, CLAUDE_API_KEY, etc.) - show Show decrypted agent secrets (no argument) + show Show decrypted agent secrets migrate Encrypt .env -> .env.enc Vault secrets (.env.vault.enc): @@ -1114,10 +2091,10 @@ EOF esac } -# ── run command ─────────────────────────────────────────────────────────────── +# ── vault-run command ───────────────────────────────────────────────────────── -disinto_run() { - local action_id="${1:?Usage: disinto run }" +disinto_vault_run() { + local action_id="${1:?Usage: disinto vault-run }" local compose_file="${FACTORY_ROOT}/docker-compose.yml" local vault_enc="${FACTORY_ROOT}/.env.vault.enc" @@ -1151,73 +2128,24 @@ disinto_run() { echo "Vault secrets decrypted to tmpfile" - # Run action in ephemeral runner container + # Run action in ephemeral vault-runner container local rc=0 docker compose -f "$compose_file" \ run --rm --env-file "$tmp_env" \ - runner "$action_id" || rc=$? + vault-runner "$action_id" || rc=$? # Clean up — secrets gone rm -f "$tmp_env" - echo "Run tmpfile removed" + echo "Vault tmpfile removed" if [ "$rc" -eq 0 ]; then - echo "Run action ${action_id} completed successfully" + echo "Vault action ${action_id} completed successfully" else - echo "Run action ${action_id} failed (exit ${rc})" >&2 + echo "Vault action ${action_id} failed (exit ${rc})" >&2 fi return "$rc" } -# ── Pre-build: download binaries to docker/agents/bin/ ──────────────────────── -# This avoids network calls during docker build (needed for Docker-in-LXD builds) -# Returns 0 on success, 1 on failure -download_agent_binaries() { - local bin_dir="${FACTORY_ROOT}/docker/agents/bin" - mkdir -p "$bin_dir" - - echo "Downloading agent binaries to ${bin_dir}..." - - # Download SOPS - local sops_file="${bin_dir}/sops" - if [ ! -f "$sops_file" ]; then - echo " Downloading SOPS v3.9.4..." - curl -sL https://github.com/getsops/sops/releases/download/v3.9.4/sops-v3.9.4.linux.amd64 -o "$sops_file" - if [ ! -f "$sops_file" ]; then - echo "Error: failed to download SOPS" >&2 - return 1 - fi - fi - # Verify checksum - echo " Verifying SOPS checksum..." - if ! echo "5488e32bc471de7982ad895dd054bbab3ab91c417a118426134551e9626e4e85 ${sops_file}" | sha256sum -c - >/dev/null 2>&1; then - echo "Error: SOPS checksum verification failed" >&2 - return 1 - fi - chmod +x "$sops_file" - - # Download tea CLI - local tea_file="${bin_dir}/tea" - if [ ! -f "$tea_file" ]; then - echo " Downloading tea CLI v0.9.2..." - curl -sL https://dl.gitea.com/tea/0.9.2/tea-0.9.2-linux-amd64 -o "$tea_file" - if [ ! -f "$tea_file" ]; then - echo "Error: failed to download tea CLI" >&2 - return 1 - fi - fi - # Verify checksum - echo " Verifying tea CLI checksum..." - if ! echo "be10cdf9a619e3c0f121df874960ed19b53e62d1c7036cf60313a28b5227d54d ${tea_file}" | sha256sum -c - >/dev/null 2>&1; then - echo "Error: tea CLI checksum verification failed" >&2 - return 1 - fi - chmod +x "$tea_file" - - echo "Binaries downloaded and verified successfully" - return 0 -} - # ── up command ──────────────────────────────────────────────────────────────── disinto_up() { @@ -1228,14 +2156,6 @@ disinto_up() { exit 1 fi - # Pre-build: download binaries to docker/agents/bin/ to avoid network calls during docker build - echo "── Pre-build: downloading agent binaries ────────────────────────" - if ! download_agent_binaries; then - echo "Error: failed to download agent binaries" >&2 - exit 1 - fi - echo "" - # Decrypt secrets to temp .env if SOPS available and .env.enc exists local tmp_env="" local enc_file="${FACTORY_ROOT}/.env.enc" @@ -1291,82 +2211,17 @@ disinto_shell() { docker compose -f "$compose_file" exec agents bash } -# ── hire-an-agent command ───────────────────────────────────────────────────── - -# Creates a Forgejo user and .profile repo for an agent. -# Usage: disinto hire-an-agent [--formula ] -# disinto_hire_an_agent() is sourced from lib/hire-agent.sh - -# ── release command ─────────────────────────────────────────────────────────── -# disinto_release() is sourced from lib/release.sh - -# ── ci-logs command ────────────────────────────────────────────────────────── -# Reads CI logs from the Woodpecker SQLite database. -# Usage: disinto ci-logs [--step ] -disinto_ci_logs() { - local pipeline_number="" step_name="" - - if [ $# -lt 1 ]; then - echo "Error: pipeline number required" >&2 - echo "Usage: disinto ci-logs [--step ]" >&2 - exit 1 - fi - - # Parse arguments - while [ $# -gt 0 ]; do - case "$1" in - --step|-s) - step_name="$2" - shift 2 - ;; - -*) - echo "Unknown option: $1" >&2 - exit 1 - ;; - *) - if [ -z "$pipeline_number" ]; then - pipeline_number="$1" - else - echo "Unexpected argument: $1" >&2 - exit 1 - fi - shift - ;; - esac - done - - if [ -z "$pipeline_number" ] || ! [[ "$pipeline_number" =~ ^[0-9]+$ ]]; then - echo "Error: pipeline number must be a positive integer" >&2 - exit 1 - fi - - local log_reader="${FACTORY_ROOT}/lib/ci-log-reader.py" - if [ ! -f "$log_reader" ]; then - echo "Error: ci-log-reader.py not found at $log_reader" >&2 - exit 1 - fi - - if [ -n "$step_name" ]; then - python3 "$log_reader" "$pipeline_number" --step "$step_name" - else - python3 "$log_reader" "$pipeline_number" - fi -} - # ── Main dispatch ──────────────────────────────────────────────────────────── case "${1:-}" in - init) shift; disinto_init "$@" ;; - up) shift; disinto_up "$@" ;; - down) shift; disinto_down "$@" ;; - logs) shift; disinto_logs "$@" ;; - shell) shift; disinto_shell ;; - status) shift; disinto_status "$@" ;; - secrets) shift; disinto_secrets "$@" ;; - run) shift; disinto_run "$@" ;; - ci-logs) shift; disinto_ci_logs "$@" ;; - release) shift; disinto_release "$@" ;; - hire-an-agent) shift; disinto_hire_an_agent "$@" ;; - -h|--help) usage ;; - *) usage ;; + init) shift; disinto_init "$@" ;; + up) shift; disinto_up "$@" ;; + down) shift; disinto_down "$@" ;; + logs) shift; disinto_logs "$@" ;; + shell) shift; disinto_shell ;; + status) shift; disinto_status "$@" ;; + secrets) shift; disinto_secrets "$@" ;; + vault-run) shift; disinto_vault_run "$@" ;; + -h|--help) usage ;; + *) usage ;; esac diff --git a/dev/AGENTS.md b/dev/AGENTS.md index e8a0ead..ccfe0c7 100644 --- a/dev/AGENTS.md +++ b/dev/AGENTS.md @@ -1,4 +1,4 @@ - + # Dev Agent **Role**: Implement issues autonomously — write code, push branches, address @@ -14,8 +14,9 @@ in-progress issues are also picked up. The direct-merge scan runs before the loc check so approved PRs get merged even while a dev-agent session is active. **Key files**: -- `dev/dev-poll.sh` — Cron scheduler: finds next ready issue, handles merge/rebase of approved PRs, tracks CI fix attempts. Formula guard skips issues labeled `formula`, `prediction/dismissed`, or `prediction/unreviewed`. **Race prevention**: checks issue assignee before claiming — skips if assigned to a different bot user. **Stale branch abandonment**: closes PRs and deletes branches that are behind `$PRIMARY_BRANCH` (restarts poll cycle for a fresh start). **Stale in-progress recovery**: on each poll cycle, scans for issues labeled `in-progress`. If the issue is assigned to `$BOT_USER` (this agent), sets `BLOCKED_BY_INPROGRESS=true` — my thread is busy. If assigned to another agent, logs and falls through (does not block). If no assignee, no open PR, and no agent lock file — removes `in-progress`, adds `blocked` with a human-triage comment. **Per-agent open-PR gate**: before starting new work, filters open waiting PRs to only those assigned to this agent (`$BOT_USER`). Other agents' PRs do not block this agent's pipeline (#358, #369). +- `dev/dev-poll.sh` — Cron scheduler: finds next ready issue, handles merge/rebase of approved PRs, tracks CI fix attempts. Formula guard skips issues labeled `formula`, `action`, `prediction/dismissed`, or `prediction/unreviewed` (replaced `prediction/backlog` — that label no longer exists) - `dev/dev-agent.sh` — Orchestrator: claims issue, creates worktree + tmux session with interactive `claude`, monitors phase file, injects CI results and review feedback, merges on approval +- `dev/phase-handler.sh` — Phase callback functions: `post_refusal_comment()`, `_on_phase_change()`, `build_phase_protocol_prompt()`. `do_merge()` detects already-merged PRs on HTTP 405 (race with dev-poll's pre-lock scan) and returns success instead of escalating. Sources `lib/mirrors.sh` and calls `mirror_push()` after every successful merge. - `dev/phase-test.sh` — Integration test for the phase protocol **Environment variables consumed** (via `lib/env.sh` + project TOML): @@ -32,7 +33,7 @@ check so approved PRs get merged even while a dev-agent session is active. **Crash recovery**: on `PHASE:crashed` or non-zero exit, the worktree is **preserved** (not destroyed) for debugging. Location logged. Supervisor housekeeping removes stale crashed worktrees older than 24h. -**Lifecycle**: dev-poll.sh (`check_active dev`) → dev-agent.sh → tmux session → phase file +**Lifecycle**: dev-poll.sh (`check_active dev`) → dev-agent.sh → tmux `dev-{project}-{issue}` → phase file drives CI/review loop → merge + `mirror_push()` → close issue. On respawn after `PHASE:escalate`, the stale phase file is cleared first so the session starts clean; the reinject prompt tells Claude not to re-escalate for the same reason. diff --git a/dev/dev-agent.sh b/dev/dev-agent.sh index c534dbd..3a78f53 100755 --- a/dev/dev-agent.sh +++ b/dev/dev-agent.sh @@ -30,7 +30,6 @@ source "$(dirname "$0")/../lib/worktree.sh" source "$(dirname "$0")/../lib/pr-lifecycle.sh" source "$(dirname "$0")/../lib/mirrors.sh" source "$(dirname "$0")/../lib/agent-sdk.sh" -source "$(dirname "$0")/../lib/formula-session.sh" # Auto-pull factory code to pick up merged fixes before any logic runs git -C "$FACTORY_ROOT" pull --ff-only origin main 2>/dev/null || true @@ -41,7 +40,7 @@ REPO_ROOT="${PROJECT_REPO_ROOT}" LOCKFILE="/tmp/dev-agent-${PROJECT_NAME:-default}.lock" STATUSFILE="/tmp/dev-agent-status-${PROJECT_NAME:-default}" -BRANCH="fix/issue-${ISSUE}" # Default; will be updated after FORGE_REMOTE is known +BRANCH="fix/issue-${ISSUE}" WORKTREE="/tmp/${PROJECT_NAME}-worktree-${ISSUE}" SID_FILE="/tmp/dev-session-${PROJECT_NAME}-${ISSUE}.sid" PREFLIGHT_RESULT="/tmp/dev-agent-preflight.json" @@ -186,11 +185,7 @@ log "preflight passed" # ============================================================================= # CLAIM ISSUE # ============================================================================= -if ! issue_claim "$ISSUE"; then - log "SKIP: failed to claim issue #${ISSUE} (already assigned to another agent)" - echo '{"status":"already_done","reason":"issue was claimed by another agent"}' > "$PREFLIGHT_RESULT" - exit 0 -fi +issue_claim "$ISSUE" CLAIMED=true # ============================================================================= @@ -263,19 +258,6 @@ FORGE_REMOTE="${FORGE_REMOTE:-origin}" export FORGE_REMOTE log "forge remote: ${FORGE_REMOTE}" -# Generate unique branch name per attempt to avoid collision with failed attempts -# Only apply when not in recovery mode (RECOVERY_MODE branch is already set from existing PR) -# First attempt: fix/issue-N, subsequent: fix/issue-N-1, fix/issue-N-2, etc. -if [ "$RECOVERY_MODE" = false ]; then - # Count only branches matching fix/issue-N, fix/issue-N-1, fix/issue-N-2, etc. (exact prefix match) - ATTEMPT=$(git ls-remote --heads "$FORGE_REMOTE" "refs/heads/fix/issue-${ISSUE}" 2>/dev/null | grep -c "refs/heads/fix/issue-${ISSUE}$" || echo 0) - ATTEMPT=$((ATTEMPT + $(git ls-remote --heads "$FORGE_REMOTE" "refs/heads/fix/issue-${ISSUE}-*" 2>/dev/null | wc -l))) - if [ "$ATTEMPT" -gt 0 ]; then - BRANCH="fix/issue-${ISSUE}-${ATTEMPT}" - fi -fi -log "using branch: ${BRANCH}" - if [ "$RECOVERY_MODE" = true ]; then if ! worktree_recover "$WORKTREE" "$BRANCH" "$FORGE_REMOTE"; then log "ERROR: worktree recovery failed" @@ -320,10 +302,6 @@ OPEN_ISSUES_SUMMARY=$(forge_api GET "/issues?state=open&labels=backlog&limit=20& PUSH_INSTRUCTIONS=$(build_phase_protocol_prompt "$BRANCH" "$FORGE_REMOTE") -# Load lessons from .profile repo if available (pre-session) -profile_load_lessons || true -LESSONS_INJECTION="${LESSONS_CONTEXT:-}" - if [ "$RECOVERY_MODE" = true ]; then GIT_DIFF_STAT=$(git -C "$WORKTREE" diff "${FORGE_REMOTE}/${PRIMARY_BRANCH}..HEAD" --stat 2>/dev/null \ | head -20 || echo "(no diff)") @@ -354,10 +332,6 @@ ${GIT_DIFF_STAT} 3. Address any pending review comments or CI failures. 4. Commit and push to \`${BRANCH}\`. -${LESSONS_INJECTION:+## Lessons learned -${LESSONS_INJECTION} - -} ${PUSH_INSTRUCTIONS}" else INITIAL_PROMPT="You are working in a git worktree at ${WORKTREE} on branch ${BRANCH}. @@ -373,10 +347,6 @@ ${OPEN_ISSUES_SUMMARY} $(if [ -n "$PRIOR_ART_DIFF" ]; then printf '## Prior Art (closed PR — DO NOT start from scratch)\n\nA previous PR attempted this issue but was closed without merging. Reuse as much as possible.\n\n```diff\n%s\n```\n' "$PRIOR_ART_DIFF" fi) -${LESSONS_INJECTION:+## Lessons learned -${LESSONS_INJECTION} - -} ## Instructions 1. Read AGENTS.md in this repo for project context and coding conventions. @@ -480,40 +450,6 @@ Closing as already implemented." fi log "ERROR: no branch pushed after agent_run" - # Dump diagnostics - diag_file="${DISINTO_LOG_DIR:-/tmp}/dev/agent-run-last.json" - if [ -f "$diag_file" ]; then - result_text=""; cost_usd=""; num_turns="" - result_text=$(jq -r '.result // "no result field"' "$diag_file" 2>/dev/null | head -50) || result_text="(parse error)" - cost_usd=$(jq -r '.cost_usd // "?"' "$diag_file" 2>/dev/null) || cost_usd="?" - num_turns=$(jq -r '.num_turns // "?"' "$diag_file" 2>/dev/null) || num_turns="?" - log "no_push diagnostics: turns=${num_turns} cost=${cost_usd}" - log "no_push result: ${result_text}" - # Save full output for later analysis - cp "$diag_file" "${DISINTO_LOG_DIR:-/tmp}/dev/no-push-${ISSUE}-$(date +%s).json" 2>/dev/null || true - fi - - # Save full session log for debugging - # Session logs are stored in CLAUDE_CONFIG_DIR/projects/{worktree-hash}/{session-id}.jsonl - _wt_hash=$(printf '%s' "$WORKTREE" | md5sum | cut -c1-12) - _cl_config="${CLAUDE_CONFIG_DIR:-$HOME/.claude}" - _session_log="${_cl_config}/projects/${_wt_hash}/${_AGENT_SESSION_ID}.jsonl" - if [ -f "$_session_log" ]; then - cp "$_session_log" "${DISINTO_LOG_DIR}/dev/no-push-session-${ISSUE}-$(date +%s).jsonl" 2>/dev/null || true - log "no_push session log saved to ${DISINTO_LOG_DIR}/dev/no-push-session-${ISSUE}-*.jsonl" - fi - - # Log session summary for debugging - if [ -f "$_session_log" ]; then - _read_calls=$(grep -c '"type":"read"' "$_session_log" 2>/dev/null || echo "0") - _edit_calls=$(grep -c '"type":"edit"' "$_session_log" 2>/dev/null || echo "0") - _bash_calls=$(grep -c '"type":"bash"' "$_session_log" 2>/dev/null || echo "0") - _text_calls=$(grep -c '"type":"text"' "$_session_log" 2>/dev/null || echo "0") - _failed_calls=$(grep -c '"exit_code":null' "$_session_log" 2>/dev/null || echo "0") - _total_turns=$(grep -c '"type":"turn"' "$_session_log" 2>/dev/null || echo "0") - log "no_push session summary: turns=${_total_turns} reads=${_read_calls} edits=${_edit_calls} bash=${_bash_calls} text=${_text_calls} failed=${_failed_calls}" - fi - issue_block "$ISSUE" "no_push" "Claude did not push branch ${BRANCH}" CLAIMED=false worktree_cleanup "$WORKTREE" @@ -561,12 +497,6 @@ if [ "$rc" -eq 0 ]; then log "PR #${PR_NUMBER} merged" issue_close "$ISSUE" - # Capture files changed for journal entry (after agent work) - FILES_CHANGED=$(git -C "$WORKTREE" diff "${FORGE_REMOTE}/${PRIMARY_BRANCH}..HEAD" --name-only 2>/dev/null | tr '\n' ',' | sed 's/,$//') || FILES_CHANGED="" - - # Write journal entry post-session (before cleanup) - profile_write_journal "$ISSUE" "$ISSUE_TITLE" "merged" "$FILES_CHANGED" || true - # Pull primary branch and push to mirrors git -C "$REPO_ROOT" fetch "$FORGE_REMOTE" "$PRIMARY_BRANCH" 2>/dev/null || true git -C "$REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true @@ -580,18 +510,6 @@ else # Exhausted or unrecoverable failure log "PR walk failed: ${_PR_WALK_EXIT_REASON:-unknown}" issue_block "$ISSUE" "${_PR_WALK_EXIT_REASON:-agent_failed}" - - # Capture files changed for journal entry (after agent work) - FILES_CHANGED=$(git -C "$WORKTREE" diff "${FORGE_REMOTE}/${PRIMARY_BRANCH}..HEAD" --name-only 2>/dev/null | tr '\n' ',' | sed 's/,$//') || FILES_CHANGED="" - - # Write journal entry post-session (before cleanup) - outcome="blocked_${_PR_WALK_EXIT_REASON:-agent_failed}" - profile_write_journal "$ISSUE" "$ISSUE_TITLE" "$outcome" "$FILES_CHANGED" || true - - # Cleanup on failure: preserve remote branch and PR for debugging, clean up local worktree - # Remote state (PR and branch) stays open for inspection of CI logs and review comments - worktree_cleanup "$WORKTREE" - rm -f "$SID_FILE" "$IMPL_SUMMARY_FILE" CLAIMED=false fi diff --git a/dev/dev-poll.sh b/dev/dev-poll.sh index 484da76..98b8b7d 100755 --- a/dev/dev-poll.sh +++ b/dev/dev-poll.sh @@ -42,11 +42,6 @@ log() { printf '[%s] poll: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" } -# Resolve current agent identity once at startup — cache for all assignee checks -BOT_USER=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API%%/repos*}/user" | jq -r '.login') || BOT_USER="" -log "running as agent: ${BOT_USER}" - # ============================================================================= # CI FIX TRACKER: per-PR counter to avoid infinite respawn loops (max 3) # ============================================================================= @@ -99,68 +94,6 @@ is_blocked() { | jq -e '.[] | select(.name == "blocked")' >/dev/null 2>&1 } -# ============================================================================= -# STALENESS DETECTION FOR IN-PROGRESS ISSUES -# ============================================================================= - -# Check if there's an open PR for a specific issue -# Args: issue_number -# Returns: 0 if open PR exists, 1 if not -open_pr_exists() { - local issue="$1" - local branch="fix/issue-${issue}" - local pr_num - - pr_num=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls?state=open&limit=20" | \ - jq -r --arg branch "$branch" \ - '.[] | select(.head.ref == $branch) | .number' | head -1) || true - - [ -n "$pr_num" ] -} - -# Relabel a stale in-progress issue to blocked with diagnostic comment -# Args: issue_number reason -# Uses shared helpers from lib/issue-lifecycle.sh -relabel_stale_issue() { - local issue="$1" reason="$2" - - log "relabeling stale in-progress issue #${issue} to blocked: ${reason}" - - # Remove in-progress label - local ip_id - ip_id=$(_ilc_in_progress_id) - if [ -n "$ip_id" ]; then - curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${issue}/labels/${ip_id}" >/dev/null 2>&1 || true - fi - - # Add blocked label - local bk_id - bk_id=$(_ilc_blocked_id) - if [ -n "$bk_id" ]; then - curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/issues/${issue}/labels" \ - -d "{\"labels\":[${bk_id}]}" >/dev/null 2>&1 || true - fi - - # Post diagnostic comment using shared helper - local comment_body - comment_body=$( - printf '%s\n\n' '### Stale in-progress issue detected' - printf '%s\n' '| Field | Value |' - printf '%s\n' '|---|---|' - printf '| Detection reason | `%s` |\n' "$reason" - printf '| Timestamp | `%s` |\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" - printf '%s\n' '**Status:** This issue was labeled `in-progress` but has no assignee, no open PR, and no agent lock file.' - printf '%s\n' '**Action required:** A maintainer should triage this issue.' - ) - _ilc_post_comment "$issue" "$comment_body" - - _ilc_log "stale issue #${issue} relabeled to blocked: ${reason}" -} - # ============================================================================= # HELPER: handle CI-exhaustion check/block (DRY for 3 call sites) # Sets CI_FIX_ATTEMPTS for caller use. Returns 0 if exhausted, 1 if not. @@ -222,10 +155,9 @@ try_direct_merge() { if [ "$issue_num" -gt 0 ]; then issue_close "$issue_num" # Remove in-progress label (don't re-add backlog — issue is closed) - IP_ID=$(_ilc_in_progress_id) curl -sf -X DELETE \ -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${issue_num}/labels/${IP_ID}" >/dev/null 2>&1 || true + "${API}/issues/${issue_num}/labels/in-progress" >/dev/null 2>&1 || true rm -f "/tmp/dev-session-${PROJECT_NAME}-${issue_num}.sid" \ "/tmp/dev-impl-summary-${PROJECT_NAME}-${issue_num}.txt" fi @@ -345,16 +277,6 @@ for i in $(seq 0 $(($(echo "$PL_PRS" | jq 'length') - 1))); do jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true if [ "${PL_HAS_APPROVE:-0}" -gt 0 ]; then - # Check if issue is assigned to this agent — only merge own PRs - if [ "$PL_ISSUE" -gt 0 ]; then - PR_ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${PL_ISSUE}") || true - PR_ISSUE_ASSIGNEE=$(echo "$PR_ISSUE_JSON" | jq -r '.assignee.login // ""') || true - if [ -n "$PR_ISSUE_ASSIGNEE" ] && [ "$PR_ISSUE_ASSIGNEE" != "$BOT_USER" ]; then - log "PR #${PL_PR_NUM} (issue #${PL_ISSUE}) assigned to ${PR_ISSUE_ASSIGNEE} — skipping merge (not mine)" - continue - fi - fi if try_direct_merge "$PL_PR_NUM" "$PL_ISSUE"; then PL_MERGED_ANY=true fi @@ -378,9 +300,6 @@ if [ -f "$LOCKFILE" ]; then rm -f "$LOCKFILE" fi -# --- Fetch origin refs before any stale branch checks --- -git fetch origin --prune 2>/dev/null || true - # --- Memory guard --- memory_guard 2000 @@ -388,211 +307,89 @@ memory_guard 2000 # PRIORITY 1: orphaned in-progress issues # ============================================================================= log "checking for in-progress issues" - ORPHANS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/issues?state=open&labels=in-progress&limit=10&type=issues") ORPHAN_COUNT=$(echo "$ORPHANS_JSON" | jq 'length') -BLOCKED_BY_INPROGRESS=false if [ "$ORPHAN_COUNT" -gt 0 ]; then ISSUE_NUM=$(echo "$ORPHANS_JSON" | jq -r '.[0].number') - # Staleness check: if no assignee, no open PR, and no agent lock, the issue is stale - OPEN_PR=false - if curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + # Formula guard: formula-labeled issues should not be worked on by dev-agent. + # Remove in-progress label and skip to prevent infinite respawn cycle (#115). + ORPHAN_LABELS=$(echo "$ORPHANS_JSON" | jq -r '.[0].labels[].name' 2>/dev/null) || true + SKIP_LABEL=$(echo "$ORPHAN_LABELS" | grep -oE '^(formula|action|prediction/dismissed|prediction/unreviewed)$' | head -1) || true + if [ -n "$SKIP_LABEL" ]; then + log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — removing in-progress, skipping" + curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${ISSUE_NUM}/labels/in-progress" >/dev/null 2>&1 || true + exit 0 + fi + + # Check if there's already an open PR for this issue + HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls?state=open&limit=20" | \ - jq -e --arg branch "fix/issue-${ISSUE_NUM}" \ - '.[] | select(.head.ref == $branch)' >/dev/null 2>&1; then - OPEN_PR=true - fi + jq -r --arg branch "fix/issue-${ISSUE_NUM}" \ + '.[] | select(.head.ref == $branch) | .number' | head -1) || true - # Check if issue has an assignee — only block on issues assigned to this agent - assignee=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" "${API}/issues/${ISSUE_NUM}" | jq -r '.assignee.login // ""') - if [ -n "$assignee" ]; then - if [ "$assignee" = "$BOT_USER" ]; then - # Check if my PR has review feedback to address before exiting - HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls?state=open&limit=20" | \ - jq -r --arg branch "fix/issue-${ISSUE_NUM}" \ - '.[] | select(.head.ref == $branch) | .number' | head -1) || true + if [ -n "$HAS_PR" ]; then + PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${HAS_PR}" | jq -r '.head.sha') || true + CI_STATE=$(ci_commit_status "$PR_SHA") || true - if [ -n "$HAS_PR" ]; then - # Check for REQUEST_CHANGES review feedback - REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${HAS_PR}/reviews") || true - HAS_CHANGES=$(echo "$REVIEWS_JSON" | \ - jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true + # Non-code PRs (docs, formulas, evidence) may have no CI — treat as passed + if ! ci_passed "$CI_STATE" && ! ci_required_for_pr "$HAS_PR"; then + CI_STATE="success" + log "PR #${HAS_PR} has no code files — treating CI as passed" + fi - if [ "${HAS_CHANGES:-0}" -gt 0 ]; then - log "issue #${ISSUE_NUM} has review feedback — spawning agent" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)" - BLOCKED_BY_INPROGRESS=true - else - log "issue #${ISSUE_NUM} assigned to me — my thread is busy" - BLOCKED_BY_INPROGRESS=true - fi - else - log "issue #${ISSUE_NUM} assigned to me — my thread is busy" - BLOCKED_BY_INPROGRESS=true + # Check formal reviews (single fetch to avoid race window) + REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${HAS_PR}/reviews") || true + HAS_APPROVE=$(echo "$REVIEWS_JSON" | \ + jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true + HAS_CHANGES=$(echo "$REVIEWS_JSON" | \ + jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true + + if ci_passed "$CI_STATE" && [ "${HAS_APPROVE:-0}" -gt 0 ]; then + if try_direct_merge "$HAS_PR" "$ISSUE_NUM"; then + exit 0 fi + # Direct merge failed (conflicts?) — fall back to dev-agent + log "falling back to dev-agent for PR #${HAS_PR} merge" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)" + exit 0 + + # Do NOT gate REQUEST_CHANGES on ci_passed: act immediately even if CI is + # pending/unknown. Definitive CI failure is handled by the elif below. + elif [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then + log "issue #${ISSUE_NUM} PR #${HAS_PR} has REQUEST_CHANGES — spawning agent" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)" + exit 0 + + elif ci_failed "$CI_STATE"; then + if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM" "check_only"; then + # Fall through to backlog scan instead of exit + : + else + # Increment at actual launch time (not on guard-hit paths) + if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM"; then + exit 0 # exhausted between check and launch + fi + log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix (attempt ${CI_FIX_ATTEMPTS}/3)" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)" + exit 0 + fi + else - log "issue #${ISSUE_NUM} assigned to ${assignee} — their thread, not blocking" - # Issue assigned to another agent — don't block, fall through to backlog + log "issue #${ISSUE_NUM} has open PR #${HAS_PR} (CI: ${CI_STATE}, waiting)" fi - fi - - # Only proceed with in-progress checks if not blocked by another agent - if [ "$BLOCKED_BY_INPROGRESS" = false ]; then - # Check for dev-agent lock file (agent may be running in another container) - LOCK_FILE="/tmp/dev-impl-summary-${PROJECT_NAME}-${ISSUE_NUM}.txt" - if [ -f "$LOCK_FILE" ]; then - log "issue #${ISSUE_NUM} has agent lock file — trusting active work" - BLOCKED_BY_INPROGRESS=true - fi - - if [ "$OPEN_PR" = false ] && [ "$BLOCKED_BY_INPROGRESS" = false ]; then - log "issue #${ISSUE_NUM} is stale (no assignee, no open PR, no agent lock) — relabeling to blocked" - relabel_stale_issue "$ISSUE_NUM" "no_assignee_no_open_pr_no_lock" - BLOCKED_BY_INPROGRESS=true - fi - - # Formula guard: formula-labeled issues should not be worked on by dev-agent. - # Remove in-progress label and skip to prevent infinite respawn cycle (#115). - if [ "$BLOCKED_BY_INPROGRESS" = false ]; then - ORPHAN_LABELS=$(echo "$ORPHANS_JSON" | jq -r '.[0].labels[].name' 2>/dev/null) || true - SKIP_LABEL=$(echo "$ORPHAN_LABELS" | grep -oE '^(formula|prediction/dismissed|prediction/unreviewed)$' | head -1) || true - if [ -n "$SKIP_LABEL" ]; then - log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — removing in-progress, skipping" - IP_ID=$(_ilc_in_progress_id) - curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true - BLOCKED_BY_INPROGRESS=true - fi - fi - - # Check if there's already an open PR for this issue - if [ "$BLOCKED_BY_INPROGRESS" = false ]; then - HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls?state=open&limit=20" | \ - jq -r --arg branch "fix/issue-${ISSUE_NUM}" \ - '.[] | select(.head.ref == $branch) | .number' | head -1) || true - - if [ -n "$HAS_PR" ]; then - # Check if branch is stale (behind primary branch) - BRANCH="fix/issue-${ISSUE_NUM}" - AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "0") - if [ "$AHEAD" -gt 0 ]; then - log "issue #${ISSUE_NUM} PR #${HAS_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR" - # Close the PR via API - curl -sf -X PATCH \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/pulls/${HAS_PR}" \ - -d '{"state":"closed"}' >/dev/null 2>&1 || true - # Delete the branch via git push - git -C "${PROJECT_REPO_ROOT:-}" push origin --delete "${BRANCH}" 2>/dev/null || true - # Reset to fresh start on primary branch - git -C "${PROJECT_REPO_ROOT:-}" checkout "${PRIMARY_BRANCH}" 2>/dev/null || true - git -C "${PROJECT_REPO_ROOT:-}" pull --ff-only origin "${PRIMARY_BRANCH}" 2>/dev/null || true - BLOCKED_BY_INPROGRESS=true - fi - - # Only process PR if not abandoned (stale branch check above) - if [ "$BLOCKED_BY_INPROGRESS" = false ]; then - PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${HAS_PR}" | jq -r '.head.sha') || true - CI_STATE=$(ci_commit_status "$PR_SHA") || true - - # Non-code PRs (docs, formulas, evidence) may have no CI — treat as passed - if ! ci_passed "$CI_STATE" && ! ci_required_for_pr "$HAS_PR"; then - CI_STATE="success" - log "PR #${HAS_PR} has no code files — treating CI as passed" - fi - - # Check formal reviews (single fetch to avoid race window) - REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${HAS_PR}/reviews") || true - HAS_APPROVE=$(echo "$REVIEWS_JSON" | \ - jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true - HAS_CHANGES=$(echo "$REVIEWS_JSON" | \ - jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true - - if ci_passed "$CI_STATE" && [ "${HAS_APPROVE:-0}" -gt 0 ]; then - if try_direct_merge "$HAS_PR" "$ISSUE_NUM"; then - BLOCKED_BY_INPROGRESS=true - else - # Direct merge failed (conflicts?) — fall back to dev-agent - log "falling back to dev-agent for PR #${HAS_PR} merge" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)" - BLOCKED_BY_INPROGRESS=true - fi - - # Do NOT gate REQUEST_CHANGES on ci_passed: act immediately even if CI is - # pending/unknown. Definitive CI failure is handled by the elif below. - elif [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then - # Check if issue is assigned to this agent — skip if assigned to another bot - ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${ISSUE_NUM}") || true - assignee=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true - if [ -n "$assignee" ] && [ "$assignee" != "$BOT_USER" ]; then - log "issue #${ISSUE_NUM} PR #${HAS_PR} REQUEST_CHANGES but assigned to ${assignee} — skipping" - # Don't block — fall through to backlog - BLOCKED_BY_INPROGRESS=false - else - log "issue #${ISSUE_NUM} PR #${HAS_PR} has REQUEST_CHANGES — spawning agent" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)" - BLOCKED_BY_INPROGRESS=true - fi - - elif ci_failed "$CI_STATE"; then - if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM" "check_only"; then - # Fall through to backlog scan instead of exit - : - else - # Increment at actual launch time (not on guard-hit paths) - if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM"; then - BLOCKED_BY_INPROGRESS=true # exhausted between check and launch - else - log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix (attempt ${CI_FIX_ATTEMPTS}/3)" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)" - BLOCKED_BY_INPROGRESS=true - fi - fi - - else - log "issue #${ISSUE_NUM} has open PR #${HAS_PR} (CI: ${CI_STATE}, waiting)" - BLOCKED_BY_INPROGRESS=true - fi - fi - else - # Check assignee before adopting orphaned issue - ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${ISSUE_NUM}") || true - ASSIGNEE=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true - - if [ -n "$ASSIGNEE" ] && [ "$ASSIGNEE" != "$BOT_USER" ]; then - log "issue #${ISSUE_NUM} assigned to ${ASSIGNEE} — skipping (not orphaned)" - # Remove in-progress label since this agent isn't working on it - IP_ID=$(_ilc_in_progress_id) - curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true - # Don't block — fall through to backlog - else - log "recovering orphaned issue #${ISSUE_NUM} (no PR found, assigned to ${BOT_USER:-unassigned})" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (recovery)" - BLOCKED_BY_INPROGRESS=true - fi - fi - fi - fi - - # If blocked by in-progress work, exit now - if [ "$BLOCKED_BY_INPROGRESS" = true ]; then + else + log "recovering orphaned issue #${ISSUE_NUM} (no PR found)" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (recovery)" exit 0 fi fi @@ -662,14 +459,6 @@ for i in $(seq 0 $(($(echo "$OPEN_PRS" | jq 'length') - 1))); do # Stuck: REQUEST_CHANGES or CI failure -> spawn agent if [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then - # Check if issue is assigned to this agent — skip if assigned to another bot - ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${STUCK_ISSUE}") || true - assignee=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true - if [ -n "$assignee" ] && [ "$assignee" != "$BOT_USER" ]; then - log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) REQUEST_CHANGES but assigned to ${assignee} — skipping" - continue # skip this PR, check next stuck PR or fall through to backlog - fi log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) has REQUEST_CHANGES — fixing first" nohup "${SCRIPT_DIR}/dev-agent.sh" "$STUCK_ISSUE" >> "$LOGFILE" 2>&1 & log "started dev-agent PID $! for stuck PR #${PR_NUM}" @@ -732,18 +521,9 @@ for i in $(seq 0 $((BACKLOG_COUNT - 1))); do ISSUE_NUM=$(echo "$BACKLOG_JSON" | jq -r ".[$i].number") ISSUE_BODY=$(echo "$BACKLOG_JSON" | jq -r ".[$i].body // \"\"") - # Check assignee before claiming — skip if assigned to another bot - ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${ISSUE_NUM}") || true - ASSIGNEE=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true - if [ -n "$ASSIGNEE" ] && [ "$ASSIGNEE" != "$BOT_USER" ]; then - log " #${ISSUE_NUM} assigned to ${ASSIGNEE} — skipping" - continue - fi - # Formula guard: formula-labeled issues must not be picked up by dev-agent. ISSUE_LABELS=$(echo "$BACKLOG_JSON" | jq -r ".[$i].labels[].name" 2>/dev/null) || true - SKIP_LABEL=$(echo "$ISSUE_LABELS" | grep -oE '^(formula|prediction/dismissed|prediction/unreviewed)$' | head -1) || true + SKIP_LABEL=$(echo "$ISSUE_LABELS" | grep -oE '^(formula|action|prediction/dismissed|prediction/unreviewed)$' | head -1) || true if [ -n "$SKIP_LABEL" ]; then log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — skipping in backlog scan" continue @@ -760,26 +540,6 @@ for i in $(seq 0 $((BACKLOG_COUNT - 1))); do '.[] | select((.head.ref == $branch) or (.title | contains($num))) | .number' | head -1) || true if [ -n "$EXISTING_PR" ]; then - # Check if branch is stale (behind primary branch) - BRANCH="fix/issue-${ISSUE_NUM}" - AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "0") - if [ "$AHEAD" -gt 0 ]; then - log "issue #${ISSUE_NUM} PR #${EXISTING_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR" - # Close the PR via API - curl -sf -X PATCH \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/pulls/${EXISTING_PR}" \ - -d '{"state":"closed"}' >/dev/null 2>&1 || true - # Delete the branch via git push - git -C "${PROJECT_REPO_ROOT:-}" push origin --delete "${BRANCH}" 2>/dev/null || true - # Reset to fresh start on primary branch - git -C "${PROJECT_REPO_ROOT:-}" checkout "${PRIMARY_BRANCH}" 2>/dev/null || true - git -C "${PROJECT_REPO_ROOT:-}" pull --ff-only origin "${PRIMARY_BRANCH}" 2>/dev/null || true - # Continue to find another ready issue - continue - fi - PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls/${EXISTING_PR}" | jq -r '.head.sha') || true CI_STATE=$(ci_commit_status "$PR_SHA") || true @@ -837,32 +597,9 @@ done # Single-threaded per project: if any issue has an open PR waiting for review/CI, # don't start new work — let the pipeline drain first -# But only block on PRs assigned to this agent (per-agent logic from #358) if [ -n "$READY_ISSUE" ] && [ -n "${WAITING_PRS:-}" ]; then - # Filter to only this agent's waiting PRs - MY_WAITING_PRS="" - for pr_num in $(echo "$WAITING_PRS" | tr ',' ' '); do - pr_num="${pr_num#\#}" # Remove leading # - # Check if this PR's issue is assigned to this agent - pr_info=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${pr_num}" 2>/dev/null) || true - pr_branch=$(echo "$pr_info" | jq -r '.head.ref') || true - issue_num=$(echo "$pr_branch" | grep -oP '(?<=fix/issue-)\d+' || true) - if [ -z "$issue_num" ]; then - continue - fi - issue_assignee=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${issue_num}" 2>/dev/null | jq -r '.assignee.login // ""') || true - if [ -n "$issue_assignee" ] && [ "$issue_assignee" = "$BOT_USER" ]; then - MY_WAITING_PRS="${MY_WAITING_PRS:-}${MY_WAITING_PRS:+, }#${pr_num}" - fi - done - - if [ -n "$MY_WAITING_PRS" ]; then - log "holding #${READY_ISSUE} — waiting for my open PR(s) to land first: ${MY_WAITING_PRS}" - exit 0 - fi - log "other agents' PRs waiting: ${WAITING_PRS} — proceeding with #${READY_ISSUE}" + log "holding #${READY_ISSUE} — waiting for open PR(s) to land first: ${WAITING_PRS}" + exit 0 fi if [ -z "$READY_ISSUE" ]; then diff --git a/dev/phase-handler.sh b/dev/phase-handler.sh new file mode 100644 index 0000000..8f3b3b4 --- /dev/null +++ b/dev/phase-handler.sh @@ -0,0 +1,820 @@ +#!/usr/bin/env bash +# dev/phase-handler.sh — Phase callback functions for dev-agent.sh +# +# Source this file from agent orchestrators after lib/agent-session.sh is loaded. +# Defines: post_refusal_comment(), _on_phase_change(), build_phase_protocol_prompt() +# +# Required globals (set by calling agent before or after sourcing): +# ISSUE, FORGE_TOKEN, API, FORGE_WEB, PROJECT_NAME, FACTORY_ROOT +# BRANCH, PHASE_FILE, WORKTREE, IMPL_SUMMARY_FILE +# PRIMARY_BRANCH, SESSION_NAME, LOGFILE, ISSUE_TITLE +# WOODPECKER_REPO_ID, WOODPECKER_TOKEN, WOODPECKER_SERVER +# +# Globals with defaults (agents can override after sourcing): +# PR_NUMBER, CI_POLL_TIMEOUT, MAX_CI_FIXES, MAX_REVIEW_ROUNDS, +# REVIEW_POLL_TIMEOUT, CI_RETRY_COUNT, CI_FIX_COUNT, REVIEW_ROUND, +# CLAIMED, PHASE_POLL_INTERVAL +# +# Calls back to agent-defined helpers: +# cleanup_worktree(), cleanup_labels(), status(), log() +# +# shellcheck shell=bash +# shellcheck disable=SC2154 # globals are set in dev-agent.sh before calling +# shellcheck disable=SC2034 # CLAIMED is read by cleanup() in dev-agent.sh + +# Load secret scanner for redacting tmux output before posting to issues +# shellcheck source=../lib/secret-scan.sh +source "$(dirname "${BASH_SOURCE[0]}")/../lib/secret-scan.sh" + +# Load shared CI helpers (is_infra_step, classify_pipeline_failure, etc.) +# shellcheck source=../lib/ci-helpers.sh +source "$(dirname "${BASH_SOURCE[0]}")/../lib/ci-helpers.sh" + +# Load mirror push helper +# shellcheck source=../lib/mirrors.sh +source "$(dirname "${BASH_SOURCE[0]}")/../lib/mirrors.sh" + +# --- Default callback stubs (agents can override after sourcing) --- +# cleanup_worktree and cleanup_labels are called during phase transitions. +# Provide no-op defaults so phase-handler.sh is self-contained; sourcing +# agents override these with real implementations. +if ! declare -f cleanup_worktree >/dev/null 2>&1; then + cleanup_worktree() { :; } +fi +if ! declare -f cleanup_labels >/dev/null 2>&1; then + cleanup_labels() { :; } +fi + +# --- Default globals (agents can override after sourcing) --- +: "${CI_POLL_TIMEOUT:=1800}" +: "${REVIEW_POLL_TIMEOUT:=10800}" +: "${MAX_CI_FIXES:=3}" +: "${MAX_REVIEW_ROUNDS:=5}" +: "${CI_RETRY_COUNT:=0}" +: "${CI_FIX_COUNT:=0}" +: "${REVIEW_ROUND:=0}" +: "${PR_NUMBER:=}" +: "${CLAIMED:=false}" +: "${PHASE_POLL_INTERVAL:=30}" + +# --- Post diagnostic comment + label issue as blocked --- +# Captures tmux pane output, posts a structured comment on the issue, removes +# in-progress label, and adds the "blocked" label. +# +# Args: reason [session_name] +# Uses globals: ISSUE, SESSION_NAME, PR_NUMBER, FORGE_TOKEN, API +post_blocked_diagnostic() { + local reason="$1" + local session="${2:-${SESSION_NAME:-}}" + + # Capture last 50 lines from tmux pane (before kill) + local tmux_output="" + if [ -n "$session" ] && tmux has-session -t "$session" 2>/dev/null; then + tmux_output=$(tmux capture-pane -p -t "$session" -S -50 2>/dev/null || true) + fi + + # Redact any secrets from tmux output before posting to issue + if [ -n "$tmux_output" ]; then + tmux_output=$(redact_secrets "$tmux_output") + fi + + # Build diagnostic comment body + local comment + comment="### Session failure diagnostic + +| Field | Value | +|---|---| +| Exit reason | \`${reason}\` | +| Timestamp | \`$(date -u +%Y-%m-%dT%H:%M:%SZ)\` |" + [ -n "${PR_NUMBER:-}" ] && [ "${PR_NUMBER:-0}" != "0" ] && \ + comment="${comment} +| PR | #${PR_NUMBER} |" + + if [ -n "$tmux_output" ]; then + comment="${comment} + +
Last 50 lines from tmux pane + +\`\`\` +${tmux_output} +\`\`\` +
" + fi + + # Post comment to issue + curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}/comments" \ + -d "$(jq -nc --arg b "$comment" '{body:$b}')" >/dev/null 2>&1 || true + + # Remove in-progress, add blocked + cleanup_labels + local blocked_id + blocked_id=$(ensure_blocked_label_id) + if [ -n "$blocked_id" ]; then + curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}/labels" \ + -d "{\"labels\":[${blocked_id}]}" >/dev/null 2>&1 || true + fi + CLAIMED=false + _BLOCKED_POSTED=true +} + +# --- Build phase protocol prompt (shared across agents) --- +# Generates the phase-signaling instructions for Claude prompts. +# Args: phase_file summary_file branch [remote] +# Output: The protocol text (stdout) +build_phase_protocol_prompt() { + local _pf="$1" _sf="$2" _br="$3" _remote="${4:-${FORGE_REMOTE:-origin}}" + cat <<_PHASE_PROTOCOL_EOF_ +## Phase-Signaling Protocol (REQUIRED) + +You are running in a persistent tmux session managed by an orchestrator. +Communicate progress by writing to the phase file. The orchestrator watches +this file and injects events (CI results, review feedback) back into this session. + +### Key files +\`\`\` +PHASE_FILE="${_pf}" +SUMMARY_FILE="${_sf}" +\`\`\` + +### Phase transitions — write these exactly: + +**After committing and pushing your branch:** +\`\`\`bash +# Rebase on target branch before push to avoid merge conflicts +git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH} +git push ${_remote} ${_br} +# Write a short summary of what you implemented: +printf '%s' "" > "\${SUMMARY_FILE}" +# Signal the orchestrator to create the PR and watch for CI: +echo "PHASE:awaiting_ci" > "${_pf}" +\`\`\` +Then STOP and wait. The orchestrator will inject CI results. + +**When you receive a "CI passed" injection:** +\`\`\`bash +echo "PHASE:awaiting_review" > "${_pf}" +\`\`\` +Then STOP and wait. The orchestrator will inject review feedback. + +**When you receive a "CI failed:" injection:** +Fix the CI issue, then rebase on target branch and push: +\`\`\`bash +git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH} +git push --force-with-lease ${_remote} ${_br} +echo "PHASE:awaiting_ci" > "${_pf}" +\`\`\` +Then STOP and wait. + +**When you receive a "Review: REQUEST_CHANGES" injection:** +Address ALL review feedback, then rebase on target branch and push: +\`\`\`bash +git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH} +git push --force-with-lease ${_remote} ${_br} +echo "PHASE:awaiting_ci" > "${_pf}" +\`\`\` +(CI runs again after each push — always write awaiting_ci, not awaiting_review) + +**When you need human help (CI exhausted, merge blocked, stuck on a decision):** +\`\`\`bash +printf 'PHASE:escalate\nReason: %s\n' "describe what you need" > "${_pf}" +\`\`\` +Then STOP and wait. A human will review and respond via the forge. + +**On unrecoverable failure:** +\`\`\`bash +printf 'PHASE:failed\nReason: %s\n' "describe what failed" > "${_pf}" +\`\`\` +_PHASE_PROTOCOL_EOF_ +} + +# --- Merge helper --- +# do_merge — attempt to merge PR via forge API. +# Args: pr_num +# Returns: +# 0 = merged successfully +# 1 = other failure (conflict, network error, etc.) +# 2 = not enough approvals (HTTP 405) — PHASE:escalate already written +do_merge() { + local pr_num="$1" + local merge_response merge_http_code merge_body + merge_response=$(curl -s -w "\n%{http_code}" -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H 'Content-Type: application/json' \ + "${API}/pulls/${pr_num}/merge" \ + -d '{"Do":"merge","delete_branch_after_merge":true}') || true + merge_http_code=$(echo "$merge_response" | tail -1) + merge_body=$(echo "$merge_response" | sed '$d') + + if [ "$merge_http_code" = "200" ] || [ "$merge_http_code" = "204" ]; then + log "do_merge: PR #${pr_num} merged (HTTP ${merge_http_code})" + return 0 + fi + + # HTTP 405 — could be "merge requirements not met" OR "already merged" (race with dev-poll). + # Before escalating, check whether the PR was already merged by another agent. + if [ "$merge_http_code" = "405" ]; then + local pr_state + pr_state=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${pr_num}" | jq -r '.merged // false') || pr_state="false" + if [ "$pr_state" = "true" ]; then + log "do_merge: PR #${pr_num} already merged (detected after HTTP 405) — treating as success" + return 0 + fi + log "do_merge: PR #${pr_num} blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" + printf 'PHASE:escalate\nReason: %s\n' \ + "PR #${pr_num} merge blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" \ + > "$PHASE_FILE" + return 2 + fi + + log "do_merge: PR #${pr_num} merge failed (HTTP ${merge_http_code}): ${merge_body:0:200}" + return 1 +} + +# --- Refusal comment helper --- +post_refusal_comment() { + local emoji="$1" title="$2" body="$3" + local last_has_title + last_has_title=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${ISSUE}/comments?limit=5" | \ + jq -r --arg t "Dev-agent: ${title}" '[.[] | .body // ""] | any(contains($t)) | tostring') || true + if [ "$last_has_title" = "true" ]; then + log "skipping duplicate refusal comment: ${title}" + return 0 + fi + local comment + comment="${emoji} **Dev-agent: ${title}** + +${body} + +--- +*Automated assessment by dev-agent · $(date -u '+%Y-%m-%d %H:%M UTC')*" + printf '%s' "$comment" > "/tmp/refusal-comment.txt" + jq -Rs '{body: .}' < "/tmp/refusal-comment.txt" > "/tmp/refusal-comment.json" + curl -sf -o /dev/null -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}/comments" \ + --data-binary @"/tmp/refusal-comment.json" 2>/dev/null || \ + log "WARNING: failed to post refusal comment" + rm -f "/tmp/refusal-comment.txt" "/tmp/refusal-comment.json" +} + +# ============================================================================= +# PHASE DISPATCH CALLBACK +# ============================================================================= + +# _on_phase_change — Phase dispatch callback for monitor_phase_loop +# Receives the current phase as $1. +# Returns 0 to continue the loop, 1 to break (terminal phase reached). +_on_phase_change() { + local phase="$1" + + # ── PHASE: awaiting_ci ────────────────────────────────────────────────────── + if [ "$phase" = "PHASE:awaiting_ci" ]; then + # Release session lock — Claude is idle during CI polling (#724) + session_lock_release + + # Create PR if not yet created + if [ -z "${PR_NUMBER:-}" ]; then + status "creating PR for issue #${ISSUE}" + IMPL_SUMMARY="" + if [ -f "$IMPL_SUMMARY_FILE" ]; then + # Don't treat refusal JSON as a PR summary + if ! jq -e '.status' < "$IMPL_SUMMARY_FILE" >/dev/null 2>&1; then + IMPL_SUMMARY=$(head -c 4000 "$IMPL_SUMMARY_FILE") + fi + fi + + printf 'Fixes #%s\n\n## Changes\n%s' "$ISSUE" "$IMPL_SUMMARY" > "/tmp/pr-body-${ISSUE}.txt" + jq -n \ + --arg title "fix: ${ISSUE_TITLE} (#${ISSUE})" \ + --rawfile body "/tmp/pr-body-${ISSUE}.txt" \ + --arg head "$BRANCH" \ + --arg base "${PRIMARY_BRANCH}" \ + '{title: $title, body: $body, head: $head, base: $base}' > "/tmp/pr-request-${ISSUE}.json" + + PR_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/pulls" \ + --data-binary @"/tmp/pr-request-${ISSUE}.json") + + PR_HTTP_CODE=$(echo "$PR_RESPONSE" | tail -1) + PR_RESPONSE_BODY=$(echo "$PR_RESPONSE" | sed '$d') + rm -f "/tmp/pr-body-${ISSUE}.txt" "/tmp/pr-request-${ISSUE}.json" + + if [ "$PR_HTTP_CODE" = "201" ] || [ "$PR_HTTP_CODE" = "200" ]; then + PR_NUMBER=$(echo "$PR_RESPONSE_BODY" | jq -r '.number') + log "created PR #${PR_NUMBER}" + elif [ "$PR_HTTP_CODE" = "409" ]; then + # PR already exists (race condition) — find it + FOUND_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -r --arg branch "$BRANCH" \ + '.[] | select(.head.ref == $branch) | .number' | head -1) || true + if [ -n "$FOUND_PR" ]; then + PR_NUMBER="$FOUND_PR" + log "PR already exists: #${PR_NUMBER}" + else + log "ERROR: PR creation got 409 but no existing PR found" + agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP 409, no existing PR found). Check the forge API. Retry by writing PHASE:awaiting_ci again after verifying the branch was pushed." + return 0 + fi + else + log "ERROR: PR creation failed (HTTP ${PR_HTTP_CODE})" + agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP ${PR_HTTP_CODE}). Check branch was pushed: git push ${FORGE_REMOTE:-origin} ${BRANCH}. Then write PHASE:awaiting_ci again." + return 0 + fi + fi + + # No CI configured? Treat as success immediately + if [ "${WOODPECKER_REPO_ID:-2}" = "0" ]; then + log "no CI configured — treating as passed" + agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER} (no CI configured for this project). +Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback." + return 0 + fi + + # Poll CI until done or timeout + status "waiting for CI on PR #${PR_NUMBER}" + CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || \ + curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') + + CI_DONE=false + CI_STATE="unknown" + CI_POLL_ELAPSED=0 + while [ "$CI_POLL_ELAPSED" -lt "$CI_POLL_TIMEOUT" ]; do + sleep 30 + CI_POLL_ELAPSED=$(( CI_POLL_ELAPSED + 30 )) + + # Check session still alive during CI wait (exit_marker + tmux fallback) + if [ -f "/tmp/claude-exited-${SESSION_NAME}.ts" ] || ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then + log "session died during CI wait" + break + fi + + # Re-fetch HEAD — Claude may have pushed new commits since loop started + CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || echo "$CI_CURRENT_SHA") + + CI_STATE=$(ci_commit_status "$CI_CURRENT_SHA") + if [ "$CI_STATE" = "success" ] || [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then + CI_DONE=true + [ "$CI_STATE" = "success" ] && CI_FIX_COUNT=0 + break + fi + done + + if ! $CI_DONE; then + log "TIMEOUT: CI didn't complete in ${CI_POLL_TIMEOUT}s" + agent_inject_into_session "$SESSION_NAME" "CI TIMEOUT: CI did not complete within 30 minutes for PR #${PR_NUMBER} (SHA: ${CI_CURRENT_SHA:0:7}). This may be an infrastructure issue. Write PHASE:escalate if you cannot proceed." + return 0 + fi + + log "CI: ${CI_STATE}" + + if [ "$CI_STATE" = "success" ]; then + agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER}. +Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback: + echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\"" + else + # Fetch CI error details + PIPELINE_NUM=$(ci_pipeline_number "$CI_CURRENT_SHA") + + FAILED_STEP="" + FAILED_EXIT="" + IS_INFRA=false + if [ -n "$PIPELINE_NUM" ]; then + FAILED_INFO=$(curl -sf \ + -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ + "${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines/${PIPELINE_NUM}" | \ + jq -r '.workflows[]?.children[]? | select(.state=="failure") | "\(.name)|\(.exit_code)"' | head -1 || true) + FAILED_STEP=$(echo "$FAILED_INFO" | cut -d'|' -f1) + FAILED_EXIT=$(echo "$FAILED_INFO" | cut -d'|' -f2) + fi + + log "CI failed: step=${FAILED_STEP:-unknown} exit=${FAILED_EXIT:-?}" + + if [ -n "$FAILED_STEP" ] && is_infra_step "$FAILED_STEP" "${FAILED_EXIT:-0}" >/dev/null 2>&1; then + IS_INFRA=true + fi + + if [ "$IS_INFRA" = true ] && [ "${CI_RETRY_COUNT:-0}" -lt 1 ]; then + CI_RETRY_COUNT=$(( CI_RETRY_COUNT + 1 )) + log "infra failure — retrigger CI (retry ${CI_RETRY_COUNT})" + (cd "$WORKTREE" && git commit --allow-empty \ + -m "ci: retrigger after infra failure (#${ISSUE})" --no-verify 2>&1 | tail -1) + # Rebase on target branch before push to avoid merge conflicts + if ! (cd "$WORKTREE" && \ + git fetch "${FORGE_REMOTE:-origin}" "${PRIMARY_BRANCH}" 2>/dev/null && \ + git rebase "${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH}" 2>&1 | tail -5); then + log "rebase conflict detected — aborting, agent must resolve" + (cd "$WORKTREE" && git rebase --abort 2>/dev/null || git reset --hard HEAD 2>/dev/null) || true + agent_inject_into_session "$SESSION_NAME" "REBASE CONFLICT: Cannot rebase onto ${PRIMARY_BRANCH} automatically. + +Please resolve merge conflicts manually: +1. Check conflict status: git status +2. Resolve conflicts in the conflicted files +3. Stage resolved files: git add +4. Continue rebase: git rebase --continue + +If you cannot resolve conflicts, abort: git rebase --abort +Then write PHASE:escalate with a reason." + return 0 + fi + # Rebase succeeded — push the result + (cd "$WORKTREE" && git push --force-with-lease "${FORGE_REMOTE:-origin}" "$BRANCH" 2>&1 | tail -3) + # Touch phase file so we recheck CI on the new SHA + # Do NOT update LAST_PHASE_MTIME here — let the main loop detect the fresh mtime + touch "$PHASE_FILE" + CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || true) + return 0 + fi + + CI_FIX_COUNT=$(( CI_FIX_COUNT + 1 )) + _ci_pipeline_url="${WOODPECKER_SERVER}/repos/${WOODPECKER_REPO_ID}/pipeline/${PIPELINE_NUM:-0}" + if [ "$CI_FIX_COUNT" -gt "$MAX_CI_FIXES" ]; then + log "CI failure not recoverable after ${CI_FIX_COUNT} fix attempts — escalating" + printf 'PHASE:escalate\nReason: ci_exhausted after %d attempts (step: %s)\n' "$CI_FIX_COUNT" "${FAILED_STEP:-unknown}" > "$PHASE_FILE" + # Do NOT update LAST_PHASE_MTIME here — let the main loop detect PHASE:escalate + return 0 + fi + + CI_ERROR_LOG="" + if [ -n "$PIPELINE_NUM" ]; then + CI_ERROR_LOG=$(bash "${FACTORY_ROOT}/lib/ci-debug.sh" failures "$PIPELINE_NUM" 2>/dev/null | tail -80 | head -c 8000 || echo "") + fi + + # Save CI result for crash recovery + printf 'CI failed (attempt %d/%d)\nStep: %s\nExit: %s\n\n%s' \ + "$CI_FIX_COUNT" "$MAX_CI_FIXES" "${FAILED_STEP:-unknown}" "${FAILED_EXIT:-?}" "$CI_ERROR_LOG" \ + > "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt" 2>/dev/null || true + + agent_inject_into_session "$SESSION_NAME" "CI failed on PR #${PR_NUMBER} (attempt ${CI_FIX_COUNT}/${MAX_CI_FIXES}). + +Failed step: ${FAILED_STEP:-unknown} (exit code ${FAILED_EXIT:-?}, pipeline #${PIPELINE_NUM:-?}) + +CI debug tool: + bash ${FACTORY_ROOT}/lib/ci-debug.sh failures ${PIPELINE_NUM:-0} + bash ${FACTORY_ROOT}/lib/ci-debug.sh logs ${PIPELINE_NUM:-0} + +Error snippet: +${CI_ERROR_LOG:-No logs available. Use ci-debug.sh to query the pipeline.} + +Instructions: +1. Run ci-debug.sh failures to get the full error output. +2. Read the failing test file(s) — understand what the tests EXPECT. +3. Fix the root cause — do NOT weaken tests. +4. Rebase on target branch and push: git fetch ${FORGE_REMOTE:-origin} ${PRIMARY_BRANCH} && git rebase ${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH} + git push --force-with-lease ${FORGE_REMOTE:-origin} ${BRANCH} +5. Write: echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\" +6. Stop and wait." + fi + + # ── PHASE: awaiting_review ────────────────────────────────────────────────── + elif [ "$phase" = "PHASE:awaiting_review" ]; then + # Release session lock — Claude is idle during review wait (#724) + session_lock_release + status "waiting for review on PR #${PR_NUMBER:-?}" + CI_FIX_COUNT=0 # Reset CI fix budget for this review cycle + + if [ -z "${PR_NUMBER:-}" ]; then + log "WARNING: awaiting_review but PR_NUMBER unknown — searching for PR" + FOUND_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -r --arg branch "$BRANCH" \ + '.[] | select(.head.ref == $branch) | .number' | head -1) || true + if [ -n "$FOUND_PR" ]; then + PR_NUMBER="$FOUND_PR" + log "found PR #${PR_NUMBER}" + else + agent_inject_into_session "$SESSION_NAME" "ERROR: Cannot find open PR for branch ${BRANCH}. Did you push? Verify with git status and git push ${FORGE_REMOTE:-origin} ${BRANCH}, then write PHASE:awaiting_ci." + return 0 + fi + fi + + REVIEW_POLL_ELAPSED=0 + REVIEW_FOUND=false + while [ "$REVIEW_POLL_ELAPSED" -lt "$REVIEW_POLL_TIMEOUT" ]; do + sleep 300 # 5 min between review checks + REVIEW_POLL_ELAPSED=$(( REVIEW_POLL_ELAPSED + 300 )) + + # Check session still alive (exit_marker + tmux fallback) + if [ -f "/tmp/claude-exited-${SESSION_NAME}.ts" ] || ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then + log "session died during review wait" + REVIEW_FOUND=false + break + fi + + # Check if phase was updated while we wait (e.g., Claude reacted to something) + NEW_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0) + if [ "$NEW_MTIME" -gt "$LAST_PHASE_MTIME" ]; then + log "phase file updated during review wait — re-entering main loop" + # Do NOT update LAST_PHASE_MTIME here — leave it stale so the outer + # loop detects the change on its next tick and dispatches the new phase. + REVIEW_FOUND=true # Prevent timeout injection + # Clean up review-poll sentinel if it exists (session already advanced) + rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}" + break + fi + + REVIEW_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') || true + REVIEW_COMMENT=$(forge_api_all "/issues/${PR_NUMBER}/comments" | \ + jq -r --arg sha "$REVIEW_SHA" \ + '[.[] | select(.body | contains("" in planner-memory.md. If (count - N) >= 5 or planner-memory.md missing, write to: @@ -256,19 +268,15 @@ If (count - N) >= 5 or planner-memory.md missing, write to: Include: run counter marker, date, constraint focus, patterns, direction. Keep under 100 lines. Replace entire file. -### 3. Commit ops repo changes -Commit the ops repo changes (prerequisites, memory, vault items): +### 4. Commit ops repo changes +Commit the ops repo changes (prerequisites, journal, memory, vault items): cd "$OPS_REPO_ROOT" - git add prerequisites.md knowledge/planner-memory.md vault/pending/ + git add prerequisites.md journal/planner/ knowledge/planner-memory.md vault/pending/ git add -u if ! git diff --cached --quiet; then git commit -m "chore: planner run $(date -u +%Y-%m-%d)" git push origin "$PRIMARY_BRANCH" fi cd "$PROJECT_REPO_ROOT" - -### 4. Write journal entry (generic) -The planner-run.sh wrapper will handle journal writing via profile_write_journal() -after the formula completes. This step is informational only. """ needs = ["triage-and-plan"] diff --git a/formulas/run-publish-site.toml b/formulas/run-publish-site.toml index 9a7c1e7..2de4455 100644 --- a/formulas/run-publish-site.toml +++ b/formulas/run-publish-site.toml @@ -3,7 +3,7 @@ # Trigger: action issue created by planner (gap analysis), dev-poll (post-merge # hook detecting site/ changes), or gardener (periodic SHA drift check). # -# The dispatcher picks up the issue, executes these steps, posts results +# The action-agent picks up the issue, executes these steps, posts results # as a comment, and closes the issue. name = "run-publish-site" diff --git a/formulas/run-rent-a-human.toml b/formulas/run-rent-a-human.toml index 41b8f1f..9009418 100644 --- a/formulas/run-rent-a-human.toml +++ b/formulas/run-rent-a-human.toml @@ -5,7 +5,7 @@ # the action and notifies the human for one-click copy-paste execution. # # Trigger: action issue created by planner or any formula. -# The dispatcher picks up the issue, executes these steps, writes a draft +# The action-agent picks up the issue, executes these steps, writes a draft # to vault/outreach/{platform}/drafts/, notifies the human via the forge, # and closes the issue. # diff --git a/formulas/run-supervisor.toml b/formulas/run-supervisor.toml index ceaf340..6f60905 100644 --- a/formulas/run-supervisor.toml +++ b/formulas/run-supervisor.toml @@ -1,7 +1,7 @@ # formulas/run-supervisor.toml — Supervisor formula (health monitoring + remediation) # # Executed by supervisor/supervisor-run.sh via cron (every 20 minutes). -# supervisor-run.sh runs claude -p via agent-sdk.sh and injects +# supervisor-run.sh creates a tmux session with Claude (sonnet) and injects # this formula with pre-collected metrics as context. # # Steps: preflight → health-assessment → decide-actions → report → journal @@ -137,15 +137,14 @@ For each finding from the health assessment, decide and execute an action. **P3 Stale PRs (CI done >20min, no push since):** Do NOT read dev-poll.sh, push branches, attempt merges, or investigate pipeline code. - Instead, file a vault item for the dev-agent to pick up: - Write $OPS_REPO_ROOT/vault/pending/stale-pr-${ISSUE_NUM}.md: - # Stale PR: ${PR_TITLE} - ## What - CI finished >20min ago but no git push has been made to the PR branch. - ## Why - P3 — Factory degraded: PRs should be pushed within 20min of CI completion. - ## Unblocks - - Factory health: dev-agent will push the branch and continue the workflow + Instead, nudge the dev-agent via tmux injection if a session is alive: + # Find the dev session for this issue + SESSION=$(tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "dev-.*-${ISSUE_NUM}" | head -1) + if [ -n "$SESSION" ]; then + # Inject a nudge into the dev-agent session + tmux send-keys -t "$SESSION" "# [supervisor] PR stale >20min — CI finished, please push or update" Enter + fi + If no active tmux session exists, note it in the journal for the next dev-poll cycle. Do NOT file vault items for stale PRs unless they remain stale for >3 consecutive runs. ### Cannot auto-fix → file vault item @@ -160,7 +159,7 @@ human judgment, file a vault procurement item: ## Unblocks - Factory health: - Vault PR filed on ops repo — human approves via PR review. + The vault-poll will notify the human and track the request. Read the relevant best-practices file before taking action: cat "$OPS_REPO_ROOT/knowledge/memory.md" # P0 @@ -242,16 +241,7 @@ run-to-run context so future supervisor runs can detect trends IMPORTANT: Do NOT commit or push the journal — it is a local working file. The journal directory is committed to git periodically by other agents. -## Learning - -If you discover something new during this run, append it to the relevant -knowledge file in the ops repo: - echo "### Lesson title - Description of what you learned." >> "${OPS_REPO_ROOT}/knowledge/.md" - -Knowledge files: memory.md, disk.md, ci.md, forge.md, dev-agent.md, -review-agent.md, git.md. - -After writing the journal, the agent session completes automatically. +After writing the journal, write the phase signal: + echo 'PHASE:done' > "$PHASE_FILE" """ needs = ["report"] diff --git a/formulas/triage.toml b/formulas/triage.toml deleted file mode 100644 index a2ec909..0000000 --- a/formulas/triage.toml +++ /dev/null @@ -1,267 +0,0 @@ -# formulas/triage.toml — Triage-agent formula (generic template) -# -# This is the base template for triage investigations. -# Project-specific formulas (e.g. formulas/triage-harb.toml) extend this by -# overriding the fields in the [project] section and providing stack-specific -# step descriptions. -# -# Triggered by: bug-report + in-triage label combination. -# Set by the reproduce-agent when: -# - Bug was confirmed (reproduced) -# - Quick log analysis did not reveal an obvious root cause -# - Reproduce-agent documented all steps taken and logs examined -# -# Steps: -# 1. read-findings — parse issue comments for prior reproduce-agent evidence -# 2. trace-data-flow — follow symptom through UI → API → backend → data store -# 3. instrumentation — throwaway branch, add logging, restart, observe -# 4. decompose — file backlog issues for each root cause -# 5. link-back — update original issue, swap in-triage → in-progress -# 6. cleanup — delete throwaway debug branch -# -# Best practices: -# - Start from reproduce-agent findings; do not repeat their work -# - Budget: 70% tracing data flow, 30% instrumented re-runs -# - Multiple causes: check if layered (Depends-on) or independent (Related) -# - Always delete the throwaway debug branch before finishing -# - If inconclusive after full turn budget: leave in-triage, post what was -# tried, do NOT relabel — supervisor handles stale triage sessions -# -# Project-specific formulas extend this template by defining: -# - stack_script: how to start/stop the project stack -# - [project].data_flow: layer names (e.g. "chain → indexer → GraphQL → UI") -# - [project].api_endpoints: which APIs/services to inspect -# - [project].stack_lock: stack lock configuration -# - Per-step description overrides with project-specific commands -# -# No hard timeout — runs until Claude hits its turn limit. -# Stack lock held for full run (triage is rare; blocking CI is acceptable). - -name = "triage" -description = "Deep root cause analysis: trace data flow, add debug instrumentation, decompose causes into backlog issues." -version = 2 - -# Set stack_script to the restart command for local stacks. -# Leave empty ("") to connect to an existing staging environment. -stack_script = "" - -tools = ["playwright"] - -# --------------------------------------------------------------------------- -# Project-specific extension fields. -# Override these in formulas/triage-.toml. -# --------------------------------------------------------------------------- -[project] -# Human-readable layer names for the data-flow trace (generic default). -# Example project override: "chain → indexer → GraphQL → UI" -data_flow = "UI → API → backend → data store" - -# Comma-separated list of API endpoints or services to inspect. -# Example: "GraphQL /graphql, REST /api/v1, RPC ws://localhost:8545" -api_endpoints = "" - -# Stack lock configuration (leave empty for default behavior). -# Example: "full" to hold a full stack lock during triage. -stack_lock = "" - -# --------------------------------------------------------------------------- -# Steps -# --------------------------------------------------------------------------- - -[[steps]] -id = "read-findings" -title = "Read reproduce-agent findings" -description = """ -Before doing anything else, parse all prior evidence from the issue comments. - -1. Fetch the issue body and all comments: - curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/issues/${ISSUE_NUMBER}" | jq -r '.body' - curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/issues/${ISSUE_NUMBER}/comments" | jq -r '.[].body' - -2. Identify the reproduce-agent comment (look for sections like - "Reproduction steps", "Logs examined", "What was tried"). - -3. Extract and note: - - The exact symptom (error message, unexpected value, visual regression) - - Steps that reliably trigger the bug - - Log lines or API responses already captured - - Any hypotheses the reproduce-agent already ruled out - -Do NOT repeat work the reproduce-agent already did. Your job starts where -theirs ended. If no reproduce-agent comment is found, note it and proceed -with fresh investigation using the issue body only. -""" - -[[steps]] -id = "trace-data-flow" -title = "Trace data flow from symptom to source" -description = """ -Systematically follow the symptom backwards through each layer of the stack. -Spend ~70% of your total turn budget here before moving to instrumentation. - -Generic layer traversal (adapt to the project's actual stack): - UI → API → backend → data store - -For each layer boundary: - 1. What does the upstream layer send? - 2. What does the downstream layer expect? - 3. Is there a mismatch? If yes — is this the root cause or a symptom? - -Tracing checklist: - a. Start at the layer closest to the visible symptom. - b. Read the relevant source files — do not guess data shapes. - c. Cross-reference API contracts: compare what the code sends vs what it - should send according to schemas, type definitions, or documentation. - d. Check recent git history on suspicious files: - git log --oneline -20 -- - e. Search for related issues or TODOs in the code: - grep -r "TODO\|FIXME\|HACK" -- - -Capture for each layer: - - The data shape flowing in and out (field names, types, nullability) - - Whether the layer's behavior matches its documented contract - - Any discrepancy found - -If a clear root cause becomes obvious during tracing, note it and continue -checking whether additional causes exist downstream. -""" -needs = ["read-findings"] - -[[steps]] -id = "instrumentation" -title = "Add debug instrumentation on a throwaway branch" -description = """ -Use ~30% of your total turn budget here. Only instrument after tracing has -identified the most likely failure points — do not instrument blindly. - -1. Create a throwaway debug branch (NEVER commit this to main): - cd "$PROJECT_REPO_ROOT" - git checkout -b debug/triage-${ISSUE_NUMBER} - -2. Add targeted logging at the layer boundaries identified during tracing: - - Console.log / structured log statements around the suspicious code path - - Log the actual values flowing through: inputs, outputs, intermediate state - - Add verbose mode flags if the stack supports them - - Keep instrumentation minimal — only what confirms or refutes the hypothesis - -3. Restart the stack using the configured script (if set): - ${stack_script:-"# No stack_script configured — restart manually or connect to staging"} - -4. Re-run the reproduction steps from the reproduce-agent findings. - -5. Observe and capture new output: - - Paste relevant log lines into your working notes - - Note whether the observed values match or contradict the hypothesis - -6. If the first instrumentation pass is inconclusive, iterate: - - Narrow the scope to the next most suspicious boundary - - Re-instrument, restart, re-run - - Maximum 2-3 instrumentation rounds before declaring inconclusive - -Do NOT push the debug branch. It will be deleted in the cleanup step. -""" -needs = ["trace-data-flow"] - -[[steps]] -id = "decompose" -title = "Decompose root causes into backlog issues" -description = """ -After tracing and instrumentation, articulate each distinct root cause. - -For each root cause found: - -1. Determine the relationship to other causes: - - Layered (one causes another) → use Depends-on in the issue body - - Independent (separate code paths fail independently) → use Related - -2. Create a backlog issue for each root cause: - curl -sf -X POST "${FORGE_API}/issues" \\ - -H "Authorization: token ${FORGE_TOKEN}" \\ - -H "Content-Type: application/json" \\ - -d '{ - "title": "fix: ", - "body": "## Root cause\\n\\n\\n## Fix suggestion\\n\\n\\n## Context\\nDecomposed from #${ISSUE_NUMBER} (cause N of M)\\n\\n## Dependencies\\n<#X if this depends on another cause being fixed first>", - "labels": [{"name": "backlog"}] - }' - -3. Note the newly created issue numbers. - -If only one root cause is found, still create a single backlog issue with -the specific code location and fix suggestion. - -If the investigation is inconclusive (no clear root cause found), skip this -step and proceed directly to link-back with the inconclusive outcome. -""" -needs = ["instrumentation"] - -[[steps]] -id = "link-back" -title = "Update original issue and relabel" -description = """ -Post a summary comment on the original issue and update its labels. - -### If root causes were found (conclusive): - -Post a comment: - "## Triage findings - - Found N root cause(s): - - #X — (cause 1 of N) - - #Y — (cause 2 of N, depends on #X) - - Data flow traced: - Instrumentation: - - Next step: backlog issues above will be implemented in dependency order." - -Then swap labels: - - Remove: in-triage - - Add: in-progress - -### If investigation was inconclusive (turn budget exhausted): - -Post a comment: - "## Triage — inconclusive - - Traced: - Tried: - Hypothesis: - - No definitive root cause identified. Leaving in-triage for supervisor - to handle as a stale triage session." - -Do NOT relabel. Leave in-triage. The supervisor monitors stale triage -sessions and will escalate or reassign. - -**CRITICAL: Write outcome file** — Always write the outcome to the outcome file: - - If root causes found (conclusive): echo "reproduced" > /tmp/triage-outcome-${ISSUE_NUMBER}.txt - - If inconclusive: echo "needs-triage" > /tmp/triage-outcome-${ISSUE_NUMBER}.txt -""" -needs = ["decompose"] - -[[steps]] -id = "cleanup" -title = "Delete throwaway debug branch" -description = """ -Always delete the debug branch, even if the investigation was inconclusive. - -1. Switch back to the main branch: - cd "$PROJECT_REPO_ROOT" - git checkout "$PRIMARY_BRANCH" - -2. Delete the local debug branch: - git branch -D debug/triage-${ISSUE_NUMBER} - -3. Confirm no remote was pushed (if accidentally pushed, delete it too): - git push origin --delete debug/triage-${ISSUE_NUMBER} 2>/dev/null || true - -4. Verify the worktree is clean: - git status - git worktree list - -A clean repo is a prerequisite for the next dev-agent run. Never leave -debug branches behind — they accumulate and pollute the branch list. -""" -needs = ["link-back"] diff --git a/gardener/AGENTS.md b/gardener/AGENTS.md index 2a5dcb3..c9ba3b1 100644 --- a/gardener/AGENTS.md +++ b/gardener/AGENTS.md @@ -1,4 +1,4 @@ - + # Gardener Agent **Role**: Backlog grooming — detect duplicate issues, missing acceptance @@ -22,8 +22,7 @@ directly from cron like the planner, predictor, and supervisor. `PHASE:awaiting_ci` — injects CI results and review feedback, re-signals `PHASE:awaiting_ci` after fixes, signals `PHASE:awaiting_review` on CI pass. Executes pending-actions manifest after PR merge. -- `formulas/run-gardener.toml` — Execution spec: preflight, grooming, dust-bundling, - agents-update, commit-and-pr +- `formulas/run-gardener.toml` — Execution spec: preflight, grooming, dust-bundling, blocked-review, agents-update, commit-and-pr - `gardener/pending-actions.json` — Manifest of deferred repo actions (label changes, closures, comments, issue creation). Written during grooming steps, committed to the PR, reviewed alongside AGENTS.md changes, executed by gardener-run.sh after merge. @@ -35,7 +34,7 @@ directly from cron like the planner, predictor, and supervisor. **Lifecycle**: gardener-run.sh (cron 0,6,12,18) → `check_active gardener` → lock + memory guard → load formula + context → create tmux session → Claude grooms backlog (writes proposed actions to manifest), bundles dust, -updates AGENTS.md, commits manifest + docs to PR → +reviews blocked issues, updates AGENTS.md, commits manifest + docs to PR → `PHASE:awaiting_ci` (stays alive) → CI pass → `PHASE:awaiting_review` → review feedback → address + re-signal → merge → gardener-run.sh executes manifest actions via API → `PHASE:done`. When blocked on external resources diff --git a/gardener/PROMPT.md b/gardener/PROMPT.md new file mode 100644 index 0000000..90cfe5e --- /dev/null +++ b/gardener/PROMPT.md @@ -0,0 +1,50 @@ +# Gardener Prompt — Dust vs Ore + +> **Note:** This is human documentation. The actual LLM prompt is built +> inline in `gardener-poll.sh` (with dynamic context injection). This file +> documents the design rationale for reference. + +## Rule + +Don't promote trivial tech-debt individually. Each promotion costs a full +factory cycle: CI + dev-agent + review + merge. Don't fill minecarts with +dust — put ore inside. + +## What is dust? + +- Comment fix +- Variable rename +- Style-only change (whitespace, formatting) +- Single-line edit +- Trivial cleanup with no behavioral impact + +## What is ore? + +- Multi-file changes +- Behavioral fixes +- Architectural improvements +- Security or correctness issues +- Anything requiring design thought + +## LLM output format + +When a tech-debt issue is dust, the LLM outputs: + +``` +DUST: {"issue": NNN, "group": "", "title": "...", "reason": "..."} +``` + +The `group` field clusters related dust by file or subsystem (e.g. +`"gardener"`, `"lib/env.sh"`, `"dev-poll"`). + +## Bundling + +The script collects dust items into `gardener/dust.jsonl`. When a group +accumulates 3+ items, the script automatically: + +1. Creates one bundled backlog issue referencing all source issues +2. Closes the individual source issues with a cross-reference comment +3. Removes bundled items from the staging file + +This converts N trivial issues into 1 actionable issue, saving N-1 factory +cycles. diff --git a/gardener/gardener-run.sh b/gardener/gardener-run.sh index b524b62..31aa8c0 100755 --- a/gardener/gardener-run.sh +++ b/gardener/gardener-run.sh @@ -45,7 +45,7 @@ source "$FACTORY_ROOT/lib/agent-sdk.sh" # shellcheck source=../lib/pr-lifecycle.sh source "$FACTORY_ROOT/lib/pr-lifecycle.sh" -LOG_FILE="${DISINTO_LOG_DIR}/gardener/gardener.log" +LOG_FILE="$SCRIPT_DIR/gardener.log" # shellcheck disable=SC2034 # consumed by agent-sdk.sh LOGFILE="$LOG_FILE" # shellcheck disable=SC2034 # consumed by agent-sdk.sh @@ -55,30 +55,19 @@ RESULT_FILE="/tmp/gardener-result-${PROJECT_NAME}.txt" GARDENER_PR_FILE="/tmp/gardener-pr-${PROJECT_NAME}.txt" WORKTREE="/tmp/${PROJECT_NAME}-gardener-run" -# Override LOG_AGENT for consistent agent identification -# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() -LOG_AGENT="gardener" +log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } # ── Guards ──────────────────────────────────────────────────────────────── check_active gardener acquire_cron_lock "/tmp/gardener-run.lock" -memory_guard 2000 +check_memory 2000 log "--- Gardener run start ---" -# ── Resolve forge remote for git operations ───────────────────────────── -resolve_forge_remote - -# ── Resolve agent identity for .profile repo ──────────────────────────── -resolve_agent_identity || true - # ── Load formula + context ─────────────────────────────────────────────── -load_formula_or_profile "gardener" "$FACTORY_ROOT/formulas/run-gardener.toml" || exit 1 +load_formula "$FACTORY_ROOT/formulas/run-gardener.toml" build_context_block AGENTS.md -# ── Prepare .profile context (lessons injection) ───────────────────────── -formula_prepare_profile_context - # ── Read scratch file (compaction survival) ─────────────────────────────── SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE") SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE") @@ -116,7 +105,7 @@ You have full shell access and --dangerously-skip-permissions. Fix what you can. File vault items for what you cannot. Do NOT ask permission — act first, report after. ## Project context -${CONTEXT_BLOCK}$(formula_lessons_block) +${CONTEXT_BLOCK} ${SCRATCH_CONTEXT:+${SCRATCH_CONTEXT} } ## Result file @@ -129,7 +118,16 @@ ${SCRATCH_INSTRUCTION} ${PROMPT_FOOTER}" # ── Create worktree ────────────────────────────────────────────────────── -formula_worktree_setup "$WORKTREE" +cd "$PROJECT_REPO_ROOT" +git fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true +worktree_cleanup "$WORKTREE" +git worktree add "$WORKTREE" "origin/${PRIMARY_BRANCH}" --detach 2>/dev/null + +cleanup() { + worktree_cleanup "$WORKTREE" + rm -f "$GARDENER_PR_FILE" +} +trap cleanup EXIT # ── Post-merge manifest execution ──────────────────────────────────────── # Reads gardener/pending-actions.json and executes each action via API. @@ -158,21 +156,19 @@ _gardener_execute_manifest() { case "$action" in add_label) - local label label_id http_code resp + local label label_id label=$(jq -r ".[$i].label" "$manifest_file") label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${FORGE_API}/labels" | jq -r --arg n "$label" \ '.[] | select(.name == $n) | .id') || true if [ -n "$label_id" ]; then - resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \ + if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${issue}/labels" \ - -d "{\"labels\":[${label_id}]}" 2>/dev/null) || true - http_code=$(echo "$resp" | tail -1) - if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then + -d "{\"labels\":[${label_id}]}" >/dev/null 2>&1; then log "manifest: add_label '${label}' to #${issue}" else - log "manifest: FAILED add_label '${label}' to #${issue}: HTTP ${http_code}" + log "manifest: FAILED add_label '${label}' to #${issue}" fi else log "manifest: FAILED add_label — label '${label}' not found" @@ -180,19 +176,17 @@ _gardener_execute_manifest() { ;; remove_label) - local label label_id http_code resp + local label label_id label=$(jq -r ".[$i].label" "$manifest_file") label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${FORGE_API}/labels" | jq -r --arg n "$label" \ '.[] | select(.name == $n) | .id') || true if [ -n "$label_id" ]; then - resp=$(curl -sf -w "\n%{http_code}" -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/issues/${issue}/labels/${label_id}" 2>/dev/null) || true - http_code=$(echo "$resp" | tail -1) - if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then + if curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues/${issue}/labels/${label_id}" >/dev/null 2>&1; then log "manifest: remove_label '${label}' from #${issue}" else - log "manifest: FAILED remove_label '${label}' from #${issue}: HTTP ${http_code}" + log "manifest: FAILED remove_label '${label}' from #${issue}" fi else log "manifest: FAILED remove_label — label '${label}' not found" @@ -200,38 +194,34 @@ _gardener_execute_manifest() { ;; close) - local reason http_code resp + local reason reason=$(jq -r ".[$i].reason // empty" "$manifest_file") - resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ + if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${issue}" \ - -d '{"state":"closed"}' 2>/dev/null) || true - http_code=$(echo "$resp" | tail -1) - if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then + -d '{"state":"closed"}' >/dev/null 2>&1; then log "manifest: closed #${issue} (${reason})" else - log "manifest: FAILED close #${issue}: HTTP ${http_code}" + log "manifest: FAILED close #${issue}" fi ;; comment) - local body escaped_body http_code resp + local body escaped_body body=$(jq -r ".[$i].body" "$manifest_file") escaped_body=$(printf '%s' "$body" | jq -Rs '.') - resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \ + if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${issue}/comments" \ - -d "{\"body\":${escaped_body}}" 2>/dev/null) || true - http_code=$(echo "$resp" | tail -1) - if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then + -d "{\"body\":${escaped_body}}" >/dev/null 2>&1; then log "manifest: commented on #${issue}" else - log "manifest: FAILED comment on #${issue}: HTTP ${http_code}" + log "manifest: FAILED comment on #${issue}" fi ;; create_issue) - local title body labels escaped_title escaped_body label_ids http_code resp + local title body labels escaped_title escaped_body label_ids title=$(jq -r ".[$i].title" "$manifest_file") body=$(jq -r ".[$i].body" "$manifest_file") labels=$(jq -r ".[$i].labels // [] | .[]" "$manifest_file") @@ -251,46 +241,40 @@ _gardener_execute_manifest() { done <<< "$labels" [ -n "$ids_json" ] && label_ids="[${ids_json}]" fi - resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \ + if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues" \ - -d "{\"title\":${escaped_title},\"body\":${escaped_body},\"labels\":${label_ids}}" 2>/dev/null) || true - http_code=$(echo "$resp" | tail -1) - if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then + -d "{\"title\":${escaped_title},\"body\":${escaped_body},\"labels\":${label_ids}}" >/dev/null 2>&1; then log "manifest: created issue '${title}'" else - log "manifest: FAILED create_issue '${title}': HTTP ${http_code}" + log "manifest: FAILED create_issue '${title}'" fi ;; edit_body) - local body escaped_body http_code resp + local body escaped_body body=$(jq -r ".[$i].body" "$manifest_file") escaped_body=$(printf '%s' "$body" | jq -Rs '.') - resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ + if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${issue}" \ - -d "{\"body\":${escaped_body}}" 2>/dev/null) || true - http_code=$(echo "$resp" | tail -1) - if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then + -d "{\"body\":${escaped_body}}" >/dev/null 2>&1; then log "manifest: edited body of #${issue}" else - log "manifest: FAILED edit_body #${issue}: HTTP ${http_code}" + log "manifest: FAILED edit_body #${issue}" fi ;; close_pr) - local pr http_code resp + local pr pr=$(jq -r ".[$i].pr" "$manifest_file") - resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ + if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/pulls/${pr}" \ - -d '{"state":"closed"}' 2>/dev/null) || true - http_code=$(echo "$resp" | tail -1) - if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then + -d '{"state":"closed"}' >/dev/null 2>&1; then log "manifest: closed PR #${pr}" else - log "manifest: FAILED close_pr #${pr}: HTTP ${http_code}" + log "manifest: FAILED close_pr #${pr}" fi ;; @@ -335,9 +319,9 @@ if [ -n "$PR_NUMBER" ]; then if [ "$_PR_WALK_EXIT_REASON" = "merged" ]; then # Post-merge: pull primary, mirror push, execute manifest - git -C "$PROJECT_REPO_ROOT" fetch "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true + git -C "$PROJECT_REPO_ROOT" fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true git -C "$PROJECT_REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true - git -C "$PROJECT_REPO_ROOT" pull --ff-only "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true + git -C "$PROJECT_REPO_ROOT" pull --ff-only origin "$PRIMARY_BRANCH" 2>/dev/null || true mirror_push _gardener_execute_manifest rm -f "$SCRATCH_FILE" @@ -350,8 +334,5 @@ else rm -f "$SCRATCH_FILE" fi -# Write journal entry post-session -profile_write_journal "gardener-run" "Gardener run $(date -u +%Y-%m-%d)" "complete" "" || true - rm -f "$GARDENER_PR_FILE" log "--- Gardener run done ---" diff --git a/gardener/pending-actions.json b/gardener/pending-actions.json index a148369..747973c 100644 --- a/gardener/pending-actions.json +++ b/gardener/pending-actions.json @@ -1,7 +1,32 @@ [ { "action": "edit_body", - "issue": 356, - "body": "## Problem\n\nThe entrypoint hardcodes `REPRODUCE_FORMULA` to `formulas/reproduce.toml` (line 26) and never checks the `DISINTO_FORMULA` environment variable passed by the dispatcher for triage runs.\n\nThe dispatcher sets `-e DISINTO_FORMULA=triage` for triage dispatch, but the entrypoint ignores it — always running the reproduce formula.\n\n## Fix\n\nAt line 26, select the formula based on `DISINTO_FORMULA`:\n\n```bash\ncase \"${DISINTO_FORMULA:-reproduce}\" in\n triage)\n ACTIVE_FORMULA=\"${DISINTO_DIR}/formulas/triage.toml\"\n ;;\n *)\n ACTIVE_FORMULA=\"${DISINTO_DIR}/formulas/reproduce.toml\"\n ;;\nesac\n```\n\nThen use `ACTIVE_FORMULA` everywhere `REPRODUCE_FORMULA` is currently used.\n\nAlso update log messages to reflect which formula is running (\"Starting triage-agent\" vs \"Starting reproduce-agent\").\n\n## Affected files\n\n- `docker/reproduce/entrypoint-reproduce.sh` — line 26 and all references to REPRODUCE_FORMULA\n\n## Acceptance criteria\n\n- [ ] `DISINTO_FORMULA=triage` selects `formulas/triage.toml` in the entrypoint\n- [ ] `DISINTO_FORMULA=reproduce` (or unset) still runs `formulas/reproduce.toml`\n- [ ] Log messages reflect which formula is active (\"Starting triage-agent\" / \"Starting reproduce-agent\")\n- [ ] All `REPRODUCE_FORMULA` references replaced with `ACTIVE_FORMULA`\n" + "issue": 765, + "body": "Depends on: none\n\n## Goal\n\nThe disinto website becomes a versioned artifact: built by CI, published to Codeberg's generic package registry, deployed to staging automatically. Version visible in footer.\n\n## Files to add/change\n\n### `site/VERSION`\n```\n0.1.0\n```\n\n### `site/build.sh`\n```bash\n#!/bin/bash\nVERSION=$(cat VERSION)\nmkdir -p dist\ncp *.html *.jpg *.webp *.png *.ico *.xml robots.txt dist/\nsed -i \"s|Built from scrap, powered by a single battery.|v${VERSION} · Built from scrap, powered by a single battery.|\" dist/index.html\necho \"$VERSION\" > dist/VERSION\n```\n\n### `site/index.html`\nNo template placeholder needed — `build.sh` does the sed replacement on the existing footer text.\n\n### `.woodpecker/site.yml`\n```yaml\nwhen:\n path: \"site/**\"\n event: push\n branch: main\n\nsteps:\n - name: build\n image: alpine\n commands:\n - cd site && sh build.sh\n - VERSION=$(cat site/VERSION)\n - tar czf site-${VERSION}.tar.gz -C site/dist .\n\n - name: publish\n image: alpine\n commands:\n - apk add curl\n - VERSION=$(cat site/VERSION)\n - >-\n curl -sf --user \"johba:$$FORGE_TOKEN\"\n --upload-file site-${VERSION}.tar.gz\n \"https://codeberg.org/api/packages/johba/generic/disinto-site/${VERSION}/site-${VERSION}.tar.gz\"\n environment:\n FORGE_TOKEN:\n from_secret: forge_token\n\n - name: deploy-staging\n image: alpine\n commands:\n - apk add curl\n - VERSION=$(cat site/VERSION)\n - >-\n curl -sf --user \"johba:$$FORGE_TOKEN\"\n \"https://codeberg.org/api/packages/johba/generic/disinto-site/${VERSION}/site-${VERSION}.tar.gz\"\n -o site.tar.gz\n - rm -rf /srv/staging/*\n - tar xzf site.tar.gz -C /srv/staging/\n environment:\n FORGE_TOKEN:\n from_secret: forge_token\n volumes:\n - /home/debian/staging-site:/srv/staging\n```\n\n## Infra setup (manual, before first run)\n- `mkdir -p /home/debian/staging-site`\n- Add to Caddyfile: `staging.disinto.ai { root * /home/debian/staging-site; file_server }`\n- DNS: `staging.disinto.ai` A record → same IP as `disinto.ai`\n- Reload Caddy: `sudo systemctl reload caddy`\n- Add `forge_token` as Woodpecker repo secret for johba/disinto (if not already set)\n- Add `/home/debian/staging-site` to `WOODPECKER_BACKEND_DOCKER_VOLUMES`\n\n## Verification\n- [ ] Merge PR that touches `site/` → CI runs site pipeline\n- [ ] Package appears at `codeberg.org/johba/-/packages/generic/disinto-site/0.1.0`\n- [ ] `staging.disinto.ai` serves the site with `v0.1.0` in footer\n- [ ] `disinto.ai` (production) unchanged\n\n## Related\n- #764 — docker stack edge proxy + staging (future: this moves inside the stack)\n- #755 — vault-gated production promotion (production deploy comes later)\n\n## Affected files\n- `site/VERSION` — new, holds current version string\n- `site/build.sh` — new, builds dist/ with version injected into footer\n- `.woodpecker/site.yml` — new, CI pipeline for build/publish/deploy-staging" + }, + { + "action": "edit_body", + "issue": 764, + "body": "Depends on: none (builds on existing docker-compose generation in `bin/disinto`)\n\n## Design\n\n`disinto init` + `disinto up` starts two additional containers as base factory infrastructure:\n\n### Edge proxy (Caddy)\n- Reverse proxies to Forgejo and Woodpecker\n- Serves staging site\n- Runs on ports 80/443\n- At bootstrap: IP-only, self-signed TLS or HTTP\n- Domain + Let's Encrypt added later via vault resource request\n\n### Staging container (Caddy)\n- Static file server for the project's staging artifacts\n- Starts with a default \"Nothing shipped yet\" page\n- CI pipelines write to a shared volume to update staging content\n- No vault approval needed — staging is the factory's sandbox\n\n### docker-compose addition\n```yaml\nservices:\n edge:\n image: caddy:alpine\n ports:\n - \"80:80\"\n - \"443:443\"\n volumes:\n - ./Caddyfile:/etc/caddy/Caddyfile\n - caddy_data:/data\n depends_on:\n - forgejo\n - woodpecker-server\n - staging\n\n staging:\n image: caddy:alpine\n volumes:\n - staging-site:/srv/site\n # Not exposed directly — edge proxies to it\n\nvolumes:\n caddy_data:\n staging-site:\n```\n\n### Caddyfile (generated by `disinto init`)\n```\n# IP-only at bootstrap, domain added later\n:80 {\n handle /forgejo/* {\n reverse_proxy forgejo:3000\n }\n handle /ci/* {\n reverse_proxy woodpecker-server:8000\n }\n handle {\n reverse_proxy staging:80\n }\n}\n```\n\n### Staging update flow\n1. CI builds artifact (site tarball, etc.)\n2. CI step writes to `staging-site` volume\n3. Staging container serves updated content immediately\n4. No restart needed — Caddy serves files directly\n\n### Domain lifecycle\n- Bootstrap: no domain, edge serves on IP\n- Later: factory files vault resource request for domain\n- Human buys domain, sets DNS\n- Caddyfile updated with domain, Let's Encrypt auto-provisions TLS\n\n## Affected files\n- `bin/disinto` — `generate_compose()` adds edge + staging services\n- New: default staging page (\"Nothing shipped yet\")\n- New: Caddyfile template in `docker/`\n\n## Related\n- #755 — vault-gated deployment promotion (production comes later)\n- #757 — ops repo (domain is a resource requested through vault)\n\n## Acceptance criteria\n- [ ] `disinto init` generates a `docker-compose.yml` that includes `edge` (Caddy) and `staging` containers\n- [ ] Edge proxy routes `/forgejo/*` → Forgejo, `/ci/*` → Woodpecker, default → staging container\n- [ ] Staging container serves a default \"Nothing shipped yet\" page on first boot\n- [ ] `docker/` directory contains a Caddyfile template generated by `disinto init`\n- [ ] `disinto up` starts all containers including edge and staging without manual steps" + }, + { + "action": "edit_body", + "issue": 761, + "body": "Depends on: #747\n\n## Design\n\nEach agent account on the bundled Forgejo gets a `.profile` repo. This repo holds the agent's formula (copied from disinto at creation time) and its journal.\n\n### Structure\n```\n{agent-bot}/.profile/\n├── formula.toml # snapshot of the formula at agent creation time\n├── journal/ # daily logs of what the agent did\n│ ├── 2026-03-26.md\n│ └── ...\n└── knowledge/ # learned patterns, best-practices (optional, agent can evolve)\n```\n\n### Lifecycle\n1. **Create agent** — `disinto init` or `disinto spawn-agent` creates Forgejo account + `.profile` repo\n2. **Copy formula** — current `formulas/{role}.toml` from disinto repo is copied to `.profile/formula.toml`\n3. **Agent reads its own formula** — at session start, agent reads from its `.profile`, not from the disinto repo\n4. **Agent writes journal** — daily entries pushed to `.profile/journal/`\n5. **Agent can evolve knowledge** — best-practices, heuristics, patterns written to `.profile/knowledge/`\n\n### What this enables\n\n**A/B testing formulas:** Create two agents from different formula versions, run both against the same backlog, compare results (cycle time, CI pass rate, review rejection rate).\n\n**Rollback:** New formula worse? Kill agent, spawn from older formula version.\n\n**Audit:** What formula was this agent running when it produced that PR? Check its `.profile` at that git commit.\n\n**Drift tracking:** Diff what an agent learned (`.profile/knowledge/`) vs what it started with. Measure formula evolution over time.\n\n**Portability:** Move agent to different box — `git clone` its `.profile`.\n\n### Disinto repo becomes the template\n\n```\ndisinto repo:\n formulas/dev-agent.toml ← canonical template, evolves\n formulas/review-agent.toml\n formulas/planner.toml\n ...\n\nRunning agents:\n dev-bot-v2/.profile/formula.toml ← snapshot from formulas/dev-agent.toml@v2\n dev-bot-v3/.profile/formula.toml ← snapshot from formulas/dev-agent.toml@v3\n review-bot/.profile/formula.toml ← snapshot from formulas/review-agent.toml\n```\n\nThe formula in the disinto repo is the template. The `.profile` copy is the instance. They can diverge — that's a feature, not a bug.\n\n## Affected files\n- `bin/disinto` — agent creation copies formula to .profile\n- Agent session scripts — read formula from .profile instead of local formulas/ dir\n- Planner/supervisor — can read other agents' journals from their .profile repos\n\n## Related\n- #747 — per-agent Forgejo accounts (prerequisite)\n- #757 — ops repo (shared concerns stay there: vault, portfolio, resources)\n\n## Acceptance criteria\n- [ ] `disinto spawn-agent` (or `disinto init`) creates a Forgejo account + `.profile` repo for each agent bot\n- [ ] Current `formulas/{role}.toml` is copied to `.profile/formula.toml` at agent creation time\n- [ ] Agent session script reads its formula from `.profile/formula.toml`, not from the repo's `formulas/` directory\n- [ ] Agent writes daily journal entries to `.profile/journal/YYYY-MM-DD.md`" + }, + { + "action": "edit_body", + "issue": 742, + "body": "## Problem\n\n`gardener/recipes/*.toml` (4 files: cascade-rebase, chicken-egg-ci, flaky-test, shellcheck-violations) are an older pattern predating `formulas/*.toml`. Two systems for the same thing.\n\n## Fix\n\nMigrate any unique content from recipes to the gardener formula or to new formulas. Delete the recipes directory.\n\n## Affected files\n- `gardener/recipes/*.toml` — delete after migration\n- `formulas/run-gardener.toml` — absorb relevant content\n- Gardener scripts that reference recipes/\n\n## Acceptance criteria\n- [ ] Contents of `gardener/recipes/*.toml` are diff'd against `formulas/run-gardener.toml` — any unique content is migrated\n- [ ] `gardener/recipes/` directory is deleted\n- [ ] No scripts in `gardener/` reference the `recipes/` path after migration\n- [ ] ShellCheck passes on all modified scripts" + }, + { + "action": "add_label", + "issue": 742, + "label": "backlog" + }, + { + "action": "add_label", + "issue": 741, + "label": "backlog" } ] diff --git a/gardener/recipes/cascade-rebase.toml b/gardener/recipes/cascade-rebase.toml new file mode 100644 index 0000000..1cd09ee --- /dev/null +++ b/gardener/recipes/cascade-rebase.toml @@ -0,0 +1,16 @@ +# gardener/recipes/cascade-rebase.toml — PR outdated after main moved +# +# Trigger: PR mergeable=false (stale branch or dismissed approval) +# Playbook: rebase only — merge and re-approval happen on subsequent cycles +# after CI reruns on the rebased branch (rebase is async via Gitea API) + +name = "cascade-rebase" +description = "PR outdated after main moved — mergeable=false or stale approval" +priority = 20 + +[trigger] +pr_mergeable = false + +[[playbook]] +action = "rebase-pr" +description = "Rebase PR onto main (async — CI reruns, merge on next cycle)" diff --git a/gardener/recipes/chicken-egg-ci.toml b/gardener/recipes/chicken-egg-ci.toml new file mode 100644 index 0000000..cc71e02 --- /dev/null +++ b/gardener/recipes/chicken-egg-ci.toml @@ -0,0 +1,25 @@ +# gardener/recipes/chicken-egg-ci.toml — PR introduces CI step that fails on pre-existing code +# +# Trigger: New .woodpecker/*.yml in PR + lint/check step + failures on unchanged files +# Playbook: make step non-blocking, create per-file issues, create follow-up to remove bypass + +name = "chicken-egg-ci" +description = "PR introduces a CI pipeline/linting step that fails on pre-existing code" +priority = 10 + +[trigger] +pr_files = '\.woodpecker/.*\.yml$' +step_name = '(?i)(lint|shellcheck|check)' +failures_on_unchanged = true + +[[playbook]] +action = "make-step-non-blocking" +description = "Make failing step non-blocking (|| true) in the PR" + +[[playbook]] +action = "lint-per-file" +description = "Create per-file fix issues for pre-existing violations (generic linter support)" + +[[playbook]] +action = "create-followup-remove-bypass" +description = "Create follow-up issue to remove || true once fixes land" diff --git a/gardener/recipes/flaky-test.toml b/gardener/recipes/flaky-test.toml new file mode 100644 index 0000000..5a76940 --- /dev/null +++ b/gardener/recipes/flaky-test.toml @@ -0,0 +1,20 @@ +# gardener/recipes/flaky-test.toml — CI fails intermittently +# +# Trigger: Test step fails + multiple CI attempts (same step, different output) +# Playbook: retrigger CI (max 2x), quarantine test if still failing + +name = "flaky-test" +description = "CI fails intermittently — same step fails across multiple attempts" +priority = 30 + +[trigger] +step_name = '(?i)test' +min_attempts = 2 + +[[playbook]] +action = "retrigger-ci" +description = "Retrigger CI (max 2 retries)" + +[[playbook]] +action = "quarantine-test" +description = "If still failing, quarantine test and create fix issue" diff --git a/gardener/recipes/shellcheck-violations.toml b/gardener/recipes/shellcheck-violations.toml new file mode 100644 index 0000000..0bc9d57 --- /dev/null +++ b/gardener/recipes/shellcheck-violations.toml @@ -0,0 +1,20 @@ +# gardener/recipes/shellcheck-violations.toml — ShellCheck step fails +# +# Trigger: Step named *shellcheck* fails with SC#### codes in output +# Playbook: parse per-file, create one issue per file, label backlog + +name = "shellcheck-violations" +description = "ShellCheck step fails with SC#### codes in output" +priority = 40 + +[trigger] +step_name = '(?i)shellcheck' +output = 'SC\d{4}' + +[[playbook]] +action = "shellcheck-per-file" +description = "Parse output by file, create one fix issue per file with specific SC codes" + +[[playbook]] +action = "label-backlog" +description = "Label created issues as backlog" diff --git a/lib/AGENTS.md b/lib/AGENTS.md index a70e9a7..520440b 100644 --- a/lib/AGENTS.md +++ b/lib/AGENTS.md @@ -1,4 +1,4 @@ - + # Shared Helpers (`lib/`) All agents source `lib/env.sh` as their first action. Additional helpers are @@ -6,29 +6,19 @@ sourced as needed. | File | What it provides | Sourced by | |---|---|---| -| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (paginates all pages; accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`; handles invalid/empty JSON responses gracefully — returns empty on parse error instead of crashing), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`) — each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens — only the runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced — compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. **Save/restore scope (#364)**: only `FORGE_URL` is preserved across `.env` re-sourcing (compose injects `http://forgejo:3000`, `.env` has `http://localhost:3000`). `FORGE_TOKEN` is NOT preserved so refreshed tokens in `.env` take effect immediately. **Required env var**: `FORGE_PASS` — bot password for git HTTP push (Forgejo 11.x rejects API tokens for `git push`, #361). | Every agent | -| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status ` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number ` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote ` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this — vault redesign in progress, see #73-#77). `ci_get_logs [--step ]` — reads CI logs from Woodpecker SQLite database via `lib/ci-log-reader.py`; outputs last 200 lines to stdout. Requires mounted woodpecker-data volume at /woodpecker-data. | dev-poll, review-poll, review-pr | +| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`, `FORGE_ACTION_TOKEN`) — each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens — only the vault-runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced — compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. | Every agent | +| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status ` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number ` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote ` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this). | dev-poll, review-poll, review-pr, supervisor-poll | | `lib/ci-debug.sh` | CLI tool for Woodpecker CI: `list`, `status`, `logs`, `failures` subcommands. Not sourced — run directly. | Humans / dev-agent (tool access) | -| `lib/ci-log-reader.py` | Python tool: reads CI logs from Woodpecker SQLite database. ` [--step ]` — returns last 200 lines from failed steps (or specified step). Used by `ci_get_logs()` in ci-helpers.sh. Requires `WOODPECKER_DATA_DIR` (default: /woodpecker-data). | ci-helpers.sh | -| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). Also exports `FORGE_REPO_OWNER` (the owner component of `FORGE_REPO`, e.g. `disinto-admin` from `disinto-admin/disinto`). | env.sh (when `PROJECT_TOML` is set) | -| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll | -| `lib/formula-session.sh` | `acquire_cron_lock()`, `load_formula()`, `load_formula_or_profile()`, `build_context_block()`, `ensure_ops_repo()`, `ops_commit_and_push()`, `build_prompt_footer()`, `build_sdk_prompt_footer()`, `formula_worktree_setup()`, `formula_prepare_profile_context()`, `formula_lessons_block()`, `profile_write_journal()`, `profile_load_lessons()`, `ensure_profile_repo()`, `_profile_has_repo()`, `_count_undigested_journals()`, `_profile_digest_journals()`, `_profile_commit_and_push()`, `resolve_agent_identity()`, `build_graph_section()`, `build_scratch_instruction()`, `read_scratch_context()`, `cleanup_stale_crashed_worktrees()` — shared helpers for formula-driven cron agents (lock, .profile repo management, prompt assembly, worktree setup). Memory guard is provided by `memory_guard()` in `lib/env.sh` (not duplicated here). `resolve_agent_identity()` — sets `FORGE_TOKEN`, `AGENT_IDENTITY`, `FORGE_REMOTE` from per-agent token env vars and FORGE_URL remote detection. `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh | -| `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in cron logs. Sourced by dev-poll.sh, review-poll.sh, predictor-run.sh, supervisor-run.sh. | cron entry points | -| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh — called after every successful merge. | dev-poll.sh | +| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). | env.sh (when `PROJECT_TOML` is set), supervisor-poll (per-project iteration) | +| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll | +| `lib/formula-session.sh` | `acquire_cron_lock()`, `check_memory()`, `load_formula()`, `build_context_block()`, `consume_escalation_reply()`, `start_formula_session()`, `formula_phase_callback()`, `build_prompt_footer()`, `build_graph_section()`, `run_formula_and_monitor(AGENT [TIMEOUT] [CALLBACK])` — shared helpers for formula-driven cron agents (lock, memory guard, formula loading, prompt assembly, tmux session, monitor loop, crash recovery). `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `formula_phase_callback()` handles `PHASE:escalate` (unified escalation path — kills the session). `run_formula_and_monitor` accepts an optional CALLBACK (default: `formula_phase_callback`) so callers can install custom merge-through or escalation handlers. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh, action-agent.sh | +| `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in cron logs. Sourced by dev-poll.sh, review-poll.sh, action-poll.sh, predictor-run.sh, supervisor-run.sh. | cron entry points | +| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh and dev/phase-handler.sh — called after every successful merge. | dev-poll.sh, phase-handler.sh | | `lib/build-graph.py` | Python tool: parses VISION.md, prerequisites.md (from ops repo), AGENTS.md, formulas/*.toml, evidence/ (from ops repo), and forge issues/labels into a NetworkX DiGraph. Runs structural analyses (orphaned objectives, stale prerequisites, thin evidence, circular deps) and outputs a JSON report. Used by `review-pr.sh` (per-PR changed-file analysis) and `predictor-run.sh` (full-project analysis) to provide structural context to Claude. | review-pr.sh, predictor-run.sh | -| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | issue-lifecycle.sh | -| `lib/stack-lock.sh` | File-based lock protocol for singleton project stack access. `stack_lock_acquire(holder, project)` — polls until free, breaks stale heartbeats (>10 min old), claims lock. `stack_lock_release(project)` — deletes lock file. `stack_lock_check(project)` — inspect current lock state. `stack_lock_heartbeat(project)` — update heartbeat timestamp (callers must call every 2 min while holding). Lock files at `~/data/locks/-stack.lock`. | docker/edge/dispatcher.sh, reproduce formula | +| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | file-action-issue.sh, phase-handler.sh | +| `lib/file-action-issue.sh` | `file_action_issue()` — dedup check, secret scan, label lookup, and issue creation for formula-driven cron wrappers. Sets `FILED_ISSUE_NUM` on success. Returns 4 if secrets detected in body. | (available for future use) | | `lib/tea-helpers.sh` | `tea_file_issue(title, body, labels...)` — create issue via tea CLI with secret scanning; sets `FILED_ISSUE_NUM`. `tea_relabel(issue_num, labels...)` — replace labels using tea's `edit` subcommand (not `label`). `tea_comment(issue_num, body)` — add comment with secret scanning. `tea_close(issue_num)` — close issue. All use `TEA_LOGIN` and `FORGE_REPO` from env.sh. Labels by name (no ID lookup). Tea binary download verified via sha256 checksum. Sourced by env.sh when `tea` binary is available. | env.sh (conditional) | -| `lib/worktree.sh` | Reusable git worktree management: `worktree_create(path, branch, [base_ref])` — create worktree, checkout base, fetch submodules. `worktree_recover(path, branch, [remote])` — detect existing worktree, reuse if on correct branch (sets `_WORKTREE_REUSED`), otherwise clean and recreate. `worktree_cleanup(path)` — `git worktree remove --force`, clear Claude Code project cache (`~/.claude/projects/` matching path). `worktree_cleanup_stale([max_age_hours])` — scan `/tmp` for orphaned worktrees older than threshold, skip preserved and active tmux worktrees, prune. `worktree_preserve(path, reason)` — mark worktree as preserved for debugging (writes `.worktree-preserved` marker, skipped by stale cleanup). | dev-agent.sh, supervisor-run.sh, planner-run.sh, predictor-run.sh, gardener-run.sh | -| `lib/pr-lifecycle.sh` | Reusable PR lifecycle library: `pr_create()`, `pr_find_by_branch()`, `pr_poll_ci()`, `pr_poll_review()`, `pr_merge()`, `pr_is_merged()`, `pr_walk_to_merge()`, `build_phase_protocol_prompt()`. Requires `lib/ci-helpers.sh`. | dev-agent.sh (future) | -| `lib/issue-lifecycle.sh` | Reusable issue lifecycle library: `issue_claim()` (add in-progress, remove backlog), `issue_release()` (remove in-progress, add backlog), `issue_block()` (post diagnostic comment with secret redaction, add blocked label), `issue_close()`, `issue_check_deps()` (parse deps, check transitive closure; sets `_ISSUE_BLOCKED_BY`, `_ISSUE_SUGGESTION`), `issue_suggest_next()` (find next unblocked backlog issue; sets `_ISSUE_NEXT`), `issue_post_refusal()` (structured refusal comment with dedup). Label IDs cached in globals on first lookup. Sources `lib/secret-scan.sh`. | dev-agent.sh (future) | -| `lib/vault.sh` | **Vault PR helper** — create vault action PRs on ops repo via Forgejo API (works from containers without SSH). `vault_request ` validates TOML (using `validate_vault_action` from `vault/vault-env.sh`), creates branch `vault/`, writes `vault/actions/.toml`, creates PR targeting `main` with title `vault: ` and body from context field, returns PR number. Idempotent: if PR exists, returns existing number. Requires `FORGE_TOKEN`, `FORGE_URL`, `FORGE_REPO`, `FORGE_OPS_REPO`. Uses the calling agent's own token (saves/restores `FORGE_TOKEN` around sourcing `vault-env.sh`), so approval workflow respects individual agent identities. | dev-agent (vault actions), future vault dispatcher | -| `lib/branch-protection.sh` | Branch protection helpers for Forgejo repos. `setup_vault_branch_protection()` — configures admin-only merge protection on main (require 1 approval, restrict merge to admin role, block direct pushes). `setup_profile_branch_protection()` — same protection for `.profile` repos. `verify_branch_protection()` — checks protection is correctly configured. `remove_branch_protection()` — removes protection (cleanup/testing). Handles race condition after initial push: retries with backoff if Forgejo hasn't processed the branch yet. Requires `FORGE_TOKEN`, `FORGE_URL`, `FORGE_OPS_REPO`. | bin/disinto (hire-an-agent) | -| `lib/agent-sdk.sh` | `agent_run([--resume SESSION_ID] [--worktree DIR] PROMPT)` — one-shot `claude -p` invocation with session persistence. Saves session ID to `SID_FILE`, reads it back on resume. `agent_recover_session()` — restore previous session ID from `SID_FILE` on startup. **Nudge guard**: skips nudge injection if the worktree is clean and no push is expected, preventing spurious re-invocations. Callers must define `SID_FILE`, `LOGFILE`, and `log()` before sourcing. | formula-driven agents (dev-agent, planner-run, predictor-run, gardener-run) | -| `lib/forge-setup.sh` | `setup_forge()` — Forgejo instance provisioning: creates admin user, bot accounts, org, repos (code + ops), configures webhooks, sets repo topics. Extracted from `bin/disinto`. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`. **Password storage (#361)**: after creating each bot account, stores its password in `.env` as `FORGE__PASS` (e.g. `FORGE_PASS`, `FORGE_REVIEW_PASS`, etc.) for use by `forge-push.sh`. | bin/disinto (init) | -| `lib/forge-push.sh` | `push_to_forge()` — pushes a local clone to the Forgejo remote and verifies the push. `_assert_forge_push_globals()` validates required env vars before use. Requires `FORGE_URL`, `FORGE_PASS`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. **Auth**: uses `FORGE_PASS` (bot password) for git HTTP push — Forgejo 11.x rejects API tokens for `git push` (#361). | bin/disinto (init) | -| `lib/ops-setup.sh` | `setup_ops_repo()` — creates ops repo on Forgejo if it doesn't exist, configures bot collaborators, clones/initializes ops repo locally, seeds directory structure (vault, knowledge, evidence). Exports `_ACTUAL_OPS_SLUG`. | bin/disinto (init) | -| `lib/ci-setup.sh` | `_install_cron_impl()` — installs crontab entries for project agents. `_create_woodpecker_oauth_impl()` — creates OAuth2 app on Forgejo for Woodpecker. `_generate_woodpecker_token_impl()` — auto-generates WOODPECKER_TOKEN via OAuth2 flow. `_activate_woodpecker_repo_impl()` — activates repo in Woodpecker. All gated by `_load_ci_context()` which validates required env vars. | bin/disinto (init) | -| `lib/generators.sh` | Template generation for `disinto init`: `generate_compose()` — docker-compose.yml, `generate_caddyfile()` — Caddyfile, `generate_staging_index()` — staging index, `generate_deploy_pipelines()` — Woodpecker deployment pipeline configs. Requires `FACTORY_ROOT`, `PROJECT_NAME`, `PRIMARY_BRANCH`. | bin/disinto (init) | -| `lib/hire-agent.sh` | `disinto_hire_an_agent()` — user creation, `.profile` repo setup, formula copying, branch protection, and state marker creation for hiring a new agent. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`, `PROJECT_NAME`. Extracted from `bin/disinto`. | bin/disinto (hire) | -| `lib/release.sh` | `disinto_release()` — vault TOML creation, branch setup on ops repo, PR creation, and auto-merge request for a versioned release. `_assert_release_globals()` validates required env vars. Requires `FORGE_URL`, `FORGE_TOKEN`, `FORGE_OPS_REPO`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. Extracted from `bin/disinto`. | bin/disinto (release) | +| `lib/worktree.sh` | Reusable git worktree management: `worktree_create(path, branch, [base_ref])` — create worktree, checkout base, fetch submodules. `worktree_recover(path, branch, [remote])` — detect existing worktree, reuse if on correct branch (sets `_WORKTREE_REUSED`), otherwise clean and recreate. `worktree_cleanup(path)` — `git worktree remove --force`, clear Claude Code project cache (`~/.claude/projects/` matching path). `worktree_cleanup_stale([max_age_hours])` — scan `/tmp` for orphaned worktrees older than threshold, skip preserved and active tmux worktrees, prune. `worktree_preserve(path, reason)` — mark worktree as preserved for debugging (writes `.worktree-preserved` marker, skipped by stale cleanup). | dev-agent.sh, action-agent.sh, supervisor-run.sh, planner-run.sh, predictor-run.sh, gardener-run.sh | +| `lib/pr-lifecycle.sh` | Reusable PR lifecycle library: `pr_create()`, `pr_find_by_branch()`, `pr_poll_ci()`, `pr_poll_review()`, `pr_merge()`, `pr_is_merged()`, `pr_walk_to_merge()`, `build_phase_protocol_prompt()`. Requires `lib/ci-helpers.sh`. | dev-agent.sh (future), action-agent.sh (future) | +| `lib/issue-lifecycle.sh` | Reusable issue lifecycle library: `issue_claim()` (add in-progress, remove backlog), `issue_release()` (remove in-progress, add backlog), `issue_block()` (post diagnostic comment with secret redaction, add blocked label), `issue_close()`, `issue_check_deps()` (parse deps, check transitive closure; sets `_ISSUE_BLOCKED_BY`, `_ISSUE_SUGGESTION`), `issue_suggest_next()` (find next unblocked backlog issue; sets `_ISSUE_NEXT`), `issue_post_refusal()` (structured refusal comment with dedup). Label IDs cached in globals on first lookup. Sources `lib/secret-scan.sh`. | dev-agent.sh (future), action-agent.sh (future) | +| `lib/agent-session.sh` | Shared tmux + Claude session helpers: `create_agent_session()`, `inject_formula()`, `agent_wait_for_claude_ready()`, `agent_inject_into_session()`, `agent_kill_session()`, `monitor_phase_loop()`, `read_phase()`, `write_compact_context()`. `create_agent_session(session, workdir, [phase_file])` optionally installs a PostToolUse hook (matcher `Bash\|Write`) that detects phase file writes in real-time — when Claude writes to the phase file, the hook writes a marker so `monitor_phase_loop` reacts on the next poll instead of waiting for mtime changes. Also installs a StopFailure hook (matcher `rate_limit\|server_error\|authentication_failed\|billing_error`) that writes `PHASE:failed` with an `api_error` reason to the phase file and touches the phase-changed marker, so the orchestrator discovers API errors within one poll cycle instead of waiting for idle timeout. Also installs a SessionStart hook (matcher `compact`) that re-injects phase protocol instructions after context compaction — callers write the context file via `write_compact_context(phase_file, content)`, and the hook (`on-compact-reinject.sh`) outputs the file content to stdout so Claude retains critical instructions. When `phase_file` is set, passes it to the idle stop hook (`on-idle-stop.sh`) so the hook can **nudge Claude** (up to 2 times) if Claude returns to the prompt without writing to the phase file — the hook injects a tmux reminder asking Claude to signal PHASE:done or PHASE:awaiting_ci. The PreToolUse guard hook (`on-pretooluse-guard.sh`) receives the session name as a third argument — formula agents (`gardener-*`, `planner-*`, `predictor-*`, `supervisor-*`) are identified this way and allowed to access `FACTORY_ROOT` from worktrees (they need env.sh, AGENTS.md, formulas/, lib/). **OAuth flock**: when `DISINTO_CONTAINER=1`, Claude CLI is wrapped in `flock -w 300 ~/.claude/session.lock` to queue concurrent token refresh attempts and prevent rotation races across agents sharing the same credentials. `monitor_phase_loop` sets `_MONITOR_LOOP_EXIT` to one of: `done`, `idle_timeout`, `idle_prompt` (Claude returned to `>` for 3 consecutive polls without writing any phase — callback invoked with `PHASE:failed`, session already dead), `crashed`, or `PHASE:escalate` / other `PHASE:*` string. **Unified escalation**: `PHASE:escalate` is the signal that a session needs human input (renamed from `PHASE:needs_human`). **Callers must handle `idle_prompt`** in both their callback and their post-loop exit handler — see [`docs/PHASE-PROTOCOL.md` idle_prompt](docs/PHASE-PROTOCOL.md#idle_prompt-exit-reason) for the full contract. | dev-agent.sh, action-agent.sh | diff --git a/lib/agent-sdk.sh b/lib/agent-sdk.sh index 1c1a69c..41879bf 100644 --- a/lib/agent-sdk.sh +++ b/lib/agent-sdk.sh @@ -46,23 +46,9 @@ agent_run() { [ -n "${CLAUDE_MODEL:-}" ] && args+=(--model "$CLAUDE_MODEL") local run_dir="${worktree_dir:-$(pwd)}" - local lock_file="${HOME}/.claude/session.lock" - mkdir -p "$(dirname "$lock_file")" - local output rc + local output log "agent_run: starting (resume=${resume_id:-(new)}, dir=${run_dir})" - output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") && rc=0 || rc=$? - if [ "$rc" -eq 124 ]; then - log "agent_run: timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $rc)" - elif [ "$rc" -ne 0 ]; then - log "agent_run: claude exited with code $rc" - # Log last 3 lines of output for diagnostics - if [ -n "$output" ]; then - log "agent_run: last output lines: $(echo "$output" | tail -3)" - fi - fi - if [ -z "$output" ]; then - log "agent_run: empty output (claude may have crashed or failed, exit code: $rc)" - fi + output=$(cd "$run_dir" && timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") || true # Extract and persist session_id local new_sid @@ -72,45 +58,4 @@ agent_run() { printf '%s' "$new_sid" > "$SID_FILE" log "agent_run: session_id=${new_sid:0:12}..." fi - - # Save output for diagnostics (no_push, crashes) - _AGENT_LAST_OUTPUT="$output" - local diag_file="${DISINTO_LOG_DIR:-/tmp}/dev/agent-run-last.json" - printf '%s' "$output" > "$diag_file" 2>/dev/null || true - - # Nudge: if the model stopped without pushing, resume with encouragement. - # Some models emit end_turn prematurely when confused. A nudge often unsticks them. - if [ -n "$_AGENT_SESSION_ID" ] && [ -n "$output" ]; then - local has_changes - has_changes=$(cd "$run_dir" && git status --porcelain 2>/dev/null | head -1) || true - local has_pushed - has_pushed=$(cd "$run_dir" && git log --oneline "${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH:-main}..HEAD" 2>/dev/null | head -1) || true - if [ -z "$has_pushed" ]; then - if [ -n "$has_changes" ]; then - # Nudge: there are uncommitted changes - local nudge="You stopped but did not push any code. You have uncommitted changes. Commit them and push." - log "agent_run: nudging (uncommitted changes)" - local nudge_rc - output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") && nudge_rc=0 || nudge_rc=$? - if [ "$nudge_rc" -eq 124 ]; then - log "agent_run: nudge timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $nudge_rc)" - elif [ "$nudge_rc" -ne 0 ]; then - log "agent_run: nudge claude exited with code $nudge_rc" - # Log last 3 lines of output for diagnostics - if [ -n "$output" ]; then - log "agent_run: nudge last output lines: $(echo "$output" | tail -3)" - fi - fi - new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true - if [ -n "$new_sid" ]; then - _AGENT_SESSION_ID="$new_sid" - printf '%s' "$new_sid" > "$SID_FILE" - fi - printf '%s' "$output" > "$diag_file" 2>/dev/null || true - _AGENT_LAST_OUTPUT="$output" - else - log "agent_run: no push and no changes — skipping nudge" - fi - fi - fi } diff --git a/lib/agent-session.sh b/lib/agent-session.sh new file mode 100644 index 0000000..dbb1e2a --- /dev/null +++ b/lib/agent-session.sh @@ -0,0 +1,486 @@ +#!/usr/bin/env bash +# agent-session.sh — Shared tmux + Claude interactive session helpers +# +# Source this into agent orchestrator scripts for reusable session management. +# +# Functions: +# agent_wait_for_claude_ready SESSION_NAME [TIMEOUT_SECS] +# agent_inject_into_session SESSION_NAME TEXT +# agent_kill_session SESSION_NAME +# monitor_phase_loop PHASE_FILE IDLE_TIMEOUT_SECS CALLBACK_FN [SESSION_NAME] +# session_lock_acquire [TIMEOUT_SECS] +# session_lock_release + +# --- Cooperative session lock (fd-based) --- +# File descriptor for the session lock. Set by create_agent_session(). +# Callers can release/re-acquire via session_lock_release/session_lock_acquire +# to allow other Claude sessions during idle phases (awaiting_review/awaiting_ci). +SESSION_LOCK_FD="" + +# Release the session lock without closing the file descriptor. +# The fd stays open so it can be re-acquired later. +session_lock_release() { + if [ -n "${SESSION_LOCK_FD:-}" ]; then + flock -u "$SESSION_LOCK_FD" + fi +} + +# Re-acquire the session lock. Blocks until available or timeout. +# Opens the lock fd if not already open (for use by external callers). +# Args: [timeout_secs] (default 300) +# Returns 0 on success, 1 on timeout/error. +# shellcheck disable=SC2120 # timeout arg is used by external callers +session_lock_acquire() { + local timeout="${1:-300}" + if [ -z "${SESSION_LOCK_FD:-}" ]; then + local lock_dir="${HOME}/.claude" + mkdir -p "$lock_dir" + exec {SESSION_LOCK_FD}>>"${lock_dir}/session.lock" + fi + flock -w "$timeout" "$SESSION_LOCK_FD" +} + +# Wait for the Claude ❯ ready prompt in a tmux pane. +# Returns 0 if ready within TIMEOUT_SECS (default 120), 1 otherwise. +agent_wait_for_claude_ready() { + local session="$1" + local timeout="${2:-120}" + local elapsed=0 + while [ "$elapsed" -lt "$timeout" ]; do + if tmux capture-pane -t "$session" -p 2>/dev/null | grep -q '❯'; then + return 0 + fi + sleep 2 + elapsed=$((elapsed + 2)) + done + return 1 +} + +# Paste TEXT into SESSION (waits for Claude to be ready first), then press Enter. +agent_inject_into_session() { + local session="$1" + local text="$2" + local tmpfile + # Re-acquire session lock before injecting — Claude will resume working + # shellcheck disable=SC2119 # using default timeout + session_lock_acquire || true + agent_wait_for_claude_ready "$session" 120 || true + # Clear idle marker — new work incoming + rm -f "/tmp/claude-idle-${session}.ts" + tmpfile=$(mktemp /tmp/agent-inject-XXXXXX) + printf '%s' "$text" > "$tmpfile" + tmux load-buffer -b "agent-inject-$$" "$tmpfile" + tmux paste-buffer -t "$session" -b "agent-inject-$$" + sleep 0.5 + tmux send-keys -t "$session" "" Enter + tmux delete-buffer -b "agent-inject-$$" 2>/dev/null || true + rm -f "$tmpfile" +} + +# Create a tmux session running Claude in the given workdir. +# Installs a Stop hook for idle detection (see monitor_phase_loop). +# Installs a PreToolUse hook to guard destructive Bash operations. +# Optionally installs a PostToolUse hook for phase file write detection. +# Optionally installs a StopFailure hook for immediate phase file update on API error. +# Args: session workdir [phase_file] +# Returns 0 if session is ready, 1 otherwise. +create_agent_session() { + local session="$1" + local workdir="${2:-.}" + local phase_file="${3:-}" + + # Prepare settings directory for hooks + mkdir -p "${workdir}/.claude" + local settings="${workdir}/.claude/settings.json" + + # Install Stop hook for idle detection: when Claude finishes a response, + # the hook writes a timestamp to a marker file. monitor_phase_loop checks + # this marker instead of fragile tmux pane scraping. + local idle_marker="/tmp/claude-idle-${session}.ts" + local hook_script="${FACTORY_ROOT}/lib/hooks/on-idle-stop.sh" + if [ -x "$hook_script" ]; then + local hook_cmd="${hook_script} ${idle_marker}" + # When a phase file is available, pass it and the session name so the + # hook can nudge Claude if it returns to the prompt without signalling. + if [ -n "$phase_file" ]; then + hook_cmd="${hook_script} ${idle_marker} ${phase_file} ${session}" + fi + if [ -f "$settings" ]; then + # Append our Stop hook to existing project settings + jq --arg cmd "$hook_cmd" ' + if (.hooks.Stop // [] | any(.[]; .hooks[]?.command == $cmd)) + then . + else .hooks.Stop = (.hooks.Stop // []) + [{ + matcher: "", + hooks: [{type: "command", command: $cmd}] + }] + end + ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" + else + jq -n --arg cmd "$hook_cmd" '{ + hooks: { + Stop: [{ + matcher: "", + hooks: [{type: "command", command: $cmd}] + }] + } + }' > "$settings" + fi + fi + + # Install PostToolUse hook for phase file write detection: when Claude + # writes to the phase file via Bash or Write, the hook writes a marker + # so monitor_phase_loop can react immediately instead of waiting for + # the next mtime-based poll cycle. + if [ -n "$phase_file" ]; then + local phase_marker="/tmp/phase-changed-${session}.marker" + local phase_hook_script="${FACTORY_ROOT}/lib/hooks/on-phase-change.sh" + if [ -x "$phase_hook_script" ]; then + local phase_hook_cmd="${phase_hook_script} ${phase_file} ${phase_marker}" + if [ -f "$settings" ]; then + jq --arg cmd "$phase_hook_cmd" ' + if (.hooks.PostToolUse // [] | any(.[]; .hooks[]?.command == $cmd)) + then . + else .hooks.PostToolUse = (.hooks.PostToolUse // []) + [{ + matcher: "Bash|Write", + hooks: [{type: "command", command: $cmd}] + }] + end + ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" + else + jq -n --arg cmd "$phase_hook_cmd" '{ + hooks: { + PostToolUse: [{ + matcher: "Bash|Write", + hooks: [{type: "command", command: $cmd}] + }] + } + }' > "$settings" + fi + rm -f "$phase_marker" + fi + fi + + # Install StopFailure hook for immediate phase file update on API error: + # when Claude hits a rate limit, server error, billing error, or auth failure, + # the hook writes PHASE:failed to the phase file and touches the phase-changed + # marker so monitor_phase_loop picks it up within one poll cycle instead of + # waiting for idle timeout (up to 2 hours). + if [ -n "$phase_file" ]; then + local stop_failure_hook_script="${FACTORY_ROOT}/lib/hooks/on-stop-failure.sh" + if [ -x "$stop_failure_hook_script" ]; then + # phase_marker is defined in the PostToolUse block above; redeclare so + # this block is self-contained if that block is ever removed. + local sf_phase_marker="/tmp/phase-changed-${session}.marker" + local stop_failure_hook_cmd="${stop_failure_hook_script} ${phase_file} ${sf_phase_marker}" + if [ -f "$settings" ]; then + jq --arg cmd "$stop_failure_hook_cmd" ' + if (.hooks.StopFailure // [] | any(.[]; .hooks[]?.command == $cmd)) + then . + else .hooks.StopFailure = (.hooks.StopFailure // []) + [{ + matcher: "rate_limit|server_error|authentication_failed|billing_error", + hooks: [{type: "command", command: $cmd}] + }] + end + ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" + else + jq -n --arg cmd "$stop_failure_hook_cmd" '{ + hooks: { + StopFailure: [{ + matcher: "rate_limit|server_error|authentication_failed|billing_error", + hooks: [{type: "command", command: $cmd}] + }] + } + }' > "$settings" + fi + fi + fi + + # Install PreToolUse hook for destructive operation guard: blocks force push + # to primary branch, rm -rf outside worktree, direct API merge calls, and + # checkout/switch to primary branch. Claude sees the denial reason on exit 2 + # and can self-correct. + local guard_hook_script="${FACTORY_ROOT}/lib/hooks/on-pretooluse-guard.sh" + if [ -x "$guard_hook_script" ]; then + local abs_workdir + abs_workdir=$(cd "$workdir" 2>/dev/null && pwd) || abs_workdir="$workdir" + local guard_hook_cmd="${guard_hook_script} ${PRIMARY_BRANCH:-main} ${abs_workdir} ${session}" + if [ -f "$settings" ]; then + jq --arg cmd "$guard_hook_cmd" ' + if (.hooks.PreToolUse // [] | any(.[]; .hooks[]?.command == $cmd)) + then . + else .hooks.PreToolUse = (.hooks.PreToolUse // []) + [{ + matcher: "Bash", + hooks: [{type: "command", command: $cmd}] + }] + end + ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" + else + jq -n --arg cmd "$guard_hook_cmd" '{ + hooks: { + PreToolUse: [{ + matcher: "Bash", + hooks: [{type: "command", command: $cmd}] + }] + } + }' > "$settings" + fi + fi + + # Install SessionEnd hook for guaranteed cleanup: when the Claude session + # exits (clean or crash), write a termination marker so monitor_phase_loop + # detects the exit faster than tmux has-session polling alone. + local exit_marker="/tmp/claude-exited-${session}.ts" + local session_end_hook_script="${FACTORY_ROOT}/lib/hooks/on-session-end.sh" + if [ -x "$session_end_hook_script" ]; then + local session_end_hook_cmd="${session_end_hook_script} ${exit_marker}" + if [ -f "$settings" ]; then + jq --arg cmd "$session_end_hook_cmd" ' + if (.hooks.SessionEnd // [] | any(.[]; .hooks[]?.command == $cmd)) + then . + else .hooks.SessionEnd = (.hooks.SessionEnd // []) + [{ + matcher: "", + hooks: [{type: "command", command: $cmd}] + }] + end + ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" + else + jq -n --arg cmd "$session_end_hook_cmd" '{ + hooks: { + SessionEnd: [{ + matcher: "", + hooks: [{type: "command", command: $cmd}] + }] + } + }' > "$settings" + fi + fi + rm -f "$exit_marker" + + # Install SessionStart hook for context re-injection after compaction: + # when Claude Code compacts context during long sessions, the phase protocol + # instructions are lost. This hook fires after each compaction and outputs + # the content of a context file so Claude retains critical instructions. + # The context file is written by callers via write_compact_context(). + if [ -n "$phase_file" ]; then + local compact_hook_script="${FACTORY_ROOT}/lib/hooks/on-compact-reinject.sh" + if [ -x "$compact_hook_script" ]; then + local context_file="${phase_file%.phase}.context" + local compact_hook_cmd="${compact_hook_script} ${context_file}" + if [ -f "$settings" ]; then + jq --arg cmd "$compact_hook_cmd" ' + if (.hooks.SessionStart // [] | any(.[]; .hooks[]?.command == $cmd)) + then . + else .hooks.SessionStart = (.hooks.SessionStart // []) + [{ + matcher: "compact", + hooks: [{type: "command", command: $cmd}] + }] + end + ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" + else + jq -n --arg cmd "$compact_hook_cmd" '{ + hooks: { + SessionStart: [{ + matcher: "compact", + hooks: [{type: "command", command: $cmd}] + }] + } + }' > "$settings" + fi + fi + fi + + rm -f "$idle_marker" + local model_flag="" + if [ -n "${CLAUDE_MODEL:-}" ]; then + model_flag="--model ${CLAUDE_MODEL}" + fi + + # Acquire a session-level mutex via fd-based flock to prevent concurrent + # Claude sessions from racing on OAuth token refresh. Unlike the previous + # command-wrapper flock, the fd approach allows callers to release the lock + # during idle phases (awaiting_review/awaiting_ci) and re-acquire before + # injecting the next prompt. See #724. + # Use ~/.claude/session.lock so the lock is shared across containers when + # the host ~/.claude directory is bind-mounted. + local lock_dir="${HOME}/.claude" + mkdir -p "$lock_dir" + local claude_lock="${lock_dir}/session.lock" + if [ -z "${SESSION_LOCK_FD:-}" ]; then + exec {SESSION_LOCK_FD}>>"${claude_lock}" + fi + if ! flock -w 300 "$SESSION_LOCK_FD"; then + return 1 + fi + local claude_cmd="claude --dangerously-skip-permissions ${model_flag}" + + tmux new-session -d -s "$session" -c "$workdir" \ + "$claude_cmd" 2>/dev/null + sleep 1 + tmux has-session -t "$session" 2>/dev/null || return 1 + agent_wait_for_claude_ready "$session" 120 || return 1 + return 0 +} + +# Inject a prompt/formula into a session (alias for agent_inject_into_session). +inject_formula() { + agent_inject_into_session "$@" +} + +# Monitor a phase file, calling a callback on changes and handling idle timeout. +# Sets _MONITOR_LOOP_EXIT to the exit reason (idle_timeout, idle_prompt, done, crashed, PHASE:failed, PHASE:escalate). +# Sets _MONITOR_SESSION to the resolved session name (arg 4 or $SESSION_NAME). +# Callbacks should reference _MONITOR_SESSION instead of $SESSION_NAME directly. +# Args: phase_file idle_timeout_secs callback_fn [session_name] +# session_name — tmux session to health-check; falls back to $SESSION_NAME global +# +# Idle detection: uses a Stop hook marker file (written by lib/hooks/on-idle-stop.sh) +# to detect when Claude finishes responding without writing a phase signal. +# If the marker exists for 3 consecutive polls with no phase written, the session +# is killed and the callback invoked with "PHASE:failed". +monitor_phase_loop() { + local phase_file="$1" + local idle_timeout="$2" + local callback="$3" + local _session="${4:-${SESSION_NAME:-}}" + # Export resolved session name so callbacks can reference it regardless of + # which session was passed to monitor_phase_loop (analogous to _MONITOR_LOOP_EXIT). + export _MONITOR_SESSION="$_session" + local poll_interval="${PHASE_POLL_INTERVAL:-10}" + local last_mtime=0 + local idle_elapsed=0 + local idle_pane_count=0 + + while true; do + sleep "$poll_interval" + idle_elapsed=$(( idle_elapsed + poll_interval )) + + # Session health check: SessionEnd hook marker provides fast detection, + # tmux has-session is the fallback for unclean exits (e.g. tmux crash). + local exit_marker="/tmp/claude-exited-${_session}.ts" + if [ -f "$exit_marker" ] || ! tmux has-session -t "${_session}" 2>/dev/null; then + local current_phase + current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true) + case "$current_phase" in + PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate) + ;; # terminal — fall through to phase handler + *) + # Call callback with "crashed" — let agent-specific code handle recovery + if type "${callback}" &>/dev/null; then + "$callback" "PHASE:crashed" + fi + # If callback didn't restart session, break + if ! tmux has-session -t "${_session}" 2>/dev/null; then + _MONITOR_LOOP_EXIT="crashed" + return 1 + fi + idle_elapsed=0 + idle_pane_count=0 + continue + ;; + esac + fi + + # Check phase-changed marker from PostToolUse hook — if present, the hook + # detected a phase file write so we reset last_mtime to force processing + # this cycle instead of waiting for the next mtime change. + local phase_marker="/tmp/phase-changed-${_session}.marker" + if [ -f "$phase_marker" ]; then + rm -f "$phase_marker" + last_mtime=0 + fi + + # Check phase file for changes + local phase_mtime + phase_mtime=$(stat -c %Y "$phase_file" 2>/dev/null || echo 0) + local current_phase + current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true) + + if [ -z "$current_phase" ] || [ "$phase_mtime" -le "$last_mtime" ]; then + # No phase change — check idle timeout + if [ "$idle_elapsed" -ge "$idle_timeout" ]; then + _MONITOR_LOOP_EXIT="idle_timeout" + agent_kill_session "${_session}" + return 0 + fi + # Idle detection via Stop hook: the on-idle-stop.sh hook writes a marker + # file when Claude finishes a response. If the marker exists and no phase + # has been written, Claude returned to the prompt without following the + # phase protocol. 3 consecutive polls = confirmed idle (not mid-turn). + local idle_marker="/tmp/claude-idle-${_session}.ts" + if [ -z "$current_phase" ] && [ -f "$idle_marker" ]; then + idle_pane_count=$(( idle_pane_count + 1 )) + if [ "$idle_pane_count" -ge 3 ]; then + _MONITOR_LOOP_EXIT="idle_prompt" + # Session is killed before the callback is invoked. + # Callbacks that handle PHASE:failed must not assume the session is alive. + agent_kill_session "${_session}" + if type "${callback}" &>/dev/null; then + "$callback" "PHASE:failed" + fi + return 0 + fi + else + idle_pane_count=0 + fi + continue + fi + + # Phase changed + last_mtime="$phase_mtime" + # shellcheck disable=SC2034 # read by phase-handler.sh callback + LAST_PHASE_MTIME="$phase_mtime" + idle_elapsed=0 + idle_pane_count=0 + + # Terminal phases + case "$current_phase" in + PHASE:done|PHASE:merged) + _MONITOR_LOOP_EXIT="done" + if type "${callback}" &>/dev/null; then + "$callback" "$current_phase" + fi + return 0 + ;; + PHASE:failed|PHASE:escalate) + _MONITOR_LOOP_EXIT="$current_phase" + if type "${callback}" &>/dev/null; then + "$callback" "$current_phase" + fi + return 0 + ;; + esac + + # Non-terminal phase — call callback + if type "${callback}" &>/dev/null; then + "$callback" "$current_phase" + fi + done +} + +# Write context to a file for re-injection after context compaction. +# The SessionStart compact hook reads this file and outputs it to stdout. +# Args: phase_file content +write_compact_context() { + local phase_file="$1" + local content="$2" + local context_file="${phase_file%.phase}.context" + printf '%s\n' "$content" > "$context_file" +} + +# Kill a tmux session gracefully (no-op if not found). +agent_kill_session() { + local session="${1:-}" + [ -n "$session" ] && tmux kill-session -t "$session" 2>/dev/null || true + rm -f "/tmp/claude-idle-${session}.ts" + rm -f "/tmp/phase-changed-${session}.marker" + rm -f "/tmp/claude-exited-${session}.ts" + rm -f "/tmp/claude-nudge-${session}.count" +} + +# Read the current phase from a phase file, stripped of whitespace. +# Usage: read_phase [file] — defaults to $PHASE_FILE +read_phase() { + local file="${1:-${PHASE_FILE:-}}" + { cat "$file" 2>/dev/null || true; } | head -1 | tr -d '[:space:]' +} diff --git a/lib/branch-protection.sh b/lib/branch-protection.sh deleted file mode 100644 index e972977..0000000 --- a/lib/branch-protection.sh +++ /dev/null @@ -1,591 +0,0 @@ -#!/usr/bin/env bash -# branch-protection.sh — Helper for setting up branch protection on repos -# -# Source after lib/env.sh: -# source "$(dirname "$0")/../lib/env.sh" -# source "$(dirname "$0")/lib/branch-protection.sh" -# -# Required globals: FORGE_TOKEN, FORGE_URL, FORGE_OPS_REPO -# -# Functions: -# setup_vault_branch_protection — Set up admin-only branch protection for main -# verify_branch_protection — Verify protection is configured correctly -# setup_profile_branch_protection — Set up admin-only branch protection for .profile repos -# remove_branch_protection — Remove branch protection (for cleanup/testing) -# -# Branch protection settings: -# - Require 1 approval before merge -# - Restrict merge to admin role (not regular collaborators or bots) -# - Block direct pushes to main (all changes must go through PR) - -set -euo pipefail - -# Internal log helper -_bp_log() { - if declare -f log >/dev/null 2>&1; then - log "branch-protection: $*" - else - printf '[%s] branch-protection: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >&2 - fi -} - -# Get ops repo API URL -_ops_api() { - printf '%s' "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}" -} - -# ----------------------------------------------------------------------------- -# setup_vault_branch_protection — Set up admin-only branch protection for main -# -# Configures the following protection rules: -# - Require 1 approval before merge -# - Restrict merge to admin role (not regular collaborators or bots) -# - Block direct pushes to main (all changes must go through PR) -# -# Returns: 0 on success, 1 on failure -# ----------------------------------------------------------------------------- -setup_vault_branch_protection() { - local branch="${1:-main}" - local api_url - api_url="$(_ops_api)" - - _bp_log "Setting up branch protection for ${branch} on ${FORGE_OPS_REPO}" - - # Check if branch exists with retry loop (handles race condition after initial push) - local branch_exists="0" - local max_attempts=3 - local attempt=1 - - while [ "$attempt" -le "$max_attempts" ]; do - branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") - - if [ "$branch_exists" = "200" ]; then - _bp_log "Branch ${branch} exists on ${FORGE_OPS_REPO}" - break - fi - - if [ "$attempt" -lt "$max_attempts" ]; then - _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..." - sleep 2 - fi - attempt=$((attempt + 1)) - done - - if [ "$branch_exists" != "200" ]; then - _bp_log "ERROR: Branch ${branch} does not exist on ${FORGE_OPS_REPO} after ${max_attempts} attempts" - return 1 - fi - - # Check if protection already exists - local protection_exists - protection_exists=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/branches/${branch}/protection" 2>/dev/null || echo "0") - - if [ "$protection_exists" = "200" ]; then - _bp_log "Branch protection already exists for ${branch}" - _bp_log "Updating existing protection rules" - fi - - # Create/update branch protection - # Note: Forgejo API uses "require_signed_commits" and "required_approvals" for approval requirements - # The "admin_enforced" field ensures only admins can merge - local protection_json - protection_json=$(cat </dev/null || true) - - if [ -z "$protection_json" ] || [ "$protection_json" = "null" ]; then - _bp_log "ERROR: No branch protection found for ${branch}" - return 1 - fi - - # Extract and validate settings - local enable_push enable_merge_commit required_approvals admin_enforced - enable_push=$(printf '%s' "$protection_json" | jq -r '.enable_push // true') - enable_merge_commit=$(printf '%s' "$protection_json" | jq -r '.enable_merge_commit // false') - required_approvals=$(printf '%s' "$protection_json" | jq -r '.required_approvals // 0') - admin_enforced=$(printf '%s' "$protection_json" | jq -r '.admin_enforced // false') - - local errors=0 - - # Check push is disabled - if [ "$enable_push" = "true" ]; then - _bp_log "ERROR: enable_push should be false" - errors=$((errors + 1)) - else - _bp_log "OK: Pushes are blocked" - fi - - # Check merge commit is enabled - if [ "$enable_merge_commit" != "true" ]; then - _bp_log "ERROR: enable_merge_commit should be true" - errors=$((errors + 1)) - else - _bp_log "OK: Merge commits are allowed" - fi - - # Check required approvals - if [ "$required_approvals" -lt 1 ]; then - _bp_log "ERROR: required_approvals should be at least 1" - errors=$((errors + 1)) - else - _bp_log "OK: Required approvals: ${required_approvals}" - fi - - # Check admin enforced - if [ "$admin_enforced" != "true" ]; then - _bp_log "ERROR: admin_enforced should be true" - errors=$((errors + 1)) - else - _bp_log "OK: Admin enforcement enabled" - fi - - if [ "$errors" -gt 0 ]; then - _bp_log "Verification failed with ${errors} error(s)" - return 1 - fi - - _bp_log "Branch protection verified successfully" - return 0 -} - -# ----------------------------------------------------------------------------- -# setup_profile_branch_protection — Set up admin-only branch protection for .profile repos -# -# Configures the following protection rules: -# - Require 1 approval before merge -# - Restrict merge to admin role (not regular collaborators or bots) -# - Block direct pushes to main (all changes must go through PR) -# -# Also creates a 'journal' branch for direct agent journal pushes -# -# Args: -# $1 - Repo path in format 'owner/repo' (e.g., 'dev-bot/.profile') -# $2 - Branch to protect (default: main) -# -# Returns: 0 on success, 1 on failure -# ----------------------------------------------------------------------------- -setup_profile_branch_protection() { - local repo="${1:-}" - local branch="${2:-main}" - - if [ -z "$repo" ]; then - _bp_log "ERROR: repo path required (format: owner/repo)" - return 1 - fi - - _bp_log "Setting up branch protection for ${branch} on ${repo}" - - local api_url - api_url="${FORGE_URL}/api/v1/repos/${repo}" - - # Check if branch exists with retry loop (handles race condition after initial push) - local branch_exists="0" - local max_attempts=3 - local attempt=1 - - while [ "$attempt" -le "$max_attempts" ]; do - branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") - - if [ "$branch_exists" = "200" ]; then - _bp_log "Branch ${branch} exists on ${repo}" - break - fi - - if [ "$attempt" -lt "$max_attempts" ]; then - _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..." - sleep 2 - fi - attempt=$((attempt + 1)) - done - - if [ "$branch_exists" != "200" ]; then - _bp_log "ERROR: Branch ${branch} does not exist on ${repo} after ${max_attempts} attempts" - return 1 - fi - - # Check if protection already exists - local protection_exists - protection_exists=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/branches/${branch}/protection" 2>/dev/null || echo "0") - - if [ "$protection_exists" = "200" ]; then - _bp_log "Branch protection already exists for ${branch}" - _bp_log "Updating existing protection rules" - fi - - # Create/update branch protection - local protection_json - protection_json=$(cat </dev/null || echo "0") - - if [ "$journal_exists" != "200" ]; then - # Create journal branch from main - # Get the commit hash of main - local main_commit - main_commit=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/git/refs/heads/${branch}" 2>/dev/null | jq -r '.[0].object.sha' || echo "") - - if [ -n "$main_commit" ]; then - curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${api_url}/git/refs" \ - -d "{\"ref\":\"refs/heads/${journal_branch}\",\"sha\":\"${main_commit}\"}" >/dev/null 2>&1 || { - _bp_log "Warning: failed to create journal branch (may already exist)" - } - fi - fi - - _bp_log "Journal branch '${journal_branch}' ready for direct pushes" - - return 0 -} - -# ----------------------------------------------------------------------------- -# remove_branch_protection — Remove branch protection (for cleanup/testing) -# -# Returns: 0 on success, 1 on failure -# ----------------------------------------------------------------------------- -remove_branch_protection() { - local branch="${1:-main}" - local api_url - api_url="$(_ops_api)" - - _bp_log "Removing branch protection for ${branch}" - - # Check if protection exists - local protection_exists - protection_exists=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/branches/${branch}/protection" 2>/dev/null || echo "0") - - if [ "$protection_exists" != "200" ]; then - _bp_log "No branch protection found for ${branch}" - return 0 - fi - - # Delete protection - local http_code - http_code=$(curl -s -o /dev/null -w "%{http_code}" \ - -X DELETE \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/branches/${branch}/protection" 2>/dev/null || echo "0") - - if [ "$http_code" != "204" ]; then - _bp_log "ERROR: Failed to remove branch protection (HTTP ${http_code})" - return 1 - fi - - _bp_log "Branch protection removed successfully for ${branch}" - return 0 -} - -# ----------------------------------------------------------------------------- -# setup_project_branch_protection — Set up branch protection for project repos -# -# Configures the following protection rules: -# - Block direct pushes to main (all changes must go through PR) -# - Require 1 approval before merge -# - Allow merge only via dev-bot (for auto-merge after review+CI) -# - Allow review-bot to approve PRs -# -# Args: -# $1 - Repo path in format 'owner/repo' (e.g., 'disinto-admin/disinto') -# $2 - Branch to protect (default: main) -# -# Returns: 0 on success, 1 on failure -# ----------------------------------------------------------------------------- -setup_project_branch_protection() { - local repo="${1:-}" - local branch="${2:-main}" - - if [ -z "$repo" ]; then - _bp_log "ERROR: repo path required (format: owner/repo)" - return 1 - fi - - _bp_log "Setting up branch protection for ${branch} on ${repo}" - - local api_url - api_url="${FORGE_URL}/api/v1/repos/${repo}" - - # Check if branch exists with retry loop (handles race condition after initial push) - local branch_exists="0" - local max_attempts=3 - local attempt=1 - - while [ "$attempt" -le "$max_attempts" ]; do - branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") - - if [ "$branch_exists" = "200" ]; then - _bp_log "Branch ${branch} exists on ${repo}" - break - fi - - if [ "$attempt" -lt "$max_attempts" ]; then - _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..." - sleep 2 - fi - attempt=$((attempt + 1)) - done - - if [ "$branch_exists" != "200" ]; then - _bp_log "ERROR: Branch ${branch} does not exist on ${repo} after ${max_attempts} attempts" - return 1 - fi - - # Check if protection already exists - local protection_exists - protection_exists=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/branches/${branch}/protection" 2>/dev/null || echo "0") - - if [ "$protection_exists" = "200" ]; then - _bp_log "Branch protection already exists for ${branch}" - _bp_log "Updating existing protection rules" - fi - - # Create/update branch protection - # Forgejo API for branch protection (factory mode): - # - enable_push: false (block direct pushes) - # - enable_merge_whitelist: true (only whitelisted users can merge) - # - merge_whitelist_usernames: ["dev-bot"] (dev-bot merges after CI) - # - required_approvals: 1 (review-bot must approve) - local protection_json - protection_json=$(cat <&2 - exit 1 - fi - - if [ -z "${FORGE_URL:-}" ]; then - echo "ERROR: FORGE_URL is required" >&2 - exit 1 - fi - - if [ -z "${FORGE_OPS_REPO:-}" ]; then - echo "ERROR: FORGE_OPS_REPO is required" >&2 - exit 1 - fi - - # Parse command line args - case "${1:-help}" in - setup) - setup_vault_branch_protection "${2:-main}" - ;; - setup-profile) - if [ -z "${2:-}" ]; then - echo "ERROR: repo path required (format: owner/repo)" >&2 - exit 1 - fi - setup_profile_branch_protection "${2}" "${3:-main}" - ;; - setup-project) - if [ -z "${2:-}" ]; then - echo "ERROR: repo path required (format: owner/repo)" >&2 - exit 1 - fi - setup_project_branch_protection "${2}" "${3:-main}" - ;; - verify) - verify_branch_protection "${2:-main}" - ;; - remove) - remove_branch_protection "${2:-main}" - ;; - help|*) - echo "Usage: $0 {setup|setup-profile|setup-project|verify|remove} [args...]" - echo "" - echo "Commands:" - echo " setup [branch] Set up branch protection on ops repo (default: main)" - echo " setup-profile [branch] Set up branch protection on .profile repo" - echo " setup-project [branch] Set up branch protection on project repo" - echo " verify [branch] Verify branch protection is configured correctly" - echo " remove [branch] Remove branch protection (for cleanup/testing)" - echo "" - echo "Required environment variables:" - echo " FORGE_TOKEN Forgejo API token (admin user recommended)" - echo " FORGE_URL Forgejo instance URL (e.g., https://codeberg.org)" - echo " FORGE_OPS_REPO Ops repo in format owner/repo (e.g., disinto-admin/disinto-ops)" - exit 0 - ;; - esac -fi diff --git a/lib/ci-debug.sh b/lib/ci-debug.sh index dd8a0a5..4fa15ba 100755 --- a/lib/ci-debug.sh +++ b/lib/ci-debug.sh @@ -17,11 +17,6 @@ REPO="${FORGE_REPO}" API="${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}" api() { - # Validate API URL to prevent URL injection - if ! validate_url "$API"; then - echo "ERROR: API URL validation failed - possible URL injection attempt" >&2 - return 1 - fi curl -sf -H "Authorization: Bearer ${WOODPECKER_TOKEN}" "${API}/$1" } diff --git a/lib/ci-helpers.sh b/lib/ci-helpers.sh index 11c668e..23ebce7 100644 --- a/lib/ci-helpers.sh +++ b/lib/ci-helpers.sh @@ -7,6 +7,27 @@ set -euo pipefail # ci_commit_status() / ci_pipeline_number() require: woodpecker_api(), forge_api() (from env.sh) # classify_pipeline_failure() requires: woodpecker_api() (defined in env.sh) +# ensure_blocked_label_id — look up (or create) the "blocked" label, print its ID. +# Caches the result in _BLOCKED_LABEL_ID to avoid repeated API calls. +# Requires: FORGE_TOKEN, FORGE_API (from env.sh), forge_api() +ensure_blocked_label_id() { + if [ -n "${_BLOCKED_LABEL_ID:-}" ]; then + printf '%s' "$_BLOCKED_LABEL_ID" + return 0 + fi + _BLOCKED_LABEL_ID=$(forge_api GET "/labels" 2>/dev/null \ + | jq -r '.[] | select(.name == "blocked") | .id' 2>/dev/null || true) + if [ -z "$_BLOCKED_LABEL_ID" ]; then + _BLOCKED_LABEL_ID=$(curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_API}/labels" \ + -d '{"name":"blocked","color":"#e11d48"}' 2>/dev/null \ + | jq -r '.id // empty' 2>/dev/null || true) + fi + printf '%s' "$_BLOCKED_LABEL_ID" +} + # ensure_priority_label — look up (or create) the "priority" label, print its ID. # Caches the result in _PRIORITY_LABEL_ID to avoid repeated API calls. # Requires: FORGE_TOKEN, FORGE_API (from env.sh), forge_api() @@ -246,42 +267,3 @@ ci_promote() { echo "$new_num" } - -# ci_get_logs [--step ] -# Reads CI logs from the Woodpecker SQLite database. -# Requires: WOODPECKER_DATA_DIR env var or mounted volume at /woodpecker-data -# Returns: 0 on success, 1 on failure. Outputs log text to stdout. -# -# Usage: -# ci_get_logs 346 # Get all failed step logs -# ci_get_logs 346 --step smoke-init # Get logs for specific step -ci_get_logs() { - local pipeline_number="$1" - shift || true - - local step_name="" - while [ $# -gt 0 ]; do - case "$1" in - --step|-s) - step_name="$2" - shift 2 - ;; - *) - echo "Unknown option: $1" >&2 - return 1 - ;; - esac - done - - local log_reader="${FACTORY_ROOT:-/home/agent/disinto}/lib/ci-log-reader.py" - if [ -f "$log_reader" ]; then - if [ -n "$step_name" ]; then - python3 "$log_reader" "$pipeline_number" --step "$step_name" - else - python3 "$log_reader" "$pipeline_number" - fi - else - echo "ERROR: ci-log-reader.py not found at $log_reader" >&2 - return 1 - fi -} diff --git a/lib/ci-log-reader.py b/lib/ci-log-reader.py deleted file mode 100755 index 5786e5a..0000000 --- a/lib/ci-log-reader.py +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env python3 -""" -ci-log-reader.py — Read CI logs from Woodpecker SQLite database. - -Usage: - ci-log-reader.py [--step ] - -Reads log entries from the Woodpecker SQLite database and outputs them to stdout. -If --step is specified, filters to that step only. Otherwise returns logs from -all failed steps, truncated to the last 200 lines to avoid context bloat. - -Environment: - WOODPECKER_DATA_DIR - Path to Woodpecker data directory (default: /woodpecker-data) - -The SQLite database is located at: $WOODPECKER_DATA_DIR/woodpecker.sqlite -""" - -import argparse -import sqlite3 -import sys -import os - -DEFAULT_DB_PATH = "/woodpecker-data/woodpecker.sqlite" -DEFAULT_WOODPECKER_DATA_DIR = "/woodpecker-data" -MAX_OUTPUT_LINES = 200 - - -def get_db_path(): - """Determine the path to the Woodpecker SQLite database.""" - env_dir = os.environ.get("WOODPECKER_DATA_DIR", DEFAULT_WOODPECKER_DATA_DIR) - return os.path.join(env_dir, "woodpecker.sqlite") - - -def query_logs(pipeline_number: int, step_name: str | None = None) -> list[str]: - """ - Query log entries from the Woodpecker database. - - Args: - pipeline_number: The pipeline number to query - step_name: Optional step name to filter by - - Returns: - List of log data strings - """ - db_path = get_db_path() - - if not os.path.exists(db_path): - print(f"ERROR: Woodpecker database not found at {db_path}", file=sys.stderr) - print(f"Set WOODPECKER_DATA_DIR or mount volume to {DEFAULT_WOODPECKER_DATA_DIR}", file=sys.stderr) - sys.exit(1) - - conn = sqlite3.connect(db_path) - conn.row_factory = sqlite3.Row - cursor = conn.cursor() - - if step_name: - # Query logs for a specific step - query = """ - SELECT le.data - FROM log_entries le - JOIN steps s ON le.step_id = s.id - JOIN pipelines p ON s.pipeline_id = p.id - WHERE p.number = ? AND s.name = ? - ORDER BY le.id - """ - cursor.execute(query, (pipeline_number, step_name)) - else: - # Query logs for all failed steps in the pipeline - query = """ - SELECT le.data - FROM log_entries le - JOIN steps s ON le.step_id = s.id - JOIN pipelines p ON s.pipeline_id = p.id - WHERE p.number = ? AND s.state IN ('failure', 'error', 'killed') - ORDER BY le.id - """ - cursor.execute(query, (pipeline_number,)) - - logs = [row["data"] for row in cursor.fetchall()] - conn.close() - return logs - - -def main(): - parser = argparse.ArgumentParser( - description="Read CI logs from Woodpecker SQLite database" - ) - parser.add_argument( - "pipeline_number", - type=int, - help="Pipeline number to query" - ) - parser.add_argument( - "--step", "-s", - dest="step_name", - default=None, - help="Filter to a specific step name" - ) - - args = parser.parse_args() - - logs = query_logs(args.pipeline_number, args.step_name) - - if not logs: - if args.step_name: - print(f"No logs found for pipeline #{args.pipeline_number}, step '{args.step_name}'", file=sys.stderr) - else: - print(f"No failed steps found in pipeline #{args.pipeline_number}", file=sys.stderr) - sys.exit(0) - - # Join all log data and output - full_output = "\n".join(logs) - - # Truncate to last N lines to avoid context bloat - lines = full_output.split("\n") - if len(lines) > MAX_OUTPUT_LINES: - # Keep last N lines - truncated = lines[-MAX_OUTPUT_LINES:] - print("\n".join(truncated)) - else: - print(full_output) - - -if __name__ == "__main__": - main() diff --git a/lib/ci-setup.sh b/lib/ci-setup.sh deleted file mode 100644 index 7c4c5dd..0000000 --- a/lib/ci-setup.sh +++ /dev/null @@ -1,455 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# ci-setup.sh — CI setup functions for Woodpecker and cron configuration -# -# Internal functions (called via _load_ci_context + _*_impl): -# _install_cron_impl() - Install crontab entries for project agents -# _create_woodpecker_oauth_impl() - Create OAuth2 app on Forgejo for Woodpecker -# _generate_woodpecker_token_impl() - Auto-generate WOODPECKER_TOKEN via OAuth2 flow -# _activate_woodpecker_repo_impl() - Activate repo in Woodpecker -# -# Globals expected (asserted by _load_ci_context): -# FORGE_URL - Forge instance URL (e.g. http://localhost:3000) -# FORGE_TOKEN - Forge API token -# FACTORY_ROOT - Root of the disinto factory -# -# Usage: -# source "${FACTORY_ROOT}/lib/ci-setup.sh" -# ============================================================================= -set -euo pipefail - -# Assert required globals are set before using this module. -_load_ci_context() { - local missing=() - [ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL") - [ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN") - [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT") - if [ "${#missing[@]}" -gt 0 ]; then - echo "Error: ci-setup.sh requires these globals to be set: ${missing[*]}" >&2 - exit 1 - fi -} - -# Generate and optionally install cron entries for the project agents. -# Usage: install_cron -_install_cron_impl() { - local name="$1" toml="$2" auto_yes="$3" bare="${4:-false}" - - # In compose mode, skip host cron — the agents container runs cron internally - if [ "$bare" = false ]; then - echo "" - echo "Cron: skipped (agents container handles scheduling in compose mode)" - return - fi - - # Bare mode: crontab is required on the host - if ! command -v crontab &>/dev/null; then - echo "Error: crontab not found (required for bare-metal mode)" >&2 - echo " Install: apt install cron / brew install cron" >&2 - exit 1 - fi - - # Use absolute path for the TOML in cron entries - local abs_toml - abs_toml="$(cd "$(dirname "$toml")" && pwd)/$(basename "$toml")" - - local cron_block - cron_block="# disinto: ${name} -2,7,12,17,22,27,32,37,42,47,52,57 * * * * ${FACTORY_ROOT}/review/review-poll.sh ${abs_toml} >/dev/null 2>&1 -4,9,14,19,24,29,34,39,44,49,54,59 * * * * ${FACTORY_ROOT}/dev/dev-poll.sh ${abs_toml} >/dev/null 2>&1 -0 0,6,12,18 * * * cd ${FACTORY_ROOT} && bash gardener/gardener-run.sh ${abs_toml} >/dev/null 2>&1" - - echo "" - echo "Cron entries to install:" - echo "$cron_block" - echo "" - - # Check if cron entries already exist - local current_crontab - current_crontab=$(crontab -l 2>/dev/null || true) - if echo "$current_crontab" | grep -q "# disinto: ${name}"; then - echo "Cron: skipped (entries for ${name} already installed)" - return - fi - - if [ "$auto_yes" = false ] && [ -t 0 ]; then - read -rp "Install these cron entries? [y/N] " confirm - if [[ ! "$confirm" =~ ^[Yy] ]]; then - echo "Skipped cron install. Add manually with: crontab -e" - return - fi - fi - - # Append to existing crontab - if { crontab -l 2>/dev/null || true; printf '%s\n' "$cron_block"; } | crontab -; then - echo "Cron entries installed for ${name}" - else - echo "Error: failed to install cron entries" >&2 - return 1 - fi -} - -# Set up Woodpecker CI to use Forgejo as its forge backend. -# Creates an OAuth2 app on Forgejo for Woodpecker, activates the repo. -# Usage: create_woodpecker_oauth -_create_woodpecker_oauth_impl() { - local forge_url="$1" - local _repo_slug="$2" # unused but required for signature compatibility - - echo "" - echo "── Woodpecker OAuth2 setup ────────────────────────────" - - # Create OAuth2 application on Forgejo for Woodpecker - local oauth2_name="woodpecker-ci" - local redirect_uri="http://localhost:8000/authorize" - local existing_app client_id client_secret - - # Check if OAuth2 app already exists - existing_app=$(curl -sf \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/user/applications/oauth2" 2>/dev/null \ - | jq -r --arg name "$oauth2_name" '.[] | select(.name == $name) | .client_id // empty' 2>/dev/null) || true - - if [ -n "$existing_app" ]; then - echo "OAuth2: ${oauth2_name} (already exists, client_id=${existing_app})" - client_id="$existing_app" - else - local oauth2_resp - oauth2_resp=$(curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/user/applications/oauth2" \ - -d "{\"name\":\"${oauth2_name}\",\"redirect_uris\":[\"${redirect_uri}\"],\"confidential_client\":true}" \ - 2>/dev/null) || oauth2_resp="" - - if [ -z "$oauth2_resp" ]; then - echo "Warning: failed to create OAuth2 app on Forgejo" >&2 - return - fi - - client_id=$(printf '%s' "$oauth2_resp" | jq -r '.client_id // empty') - client_secret=$(printf '%s' "$oauth2_resp" | jq -r '.client_secret // empty') - - if [ -z "$client_id" ]; then - echo "Warning: OAuth2 app creation returned no client_id" >&2 - return - fi - - echo "OAuth2: ${oauth2_name} created (client_id=${client_id})" - fi - - # Store Woodpecker forge config in .env - # WP_FORGEJO_CLIENT/SECRET match the docker-compose.yml variable references - # WOODPECKER_HOST must be host-accessible URL to match OAuth2 redirect_uri - local env_file="${FACTORY_ROOT}/.env" - local wp_vars=( - "WOODPECKER_FORGEJO=true" - "WOODPECKER_FORGEJO_URL=${forge_url}" - "WOODPECKER_HOST=http://localhost:8000" - ) - if [ -n "${client_id:-}" ]; then - wp_vars+=("WP_FORGEJO_CLIENT=${client_id}") - fi - if [ -n "${client_secret:-}" ]; then - wp_vars+=("WP_FORGEJO_SECRET=${client_secret}") - fi - - for var_line in "${wp_vars[@]}"; do - local var_name="${var_line%%=*}" - if grep -q "^${var_name}=" "$env_file" 2>/dev/null; then - sed -i "s|^${var_name}=.*|${var_line}|" "$env_file" - else - printf '%s\n' "$var_line" >> "$env_file" - fi - done - echo "Config: Woodpecker forge vars written to .env" -} - -# Auto-generate WOODPECKER_TOKEN by driving the Forgejo OAuth2 login flow. -# Requires _FORGE_ADMIN_PASS (set by setup_forge when admin user was just created). -# Called after compose stack is up, before activate_woodpecker_repo. -# Usage: generate_woodpecker_token -_generate_woodpecker_token_impl() { - local forge_url="$1" - local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}" - local env_file="${FACTORY_ROOT}/.env" - local admin_user="disinto-admin" - local admin_pass="${_FORGE_ADMIN_PASS:-}" - - # Skip if already set - if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then - echo "Config: WOODPECKER_TOKEN already set in .env" - return 0 - fi - - echo "" - echo "── Woodpecker token generation ────────────────────────" - - if [ -z "$admin_pass" ]; then - echo "Warning: Forgejo admin password not available — cannot generate WOODPECKER_TOKEN" >&2 - echo " Log into Woodpecker at ${wp_server} and create a token manually" >&2 - return 1 - fi - - # Wait for Woodpecker to become ready - echo -n "Waiting for Woodpecker" - local retries=0 - while ! curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; do - retries=$((retries + 1)) - if [ "$retries" -gt 30 ]; then - echo "" - echo "Warning: Woodpecker not ready at ${wp_server} — skipping token generation" >&2 - return 1 - fi - echo -n "." - sleep 2 - done - echo " ready" - - # Flow: Forgejo web login → OAuth2 authorize → Woodpecker callback → token - local cookie_jar auth_body_file - cookie_jar=$(mktemp /tmp/wp-auth-XXXXXX) - auth_body_file=$(mktemp /tmp/wp-body-XXXXXX) - - # Step 1: Log into Forgejo web UI (session cookie needed for OAuth consent) - local csrf - csrf=$(curl -sf -c "$cookie_jar" "${forge_url}/user/login" 2>/dev/null \ - | grep -o 'name="_csrf"[^>]*' | head -1 \ - | grep -oE '(content|value)="[^"]*"' | head -1 \ - | cut -d'"' -f2) || csrf="" - - if [ -z "$csrf" ]; then - echo "Warning: could not get Forgejo CSRF token — skipping token generation" >&2 - rm -f "$cookie_jar" "$auth_body_file" - return 1 - fi - - curl -sf -b "$cookie_jar" -c "$cookie_jar" -X POST \ - -o /dev/null \ - "${forge_url}/user/login" \ - --data-urlencode "_csrf=${csrf}" \ - --data-urlencode "user_name=${admin_user}" \ - --data-urlencode "password=${admin_pass}" \ - 2>/dev/null || true - - # Step 2: Start Woodpecker OAuth2 flow (captures authorize URL with state param) - local wp_redir - wp_redir=$(curl -sf -o /dev/null -w '%{redirect_url}' \ - "${wp_server}/authorize" 2>/dev/null) || wp_redir="" - - if [ -z "$wp_redir" ]; then - echo "Warning: Woodpecker did not provide OAuth redirect — skipping token generation" >&2 - rm -f "$cookie_jar" "$auth_body_file" - return 1 - fi - - # Rewrite internal Docker network URLs to host-accessible URLs. - # Handle both plain and URL-encoded forms of the internal hostnames. - local forge_url_enc wp_server_enc - forge_url_enc=$(printf '%s' "$forge_url" | sed 's|:|%3A|g; s|/|%2F|g') - wp_server_enc=$(printf '%s' "$wp_server" | sed 's|:|%3A|g; s|/|%2F|g') - wp_redir=$(printf '%s' "$wp_redir" \ - | sed "s|http://forgejo:3000|${forge_url}|g" \ - | sed "s|http%3A%2F%2Fforgejo%3A3000|${forge_url_enc}|g" \ - | sed "s|http://woodpecker:8000|${wp_server}|g" \ - | sed "s|http%3A%2F%2Fwoodpecker%3A8000|${wp_server_enc}|g") - - # Step 3: Hit Forgejo OAuth authorize endpoint with session - # First time: shows consent page. Already approved: redirects with code. - local auth_headers redirect_loc auth_code - auth_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \ - -D - -o "$auth_body_file" \ - "$wp_redir" 2>/dev/null) || auth_headers="" - - redirect_loc=$(printf '%s' "$auth_headers" \ - | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') - - if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then - # Auto-approved: extract code from redirect - auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/') - else - # Consent page: extract CSRF and all form fields, POST grant approval - local consent_csrf form_client_id form_state form_redirect_uri - consent_csrf=$(grep -o 'name="_csrf"[^>]*' "$auth_body_file" 2>/dev/null \ - | head -1 | grep -oE '(content|value)="[^"]*"' | head -1 \ - | cut -d'"' -f2) || consent_csrf="" - form_client_id=$(grep 'name="client_id"' "$auth_body_file" 2>/dev/null \ - | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_client_id="" - form_state=$(grep 'name="state"' "$auth_body_file" 2>/dev/null \ - | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_state="" - form_redirect_uri=$(grep 'name="redirect_uri"' "$auth_body_file" 2>/dev/null \ - | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_redirect_uri="" - - if [ -n "$consent_csrf" ]; then - local grant_headers - grant_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \ - -D - -o /dev/null -X POST \ - "${forge_url}/login/oauth/grant" \ - --data-urlencode "_csrf=${consent_csrf}" \ - --data-urlencode "client_id=${form_client_id}" \ - --data-urlencode "state=${form_state}" \ - --data-urlencode "scope=" \ - --data-urlencode "nonce=" \ - --data-urlencode "redirect_uri=${form_redirect_uri}" \ - --data-urlencode "granted=true" \ - 2>/dev/null) || grant_headers="" - - redirect_loc=$(printf '%s' "$grant_headers" \ - | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') - - if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then - auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/') - fi - fi - fi - - rm -f "$auth_body_file" - - if [ -z "${auth_code:-}" ]; then - echo "Warning: could not obtain OAuth2 authorization code — skipping token generation" >&2 - rm -f "$cookie_jar" - return 1 - fi - - # Step 4: Complete Woodpecker OAuth callback (exchanges code for session) - local state - state=$(printf '%s' "$wp_redir" | sed -n 's/.*[&?]state=\([^&]*\).*/\1/p') - - local wp_headers wp_token - wp_headers=$(curl -sf -c "$cookie_jar" \ - -D - -o /dev/null \ - "${wp_server}/authorize?code=${auth_code}&state=${state:-}" \ - 2>/dev/null) || wp_headers="" - - # Extract token from redirect URL (Woodpecker returns ?access_token=...) - redirect_loc=$(printf '%s' "$wp_headers" \ - | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') - - wp_token="" - if printf '%s' "${redirect_loc:-}" | grep -q 'access_token='; then - wp_token=$(printf '%s' "$redirect_loc" | sed 's/.*access_token=\([^&]*\).*/\1/') - fi - - # Fallback: check for user_sess cookie - if [ -z "$wp_token" ]; then - wp_token=$(awk '/user_sess/{print $NF}' "$cookie_jar" 2>/dev/null) || wp_token="" - fi - - rm -f "$cookie_jar" - - if [ -z "$wp_token" ]; then - echo "Warning: could not obtain Woodpecker token — skipping token generation" >&2 - return 1 - fi - - # Step 5: Create persistent personal access token via Woodpecker API - # WP v3 requires CSRF header for POST operations with session tokens. - local wp_csrf - wp_csrf=$(curl -sf -b "user_sess=${wp_token}" \ - "${wp_server}/web-config.js" 2>/dev/null \ - | sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p') || wp_csrf="" - - local pat_resp final_token - pat_resp=$(curl -sf -X POST \ - -b "user_sess=${wp_token}" \ - ${wp_csrf:+-H "X-CSRF-Token: ${wp_csrf}"} \ - "${wp_server}/api/user/token" \ - 2>/dev/null) || pat_resp="" - - final_token="" - if [ -n "$pat_resp" ]; then - final_token=$(printf '%s' "$pat_resp" \ - | jq -r 'if .token then .token elif .access_token then .access_token else empty end' \ - 2>/dev/null) || final_token="" - fi - - # Use persistent token if available, otherwise use session token - final_token="${final_token:-$wp_token}" - - # Save to .env - if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then - sed -i "s|^WOODPECKER_TOKEN=.*|WOODPECKER_TOKEN=${final_token}|" "$env_file" - else - printf 'WOODPECKER_TOKEN=%s\n' "$final_token" >> "$env_file" - fi - export WOODPECKER_TOKEN="$final_token" - echo "Config: WOODPECKER_TOKEN generated and saved to .env" -} - -# Activate a repo in Woodpecker CI. -# Usage: activate_woodpecker_repo -_activate_woodpecker_repo_impl() { - local forge_repo="$1" - local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}" - - # Wait for Woodpecker to become ready after stack start - local retries=0 - while [ $retries -lt 10 ]; do - if curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; then - break - fi - retries=$((retries + 1)) - sleep 2 - done - - if ! curl -sf --max-time 5 "${wp_server}/api/version" >/dev/null 2>&1; then - echo "Woodpecker: not reachable at ${wp_server} after stack start, skipping repo activation" >&2 - return - fi - - echo "" - echo "── Woodpecker repo activation ─────────────────────────" - - local wp_token="${WOODPECKER_TOKEN:-}" - if [ -z "$wp_token" ]; then - echo "Warning: WOODPECKER_TOKEN not set — cannot activate repo" >&2 - echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2 - return - fi - - local wp_repo_id - wp_repo_id=$(curl -sf \ - -H "Authorization: Bearer ${wp_token}" \ - "${wp_server}/api/repos/lookup/${forge_repo}" 2>/dev/null \ - | jq -r '.id // empty' 2>/dev/null) || true - - if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then - echo "Repo: ${forge_repo} already active in Woodpecker (id=${wp_repo_id})" - else - # Get Forgejo repo numeric ID for WP activation - local forge_repo_id - forge_repo_id=$(curl -sf \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_URL:-http://localhost:3000}/api/v1/repos/${forge_repo}" 2>/dev/null \ - | jq -r '.id // empty' 2>/dev/null) || forge_repo_id="" - - local activate_resp - activate_resp=$(curl -sf -X POST \ - -H "Authorization: Bearer ${wp_token}" \ - "${wp_server}/api/repos?forge_remote_id=${forge_repo_id:-0}" \ - 2>/dev/null) || activate_resp="" - - wp_repo_id=$(printf '%s' "$activate_resp" | jq -r '.id // empty' 2>/dev/null) || true - - if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then - echo "Repo: ${forge_repo} activated in Woodpecker (id=${wp_repo_id})" - - # Set pipeline timeout to 5 minutes (default is 60) - if curl -sf -X PATCH \ - -H "Authorization: Bearer ${wp_token}" \ - -H "Content-Type: application/json" \ - "${wp_server}/api/repos/${wp_repo_id}" \ - -d '{"timeout": 5}' >/dev/null 2>&1; then - echo "Config: pipeline timeout set to 5 minutes" - fi - else - echo "Warning: could not activate repo in Woodpecker" >&2 - echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2 - fi - fi - - # Store repo ID for later TOML generation - if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then - _WP_REPO_ID="$wp_repo_id" - fi -} diff --git a/lib/env.sh b/lib/env.sh index 20e64d0..6bc181e 100755 --- a/lib/env.sh +++ b/lib/env.sh @@ -13,61 +13,33 @@ FACTORY_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" if [ "${DISINTO_CONTAINER:-}" = "1" ]; then DISINTO_DATA_DIR="${HOME}/data" DISINTO_LOG_DIR="${DISINTO_DATA_DIR}/logs" - mkdir -p "${DISINTO_DATA_DIR}" "${DISINTO_LOG_DIR}"/{dev,action,review,supervisor,vault,site,metrics,gardener,planner,predictor,architect,dispatcher} + mkdir -p "${DISINTO_DATA_DIR}" "${DISINTO_LOG_DIR}"/{dev,action,review,supervisor,vault,site,metrics} else DISINTO_LOG_DIR="${FACTORY_ROOT}" fi export DISINTO_LOG_DIR # Load secrets: prefer .env.enc (SOPS-encrypted), fall back to plaintext .env. -# Inside containers (DISINTO_CONTAINER=1), compose environment is the source of truth. -# On bare metal, .env/.env.enc is sourced to provide default values. -if [ "${DISINTO_CONTAINER:-}" != "1" ]; then - if [ -f "$FACTORY_ROOT/.env.enc" ] && command -v sops &>/dev/null; then - set -a - _saved_forge_url="${FORGE_URL:-}" - # Use temp file + validate dotenv format before sourcing (avoids eval injection) - # SOPS -d automatically verifies MAC/GCM authentication tag during decryption - _tmpenv=$(mktemp) || { echo "Error: failed to create temp file for .env.enc" >&2; exit 1; } - if ! sops -d --output-type dotenv "$FACTORY_ROOT/.env.enc" > "$_tmpenv" 2>/dev/null; then - echo "Error: failed to decrypt .env.enc — decryption failed, possible corruption" >&2 - rm -f "$_tmpenv" - exit 1 - fi - # Validate: non-empty, non-comment lines must match KEY=value pattern - # Filter out blank lines and comments before validation - _validated=$(grep -E '^[A-Za-z_][A-Za-z0-9_]*=' "$_tmpenv" 2>/dev/null || true) - if [ -n "$_validated" ]; then - # Write validated content to a second temp file and source it - _validated_env=$(mktemp) - printf '%s\n' "$_validated" > "$_validated_env" - # shellcheck source=/dev/null - source "$_validated_env" - rm -f "$_validated_env" - else - echo "Error: .env.enc decryption output failed format validation" >&2 - rm -f "$_tmpenv" - exit 1 - fi - rm -f "$_tmpenv" - set +a - [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url" - elif [ -f "$FACTORY_ROOT/.env" ]; then - # Preserve compose-injected FORGE_URL (localhost in .env != forgejo in Docker) - _saved_forge_url="${FORGE_URL:-}" - set -a - # shellcheck source=/dev/null - source "$FACTORY_ROOT/.env" - set +a - [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url" - fi -fi - -# Allow per-container token override (#375): .env sets the default FORGE_TOKEN -# (dev-bot), then FORGE_TOKEN_OVERRIDE replaces it for containers that need a -# different Forgejo identity (e.g. dev-qwen). -if [ -n "${FORGE_TOKEN_OVERRIDE:-}" ]; then - export FORGE_TOKEN="$FORGE_TOKEN_OVERRIDE" +# Always source .env — cron jobs inside the container do NOT inherit compose +# env vars (FORGE_TOKEN, etc.). Compose-injected vars (like FORGE_URL) are +# already set and won't be clobbered since env.sh uses ${VAR:-default} patterns +# for derived values. FORGE_URL from .env (localhost:3000) is overridden below +# by the compose-injected value when running via docker exec. +if [ -f "$FACTORY_ROOT/.env.enc" ] && command -v sops &>/dev/null; then + set -a + _saved_forge_url="${FORGE_URL:-}" + eval "$(sops -d --output-type dotenv "$FACTORY_ROOT/.env.enc" 2>/dev/null)" \ + || echo "Warning: failed to decrypt .env.enc — secrets not loaded" >&2 + set +a + [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url" +elif [ -f "$FACTORY_ROOT/.env" ]; then + # Preserve compose-injected FORGE_URL (localhost in .env != forgejo in Docker) + _saved_forge_url="${FORGE_URL:-}" + set -a + # shellcheck source=/dev/null + source "$FACTORY_ROOT/.env" + set +a + [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url" fi # PATH: foundry, node, system @@ -79,11 +51,16 @@ if [ -n "${PROJECT_TOML:-}" ] && [ -f "$PROJECT_TOML" ]; then source "${FACTORY_ROOT}/lib/load-project.sh" "$PROJECT_TOML" fi -# Forge token -export FORGE_TOKEN="${FORGE_TOKEN:-}" +# Forge token: new FORGE_TOKEN > legacy CODEBERG_TOKEN +if [ -z "${FORGE_TOKEN:-}" ]; then + FORGE_TOKEN="${CODEBERG_TOKEN:-}" +fi +export FORGE_TOKEN +export CODEBERG_TOKEN="${FORGE_TOKEN}" # backwards compat -# Review bot token +# Review bot token: FORGE_REVIEW_TOKEN > legacy REVIEW_BOT_TOKEN export FORGE_REVIEW_TOKEN="${FORGE_REVIEW_TOKEN:-${REVIEW_BOT_TOKEN:-}}" +export REVIEW_BOT_TOKEN="${FORGE_REVIEW_TOKEN}" # backwards compat # Per-agent tokens (#747): each agent gets its own Forgejo identity. # Falls back to FORGE_TOKEN for backwards compat with single-token setups. @@ -92,16 +69,20 @@ export FORGE_GARDENER_TOKEN="${FORGE_GARDENER_TOKEN:-${FORGE_TOKEN}}" export FORGE_VAULT_TOKEN="${FORGE_VAULT_TOKEN:-${FORGE_TOKEN}}" export FORGE_SUPERVISOR_TOKEN="${FORGE_SUPERVISOR_TOKEN:-${FORGE_TOKEN}}" export FORGE_PREDICTOR_TOKEN="${FORGE_PREDICTOR_TOKEN:-${FORGE_TOKEN}}" -export FORGE_ARCHITECT_TOKEN="${FORGE_ARCHITECT_TOKEN:-${FORGE_TOKEN}}" +export FORGE_ACTION_TOKEN="${FORGE_ACTION_TOKEN:-${FORGE_TOKEN}}" -# Bot usernames filter -export FORGE_BOT_USERNAMES="${FORGE_BOT_USERNAMES:-dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot}" +# Bot usernames filter: FORGE_BOT_USERNAMES > legacy CODEBERG_BOT_USERNAMES +export FORGE_BOT_USERNAMES="${FORGE_BOT_USERNAMES:-${CODEBERG_BOT_USERNAMES:-dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,action-bot}}" +export CODEBERG_BOT_USERNAMES="${FORGE_BOT_USERNAMES}" # backwards compat -# Project config -export FORGE_REPO="${FORGE_REPO:-}" +# Project config (FORGE_* preferred, CODEBERG_* fallback) +export FORGE_REPO="${FORGE_REPO:-${CODEBERG_REPO:-}}" +export CODEBERG_REPO="${FORGE_REPO}" # backwards compat export FORGE_URL="${FORGE_URL:-http://localhost:3000}" export FORGE_API="${FORGE_API:-${FORGE_URL}/api/v1/repos/${FORGE_REPO}}" export FORGE_WEB="${FORGE_WEB:-${FORGE_URL}/${FORGE_REPO}}" +export CODEBERG_API="${FORGE_API}" # backwards compat +export CODEBERG_WEB="${FORGE_WEB}" # backwards compat # tea CLI login name: derived from FORGE_URL (codeberg vs local forgejo) if [ -z "${TEA_LOGIN:-}" ]; then case "${FORGE_URL}" in @@ -127,7 +108,7 @@ export CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-7200}" # Vault-only token guard (#745): external-action tokens (GITHUB_TOKEN, CLAWHUB_TOKEN) # must NEVER be available to agents. They live in .env.vault.enc and are injected -# only into the ephemeral runner container at fire time. Unset them here so +# only into the ephemeral vault-runner container at fire time. Unset them here so # even an accidental .env inclusion cannot leak them into agent sessions. unset GITHUB_TOKEN 2>/dev/null || true unset CLAWHUB_TOKEN 2>/dev/null || true @@ -137,75 +118,21 @@ unset CLAWHUB_TOKEN 2>/dev/null || true export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1 # Shared log helper -# Usage: log "message" -# Output: [2026-04-03T14:00:00Z] agent: message -# Where agent is set via LOG_AGENT variable (defaults to caller's context) log() { - local agent="${LOG_AGENT:-agent}" - printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" + printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" } -# ============================================================================= -# URL VALIDATION HELPER -# ============================================================================= -# Validates that a URL variable matches expected patterns to prevent -# URL injection or redirection attacks (OWASP URL Redirection prevention). -# Returns 0 if valid, 1 if invalid. -# ============================================================================= -validate_url() { - local url="$1" - local allowed_hosts="${2:-}" - - # Must start with http:// or https:// - if [[ ! "$url" =~ ^https?:// ]]; then - return 1 - fi - - # Extract host and reject if it contains @ (credential injection) - if [[ "$url" =~ ^https?://[^@]+@ ]]; then - return 1 - fi - - # If allowed_hosts is specified, validate against it - if [ -n "$allowed_hosts" ]; then - local host - host=$(echo "$url" | sed -E 's|^https?://([^/:]+).*|\1|') - local valid=false - for allowed in $allowed_hosts; do - if [ "$host" = "$allowed" ]; then - valid=true - break - fi - done - if [ "$valid" = false ]; then - return 1 - fi - fi - - return 0 -} - -# ============================================================================= -# FORGE API HELPER -# ============================================================================= -# Usage: forge_api GET /issues?state=open -# Validates FORGE_API before use to prevent URL injection attacks. -# ============================================================================= +# Forge API helper — usage: forge_api GET /issues?state=open forge_api() { local method="$1" path="$2" shift 2 - - # Validate FORGE_API to prevent URL injection - if ! validate_url "$FORGE_API"; then - echo "ERROR: FORGE_API validation failed - possible URL injection attempt" >&2 - return 1 - fi - curl -sf -X "$method" \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ "${FORGE_API}${path}" "$@" } +# Backwards-compat alias +codeberg_api() { forge_api "$@"; } # Paginate a Forge API GET endpoint and return all items as a merged JSON array. # Usage: forge_api_all /path (no existing query params) @@ -222,8 +149,7 @@ forge_api_all() { page=1 while true; do page_items=$(forge_api GET "${path_prefix}${sep}limit=50&page=${page}") - count=$(printf '%s' "$page_items" | jq 'length' 2>/dev/null) || count=0 - [ -z "$count" ] && count=0 + count=$(printf '%s' "$page_items" | jq 'length') [ "$count" -eq 0 ] && break all_items=$(printf '%s\n%s' "$all_items" "$page_items" | jq -s 'add') [ "$count" -lt 50 ] && break @@ -231,23 +157,13 @@ forge_api_all() { done printf '%s' "$all_items" } +# Backwards-compat alias +codeberg_api_all() { forge_api_all "$@"; } -# ============================================================================= -# WOODPECKER API HELPER -# ============================================================================= -# Usage: woodpecker_api /repos/{id}/pipelines -# Validates WOODPECKER_SERVER before use to prevent URL injection attacks. -# ============================================================================= +# Woodpecker API helper woodpecker_api() { local path="$1" shift - - # Validate WOODPECKER_SERVER to prevent URL injection - if ! validate_url "$WOODPECKER_SERVER"; then - echo "ERROR: WOODPECKER_SERVER validation failed - possible URL injection attempt" >&2 - return 1 - fi - curl -sfL \ -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ "${WOODPECKER_SERVER}/api${path}" "$@" diff --git a/lib/file-action-issue.sh b/lib/file-action-issue.sh new file mode 100644 index 0000000..abba4c8 --- /dev/null +++ b/lib/file-action-issue.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# file-action-issue.sh — File an action issue for a formula run +# +# Usage: source this file, then call file_action_issue. +# Requires: forge_api() from lib/env.sh, jq, lib/secret-scan.sh +# +# file_action_issue <body> +# Sets FILED_ISSUE_NUM on success. +# Returns: 0=created, 1=duplicate exists, 2=label not found, 3=API error, 4=secrets detected + +# Load secret scanner +# shellcheck source=secret-scan.sh +source "$(dirname "${BASH_SOURCE[0]}")/secret-scan.sh" + +file_action_issue() { + local formula_name="$1" title="$2" body="$3" + FILED_ISSUE_NUM="" + + # Secret scan: reject issue bodies containing embedded secrets + if ! scan_for_secrets "$body"; then + echo "file-action-issue: BLOCKED — issue body for '${formula_name}' contains potential secrets. Use env var references instead." >&2 + return 4 + fi + + # Dedup: skip if an open action issue for this formula already exists + local open_actions + open_actions=$(forge_api_all "/issues?state=open&type=issues&labels=action" 2>/dev/null || true) + if [ -n "$open_actions" ] && [ "$open_actions" != "null" ]; then + local existing + existing=$(printf '%s' "$open_actions" | \ + jq --arg f "$formula_name" '[.[] | select(.title | test($f))] | length' 2>/dev/null || echo 0) + if [ "${existing:-0}" -gt 0 ]; then + return 1 + fi + fi + + # Fetch 'action' label ID + local action_label_id + action_label_id=$(forge_api GET "/labels" 2>/dev/null | \ + jq -r '.[] | select(.name == "action") | .id' 2>/dev/null || true) + if [ -z "$action_label_id" ]; then + return 2 + fi + + # Create the issue + local payload result + payload=$(jq -nc \ + --arg title "$title" \ + --arg body "$body" \ + --argjson labels "[$action_label_id]" \ + '{title: $title, body: $body, labels: $labels}') + + result=$(forge_api POST "/issues" -d "$payload" 2>/dev/null || true) + FILED_ISSUE_NUM=$(printf '%s' "$result" | jq -r '.number // empty' 2>/dev/null || true) + + if [ -z "$FILED_ISSUE_NUM" ]; then + return 3 + fi +} diff --git a/lib/forge-push.sh b/lib/forge-push.sh deleted file mode 100644 index 1da61f7..0000000 --- a/lib/forge-push.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# forge-push.sh — push_to_forge() function -# -# Handles pushing a local clone to the Forgejo remote and verifying the push. -# -# Globals expected: -# FORGE_URL - Forge instance URL (e.g. http://localhost:3000) -# FORGE_TOKEN - API token for Forge operations (used for API verification) -# FORGE_PASS - Bot password for git HTTP push (#361: tokens rejected by Forgejo 11.x) -# FACTORY_ROOT - Root of the disinto factory -# PRIMARY_BRANCH - Primary branch name (e.g. main) -# -# Usage: -# source "${FACTORY_ROOT}/lib/forge-push.sh" -# push_to_forge <repo_root> <forge_url> <repo_slug> -# ============================================================================= -set -euo pipefail - -# Assert required globals are set before using this module. -_assert_forge_push_globals() { - local missing=() - [ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL") - [ -z "${FORGE_PASS:-}" ] && missing+=("FORGE_PASS") - [ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN") - [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT") - [ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH") - if [ "${#missing[@]}" -gt 0 ]; then - echo "Error: forge-push.sh requires these globals to be set: ${missing[*]}" >&2 - exit 1 - fi -} - -# Push local clone to the Forgejo remote. -push_to_forge() { - local repo_root="$1" forge_url="$2" repo_slug="$3" - - # Build authenticated remote URL: http://dev-bot:<password>@host:port/org/repo.git - # Forgejo 11.x rejects API tokens for git HTTP push (#361); password auth works. - if [ -z "${FORGE_PASS:-}" ]; then - echo "Error: FORGE_PASS not set — cannot push to Forgejo (see #361)" >&2 - return 1 - fi - local auth_url - auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_PASS}@|") - local remote_url="${auth_url}/${repo_slug}.git" - # Display URL without token - local display_url="${forge_url}/${repo_slug}.git" - - # Always set the remote URL to ensure credentials are current - if git -C "$repo_root" remote get-url forgejo >/dev/null 2>&1; then - git -C "$repo_root" remote set-url forgejo "$remote_url" - else - git -C "$repo_root" remote add forgejo "$remote_url" - fi - echo "Remote: forgejo -> ${display_url}" - - # Skip push if local repo has no commits (e.g. cloned from empty Forgejo repo) - if ! git -C "$repo_root" rev-parse HEAD >/dev/null 2>&1; then - echo "Push: skipped (local repo has no commits)" - return 0 - fi - - # Push all branches and tags - echo "Pushing: branches to forgejo" - if ! git -C "$repo_root" push forgejo --all 2>&1; then - echo "Error: failed to push branches to Forgejo" >&2 - return 1 - fi - echo "Pushing: tags to forgejo" - if ! git -C "$repo_root" push forgejo --tags 2>&1; then - echo "Error: failed to push tags to Forgejo" >&2 - return 1 - fi - - # Verify the repo is no longer empty (Forgejo may need a moment to index pushed refs) - local is_empty="true" - local verify_attempt - for verify_attempt in $(seq 1 5); do - local repo_info - repo_info=$(curl -sf --max-time 10 \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/repos/${repo_slug}" 2>/dev/null) || repo_info="" - if [ -z "$repo_info" ]; then - is_empty="skipped" - break # API unreachable, skip verification - fi - is_empty=$(printf '%s' "$repo_info" | jq -r '.empty // "unknown"') - if [ "$is_empty" != "true" ]; then - echo "Verify: repo is not empty (push confirmed)" - break - fi - if [ "$verify_attempt" -lt 5 ]; then - sleep 2 - fi - done - if [ "$is_empty" = "true" ]; then - echo "Warning: Forgejo repo still reports empty after push" >&2 - return 1 - fi -} diff --git a/lib/forge-setup.sh b/lib/forge-setup.sh deleted file mode 100644 index d640755..0000000 --- a/lib/forge-setup.sh +++ /dev/null @@ -1,550 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# forge-setup.sh — setup_forge() and helpers for Forgejo provisioning -# -# Handles admin user creation, bot user creation, token generation, -# password resets, repo creation, and collaborator setup. -# -# Globals expected (asserted by _load_init_context): -# FORGE_URL - Forge instance URL (e.g. http://localhost:3000) -# FACTORY_ROOT - Root of the disinto factory -# PRIMARY_BRANCH - Primary branch name (e.g. main) -# -# Usage: -# source "${FACTORY_ROOT}/lib/forge-setup.sh" -# setup_forge <forge_url> <repo_slug> -# ============================================================================= -set -euo pipefail - -# Assert required globals are set before using this module. -_load_init_context() { - local missing=() - [ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL") - [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT") - [ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH") - if [ "${#missing[@]}" -gt 0 ]; then - echo "Error: forge-setup.sh requires these globals to be set: ${missing[*]}" >&2 - exit 1 - fi -} - -# Execute a command in the Forgejo container (for admin operations) -_forgejo_exec() { - local use_bare="${DISINTO_BARE:-false}" - if [ "$use_bare" = true ]; then - docker exec -u git disinto-forgejo "$@" - else - docker compose -f "${FACTORY_ROOT}/docker-compose.yml" exec -T -u git forgejo "$@" - fi -} - -# Provision or connect to a local Forgejo instance. -# Creates admin + bot users, generates API tokens, stores in .env. -# When $DISINTO_BARE is set, uses standalone docker run; otherwise uses compose. -setup_forge() { - local forge_url="$1" - local repo_slug="$2" - local use_bare="${DISINTO_BARE:-false}" - - echo "" - echo "── Forge setup ────────────────────────────────────────" - - # Check if Forgejo is already running - if curl -sf --max-time 5 "${forge_url}/api/v1/version" >/dev/null 2>&1; then - echo "Forgejo: ${forge_url} (already running)" - else - echo "Forgejo not reachable at ${forge_url}" - echo "Starting Forgejo via Docker..." - - if ! command -v docker &>/dev/null; then - echo "Error: docker not found — needed to provision Forgejo" >&2 - echo " Install Docker or start Forgejo manually at ${forge_url}" >&2 - exit 1 - fi - - # Extract port from forge_url - local forge_port - forge_port=$(printf '%s' "$forge_url" | sed -E 's|.*:([0-9]+)/?$|\1|') - forge_port="${forge_port:-3000}" - - if [ "$use_bare" = true ]; then - # Bare-metal mode: standalone docker run - mkdir -p "${FORGEJO_DATA_DIR}" - - if docker ps -a --format '{{.Names}}' | grep -q '^disinto-forgejo$'; then - docker start disinto-forgejo >/dev/null 2>&1 || true - else - docker run -d \ - --name disinto-forgejo \ - --restart unless-stopped \ - -p "${forge_port}:3000" \ - -p 2222:22 \ - -v "${FORGEJO_DATA_DIR}:/data" \ - -e "FORGEJO__database__DB_TYPE=sqlite3" \ - -e "FORGEJO__server__ROOT_URL=${forge_url}/" \ - -e "FORGEJO__server__HTTP_PORT=3000" \ - -e "FORGEJO__service__DISABLE_REGISTRATION=true" \ - codeberg.org/forgejo/forgejo:11.0 - fi - else - # Compose mode: start Forgejo via docker compose - docker compose -f "${FACTORY_ROOT}/docker-compose.yml" up -d forgejo - fi - - # Wait for Forgejo to become healthy - echo -n "Waiting for Forgejo to start" - local retries=0 - while ! curl -sf --max-time 3 "${forge_url}/api/v1/version" >/dev/null 2>&1; do - retries=$((retries + 1)) - if [ "$retries" -gt 60 ]; then - echo "" - echo "Error: Forgejo did not become ready within 60s" >&2 - exit 1 - fi - echo -n "." - sleep 1 - done - echo " ready" - fi - - # Wait for Forgejo database to accept writes (API may be ready before DB is) - echo -n "Waiting for Forgejo database" - local db_ready=false - for _i in $(seq 1 30); do - if _forgejo_exec forgejo admin user list >/dev/null 2>&1; then - db_ready=true - break - fi - echo -n "." - sleep 1 - done - echo "" - if [ "$db_ready" != true ]; then - echo "Error: Forgejo database not ready after 30s" >&2 - exit 1 - fi - - # Create admin user if it doesn't exist - local admin_user="disinto-admin" - local admin_pass - local env_file="${FACTORY_ROOT}/.env" - - # Re-read persisted admin password if available (#158) - if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then - admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-) - fi - # Generate a fresh password only when none was persisted - if [ -z "${admin_pass:-}" ]; then - admin_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - fi - - if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then - echo "Creating admin user: ${admin_user}" - local create_output - if ! create_output=$(_forgejo_exec forgejo admin user create \ - --admin \ - --username "${admin_user}" \ - --password "${admin_pass}" \ - --email "admin@disinto.local" \ - --must-change-password=false 2>&1); then - echo "Error: failed to create admin user '${admin_user}':" >&2 - echo " ${create_output}" >&2 - exit 1 - fi - # Forgejo 11.x ignores --must-change-password=false on create; - # explicitly clear the flag so basic-auth token creation works. - _forgejo_exec forgejo admin user change-password \ - --username "${admin_user}" \ - --password "${admin_pass}" \ - --must-change-password=false - - # Verify admin user was actually created - if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then - echo "Error: admin user '${admin_user}' not found after creation" >&2 - exit 1 - fi - - # Persist admin password to .env for idempotent re-runs (#158) - if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then - sed -i "s|^FORGE_ADMIN_PASS=.*|FORGE_ADMIN_PASS=${admin_pass}|" "$env_file" - else - printf 'FORGE_ADMIN_PASS=%s\n' "$admin_pass" >> "$env_file" - fi - else - echo "Admin user: ${admin_user} (already exists)" - # Only reset password if basic auth fails (#158, #267) - # Forgejo 11.x may ignore --must-change-password=false, blocking token creation - if ! curl -sf --max-time 5 -u "${admin_user}:${admin_pass}" \ - "${forge_url}/api/v1/user" >/dev/null 2>&1; then - _forgejo_exec forgejo admin user change-password \ - --username "${admin_user}" \ - --password "${admin_pass}" \ - --must-change-password=false - fi - fi - # Preserve password for Woodpecker OAuth2 token generation (#779) - _FORGE_ADMIN_PASS="$admin_pass" - - # Create human user (disinto-admin) as site admin if it doesn't exist - local human_user="disinto-admin" - local human_pass - human_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - - if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then - echo "Creating human user: ${human_user}" - local create_output - if ! create_output=$(_forgejo_exec forgejo admin user create \ - --admin \ - --username "${human_user}" \ - --password "${human_pass}" \ - --email "admin@disinto.local" \ - --must-change-password=false 2>&1); then - echo "Error: failed to create human user '${human_user}':" >&2 - echo " ${create_output}" >&2 - exit 1 - fi - # Forgejo 11.x ignores --must-change-password=false on create; - # explicitly clear the flag so basic-auth token creation works. - _forgejo_exec forgejo admin user change-password \ - --username "${human_user}" \ - --password "${human_pass}" \ - --must-change-password=false - - # Verify human user was actually created - if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then - echo "Error: human user '${human_user}' not found after creation" >&2 - exit 1 - fi - echo " Human user '${human_user}' created as site admin" - else - echo "Human user: ${human_user} (already exists)" - fi - - # Delete existing admin token if present (token sha1 is only returned at creation time) - local existing_token_id - existing_token_id=$(curl -sf \ - -u "${admin_user}:${admin_pass}" \ - "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \ - | jq -r '.[] | select(.name == "disinto-admin-token") | .id') || existing_token_id="" - if [ -n "$existing_token_id" ]; then - curl -sf -X DELETE \ - -u "${admin_user}:${admin_pass}" \ - "${forge_url}/api/v1/users/${admin_user}/tokens/${existing_token_id}" >/dev/null 2>&1 || true - fi - - # Create admin token (fresh, so sha1 is returned) - local admin_token - admin_token=$(curl -sf -X POST \ - -u "${admin_user}:${admin_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${admin_user}/tokens" \ - -d '{"name":"disinto-admin-token","scopes":["all"]}' 2>/dev/null \ - | jq -r '.sha1 // empty') || admin_token="" - - if [ -z "$admin_token" ]; then - echo "Error: failed to obtain admin API token" >&2 - exit 1 - fi - - # Get or create human user token - local human_token - if curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then - # Delete existing human token if present (token sha1 is only returned at creation time) - local existing_human_token_id - existing_human_token_id=$(curl -sf \ - -u "${human_user}:${human_pass}" \ - "${forge_url}/api/v1/users/${human_user}/tokens" 2>/dev/null \ - | jq -r '.[] | select(.name == "disinto-human-token") | .id') || existing_human_token_id="" - if [ -n "$existing_human_token_id" ]; then - curl -sf -X DELETE \ - -u "${human_user}:${human_pass}" \ - "${forge_url}/api/v1/users/${human_user}/tokens/${existing_human_token_id}" >/dev/null 2>&1 || true - fi - - # Create human token (fresh, so sha1 is returned) - human_token=$(curl -sf -X POST \ - -u "${human_user}:${human_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${human_user}/tokens" \ - -d '{"name":"disinto-human-token","scopes":["all"]}' 2>/dev/null \ - | jq -r '.sha1 // empty') || human_token="" - - if [ -n "$human_token" ]; then - # Store human token in .env - if grep -q '^HUMAN_TOKEN=' "$env_file" 2>/dev/null; then - sed -i "s|^HUMAN_TOKEN=.*|HUMAN_TOKEN=${human_token}|" "$env_file" - else - printf 'HUMAN_TOKEN=%s\n' "$human_token" >> "$env_file" - fi - export HUMAN_TOKEN="$human_token" - echo " Human token saved (HUMAN_TOKEN)" - fi - fi - - # Create bot users and tokens - # Each agent gets its own Forgejo account for identity and audit trail (#747). - # Map: bot-username -> env-var-name for the token - local -A bot_token_vars=( - [dev-bot]="FORGE_TOKEN" - [review-bot]="FORGE_REVIEW_TOKEN" - [planner-bot]="FORGE_PLANNER_TOKEN" - [gardener-bot]="FORGE_GARDENER_TOKEN" - [vault-bot]="FORGE_VAULT_TOKEN" - [supervisor-bot]="FORGE_SUPERVISOR_TOKEN" - [predictor-bot]="FORGE_PREDICTOR_TOKEN" - [architect-bot]="FORGE_ARCHITECT_TOKEN" - ) - # Map: bot-username -> env-var-name for the password - # Forgejo 11.x API tokens don't work for git HTTP push (#361). - # Store passwords so agents can use password auth for git operations. - local -A bot_pass_vars=( - [dev-bot]="FORGE_PASS" - [review-bot]="FORGE_REVIEW_PASS" - [planner-bot]="FORGE_PLANNER_PASS" - [gardener-bot]="FORGE_GARDENER_PASS" - [vault-bot]="FORGE_VAULT_PASS" - [supervisor-bot]="FORGE_SUPERVISOR_PASS" - [predictor-bot]="FORGE_PREDICTOR_PASS" - [architect-bot]="FORGE_ARCHITECT_PASS" - ) - - local bot_user bot_pass token token_var pass_var - - for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot architect-bot; do - bot_pass="bot-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - token_var="${bot_token_vars[$bot_user]}" - - # Check if bot user exists - local user_exists=false - if curl -sf --max-time 5 \ - -H "Authorization: token ${admin_token}" \ - "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then - user_exists=true - fi - - if [ "$user_exists" = false ]; then - echo "Creating bot user: ${bot_user}" - local create_output - if ! create_output=$(_forgejo_exec forgejo admin user create \ - --username "${bot_user}" \ - --password "${bot_pass}" \ - --email "${bot_user}@disinto.local" \ - --must-change-password=false 2>&1); then - echo "Error: failed to create bot user '${bot_user}':" >&2 - echo " ${create_output}" >&2 - exit 1 - fi - # Forgejo 11.x ignores --must-change-password=false on create; - # explicitly clear the flag so basic-auth token creation works. - _forgejo_exec forgejo admin user change-password \ - --username "${bot_user}" \ - --password "${bot_pass}" \ - --must-change-password=false - - # Verify bot user was actually created - if ! curl -sf --max-time 5 \ - -H "Authorization: token ${admin_token}" \ - "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then - echo "Error: bot user '${bot_user}' not found after creation" >&2 - exit 1 - fi - echo " ${bot_user} user created" - else - echo " ${bot_user} user exists (resetting password for token generation)" - # User exists but may not have a known password. - # Use admin API to reset the password so we can generate a new token. - _forgejo_exec forgejo admin user change-password \ - --username "${bot_user}" \ - --password "${bot_pass}" \ - --must-change-password=false || { - echo "Error: failed to reset password for existing bot user '${bot_user}'" >&2 - exit 1 - } - fi - - # Generate token via API (basic auth as the bot user — Forgejo requires - # basic auth on POST /users/{username}/tokens, token auth is rejected) - # First, try to delete existing tokens to avoid name collision - # Use bot user's own Basic Auth (we just set the password above) - local existing_token_ids - existing_token_ids=$(curl -sf \ - -u "${bot_user}:${bot_pass}" \ - "${forge_url}/api/v1/users/${bot_user}/tokens" 2>/dev/null \ - | jq -r '.[].id // empty' 2>/dev/null) || existing_token_ids="" - - # Delete any existing tokens for this user - if [ -n "$existing_token_ids" ]; then - while IFS= read -r tid; do - [ -n "$tid" ] && curl -sf -X DELETE \ - -u "${bot_user}:${bot_pass}" \ - "${forge_url}/api/v1/users/${bot_user}/tokens/${tid}" >/dev/null 2>&1 || true - done <<< "$existing_token_ids" - fi - - token=$(curl -sf -X POST \ - -u "${bot_user}:${bot_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${bot_user}/tokens" \ - -d "{\"name\":\"disinto-${bot_user}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \ - | jq -r '.sha1 // empty') || token="" - - if [ -z "$token" ]; then - echo "Error: failed to create API token for '${bot_user}'" >&2 - exit 1 - fi - - # Store token in .env under the per-agent variable name - if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then - sed -i "s|^${token_var}=.*|${token_var}=${token}|" "$env_file" - else - printf '%s=%s\n' "$token_var" "$token" >> "$env_file" - fi - export "${token_var}=${token}" - echo " ${bot_user} token generated and saved (${token_var})" - - # Store password in .env for git HTTP push (#361) - # Forgejo 11.x API tokens don't work for git push; password auth does. - pass_var="${bot_pass_vars[$bot_user]}" - if grep -q "^${pass_var}=" "$env_file" 2>/dev/null; then - sed -i "s|^${pass_var}=.*|${pass_var}=${bot_pass}|" "$env_file" - else - printf '%s=%s\n' "$pass_var" "$bot_pass" >> "$env_file" - fi - export "${pass_var}=${bot_pass}" - echo " ${bot_user} password saved (${pass_var})" - - # Backwards-compat aliases for dev-bot and review-bot - if [ "$bot_user" = "dev-bot" ]; then - export CODEBERG_TOKEN="$token" - elif [ "$bot_user" = "review-bot" ]; then - export REVIEW_BOT_TOKEN="$token" - fi - done - - # Create .profile repos for all bot users (if they don't already exist) - # This runs the same logic as hire-an-agent Step 2-3 for idempotent setup - echo "" - echo "── Setting up .profile repos ────────────────────────────" - - local -a bot_users=(dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot architect-bot) - local bot_user - - for bot_user in "${bot_users[@]}"; do - # Check if .profile repo already exists - if curl -sf --max-time 5 "${forge_url}/api/v1/repos/${bot_user}/.profile" >/dev/null 2>&1; then - echo " ${bot_user}/.profile already exists" - continue - fi - - echo "Creating ${bot_user}/.profile repo..." - - # Create the repo using the admin API to ensure it's created in the bot user's namespace - local create_output - create_output=$(curl -sf -X POST \ - -u "${admin_user}:${admin_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/admin/users/${bot_user}/repos" \ - -d "{\"name\":\".profile\",\"description\":\"${bot_user}'s .profile repo\",\"private\":true,\"auto_init\":false}" 2>&1) || true - - if echo "$create_output" | grep -q '"id":\|[0-9]'; then - echo " Created ${bot_user}/.profile (via admin API)" - else - echo " Warning: failed to create ${bot_user}/.profile: ${create_output}" >&2 - fi - done - - # Store FORGE_URL in .env if not already present - if ! grep -q '^FORGE_URL=' "$env_file" 2>/dev/null; then - printf 'FORGE_URL=%s\n' "$forge_url" >> "$env_file" - fi - - # Create the repo on Forgejo if it doesn't exist - local org_name="${repo_slug%%/*}" - local repo_name="${repo_slug##*/}" - - # Check if repo already exists - if ! curl -sf --max-time 5 \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/repos/${repo_slug}" >/dev/null 2>&1; then - - # Try creating org first (ignore if exists) - curl -sf -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/orgs" \ - -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true - - # Create repo under org - if ! curl -sf -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/orgs/${org_name}/repos" \ - -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then - # Fallback: create under the human user namespace using admin endpoint - if [ -n "${admin_token:-}" ]; then - if ! curl -sf -X POST \ - -H "Authorization: token ${admin_token}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/admin/users/${org_name}/repos" \ - -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then - echo "Error: failed to create repo '${repo_slug}' on Forgejo (admin endpoint)" >&2 - exit 1 - fi - elif [ -n "${HUMAN_TOKEN:-}" ]; then - if ! curl -sf -X POST \ - -H "Authorization: token ${HUMAN_TOKEN}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/user/repos" \ - -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then - echo "Error: failed to create repo '${repo_slug}' on Forgejo (user endpoint)" >&2 - exit 1 - fi - else - echo "Error: failed to create repo '${repo_slug}' — no admin or human token available" >&2 - exit 1 - fi - fi - - # Add all bot users as collaborators with appropriate permissions - # dev-bot: write (PR creation via lib/vault.sh) - # review-bot: read (PR review) - # planner-bot: write (prerequisites.md, memory) - # gardener-bot: write (backlog grooming) - # vault-bot: write (vault items) - # supervisor-bot: read (health monitoring) - # predictor-bot: read (pattern detection) - # architect-bot: write (sprint PRs) - local bot_perm - declare -A bot_permissions=( - [dev-bot]="write" - [review-bot]="read" - [planner-bot]="write" - [gardener-bot]="write" - [vault-bot]="write" - [supervisor-bot]="read" - [predictor-bot]="read" - [architect-bot]="write" - ) - for bot_user in "${!bot_permissions[@]}"; do - bot_perm="${bot_permissions[$bot_user]}" - curl -sf -X PUT \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/repos/${repo_slug}/collaborators/${bot_user}" \ - -d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1 || true - done - - # Add disinto-admin as admin collaborator - curl -sf -X PUT \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/repos/${repo_slug}/collaborators/disinto-admin" \ - -d '{"permission":"admin"}' >/dev/null 2>&1 || true - - echo "Repo: ${repo_slug} created on Forgejo" - else - echo "Repo: ${repo_slug} (already exists on Forgejo)" - fi - - echo "Forge: ${forge_url} (ready)" -} diff --git a/lib/formula-session.sh b/lib/formula-session.sh index 1b2b884..7c52035 100644 --- a/lib/formula-session.sh +++ b/lib/formula-session.sh @@ -1,34 +1,23 @@ #!/usr/bin/env bash # formula-session.sh — Shared helpers for formula-driven cron agents # -# Provides reusable utility functions for the common cron-wrapper pattern -# used by planner-run.sh, predictor-run.sh, gardener-run.sh, and supervisor-run.sh. +# Provides reusable functions for the common cron-wrapper + tmux-session +# pattern used by planner-run.sh, predictor-run.sh, gardener-run.sh, and supervisor-run.sh. # # Functions: # acquire_cron_lock LOCK_FILE — PID lock with stale cleanup +# check_memory [MIN_MB] — skip if available RAM too low # load_formula FORMULA_FILE — sets FORMULA_CONTENT # build_context_block FILE [FILE ...] — sets CONTEXT_BLOCK -# build_prompt_footer [EXTRA_API_LINES] — sets PROMPT_FOOTER (API ref + env) -# build_sdk_prompt_footer [EXTRA_API] — omits phase protocol (SDK mode) -# formula_worktree_setup WORKTREE — isolated worktree for formula execution -# formula_prepare_profile_context — load lessons from .profile repo (pre-session) -# formula_lessons_block — return lessons block for prompt -# profile_write_journal ISSUE_NUM TITLE OUTCOME [FILES] — post-session journal -# profile_load_lessons — load lessons-learned.md into LESSONS_CONTEXT -# ensure_profile_repo [AGENT_IDENTITY] — clone/pull .profile repo -# _profile_has_repo — check if agent has .profile repo -# _count_undigested_journals — count journal entries to digest -# _profile_digest_journals — digest journals into lessons -# _profile_commit_and_push MESSAGE [FILES] — commit/push to .profile repo -# resolve_agent_identity — resolve agent user login from FORGE_TOKEN -# build_graph_section — run build-graph.py and set GRAPH_SECTION -# build_scratch_instruction SCRATCH_FILE — return context scratch instruction -# read_scratch_context SCRATCH_FILE — return scratch file content block -# ensure_ops_repo — clone/pull ops repo -# ops_commit_and_push MESSAGE [FILES] — commit/push to ops repo -# cleanup_stale_crashed_worktrees [HOURS] — thin wrapper around worktree_cleanup_stale +# start_formula_session SESSION WORKDIR PHASE_FILE — create tmux + claude +# build_prompt_footer [EXTRA_API] — sets PROMPT_FOOTER (API ref + env + phase) +# run_formula_and_monitor AGENT [TIMEOUT] [CALLBACK] — session start, inject, monitor, log +# formula_phase_callback PHASE — standard crash-recovery callback # -# Requires: lib/env.sh, lib/worktree.sh sourced first for shared helpers. +# Requires: lib/agent-session.sh sourced first (for create_agent_session, +# agent_kill_session, agent_inject_into_session). +# Globals used by formula_phase_callback: SESSION_NAME, PHASE_FILE, +# PROJECT_REPO_ROOT, PROMPT (set by the calling script). # ── Cron guards ────────────────────────────────────────────────────────── @@ -50,431 +39,16 @@ acquire_cron_lock() { trap 'rm -f "$_CRON_LOCK_FILE"' EXIT } -# ── Agent identity resolution ──────────────────────────────────────────── - -# resolve_agent_identity -# Resolves the agent identity (user login) from the FORGE_TOKEN. -# Exports AGENT_IDENTITY (user login string). -# Returns 0 on success, 1 on failure. -resolve_agent_identity() { - if [ -z "${FORGE_TOKEN:-}" ]; then - log "WARNING: FORGE_TOKEN not set, cannot resolve agent identity" - return 1 +# check_memory [MIN_MB] +# Exits 0 (skip) if available memory is below MIN_MB (default 2000). +check_memory() { + local min_mb="${1:-2000}" + local avail_mb + avail_mb=$(free -m | awk '/Mem:/{print $7}') + if [ "${avail_mb:-0}" -lt "$min_mb" ]; then + log "run: skipping — only ${avail_mb}MB available (need ${min_mb})" + exit 0 fi - local forge_url="${FORGE_URL:-http://localhost:3000}" - AGENT_IDENTITY=$(curl -sf --max-time 10 \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null) || true - if [ -z "$AGENT_IDENTITY" ]; then - log "WARNING: failed to resolve agent identity from FORGE_TOKEN" - return 1 - fi - log "Resolved agent identity: ${AGENT_IDENTITY}" - return 0 -} - -# ── Forge remote resolution ────────────────────────────────────────────── - -# resolve_forge_remote -# Resolves FORGE_REMOTE by matching FORGE_URL hostname against git remotes. -# Falls back to "origin" if no match found. -# Requires: FORGE_URL, git repo with remotes configured. -# Exports: FORGE_REMOTE (always set). -resolve_forge_remote() { - # Extract hostname from FORGE_URL (e.g., https://codeberg.org/user/repo -> codeberg.org) - _forge_host=$(printf '%s' "$FORGE_URL" | sed 's|https\?://||; s|/.*||; s|:.*||') - # Find git remote whose push URL matches the forge host - FORGE_REMOTE=$(git remote -v | awk -v host="$_forge_host" '$2 ~ host && /\(push\)/ {print $1; exit}') - # Fallback to origin if no match found - FORGE_REMOTE="${FORGE_REMOTE:-origin}" - export FORGE_REMOTE - log "forge remote: ${FORGE_REMOTE}" -} - -# ── .profile repo management ────────────────────────────────────────────── - -# ensure_profile_repo [AGENT_IDENTITY] -# Clones or pulls the agent's .profile repo to a local cache dir. -# Requires: FORGE_TOKEN, FORGE_URL. -# Exports PROFILE_REPO_PATH (local cache path) and PROFILE_FORMULA_PATH. -# Returns 0 on success, 1 on failure (falls back gracefully). -ensure_profile_repo() { - local agent_identity="${1:-${AGENT_IDENTITY:-}}" - - if [ -z "$agent_identity" ]; then - # Try to resolve from FORGE_TOKEN - if ! resolve_agent_identity; then - log "WARNING: cannot resolve agent identity, skipping .profile repo" - return 1 - fi - agent_identity="$AGENT_IDENTITY" - fi - - # Define cache directory: /home/agent/data/.profile/{agent-name} - PROFILE_REPO_PATH="${HOME:-/home/agent}/data/.profile/${agent_identity}" - - # Build clone URL from FORGE_URL and agent identity - local forge_url="${FORGE_URL:-http://localhost:3000}" - local auth_url - auth_url=$(printf '%s' "$forge_url" | sed "s|://|://$(whoami):${FORGE_TOKEN}@|") - local clone_url="${auth_url}/${agent_identity}/.profile.git" - - # Check if already cached and up-to-date - if [ -d "${PROFILE_REPO_PATH}/.git" ]; then - log "Pulling .profile repo: ${agent_identity}/.profile" - if git -C "$PROFILE_REPO_PATH" fetch origin --quiet 2>/dev/null; then - git -C "$PROFILE_REPO_PATH" checkout main --quiet 2>/dev/null || \ - git -C "$PROFILE_REPO_PATH" checkout master --quiet 2>/dev/null || true - git -C "$PROFILE_REPO_PATH" pull --ff-only origin main --quiet 2>/dev/null || \ - git -C "$PROFILE_REPO_PATH" pull --ff-only origin master --quiet 2>/dev/null || true - log ".profile repo pulled: ${PROFILE_REPO_PATH}" - else - log "WARNING: failed to pull .profile repo, using cached version" - fi - else - log "Cloning .profile repo: ${agent_identity}/.profile -> ${PROFILE_REPO_PATH}" - if git clone --quiet "$clone_url" "$PROFILE_REPO_PATH" 2>/dev/null; then - log ".profile repo cloned: ${PROFILE_REPO_PATH}" - else - log "WARNING: failed to clone .profile repo ${agent_identity}/.profile — falling back to formulas/" - return 1 - fi - fi - - # Set formula path from .profile - PROFILE_FORMULA_PATH="${PROFILE_REPO_PATH}/formula.toml" - return 0 -} - -# _profile_has_repo -# Checks if the agent has a .profile repo by querying Forgejo API. -# Returns 0 if repo exists, 1 otherwise. -_profile_has_repo() { - local agent_identity="${AGENT_IDENTITY:-}" - - if [ -z "$agent_identity" ]; then - if ! resolve_agent_identity; then - return 1 - fi - agent_identity="$AGENT_IDENTITY" - fi - - local forge_url="${FORGE_URL:-http://localhost:3000}" - local api_url="${forge_url}/api/v1/repos/${agent_identity}/.profile" - - # Check if repo exists via API (returns 200 if exists, 404 if not) - if curl -sf -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "$api_url" >/dev/null 2>&1; then - return 0 - fi - return 1 -} - -# _count_undigested_journals -# Counts journal entries in .profile/journal/ excluding archive/ -# Returns count via stdout. -_count_undigested_journals() { - if [ ! -d "${PROFILE_REPO_PATH:-}/journal" ]; then - echo "0" - return - fi - find "${PROFILE_REPO_PATH}/journal" -maxdepth 1 -name "*.md" -type f ! -path "*/archive/*" 2>/dev/null | wc -l -} - -# _profile_digest_journals -# Runs a claude -p one-shot to digest undigested journals into lessons-learned.md -# Returns 0 on success, 1 on failure. -_profile_digest_journals() { - local agent_identity="${AGENT_IDENTITY:-}" - local model="${CLAUDE_MODEL:-opus}" - - if [ -z "$agent_identity" ]; then - if ! resolve_agent_identity; then - return 1 - fi - agent_identity="$AGENT_IDENTITY" - fi - - local journal_dir="${PROFILE_REPO_PATH}/journal" - local knowledge_dir="${PROFILE_REPO_PATH}/knowledge" - local lessons_file="${knowledge_dir}/lessons-learned.md" - - # Collect undigested journal entries - local journal_entries="" - if [ -d "$journal_dir" ]; then - for jf in "$journal_dir"/*.md; do - [ -f "$jf" ] || continue - # Skip archived entries - [[ "$jf" == */archive/* ]] && continue - local basename - basename=$(basename "$jf") - journal_entries="${journal_entries} -### ${basename} -$(cat "$jf") -" - done - fi - - if [ -z "$journal_entries" ]; then - log "profile: no undigested journals to digest" - return 0 - fi - - # Read existing lessons if available - local existing_lessons="" - if [ -f "$lessons_file" ]; then - existing_lessons=$(cat "$lessons_file") - fi - - # Build prompt for digestion - local digest_prompt="You are digesting journal entries from a developer agent's work sessions. - -## Task -Condense these journal entries into abstract, transferable lessons. Rewrite lessons-learned.md entirely. - -## Constraints -- Hard cap: 2KB maximum -- Abstract: patterns and heuristics, not specific issues or file paths -- Transferable: must help with future unseen work, not just recall past work -- Drop the least transferable lessons if over limit - -## Existing lessons-learned.md (if any) -${existing_lessons:-<none>} - -## Journal entries to digest -${journal_entries} - -## Output -Write the complete, rewritten lessons-learned.md content below. No preamble, no explanation — just the file content." - - # Run claude -p one-shot with same model as agent - local output - output=$(claude -p "$digest_prompt" \ - --output-format json \ - --dangerously-skip-permissions \ - ${model:+--model "$model"} \ - 2>>"$LOGFILE" || echo '{"result":"error"}') - - # Extract content from JSON response - local lessons_content - lessons_content=$(printf '%s' "$output" | jq -r '.result // empty' 2>/dev/null || echo "") - - if [ -z "$lessons_content" ]; then - log "profile: failed to digest journals" - return 1 - fi - - # Ensure knowledge directory exists - mkdir -p "$knowledge_dir" - - # Write the lessons file (full rewrite) - printf '%s\n' "$lessons_content" > "$lessons_file" - log "profile: wrote lessons-learned.md (${#lessons_content} bytes)" - - # Move digested journals to archive (if any were processed) - if [ -d "$journal_dir" ]; then - mkdir -p "${journal_dir}/archive" - local archived=0 - for jf in "$journal_dir"/*.md; do - [ -f "$jf" ] || continue - [[ "$jf" == */archive/* ]] && continue - local basename - basename=$(basename "$jf") - mv "$jf" "${journal_dir}/archive/${basename}" 2>/dev/null && archived=$((archived + 1)) - done - if [ "$archived" -gt 0 ]; then - log "profile: archived ${archived} journal entries" - fi - fi - - return 0 -} - -# _profile_commit_and_push MESSAGE [FILE ...] -# Commits and pushes changes to .profile repo. -_profile_commit_and_push() { - local msg="$1" - shift - local files=("$@") - - if [ ! -d "${PROFILE_REPO_PATH:-}/.git" ]; then - return 1 - fi - - ( - cd "$PROFILE_REPO_PATH" || return 1 - - if [ ${#files[@]} -gt 0 ]; then - git add "${files[@]}" - else - git add -A - fi - - if ! git diff --cached --quiet 2>/dev/null; then - git config user.name "${AGENT_IDENTITY}" || true - git config user.email "${AGENT_IDENTITY}@users.noreply.codeberg.org" || true - git commit -m "$msg" --no-verify 2>/dev/null || true - git push origin main --quiet 2>/dev/null || git push origin master --quiet 2>/dev/null || true - fi - ) -} - -# profile_load_lessons -# Pre-session: loads lessons-learned.md into LESSONS_CONTEXT for prompt injection. -# Lazy digestion: if >10 undigested journals exist, runs claude -p to digest them. -# Returns 0 on success, 1 if agent has no .profile repo (silent no-op). -# Requires: ensure_profile_repo() called, AGENT_IDENTITY, FORGE_TOKEN, FORGE_URL, CLAUDE_MODEL. -# Exports: LESSONS_CONTEXT (the lessons file content, hard-capped at 2KB). -profile_load_lessons() { - # Check if agent has .profile repo - if ! _profile_has_repo; then - return 0 # Silent no-op - fi - - # Pull .profile repo - if ! ensure_profile_repo; then - return 0 # Silent no-op - fi - - # Check journal count for lazy digestion trigger - local journal_count - journal_count=$(_count_undigested_journals) - - if [ "${journal_count:-0}" -gt 10 ]; then - log "profile: digesting ${journal_count} undigested journals" - if ! _profile_digest_journals; then - log "profile: warning — journal digestion failed" - fi - fi - - # Read lessons-learned.md (hard cap at 2KB) - local lessons_file="${PROFILE_REPO_PATH}/knowledge/lessons-learned.md" - LESSONS_CONTEXT="" - - if [ -f "$lessons_file" ]; then - local lessons_content - lessons_content=$(head -c 2048 "$lessons_file" 2>/dev/null) || lessons_content="" - if [ -n "$lessons_content" ]; then - # shellcheck disable=SC2034 # exported to caller for prompt injection - LESSONS_CONTEXT="## Lessons learned (from .profile/knowledge/lessons-learned.md) -${lessons_content}" - log "profile: loaded lessons-learned.md (${#lessons_content} bytes)" - fi - fi - - return 0 -} - -# formula_prepare_profile_context -# Pre-session: loads lessons from .profile repo and sets LESSONS_CONTEXT for prompt injection. -# Single shared function to avoid duplicate boilerplate across agent scripts. -# Requires: AGENT_IDENTITY, FORGE_TOKEN, FORGE_URL (via profile_load_lessons). -# Exports: LESSONS_CONTEXT (set by profile_load_lessons). -# Returns 0 on success, 1 if agent has no .profile repo (silent no-op). -formula_prepare_profile_context() { - profile_load_lessons || true - LESSONS_INJECTION="${LESSONS_CONTEXT:-}" -} - -# formula_lessons_block -# Returns a formatted lessons block for prompt injection. -# Usage: LESSONS_BLOCK=$(formula_lessons_block) -# Expects: LESSONS_INJECTION to be set by formula_prepare_profile_context. -# Returns: formatted block or empty string. -formula_lessons_block() { - if [ -n "${LESSONS_INJECTION:-}" ]; then - printf '\n## Lessons learned (from .profile/knowledge/lessons-learned.md)\n%s' "$LESSONS_INJECTION" - fi -} - -# profile_write_journal ISSUE_NUM ISSUE_TITLE OUTCOME [FILES_CHANGED] -# Post-session: writes a reflection journal entry after work completes. -# Returns 0 on success, 1 on failure. -# Requires: AGENT_IDENTITY, FORGE_TOKEN, FORGE_URL, CLAUDE_MODEL. -# Args: -# $1 - ISSUE_NUM: The issue number worked on -# $2 - ISSUE_TITLE: The issue title -# $3 - OUTCOME: Session outcome (merged, blocked, failed, etc.) -# $4 - FILES_CHANGED: Optional comma-separated list of files changed -profile_write_journal() { - local issue_num="$1" - local issue_title="$2" - local outcome="$3" - local files_changed="${4:-}" - - # Check if agent has .profile repo - if ! _profile_has_repo; then - return 0 # Silent no-op - fi - - # Pull .profile repo - if ! ensure_profile_repo; then - return 0 # Silent no-op - fi - - # Build session summary - local session_summary="" - if [ -n "$files_changed" ]; then - session_summary="Files changed: ${files_changed} -" - fi - session_summary="${session_summary}Outcome: ${outcome}" - - # Build reflection prompt - local reflection_prompt="You are reflecting on a development session. Write a concise journal entry about transferable lessons learned. - -## Session context -- Issue: #${issue_num} — ${issue_title} -- Outcome: ${outcome} - -${session_summary} - -## Task -Write a journal entry focused on what you learned that would help you do similar work better next time. - -## Constraints -- Be concise (100-200 words) -- Focus on transferable lessons, not a summary of what you did -- Abstract patterns and heuristics, not specific issue/file references -- One concise entry, not a list - -## Output -Write the journal entry below. Use markdown format." - - # Run claude -p one-shot with same model as agent - local output - output=$(claude -p "$reflection_prompt" \ - --output-format json \ - --dangerously-skip-permissions \ - ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} \ - 2>>"$LOGFILE" || echo '{"result":"error"}') - - # Extract content from JSON response - local journal_content - journal_content=$(printf '%s' "$output" | jq -r '.result // empty' 2>/dev/null || echo "") - - if [ -z "$journal_content" ]; then - log "profile: failed to write journal entry" - return 1 - fi - - # Ensure journal directory exists - local journal_dir="${PROFILE_REPO_PATH}/journal" - mkdir -p "$journal_dir" - - # Write journal entry (append if exists) - local journal_file="${journal_dir}/issue-${issue_num}.md" - if [ -f "$journal_file" ]; then - printf '\n---\n\n' >> "$journal_file" - fi - printf '%s\n' "$journal_content" >> "$journal_file" - log "profile: wrote journal entry for issue #${issue_num}" - - # Commit and push to .profile repo - _profile_commit_and_push "journal: issue #${issue_num} reflection" "journal/issue-${issue_num}.md" - - return 0 } # ── Formula loading ────────────────────────────────────────────────────── @@ -491,60 +65,6 @@ load_formula() { FORMULA_CONTENT=$(cat "$formula_file") } -# load_formula_or_profile [ROLE] [FORMULA_FILE] -# Tries to load formula from .profile repo first, falls back to formulas/<role>.toml. -# Requires: AGENT_IDENTITY, ensure_profile_repo() available. -# Exports: FORMULA_CONTENT, FORMULA_SOURCE (either ".profile" or "formulas/"). -# Returns 0 on success, 1 on failure. -load_formula_or_profile() { - local role="${1:-}" - local fallback_formula="${2:-}" - - # Try to load from .profile repo - if [ -n "$AGENT_IDENTITY" ] && ensure_profile_repo "$AGENT_IDENTITY"; then - if [ -f "$PROFILE_FORMULA_PATH" ]; then - log "formula source: .profile (${PROFILE_FORMULA_PATH})" - # shellcheck disable=SC2034 - FORMULA_CONTENT="$(cat "$PROFILE_FORMULA_PATH")" - FORMULA_SOURCE=".profile" - return 0 - else - log "WARNING: .profile repo exists but formula.toml not found at ${PROFILE_FORMULA_PATH}" - fi - fi - - # Fallback to formulas/<role>.toml - if [ -n "$fallback_formula" ]; then - if [ -f "$fallback_formula" ]; then - log "formula source: formulas/ (fallback) — ${fallback_formula}" - # shellcheck disable=SC2034 - FORMULA_CONTENT="$(cat "$fallback_formula")" - FORMULA_SOURCE="formulas/" - return 0 - else - log "ERROR: formula not found in .profile and fallback file not found: $fallback_formula" - return 1 - fi - fi - - # No fallback specified but role provided — construct fallback path - if [ -n "$role" ]; then - fallback_formula="${FACTORY_ROOT}/formulas/${role}.toml" - if [ -f "$fallback_formula" ]; then - log "formula source: formulas/ (fallback) — ${fallback_formula}" - # shellcheck disable=SC2034 - FORMULA_CONTENT="$(cat "$fallback_formula")" - # shellcheck disable=SC2034 - FORMULA_SOURCE="formulas/" - return 0 - fi - fi - - # No fallback specified - log "ERROR: formula not found in .profile and no fallback specified" - return 1 -} - # build_context_block FILE [FILE ...] # Reads each file from $PROJECT_REPO_ROOT and builds CONTEXT_BLOCK. # Files prefixed with "ops:" are read from $OPS_REPO_ROOT instead. @@ -571,7 +91,7 @@ $(cat "$ctx_path") done } -# ── Ops repo helpers ──────────────────────────────────────────────────── +# ── Ops repo helpers ───────────────────────────────────────────────── # ensure_ops_repo # Clones or pulls the ops repo so agents can read/write operational data. @@ -634,6 +154,90 @@ ops_commit_and_push() { ) } +# ── Session management ─────────────────────────────────────────────────── + +# start_formula_session SESSION WORKDIR PHASE_FILE +# Kills stale session, resets phase file, creates a per-agent git worktree +# for session isolation, and creates a new tmux + claude session in it. +# Sets _FORMULA_SESSION_WORKDIR to the worktree path (or original workdir +# on fallback). Callers must clean up via remove_formula_worktree after +# the session ends. +# Returns 0 on success, 1 on failure. +start_formula_session() { + local session="$1" workdir="$2" phase_file="$3" + agent_kill_session "$session" + rm -f "$phase_file" + + # Create per-agent git worktree for session isolation. + # Each agent gets its own CWD so Claude Code treats them as separate + # projects — no resume collisions between sequential formula runs. + _FORMULA_SESSION_WORKDIR="/tmp/disinto-${session}" + # Clean up any stale worktree from a previous run + git -C "$workdir" worktree remove "$_FORMULA_SESSION_WORKDIR" --force 2>/dev/null || true + if git -C "$workdir" worktree add "$_FORMULA_SESSION_WORKDIR" HEAD --detach 2>/dev/null; then + log "Created worktree: ${_FORMULA_SESSION_WORKDIR}" + else + log "WARNING: worktree creation failed — falling back to ${workdir}" + _FORMULA_SESSION_WORKDIR="$workdir" + fi + + log "Creating tmux session: ${session}" + if ! create_agent_session "$session" "$_FORMULA_SESSION_WORKDIR" "$phase_file"; then + log "ERROR: failed to create tmux session ${session}" + return 1 + fi +} + +# remove_formula_worktree +# Removes the worktree created by start_formula_session if it differs from +# PROJECT_REPO_ROOT. Safe to call multiple times. No-op if no worktree was created. +remove_formula_worktree() { + if [ -n "${_FORMULA_SESSION_WORKDIR:-}" ] \ + && [ "$_FORMULA_SESSION_WORKDIR" != "${PROJECT_REPO_ROOT:-}" ]; then + git -C "$PROJECT_REPO_ROOT" worktree remove "$_FORMULA_SESSION_WORKDIR" --force 2>/dev/null || true + log "Removed worktree: ${_FORMULA_SESSION_WORKDIR}" + fi +} + +# formula_phase_callback PHASE +# Standard crash-recovery phase callback for formula sessions. +# Requires globals: SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT. +# Uses _FORMULA_CRASH_COUNT (auto-initialized) for single-retry limit. +# shellcheck disable=SC2154 # SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT set by caller +formula_phase_callback() { + local phase="$1" + log "phase: ${phase}" + case "$phase" in + PHASE:crashed) + if [ "${_FORMULA_CRASH_COUNT:-0}" -gt 0 ]; then + log "ERROR: session crashed again after recovery — giving up" + return 0 + fi + _FORMULA_CRASH_COUNT=$(( ${_FORMULA_CRASH_COUNT:-0} + 1 )) + log "WARNING: tmux session died unexpectedly — attempting recovery" + if create_agent_session "${_MONITOR_SESSION:-$SESSION_NAME}" "${_FORMULA_SESSION_WORKDIR:-$PROJECT_REPO_ROOT}" "$PHASE_FILE" 2>/dev/null; then + agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" "$PROMPT" + log "Recovery session started" + else + log "ERROR: could not restart session after crash" + fi + ;; + PHASE:done|PHASE:failed|PHASE:escalate|PHASE:merged) + agent_kill_session "${_MONITOR_SESSION:-$SESSION_NAME}" + ;; + esac +} + +# ── Stale crashed worktree cleanup ───────────────────────────────────────── + +# cleanup_stale_crashed_worktrees [MAX_AGE_HOURS] +# Thin wrapper around worktree_cleanup_stale() from lib/worktree.sh. +# Kept for backwards compatibility with existing callers. +# Requires: lib/worktree.sh sourced. +cleanup_stale_crashed_worktrees() { + worktree_cleanup_stale "${1:-24}" +} + # ── Scratch file helpers (compaction survival) ──────────────────────────── # build_scratch_instruction SCRATCH_FILE @@ -679,14 +283,8 @@ build_graph_section() { --project-root "$PROJECT_REPO_ROOT" \ --output "$report" 2>>"$LOG_FILE"; then # shellcheck disable=SC2034 - local report_content - report_content="$(cat "$report")" - # shellcheck disable=SC2034 - GRAPH_SECTION=" -## Structural analysis -\`\`\`json -${report_content} -\`\`\`" + GRAPH_SECTION=$(printf '\n## Structural analysis\n```json\n%s\n```\n' \ + "$(cat "$report")") log "graph report generated: $(jq -r '.stats | "\(.nodes) nodes, \(.edges) edges"' "$report")" else log "WARN: build-graph.py failed — continuing without structural analysis" @@ -709,26 +307,25 @@ build_sdk_prompt_footer() { # Creates an isolated worktree for synchronous formula execution. # Fetches primary branch, cleans stale worktree, creates new one, and # sets an EXIT trap for cleanup. -# Requires globals: PROJECT_REPO_ROOT, PRIMARY_BRANCH, FORGE_REMOTE. -# Ensure resolve_forge_remote() is called before this function. +# Requires globals: PROJECT_REPO_ROOT, PRIMARY_BRANCH. formula_worktree_setup() { local worktree="$1" cd "$PROJECT_REPO_ROOT" || return - git fetch "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true + git fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true worktree_cleanup "$worktree" - git worktree add "$worktree" "${FORGE_REMOTE}/${PRIMARY_BRANCH}" --detach 2>/dev/null + git worktree add "$worktree" "origin/${PRIMARY_BRANCH}" --detach 2>/dev/null # shellcheck disable=SC2064 # expand worktree now, not at trap time trap "worktree_cleanup '$worktree'" EXIT } -# ── Prompt helpers ────────────────────────────────────────────────────── +# ── Prompt + monitor helpers ────────────────────────────────────────────── # build_prompt_footer [EXTRA_API_LINES] -# Assembles the common forge API reference + environment block for formula prompts. -# Sets PROMPT_FOOTER. +# Assembles the common forge API reference + environment + phase protocol +# block for formula prompts. Sets PROMPT_FOOTER. # Pass additional API endpoint lines (pre-formatted, newline-prefixed) via $1. # Requires globals: FORGE_API, FACTORY_ROOT, PROJECT_REPO_ROOT, -# PRIMARY_BRANCH. +# PRIMARY_BRANCH, PHASE_FILE. build_prompt_footer() { local extra_api="${1:-}" # shellcheck disable=SC2034 # consumed by the calling script's PROMPT @@ -744,15 +341,66 @@ NEVER echo or include the actual token value in output — always reference \${F FACTORY_ROOT=${FACTORY_ROOT} PROJECT_REPO_ROOT=${PROJECT_REPO_ROOT} OPS_REPO_ROOT=${OPS_REPO_ROOT} -PRIMARY_BRANCH=${PRIMARY_BRANCH}" +PRIMARY_BRANCH=${PRIMARY_BRANCH} +PHASE_FILE=${PHASE_FILE} + +## Phase protocol (REQUIRED) +When all work is done: + echo 'PHASE:done' > '${PHASE_FILE}' +On unrecoverable error: + printf 'PHASE:failed\nReason: %s\n' 'describe error' > '${PHASE_FILE}'" } -# ── Stale crashed worktree cleanup ──────────────────────────────────────── +# run_formula_and_monitor AGENT_NAME [TIMEOUT] +# Starts the formula session, injects PROMPT, monitors phase, and logs result. +# Requires globals: SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT, +# FORGE_REPO, CLAUDE_MODEL (exported). +# shellcheck disable=SC2154 # SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT set by caller +run_formula_and_monitor() { + local agent_name="$1" + local timeout="${2:-7200}" + local callback="${3:-formula_phase_callback}" -# cleanup_stale_crashed_worktrees [MAX_AGE_HOURS] -# Thin wrapper around worktree_cleanup_stale() from lib/worktree.sh. -# Kept for backwards compatibility with existing callers. -# Requires: lib/worktree.sh sourced. -cleanup_stale_crashed_worktrees() { - worktree_cleanup_stale "${1:-24}" + if ! start_formula_session "$SESSION_NAME" "$PROJECT_REPO_ROOT" "$PHASE_FILE"; then + exit 1 + fi + + # Write phase protocol to context file for compaction survival + if [ -n "${PROMPT_FOOTER:-}" ]; then + write_compact_context "$PHASE_FILE" "$PROMPT_FOOTER" + fi + + agent_inject_into_session "$SESSION_NAME" "$PROMPT" + log "Prompt sent to tmux session" + + log "Monitoring phase file: ${PHASE_FILE}" + _FORMULA_CRASH_COUNT=0 + + monitor_phase_loop "$PHASE_FILE" "$timeout" "$callback" + + FINAL_PHASE=$(read_phase "$PHASE_FILE") + log "Final phase: ${FINAL_PHASE:-none}" + + if [ "$FINAL_PHASE" != "PHASE:done" ]; then + case "${_MONITOR_LOOP_EXIT:-}" in + idle_prompt) + log "${agent_name}: Claude returned to prompt without writing phase signal" + ;; + idle_timeout) + log "${agent_name}: timed out with no phase signal" + ;; + *) + log "${agent_name} finished without PHASE:done (phase: ${FINAL_PHASE:-none}, exit: ${_MONITOR_LOOP_EXIT:-})" + ;; + esac + fi + + # Preserve worktree on crash for debugging; clean up on success + if [ "${_MONITOR_LOOP_EXIT:-}" = "crashed" ]; then + worktree_preserve "${_FORMULA_SESSION_WORKDIR:-}" "crashed (agent=${agent_name})" + else + remove_formula_worktree + fi + + log "--- ${agent_name^} run done ---" } diff --git a/lib/generators.sh b/lib/generators.sh deleted file mode 100644 index 80386d2..0000000 --- a/lib/generators.sh +++ /dev/null @@ -1,443 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# generators — template generation functions for disinto init -# -# Generates docker-compose.yml, Dockerfile, Caddyfile, staging index, and -# deployment pipeline configs. -# -# Globals expected (must be set before sourcing): -# FACTORY_ROOT - Root of the disinto factory -# PROJECT_NAME - Project name for the project repo (defaults to 'project') -# PRIMARY_BRANCH - Primary branch name (defaults to 'main') -# -# Usage: -# source "${FACTORY_ROOT}/lib/generators.sh" -# generate_compose "$forge_port" -# generate_caddyfile -# generate_staging_index -# generate_deploy_pipelines "$repo_root" "$project_name" -# ============================================================================= -set -euo pipefail - -# Assert required globals are set -: "${FACTORY_ROOT:?FACTORY_ROOT must be set}" -# PROJECT_NAME defaults to 'project' if not set (env.sh may have set it from FORGE_REPO) -PROJECT_NAME="${PROJECT_NAME:-project}" -# PRIMARY_BRANCH defaults to main (env.sh may have set it to 'master') -PRIMARY_BRANCH="${PRIMARY_BRANCH:-main}" - -# Generate docker-compose.yml in the factory root. -_generate_compose_impl() { - local forge_port="${1:-3000}" - local compose_file="${FACTORY_ROOT}/docker-compose.yml" - - # Check if compose file already exists - if [ -f "$compose_file" ]; then - echo "Compose: ${compose_file} (already exists, skipping)" - return 0 - fi - - cat > "$compose_file" <<'COMPOSEEOF' -# docker-compose.yml — generated by disinto init -# Brings up Forgejo, Woodpecker, and the agent runtime. - -services: - forgejo: - image: codeberg.org/forgejo/forgejo:1 - container_name: disinto-forgejo - restart: unless-stopped - security_opt: - - apparmor=unconfined - volumes: - - forgejo-data:/data - environment: - FORGEJO__database__DB_TYPE: sqlite3 - FORGEJO__server__ROOT_URL: http://forgejo:3000/ - FORGEJO__server__HTTP_PORT: "3000" - FORGEJO__security__INSTALL_LOCK: "true" - FORGEJO__service__DISABLE_REGISTRATION: "true" - FORGEJO__webhook__ALLOWED_HOST_LIST: "private" - networks: - - disinto-net - - woodpecker: - image: woodpeckerci/woodpecker-server:v3 - container_name: disinto-woodpecker - restart: unless-stopped - security_opt: - - apparmor=unconfined - ports: - - "8000:8000" - - "9000:9000" - volumes: - - woodpecker-data:/var/lib/woodpecker - environment: - WOODPECKER_FORGEJO: "true" - WOODPECKER_FORGEJO_URL: http://forgejo:3000 - WOODPECKER_FORGEJO_CLIENT: ${WP_FORGEJO_CLIENT:-} - WOODPECKER_FORGEJO_SECRET: ${WP_FORGEJO_SECRET:-} - WOODPECKER_HOST: ${WOODPECKER_HOST:-http://woodpecker:8000} - WOODPECKER_OPEN: "true" - WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-} - WOODPECKER_DATABASE_DRIVER: sqlite3 - WOODPECKER_DATABASE_DATASOURCE: /var/lib/woodpecker/woodpecker.sqlite - WOODPECKER_ENVIRONMENT: "FORGE_TOKEN:${FORGE_TOKEN}" - depends_on: - - forgejo - networks: - - disinto-net - - woodpecker-agent: - image: woodpeckerci/woodpecker-agent:v3 - container_name: disinto-woodpecker-agent - restart: unless-stopped - network_mode: host - privileged: true - volumes: - - /var/run/docker.sock:/var/run/docker.sock - environment: - WOODPECKER_SERVER: localhost:9000 - WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-} - WOODPECKER_GRPC_SECURE: "false" - WOODPECKER_HEALTHCHECK_ADDR: ":3333" - WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net - WOODPECKER_MAX_WORKFLOWS: 1 - depends_on: - - woodpecker - - agents: - build: - context: . - dockerfile: docker/agents/Dockerfile - container_name: disinto-agents - restart: unless-stopped - security_opt: - - apparmor=unconfined - volumes: - - agent-data:/home/agent/data - - project-repos:/home/agent/repos - - ${HOME}/.claude:/home/agent/.claude - - ${HOME}/.claude.json:/home/agent/.claude.json:ro - - CLAUDE_BIN_PLACEHOLDER:/usr/local/bin/claude:ro - - ${HOME}/.ssh:/home/agent/.ssh:ro - - ${HOME}/.config/sops/age:/home/agent/.config/sops/age:ro - - woodpecker-data:/woodpecker-data:ro - environment: - FORGE_URL: http://forgejo:3000 - FORGE_TOKEN: ${FORGE_TOKEN:-} - FORGE_REVIEW_TOKEN: ${FORGE_REVIEW_TOKEN:-} - FORGE_PLANNER_TOKEN: ${FORGE_PLANNER_TOKEN:-} - FORGE_GARDENER_TOKEN: ${FORGE_GARDENER_TOKEN:-} - FORGE_VAULT_TOKEN: ${FORGE_VAULT_TOKEN:-} - FORGE_SUPERVISOR_TOKEN: ${FORGE_SUPERVISOR_TOKEN:-} - FORGE_PREDICTOR_TOKEN: ${FORGE_PREDICTOR_TOKEN:-} - FORGE_ARCHITECT_TOKEN: ${FORGE_ARCHITECT_TOKEN:-} - FORGE_BOT_USERNAMES: ${FORGE_BOT_USERNAMES:-} - WOODPECKER_TOKEN: ${WOODPECKER_TOKEN:-} - CLAUDE_TIMEOUT: ${CLAUDE_TIMEOUT:-7200} - CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: ${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1} - ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-} - FORGE_ADMIN_PASS: ${FORGE_ADMIN_PASS:-} - DISINTO_CONTAINER: "1" - PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project} - WOODPECKER_DATA_DIR: /woodpecker-data - # IMPORTANT: agents get explicit environment variables (forge tokens, CI tokens, config). - # Vault-only secrets (GITHUB_TOKEN, CLAWHUB_TOKEN, deploy keys) live in - # .env.vault.enc and are NEVER injected here — only the runner - # container receives them at fire time (AD-006, #745). - depends_on: - - forgejo - - woodpecker - networks: - - disinto-net - - runner: - build: - context: . - dockerfile: docker/agents/Dockerfile - profiles: ["vault"] - security_opt: - - apparmor=unconfined - volumes: - - agent-data:/home/agent/data - environment: - FORGE_URL: http://forgejo:3000 - DISINTO_CONTAINER: "1" - PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project} - # Vault redesign in progress (PR-based approval, see #73-#77) - # This container is being replaced — entrypoint will be updated in follow-up - networks: - - disinto-net - - # Edge proxy — reverse proxy to Forgejo, Woodpecker, and staging - # Serves on ports 80/443, routes based on path - edge: - build: ./docker/edge - container_name: disinto-edge - ports: - - "80:80" - - "443:443" - environment: - - DISINTO_VERSION=${DISINTO_VERSION:-main} - - FORGE_URL=http://forgejo:3000 - - FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto} - - FORGE_OPS_REPO=${FORGE_OPS_REPO:-disinto-admin/disinto-ops} - - FORGE_TOKEN=${FORGE_TOKEN:-} - - FORGE_ADMIN_USERS=${FORGE_ADMIN_USERS:-disinto-admin} - - FORGE_ADMIN_TOKEN=${FORGE_ADMIN_TOKEN:-} - - OPS_REPO_ROOT=/opt/disinto-ops - - PROJECT_REPO_ROOT=/opt/disinto - - PRIMARY_BRANCH=main - volumes: - - ./docker/Caddyfile:/etc/caddy/Caddyfile - - caddy_data:/data - - /var/run/docker.sock:/var/run/docker.sock - depends_on: - - forgejo - - woodpecker - - staging - networks: - - disinto-net - - # Staging container — static file server for staging artifacts - # Edge proxy routes to this container for default requests - staging: - image: caddy:alpine - command: ["caddy", "file-server", "--root", "/srv/site"] - volumes: - - ./docker:/srv/site:ro - networks: - - disinto-net - - # Staging deployment slot — activated by Woodpecker staging pipeline (#755). - # Profile-gated: only starts when explicitly targeted by deploy commands. - # Customize image/ports/volumes for your project after init. - staging-deploy: - image: alpine:3 - profiles: ["staging"] - security_opt: - - apparmor=unconfined - environment: - DEPLOY_ENV: staging - networks: - - disinto-net - command: ["echo", "staging slot — replace with project image"] - -volumes: - forgejo-data: - woodpecker-data: - agent-data: - project-repos: - caddy_data: - -networks: - disinto-net: - driver: bridge -COMPOSEEOF - - # Patch the Claude CLI binary path — resolve from host PATH at init time. - local claude_bin - claude_bin="$(command -v claude 2>/dev/null || true)" - if [ -n "$claude_bin" ]; then - # Resolve symlinks to get the real binary path - claude_bin="$(readlink -f "$claude_bin")" - sed -i "s|CLAUDE_BIN_PLACEHOLDER|${claude_bin}|" "$compose_file" - else - echo "Warning: claude CLI not found in PATH — update docker-compose.yml volumes manually" >&2 - sed -i "s|CLAUDE_BIN_PLACEHOLDER|/usr/local/bin/claude|" "$compose_file" - fi - - # Patch the forgejo port mapping into the file if non-default - if [ "$forge_port" != "3000" ]; then - # Add port mapping to forgejo service so it's reachable from host during init - sed -i "/image: codeberg\.org\/forgejo\/forgejo:1/a\\ ports:\\n - \"${forge_port}:3000\"" "$compose_file" - else - sed -i "/image: codeberg\.org\/forgejo\/forgejo:1/a\\ ports:\\n - \"3000:3000\"" "$compose_file" - fi - - echo "Created: ${compose_file}" -} - -# Generate docker/agents/ files if they don't already exist. -_generate_agent_docker_impl() { - local docker_dir="${FACTORY_ROOT}/docker/agents" - mkdir -p "$docker_dir" - - if [ ! -f "${docker_dir}/Dockerfile" ]; then - echo "Warning: docker/agents/Dockerfile not found — expected in repo" >&2 - fi - if [ ! -f "${docker_dir}/entrypoint.sh" ]; then - echo "Warning: docker/agents/entrypoint.sh not found — expected in repo" >&2 - fi -} - -# Generate docker/Caddyfile template for edge proxy. -_generate_caddyfile_impl() { - local docker_dir="${FACTORY_ROOT}/docker" - local caddyfile="${docker_dir}/Caddyfile" - - if [ -f "$caddyfile" ]; then - echo "Caddyfile: ${caddyfile} (already exists, skipping)" - return - fi - - cat > "$caddyfile" <<'CADDYFILEEOF' -# Caddyfile — edge proxy configuration -# IP-only binding at bootstrap; domain + TLS added later via vault resource request - -:80 { - # Reverse proxy to Forgejo - handle /forgejo/* { - reverse_proxy forgejo:3000 - } - - # Reverse proxy to Woodpecker CI - handle /ci/* { - reverse_proxy woodpecker:8000 - } - - # Default: proxy to staging container - handle { - reverse_proxy staging:80 - } -} -CADDYFILEEOF - - echo "Created: ${caddyfile}" -} - -# Generate docker/index.html default page. -_generate_staging_index_impl() { - local docker_dir="${FACTORY_ROOT}/docker" - local index_file="${docker_dir}/index.html" - - if [ -f "$index_file" ]; then - echo "Staging: ${index_file} (already exists, skipping)" - return - fi - - cat > "$index_file" <<'INDEXEOF' -<!DOCTYPE html> -<html lang="en"> -<head> - <meta charset="UTF-8"> - <meta name="viewport" content="width=device-width, initial-scale=1.0"> - <title>Nothing shipped yet - - - -
-

Nothing shipped yet

-

CI pipelines will update this page with your staging artifacts.

-
- - -INDEXEOF - - echo "Created: ${index_file}" -} - -# Generate template .woodpecker/ deployment pipeline configs in a project repo. -# Creates staging.yml and production.yml alongside the project's existing CI config. -# These pipelines trigger on Woodpecker's deployment event with environment filters. -_generate_deploy_pipelines_impl() { - local repo_root="$1" - local project_name="$2" - : "${project_name// /}" # Silence SC2034 - variable used in heredoc - local wp_dir="${repo_root}/.woodpecker" - - mkdir -p "$wp_dir" - - # Skip if deploy pipelines already exist - if [ -f "${wp_dir}/staging.yml" ] && [ -f "${wp_dir}/production.yml" ]; then - echo "Deploy: .woodpecker/{staging,production}.yml (already exist)" - return - fi - - if [ ! -f "${wp_dir}/staging.yml" ]; then - cat > "${wp_dir}/staging.yml" <<'STAGINGEOF' -# .woodpecker/staging.yml — Staging deployment pipeline -# Triggered by runner via Woodpecker promote API. -# Human approves promotion in vault → runner calls promote → this runs. - -when: - event: deployment - environment: staging - -steps: - - name: deploy-staging - image: docker:27 - commands: - - echo "Deploying to staging environment..." - - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from CI #${CI_PIPELINE_PARENT}" - # Pull the image built by CI and deploy to staging - # Customize these commands for your project: - # - docker compose -f docker-compose.yml --profile staging up -d - - echo "Staging deployment complete" - - - name: verify-staging - image: alpine:3 - commands: - - echo "Verifying staging deployment..." - # Add health checks, smoke tests, or integration tests here: - # - curl -sf http://staging:8080/health || exit 1 - - echo "Staging verification complete" -STAGINGEOF - echo "Created: ${wp_dir}/staging.yml" - fi - - if [ ! -f "${wp_dir}/production.yml" ]; then - cat > "${wp_dir}/production.yml" <<'PRODUCTIONEOF' -# .woodpecker/production.yml — Production deployment pipeline -# Triggered by runner via Woodpecker promote API. -# Human approves promotion in vault → runner calls promote → this runs. - -when: - event: deployment - environment: production - -steps: - - name: deploy-production - image: docker:27 - commands: - - echo "Deploying to production environment..." - - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from staging" - # Pull the verified image and deploy to production - # Customize these commands for your project: - # - docker compose -f docker-compose.yml up -d - - echo "Production deployment complete" - - - name: verify-production - image: alpine:3 - commands: - - echo "Verifying production deployment..." - # Add production health checks here: - # - curl -sf http://production:8080/health || exit 1 - - echo "Production verification complete" -PRODUCTIONEOF - echo "Created: ${wp_dir}/production.yml" - fi -} diff --git a/lib/hire-agent.sh b/lib/hire-agent.sh deleted file mode 100644 index b15b2b7..0000000 --- a/lib/hire-agent.sh +++ /dev/null @@ -1,471 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# hire-agent — disinto_hire_an_agent() function -# -# Handles user creation, .profile repo setup, formula copying, branch protection, -# and state marker creation for hiring a new agent. -# -# Globals expected: -# FORGE_URL - Forge instance URL -# FORGE_TOKEN - Admin token for Forge operations -# FACTORY_ROOT - Root of the disinto factory -# PROJECT_NAME - Project name for email/domain generation -# -# Usage: -# source "${FACTORY_ROOT}/lib/hire-agent.sh" -# disinto_hire_an_agent [--formula ] [--local-model ] [--poll-interval ] -# ============================================================================= -set -euo pipefail - -disinto_hire_an_agent() { - local agent_name="${1:-}" - local role="${2:-}" - local formula_path="" - local local_model="" - local poll_interval="" - - if [ -z "$agent_name" ] || [ -z "$role" ]; then - echo "Error: agent-name and role required" >&2 - echo "Usage: disinto hire-an-agent [--formula ] [--local-model ] [--poll-interval ]" >&2 - exit 1 - fi - shift 2 - - # Parse flags - while [ $# -gt 0 ]; do - case "$1" in - --formula) - formula_path="$2" - shift 2 - ;; - --local-model) - local_model="$2" - shift 2 - ;; - --poll-interval) - poll_interval="$2" - shift 2 - ;; - *) - echo "Unknown option: $1" >&2 - exit 1 - ;; - esac - done - - # Default formula path — try both naming conventions - if [ -z "$formula_path" ]; then - formula_path="${FACTORY_ROOT}/formulas/${role}.toml" - if [ ! -f "$formula_path" ]; then - formula_path="${FACTORY_ROOT}/formulas/run-${role}.toml" - fi - fi - - # Validate formula exists - if [ ! -f "$formula_path" ]; then - echo "Error: formula not found at ${formula_path}" >&2 - exit 1 - fi - - echo "── Hiring agent: ${agent_name} (${role}) ───────────────────────" - echo "Formula: ${formula_path}" - if [ -n "$local_model" ]; then - echo "Local model: ${local_model}" - echo "Poll interval: ${poll_interval:-300}s" - fi - - # Ensure FORGE_TOKEN is set - if [ -z "${FORGE_TOKEN:-}" ]; then - echo "Error: FORGE_TOKEN not set" >&2 - exit 1 - fi - - # Get Forge URL - local forge_url="${FORGE_URL:-http://localhost:3000}" - echo "Forge: ${forge_url}" - - # Step 1: Create user via API (skip if exists) - echo "" - echo "Step 1: Creating user '${agent_name}' (if not exists)..." - - local user_pass="" - local admin_pass="" - - # Read admin password from .env for standalone runs (#184) - local env_file="${FACTORY_ROOT}/.env" - if [ -f "$env_file" ] && grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then - admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-) - fi - - # Get admin token early (needed for both user creation and password reset) - local admin_user="disinto-admin" - admin_pass="${admin_pass:-admin}" - local admin_token="" - local admin_token_name - admin_token_name="temp-token-$(date +%s)" - admin_token=$(curl -sf -X POST \ - -u "${admin_user}:${admin_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${admin_user}/tokens" \ - -d "{\"name\":\"${admin_token_name}\",\"scopes\":[\"all\"]}" 2>/dev/null \ - | jq -r '.sha1 // empty') || admin_token="" - if [ -z "$admin_token" ]; then - # Token might already exist — try listing - admin_token=$(curl -sf \ - -u "${admin_user}:${admin_pass}" \ - "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \ - | jq -r '.[0].sha1 // empty') || admin_token="" - fi - if [ -z "$admin_token" ]; then - echo "Error: failed to obtain admin API token" >&2 - echo " Cannot proceed without admin privileges" >&2 - exit 1 - fi - - if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then - echo " User '${agent_name}' already exists" - # Reset user password so we can get a token (#184) - user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - # Use Forgejo CLI to reset password (API PATCH ignores must_change_password in Forgejo 11.x) - if _forgejo_exec forgejo admin user change-password \ - --username "${agent_name}" \ - --password "${user_pass}" \ - --must-change-password=false >/dev/null 2>&1; then - echo " Reset password for existing user '${agent_name}'" - else - echo " Warning: could not reset password for existing user" >&2 - fi - else - # Create user using basic auth (admin token fallback would poison subsequent calls) - # Create the user - user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - if curl -sf -X POST \ - -u "${admin_user}:${admin_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/admin/users" \ - -d "{\"username\":\"${agent_name}\",\"password\":\"${user_pass}\",\"email\":\"${agent_name}@${PROJECT_NAME:-disinto}.local\",\"full_name\":\"${agent_name}\",\"active\":true,\"admin\":false,\"must_change_password\":false}" >/dev/null 2>&1; then - echo " Created user '${agent_name}'" - else - echo " Warning: failed to create user via admin API" >&2 - # Try alternative: user might already exist - if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then - echo " User '${agent_name}' exists (confirmed)" - else - echo " Error: failed to create user '${agent_name}'" >&2 - exit 1 - fi - fi - fi - - # Step 1.5: Generate Forge token for the new/existing user - echo "" - echo "Step 1.5: Generating Forge token for '${agent_name}'..." - - # Convert role to uppercase token variable name (e.g., architect -> FORGE_ARCHITECT_TOKEN) - local role_upper - role_upper=$(echo "$role" | tr '[:lower:]' '[:upper:]') - local token_var="FORGE_${role_upper}_TOKEN" - - # Generate token using the user's password (basic auth) - local agent_token="" - agent_token=$(curl -sf -X POST \ - -u "${agent_name}:${user_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${agent_name}/tokens" \ - -d "{\"name\":\"disinto-${agent_name}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \ - | jq -r '.sha1 // empty') || agent_token="" - - if [ -z "$agent_token" ]; then - # Token name collision — create with timestamp suffix - agent_token=$(curl -sf -X POST \ - -u "${agent_name}:${user_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${agent_name}/tokens" \ - -d "{\"name\":\"disinto-${agent_name}-$(date +%s)\",\"scopes\":[\"all\"]}" 2>/dev/null \ - | jq -r '.sha1 // empty') || agent_token="" - fi - - if [ -z "$agent_token" ]; then - echo " Warning: failed to create API token for '${agent_name}'" >&2 - else - # Store token in .env under the role-specific variable name - if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then - # Use sed with alternative delimiter and proper escaping for special chars in token - local escaped_token - escaped_token=$(printf '%s\n' "$agent_token" | sed 's/[&/\]/\\&/g') - sed -i "s|^${token_var}=.*|${token_var}=${escaped_token}|" "$env_file" - echo " ${agent_name} token updated (${token_var})" - else - printf '%s=%s\n' "$token_var" "$agent_token" >> "$env_file" - echo " ${agent_name} token saved (${token_var})" - fi - export "${token_var}=${agent_token}" - fi - - # Step 2: Create .profile repo on Forgejo - echo "" - echo "Step 2: Creating '${agent_name}/.profile' repo (if not exists)..." - - if curl -sf --max-time 5 "${forge_url}/api/v1/repos/${agent_name}/.profile" >/dev/null 2>&1; then - echo " Repo '${agent_name}/.profile' already exists" - else - # Create the repo using the admin API to ensure it's created in the agent's namespace. - # Using POST /api/v1/user/repos with a user token would create the repo under the - # authenticated user, which could be wrong if the token belongs to a different user. - # The admin API POST /api/v1/admin/users/{username}/repos explicitly creates in the - # specified user's namespace. - local create_output - create_output=$(curl -sf -X POST \ - -u "${admin_user}:${admin_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/admin/users/${agent_name}/repos" \ - -d "{\"name\":\".profile\",\"description\":\"${agent_name}'s .profile repo\",\"private\":true,\"auto_init\":false}" 2>&1) || true - - if echo "$create_output" | grep -q '"id":\|[0-9]'; then - echo " Created repo '${agent_name}/.profile' (via admin API)" - else - echo " Error: failed to create repo '${agent_name}/.profile'" >&2 - echo " Response: ${create_output}" >&2 - exit 1 - fi - fi - - # Step 3: Clone repo and create initial commit - echo "" - echo "Step 3: Cloning repo and creating initial commit..." - - local clone_dir="/tmp/.profile-clone-${agent_name}" - rm -rf "$clone_dir" - mkdir -p "$clone_dir" - - # Build authenticated clone URL using basic auth (user_pass is always set in Step 1) - if [ -z "${user_pass:-}" ]; then - echo " Error: no user password available for cloning" >&2 - exit 1 - fi - - local auth_url - auth_url=$(printf '%s' "$forge_url" | sed "s|://|://${agent_name}:${user_pass}@|") - auth_url="${auth_url}/${agent_name}/.profile.git" - - # Display unauthenticated URL (auth token only in actual git clone command) - echo " Cloning: ${forge_url}/${agent_name}/.profile.git" - - # Try authenticated clone first (required for private repos) - if ! git clone --quiet "$auth_url" "$clone_dir" 2>/dev/null; then - echo " Error: failed to clone repo with authentication" >&2 - echo " Note: Ensure the user has a valid API token with repository access" >&2 - rm -rf "$clone_dir" - exit 1 - fi - - # Configure git - git -C "$clone_dir" config user.name "disinto-admin" - git -C "$clone_dir" config user.email "disinto-admin@localhost" - - # Create directory structure - echo " Creating directory structure..." - mkdir -p "${clone_dir}/journal" - mkdir -p "${clone_dir}/knowledge" - touch "${clone_dir}/journal/.gitkeep" - touch "${clone_dir}/knowledge/.gitkeep" - - # Copy formula - echo " Copying formula..." - cp "$formula_path" "${clone_dir}/formula.toml" - - # Create README - if [ ! -f "${clone_dir}/README.md" ]; then - cat > "${clone_dir}/README.md" </dev/null; then - git -C "$clone_dir" commit -m "chore: initial .profile setup" -q - git -C "$clone_dir" push origin main >/dev/null 2>&1 || \ - git -C "$clone_dir" push origin master >/dev/null 2>&1 || true - echo " Committed: initial .profile setup" - else - echo " No changes to commit" - fi - - rm -rf "$clone_dir" - - # Step 4: Set up branch protection - echo "" - echo "Step 4: Setting up branch protection..." - - # Source branch-protection.sh helper - local bp_script="${FACTORY_ROOT}/lib/branch-protection.sh" - if [ -f "$bp_script" ]; then - # Source required environment - if [ -f "${FACTORY_ROOT}/lib/env.sh" ]; then - source "${FACTORY_ROOT}/lib/env.sh" - fi - - # Set up branch protection for .profile repo - if source "$bp_script" 2>/dev/null && setup_profile_branch_protection "${agent_name}/.profile" "main"; then - echo " Branch protection configured for main branch" - echo " - Requires 1 approval before merge" - echo " - Admin-only merge enforcement" - echo " - Journal branch created for direct agent pushes" - else - echo " Warning: could not configure branch protection (Forgejo API may not be available)" - echo " Note: Branch protection can be set up manually later" - fi - else - echo " Warning: branch-protection.sh not found at ${bp_script}" - fi - - # Step 5: Create state marker - echo "" - echo "Step 5: Creating state marker..." - - local state_dir="${FACTORY_ROOT}/state" - mkdir -p "$state_dir" - local state_file="${state_dir}/.${role}-active" - - if [ ! -f "$state_file" ]; then - touch "$state_file" - echo " Created: ${state_file}" - else - echo " State marker already exists: ${state_file}" - fi - - # Step 6: Set up local model agent (if --local-model specified) - if [ -n "$local_model" ]; then - echo "" - echo "Step 6: Configuring local model agent..." - - local override_file="${FACTORY_ROOT}/docker-compose.override.yml" - local override_dir - override_dir=$(dirname "$override_file") - mkdir -p "$override_dir" - - # Validate model endpoint is reachable - echo " Validating model endpoint: ${local_model}" - if ! curl -sf --max-time 10 "${local_model}/health" >/dev/null 2>&1; then - # Try /v1/chat/completions as fallback endpoint check - if ! curl -sf --max-time 10 "${local_model}/v1/chat/completions" >/dev/null 2>&1; then - echo " Warning: model endpoint may not be reachable at ${local_model}" - echo " Continuing with configuration..." - fi - else - echo " Model endpoint is reachable" - fi - - # Generate service name from agent name (lowercase) - local service_name="agents-${agent_name}" - service_name=$(echo "$service_name" | tr '[:upper:]' '[:lower:]') - - # Set default poll interval - local interval="${poll_interval:-300}" - - # Generate the override compose file - # Bash expands ${service_name}, ${local_model}, ${interval}, ${PROJECT_NAME} at generation time - # \$HOME, \$FORGE_TOKEN become ${HOME}, ${FORGE_TOKEN} in the file for docker-compose runtime expansion - cat > "$override_file" </dev/null || true) fi if [ -n "$label_id" ]; then - _ILC_LABEL_IDS["$name"]="$label_id" + eval "${varname}=\"${label_id}\"" fi printf '%s' "$label_id" } -_ilc_backlog_id() { _ilc_ensure_label_id "backlog" "#0075ca"; } -_ilc_in_progress_id() { _ilc_ensure_label_id "in-progress" "#1d76db"; } -_ilc_blocked_id() { _ilc_ensure_label_id "blocked" "#e11d48"; } +_ilc_backlog_id() { _ilc_ensure_label_id _ILC_BACKLOG_ID "backlog" "#0075ca"; } +_ilc_in_progress_id() { _ilc_ensure_label_id _ILC_IN_PROGRESS_ID "in-progress" "#1d76db"; } +_ilc_blocked_id() { _ilc_ensure_label_id _ILC_BLOCKED_ID "blocked" "#e11d48"; } # --------------------------------------------------------------------------- -# issue_claim — assign issue to bot, add "in-progress" label, remove "backlog". +# issue_claim — add "in-progress" label, remove "backlog" label. # Args: issue_number -# Returns: 0 on success, 1 if already assigned to another agent # --------------------------------------------------------------------------- issue_claim() { local issue="$1" - - # Get current bot identity - local me - me=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_URL}/api/v1/user" | jq -r '.login') || return 1 - - # Check current assignee - local current - current=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/issues/${issue}" | jq -r '.assignee.login // ""') || return 1 - - if [ -n "$current" ] && [ "$current" != "$me" ]; then - _ilc_log "issue #${issue} already assigned to ${current} — skipping" - return 1 - fi - - # Assign to self (Forgejo rejects if already assigned differently) - curl -sf -X PATCH \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_API}/issues/${issue}" \ - -d "{\"assignees\":[\"${me}\"]}" >/dev/null 2>&1 || return 1 - local ip_id bl_id ip_id=$(_ilc_in_progress_id) bl_id=$(_ilc_backlog_id) @@ -125,23 +102,14 @@ issue_claim() { "${FORGE_API}/issues/${issue}/labels/${bl_id}" >/dev/null 2>&1 || true fi _ilc_log "claimed issue #${issue}" - return 0 } # --------------------------------------------------------------------------- -# issue_release — remove "in-progress" label, add "backlog" label, clear assignee. +# issue_release — remove "in-progress" label, add "backlog" label. # Args: issue_number # --------------------------------------------------------------------------- issue_release() { local issue="$1" - - # Clear assignee - curl -sf -X PATCH \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_API}/issues/${issue}" \ - -d '{"assignees":[]}' >/dev/null 2>&1 || true - local ip_id bl_id ip_id=$(_ilc_in_progress_id) bl_id=$(_ilc_backlog_id) @@ -160,27 +128,6 @@ issue_release() { _ilc_log "released issue #${issue}" } -# --------------------------------------------------------------------------- -# _ilc_post_comment — Post a comment to an issue (internal helper) -# Args: issue_number body_text -# Uses a temp file to avoid large inline strings. -# --------------------------------------------------------------------------- -_ilc_post_comment() { - local issue="$1" body="$2" - - local tmpfile tmpjson - tmpfile=$(mktemp /tmp/ilc-comment-XXXXXX.md) - tmpjson="${tmpfile}.json" - printf '%s' "$body" > "$tmpfile" - jq -Rs '{body:.}' < "$tmpfile" > "$tmpjson" - curl -sf -o /dev/null -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_API}/issues/${issue}/comments" \ - --data-binary @"$tmpjson" 2>/dev/null || true - rm -f "$tmpfile" "$tmpjson" -} - # --------------------------------------------------------------------------- # issue_block — add "blocked" label, post diagnostic comment, remove in-progress. # Args: issue_number reason [result_text] @@ -207,9 +154,14 @@ issue_block() { fi } > "$tmpfile" - # Post comment using shared helper - _ilc_post_comment "$issue" "$(cat "$tmpfile")" - rm -f "$tmpfile" + # Post comment + jq -Rs '{body:.}' < "$tmpfile" > "${tmpfile}.json" + curl -sf -o /dev/null -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_API}/issues/${issue}/comments" \ + --data-binary @"${tmpfile}.json" 2>/dev/null || true + rm -f "$tmpfile" "${tmpfile}.json" # Remove in-progress, add blocked local ip_id bk_id @@ -232,19 +184,11 @@ issue_block() { } # --------------------------------------------------------------------------- -# issue_close — clear assignee, PATCH state to closed. +# issue_close — PATCH state to closed. # Args: issue_number # --------------------------------------------------------------------------- issue_close() { local issue="$1" - - # Clear assignee before closing - curl -sf -X PATCH \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_API}/issues/${issue}" \ - -d '{"assignees":[]}' >/dev/null 2>&1 || true - curl -sf -X PATCH \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ diff --git a/lib/load-project.sh b/lib/load-project.sh index 9d7afaf..dcddc94 100755 --- a/lib/load-project.sh +++ b/lib/load-project.sh @@ -10,6 +10,7 @@ # PROJECT_CONTAINERS, CHECK_PRS, CHECK_DEV_AGENT, # CHECK_PIPELINE_STALL, CI_STALE_MINUTES, # MIRROR_NAMES, MIRROR_URLS, MIRROR_ (per configured mirror) +# (plus backwards-compat aliases: CODEBERG_REPO, CODEBERG_API, CODEBERG_WEB) # # If no argument given, does nothing (allows poll scripts to work with # plain .env fallback for backwards compatibility). @@ -82,7 +83,7 @@ if mirrors: # Export parsed variables. # Inside the agents container (DISINTO_CONTAINER=1), compose already sets the # correct FORGE_URL (http://forgejo:3000) and path vars for the container -# environment. The TOML carries host-perspective values (localhost, /home/admin/…) +# environment. The TOML carries host-perspective values (localhost, /home/johba/…) # that would break container API calls and path resolution. Skip overriding # any env var that is already set when running inside the container. while IFS='=' read -r _key _val; do @@ -99,9 +100,11 @@ export FORGE_URL="${FORGE_URL:-http://localhost:3000}" if [ -n "$FORGE_REPO" ]; then export FORGE_API="${FORGE_URL}/api/v1/repos/${FORGE_REPO}" export FORGE_WEB="${FORGE_URL}/${FORGE_REPO}" - # Extract repo owner (first path segment of owner/repo) - export FORGE_REPO_OWNER="${FORGE_REPO%%/*}" fi +# Backwards-compat aliases +export CODEBERG_REPO="${FORGE_REPO}" +export CODEBERG_API="${FORGE_API:-}" +export CODEBERG_WEB="${FORGE_WEB:-}" # Derive PROJECT_REPO_ROOT if not explicitly set if [ -z "${PROJECT_REPO_ROOT:-}" ] && [ -n "${PROJECT_NAME:-}" ]; then diff --git a/lib/mirrors.sh b/lib/mirrors.sh index 3ba561d..e6dfba1 100644 --- a/lib/mirrors.sh +++ b/lib/mirrors.sh @@ -13,16 +13,7 @@ mirror_push() { local name url for name in $MIRROR_NAMES; do - # Convert name to uppercase env var name safely (only alphanumeric allowed) - local upper_name - upper_name=$(printf '%s' "$name" | tr '[:lower:]' '[:upper:]') - # Validate: only allow alphanumeric + underscore in var name - if [[ ! "$upper_name" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then - continue - fi - # Use indirect expansion safely (no eval) — MIRROR_ prefix required - local varname="MIRROR_${upper_name}" - url="${!varname:-}" + url=$(eval "echo \"\$MIRROR_$(echo "$name" | tr '[:lower:]' '[:upper:]')\"") || true [ -z "$url" ] && continue # Ensure remote exists with correct URL diff --git a/lib/ops-setup.sh b/lib/ops-setup.sh deleted file mode 100644 index ae6b216..0000000 --- a/lib/ops-setup.sh +++ /dev/null @@ -1,236 +0,0 @@ -#!/usr/bin/env bash -# ops-setup.sh — Setup ops repository (disinto-ops) -# -# Source from bin/disinto: -# source "$(dirname "$0")/../lib/ops-setup.sh" -# -# Required globals: FORGE_URL, FORGE_TOKEN, FACTORY_ROOT -# Optional: admin_token (falls back to FORGE_TOKEN for admin operations) -# -# Functions: -# setup_ops_repo [primary_branch] -# - Create ops repo on Forgejo if it doesn't exist -# - Configure bot collaborators with appropriate permissions -# - Clone or initialize ops repo locally -# - Seed directory structure (vault, knowledge, evidence) -# - Export _ACTUAL_OPS_SLUG for caller to use -# -# Globals modified: -# _ACTUAL_OPS_SLUG - resolved ops repo slug after function completes - -set -euo pipefail - -setup_ops_repo() { - - local forge_url="$1" ops_slug="$2" ops_root="$3" primary_branch="${4:-main}" - local org_name="${ops_slug%%/*}" - local ops_name="${ops_slug##*/}" - - echo "" - echo "── Ops repo setup ─────────────────────────────────────" - - # Determine the actual ops repo location by searching across possible namespaces - # This handles cases where the repo was created under a different namespace - # due to past bugs (e.g., dev-bot/disinto-ops instead of disinto-admin/disinto-ops) - local actual_ops_slug="" - local -a possible_namespaces=( "$org_name" "dev-bot" "disinto-admin" ) - local http_code - - for ns in "${possible_namespaces[@]}"; do - slug="${ns}/${ops_name}" - if curl -sf --max-time 5 \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/repos/${slug}" >/dev/null 2>&1; then - actual_ops_slug="$slug" - echo "Ops repo: ${slug} (found at ${slug})" - break - fi - done - - # If not found, try to create it in the configured namespace - if [ -z "$actual_ops_slug" ]; then - echo "Creating ops repo in namespace: ${org_name}" - # Create org if it doesn't exist - curl -sf -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/orgs" \ - -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true - if curl -sf -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/orgs/${org_name}/repos" \ - -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" >/dev/null 2>&1; then - actual_ops_slug="${org_name}/${ops_name}" - echo "Ops repo: ${actual_ops_slug} created on Forgejo" - else - # Fallback: use admin API to create repo under the target namespace - http_code=$(curl -s -o /dev/null -w "%{http_code}" \ - -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/admin/users/${org_name}/repos" \ - -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" 2>/dev/null || echo "0") - if [ "$http_code" = "201" ]; then - actual_ops_slug="${org_name}/${ops_name}" - echo "Ops repo: ${actual_ops_slug} created on Forgejo (via admin API)" - else - echo "Error: failed to create ops repo '${org_name}/${ops_name}' (HTTP ${http_code})" >&2 - return 1 - fi - fi - fi - - # Configure collaborators on the ops repo - local bot_user bot_perm - declare -A bot_permissions=( - [dev-bot]="write" - [review-bot]="read" - [planner-bot]="write" - [gardener-bot]="write" - [vault-bot]="write" - [supervisor-bot]="read" - [predictor-bot]="read" - [architect-bot]="write" - ) - - # Add all bot users as collaborators with appropriate permissions - # vault branch protection (#77) requires: - # - Admin-only merge to main (enforced by admin_enforced: true) - # - Bots can push branches and create PRs, but cannot merge - for bot_user in "${!bot_permissions[@]}"; do - bot_perm="${bot_permissions[$bot_user]}" - if curl -sf -X PUT \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/repos/${actual_ops_slug}/collaborators/${bot_user}" \ - -d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1; then - echo " + ${bot_user} = ${bot_perm} collaborator" - else - echo " ! ${bot_user} = ${bot_perm} (already set or failed)" - fi - done - - # Add disinto-admin as admin collaborator - if curl -sf -X PUT \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/repos/${actual_ops_slug}/collaborators/disinto-admin" \ - -d '{"permission":"admin"}' >/dev/null 2>&1; then - echo " + disinto-admin = admin collaborator" - else - echo " ! disinto-admin = admin (already set or failed)" - fi - - # Clone ops repo locally if not present - if [ ! -d "${ops_root}/.git" ]; then - local auth_url - auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_TOKEN}@|") - local clone_url="${auth_url}/${actual_ops_slug}.git" - echo "Cloning: ops repo -> ${ops_root}" - if git clone --quiet "$clone_url" "$ops_root" 2>/dev/null; then - echo "Ops repo: ${actual_ops_slug} cloned successfully" - else - echo "Initializing: ops repo at ${ops_root}" - mkdir -p "$ops_root" - git -C "$ops_root" init --initial-branch="${primary_branch}" -q - # Set remote to the actual ops repo location - git -C "$ops_root" remote add origin "${forge_url}/${actual_ops_slug}.git" - echo "Ops repo: ${actual_ops_slug} initialized locally" - fi - else - echo "Ops repo: ${ops_root} (already exists locally)" - # Verify remote is correct - local current_remote - current_remote=$(git -C "$ops_root" remote get-url origin 2>/dev/null || true) - local expected_remote="${forge_url}/${actual_ops_slug}.git" - if [ -n "$current_remote" ] && [ "$current_remote" != "$expected_remote" ]; then - echo " Fixing: remote URL from ${current_remote} to ${expected_remote}" - git -C "$ops_root" remote set-url origin "$expected_remote" - fi - fi - - # Seed directory structure - local seeded=false - mkdir -p "${ops_root}/vault/pending" - mkdir -p "${ops_root}/vault/approved" - mkdir -p "${ops_root}/vault/fired" - mkdir -p "${ops_root}/vault/rejected" - mkdir -p "${ops_root}/knowledge" - mkdir -p "${ops_root}/evidence/engagement" - mkdir -p "${ops_root}/evidence/red-team" - mkdir -p "${ops_root}/evidence/holdout" - mkdir -p "${ops_root}/evidence/evolution" - mkdir -p "${ops_root}/evidence/user-test" - mkdir -p "${ops_root}/sprints" - [ -f "${ops_root}/sprints/.gitkeep" ] || { touch "${ops_root}/sprints/.gitkeep"; seeded=true; } - [ -f "${ops_root}/evidence/red-team/.gitkeep" ] || { touch "${ops_root}/evidence/red-team/.gitkeep"; seeded=true; } - [ -f "${ops_root}/evidence/holdout/.gitkeep" ] || { touch "${ops_root}/evidence/holdout/.gitkeep"; seeded=true; } - [ -f "${ops_root}/evidence/evolution/.gitkeep" ] || { touch "${ops_root}/evidence/evolution/.gitkeep"; seeded=true; } - [ -f "${ops_root}/evidence/user-test/.gitkeep" ] || { touch "${ops_root}/evidence/user-test/.gitkeep"; seeded=true; } - - if [ ! -f "${ops_root}/README.md" ]; then - cat > "${ops_root}/README.md" < **Note:** Journal directories (journal/planner/ and journal/supervisor/) have been removed from the ops repo. Agent journals are now stored in each agent's .profile repo on Forgejo. - -## Branch protection - -- \`main\`: 2 reviewers required for vault items -- Journal/evidence commits may use lighter rules -OPSEOF - seeded=true - fi - - # Create stub files if they don't exist - [ -f "${ops_root}/portfolio.md" ] || { echo "# Portfolio" > "${ops_root}/portfolio.md"; seeded=true; } - [ -f "${ops_root}/prerequisites.md" ] || { echo "# Prerequisite Tree" > "${ops_root}/prerequisites.md"; seeded=true; } - [ -f "${ops_root}/RESOURCES.md" ] || { echo "# Resources" > "${ops_root}/RESOURCES.md"; seeded=true; } - - # Commit and push seed content - if [ "$seeded" = true ] && [ -d "${ops_root}/.git" ]; then - # Auto-configure repo-local git identity if missing (#778) - if [ -z "$(git -C "$ops_root" config user.name 2>/dev/null)" ]; then - git -C "$ops_root" config user.name "disinto-admin" - fi - if [ -z "$(git -C "$ops_root" config user.email 2>/dev/null)" ]; then - git -C "$ops_root" config user.email "disinto-admin@localhost" - fi - - git -C "$ops_root" add -A - if ! git -C "$ops_root" diff --cached --quiet 2>/dev/null; then - git -C "$ops_root" commit -m "chore: seed ops repo structure" -q - # Push if remote exists - if git -C "$ops_root" remote get-url origin >/dev/null 2>&1; then - if git -C "$ops_root" push origin "${primary_branch}" -q 2>/dev/null; then - echo "Seeded: ops repo with initial structure" - else - echo "Warning: failed to push seed content to ops repo" >&2 - fi - fi - fi - fi - - # Export resolved slug for the caller to write back to the project TOML - _ACTUAL_OPS_SLUG="${actual_ops_slug}" -} diff --git a/lib/pr-lifecycle.sh b/lib/pr-lifecycle.sh index e097f34..ad6f0de 100644 --- a/lib/pr-lifecycle.sh +++ b/lib/pr-lifecycle.sh @@ -61,15 +61,13 @@ _prl_log() { # --------------------------------------------------------------------------- # pr_create — Create a PR via forge API. -# Args: branch title body [base_branch] [api_url] +# Args: branch title body [base_branch] # Stdout: PR number # Returns: 0=created (or found existing), 1=failed -# api_url defaults to FORGE_API if not provided # --------------------------------------------------------------------------- pr_create() { local branch="$1" title="$2" body="$3" local base="${4:-${PRIMARY_BRANCH:-main}}" - local api_url="${5:-${FORGE_API}}" local tmpfile resp http_code resp_body pr_num tmpfile=$(mktemp /tmp/prl-create-XXXXXX.json) @@ -79,7 +77,7 @@ pr_create() { resp=$(curl -s -w "\n%{http_code}" -X POST \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ - "${api_url}/pulls" \ + "${FORGE_API}/pulls" \ --data-binary @"$tmpfile") || true rm -f "$tmpfile" @@ -94,7 +92,7 @@ pr_create() { return 0 ;; 409) - pr_num=$(pr_find_by_branch "$branch" "$api_url") || true + pr_num=$(pr_find_by_branch "$branch") || true if [ -n "$pr_num" ]; then _prl_log "PR already exists: #${pr_num}" printf '%s' "$pr_num" @@ -112,17 +110,15 @@ pr_create() { # --------------------------------------------------------------------------- # pr_find_by_branch — Find an open PR by head branch name. -# Args: branch [api_url] +# Args: branch # Stdout: PR number # Returns: 0=found, 1=not found -# api_url defaults to FORGE_API if not provided # --------------------------------------------------------------------------- pr_find_by_branch() { local branch="$1" - local api_url="${2:-${FORGE_API}}" local pr_num pr_num=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/pulls?state=open&limit=20" | \ + "${FORGE_API}/pulls?state=open&limit=20" | \ jq -r --arg b "$branch" '.[] | select(.head.ref == $b) | .number' \ | head -1) || true if [ -n "$pr_num" ]; then @@ -348,29 +344,6 @@ pr_is_merged() { [ "$merged" = "true" ] } -# --------------------------------------------------------------------------- -# pr_close — Close a PR via forge API. -# Args: pr_number -# Returns: 0=closed, 1=error -# --------------------------------------------------------------------------- -pr_close() { - local pr_num="$1" - - _prl_log "closing PR #${pr_num}" - local resp http_code - resp=$(curl -sf -w "\n%{http_code}" -X PATCH \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_API}/pulls/${pr_num}" \ - -d '{"state":"closed"}' 2>/dev/null) || true - http_code=$(printf '%s\n' "$resp" | tail -1) - if [ "$http_code" != "200" ] && [ "$http_code" != "204" ]; then - _prl_log "pr_close FAILED: HTTP ${http_code} for PR #${pr_num}" - return 1 - fi - _prl_log "PR #${pr_num} closed" -} - # --------------------------------------------------------------------------- # pr_walk_to_merge — Walk a PR through CI, review, and merge. # @@ -405,18 +378,11 @@ pr_walk_to_merge() { if [ "${_PR_CI_FAILURE_TYPE:-}" = "infra" ] && [ "$ci_retry_count" -lt 1 ]; then ci_retry_count=$((ci_retry_count + 1)) _prl_log "infra failure — retriggering CI (retry ${ci_retry_count})" - local rebase_output rebase_rc ( cd "$worktree" && \ git commit --allow-empty -m "ci: retrigger after infra failure" --no-verify && \ git fetch "$remote" "${PRIMARY_BRANCH}" 2>/dev/null && \ git rebase "${remote}/${PRIMARY_BRANCH}" && \ - git push --force-with-lease "$remote" HEAD ) > /tmp/rebase-output-$$ 2>&1 - rebase_rc=$? - rebase_output=$(cat /tmp/rebase-output-$$) - rm -f /tmp/rebase-output-$$ - if [ "$rebase_rc" -ne 0 ]; then - _prl_log "rebase/push failed (exit code $rebase_rc): $(echo "$rebase_output" | tail -5)" - fi + git push --force-with-lease "$remote" HEAD ) 2>&1 | tail -5 || true continue fi @@ -428,23 +394,6 @@ pr_walk_to_merge() { fi _prl_log "CI failed — invoking agent (attempt ${ci_fix_count}/${max_ci_fixes})" - - # Get CI logs from SQLite database if available - local ci_logs="" - if [ -n "$_PR_CI_PIPELINE" ] && [ -n "${FACTORY_ROOT:-}" ]; then - ci_logs=$(ci_get_logs "$_PR_CI_PIPELINE" 2>/dev/null | tail -50) || ci_logs="" - fi - - local logs_section="" - if [ -n "$ci_logs" ]; then - logs_section=" -CI Log Output (last 50 lines): -\`\`\` -${ci_logs} -\`\`\` -" - fi - agent_run --resume "$session_id" --worktree "$worktree" \ "CI failed on PR #${pr_num} (attempt ${ci_fix_count}/${max_ci_fixes}). @@ -452,7 +401,7 @@ Pipeline: #${_PR_CI_PIPELINE:-?} Failure type: ${_PR_CI_FAILURE_TYPE:-unknown} Error log: -${_PR_CI_ERROR_LOG:-No logs available.}${logs_section} +${_PR_CI_ERROR_LOG:-No logs available.} Fix the issue, run tests, commit, rebase on ${PRIMARY_BRANCH}, and push: git fetch ${remote} ${PRIMARY_BRANCH} && git rebase ${remote}/${PRIMARY_BRANCH} @@ -488,7 +437,11 @@ Fix the issue, run tests, commit, rebase on ${PRIMARY_BRANCH}, and push: _PR_WALK_EXIT_REASON="merged" return 0 fi - # Merge failed (conflict or HTTP 405) — ask agent to rebase + if [ "$rc" -eq 2 ]; then + _PR_WALK_EXIT_REASON="merge_blocked" + return 1 + fi + # Merge failed (conflict) — ask agent to rebase _prl_log "merge failed — invoking agent to rebase" agent_run --resume "$session_id" --worktree "$worktree" \ "PR #${pr_num} approved but merge failed: ${_PR_MERGE_ERROR:-unknown} @@ -534,7 +487,8 @@ Commit, rebase on ${PRIMARY_BRANCH}, and push: # build_phase_protocol_prompt — Generate push/commit instructions for Claude. # # For the synchronous agent_run architecture: tells Claude how to commit and -# push (no phase files). +# push (no phase files). For the tmux session architecture, use the +# build_phase_protocol_prompt in dev/phase-handler.sh instead. # # Args: branch [remote] # Stdout: instruction text diff --git a/lib/release.sh b/lib/release.sh deleted file mode 100644 index 6eb03ee..0000000 --- a/lib/release.sh +++ /dev/null @@ -1,178 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# release.sh — disinto_release() function -# -# Handles vault TOML creation, branch setup on ops repo, PR creation, -# and auto-merge request for a versioned release. -# -# Globals expected: -# FORGE_URL - Forge instance URL (e.g. http://localhost:3000) -# FORGE_TOKEN - API token for Forge operations -# FORGE_OPS_REPO - Ops repo slug (e.g. disinto-admin/myproject-ops) -# FACTORY_ROOT - Root of the disinto factory -# PRIMARY_BRANCH - Primary branch name (e.g. main) -# -# Usage: -# source "${FACTORY_ROOT}/lib/release.sh" -# disinto_release -# ============================================================================= -set -euo pipefail - -# Source vault.sh for _vault_log helper -source "${FACTORY_ROOT}/lib/vault.sh" - -# Assert required globals are set before using this module. -_assert_release_globals() { - local missing=() - [ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL") - [ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN") - [ -z "${FORGE_OPS_REPO:-}" ] && missing+=("FORGE_OPS_REPO") - [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT") - [ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH") - if [ "${#missing[@]}" -gt 0 ]; then - echo "Error: release.sh requires these globals to be set: ${missing[*]}" >&2 - exit 1 - fi -} - -disinto_release() { - _assert_release_globals - - local version="${1:-}" - local formula_path="${FACTORY_ROOT}/formulas/release.toml" - - if [ -z "$version" ]; then - echo "Error: version required" >&2 - echo "Usage: disinto release " >&2 - echo "Example: disinto release v1.2.0" >&2 - exit 1 - fi - - # Validate version format (must start with 'v' followed by semver) - if ! echo "$version" | grep -qE '^v[0-9]+\.[0-9]+\.[0-9]+$'; then - echo "Error: version must be in format v1.2.3 (semver with 'v' prefix)" >&2 - exit 1 - fi - - # Load project config to get FORGE_OPS_REPO - if [ -z "${PROJECT_NAME:-}" ]; then - # PROJECT_NAME is unset - detect project TOML from projects/ directory - local found_toml - found_toml=$(find "${FACTORY_ROOT}/projects" -maxdepth 1 -name "*.toml" ! -name "*.example" 2>/dev/null | head -1) - if [ -n "$found_toml" ]; then - source "${FACTORY_ROOT}/lib/load-project.sh" "$found_toml" - fi - else - local project_toml="${FACTORY_ROOT}/projects/${PROJECT_NAME}.toml" - if [ -f "$project_toml" ]; then - source "${FACTORY_ROOT}/lib/load-project.sh" "$project_toml" - fi - fi - - # Check formula exists - if [ ! -f "$formula_path" ]; then - echo "Error: release formula not found at ${formula_path}" >&2 - exit 1 - fi - - # Get the ops repo root - local ops_root="${FACTORY_ROOT}/../disinto-ops" - if [ ! -d "${ops_root}/.git" ]; then - echo "Error: ops repo not found at ${ops_root}" >&2 - echo " Run 'disinto init' to set up the ops repo first" >&2 - exit 1 - fi - - # Generate a unique ID for the vault item - local id="release-${version//./}" - local vault_toml="${ops_root}/vault/actions/${id}.toml" - - # Create vault TOML with the specific version - cat > "$vault_toml" </dev/null || true - - # Push branch - git push -u origin "$branch_name" 2>/dev/null || { - echo "Error: failed to push branch" >&2 - exit 1 - } - ) - - # Create PR - local pr_response - pr_response=$(curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls" \ - -d "{\"title\":\"${pr_title}\",\"head\":\"${branch_name}\",\"base\":\"${PRIMARY_BRANCH}\",\"body\":\"$(echo "$pr_body" | sed ':a;N;$!ba;s/\n/\\n/g')\"}" 2>/dev/null) || { - echo "Error: failed to create PR" >&2 - echo "Response: ${pr_response}" >&2 - exit 1 - } - - local pr_number - pr_number=$(echo "$pr_response" | jq -r '.number') - - local pr_url="${FORGE_URL}/${FORGE_OPS_REPO}/pulls/${pr_number}" - - # Enable auto-merge on the PR — Forgejo will auto-merge after approval - _vault_log "Enabling auto-merge for PR #${pr_number}" - curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls/${pr_number}/merge" \ - -d '{"Do":"merge","merge_when_checks_succeed":true}' >/dev/null 2>&1 || { - echo "Warning: failed to enable auto-merge (may already be enabled or not supported)" >&2 - } - - echo "" - echo "Release PR created: ${pr_url}" - echo "" - echo "Next steps:" - echo " 1. Review the PR" - echo " 2. Approve the PR (auto-merge will trigger after approval)" - echo " 3. The vault runner will execute the release formula" - echo "" - echo "After merge, the release will:" - echo " 1. Tag Forgejo main with ${version}" - echo " 2. Push tag to mirrors (Codeberg, GitHub)" - echo " 3. Build and tag the agents Docker image" - echo " 4. Restart agent containers" -} diff --git a/lib/stack-lock.sh b/lib/stack-lock.sh deleted file mode 100644 index 6c8c1ed..0000000 --- a/lib/stack-lock.sh +++ /dev/null @@ -1,197 +0,0 @@ -#!/usr/bin/env bash -# stack-lock.sh — File-based lock protocol for singleton project stack access -# -# Prevents CI pipelines and the reproduce-agent from stepping on each other -# when sharing a single project stack (e.g. harb docker compose). -# -# Lock file: /home/agent/data/locks/-stack.lock -# Contents: {"holder": "reproduce-agent-42", "since": "...", "heartbeat": "..."} -# -# Protocol: -# 1. stack_lock_check — inspect current lock state -# 2. stack_lock_acquire — wait until lock is free, then claim it -# 3. stack_lock_release — delete lock file when done -# -# Heartbeat: callers must update the heartbeat every 2 minutes while holding -# the lock by calling stack_lock_heartbeat. A heartbeat older than 10 minutes -# is considered stale — the next acquire will break it. -# -# Usage: -# source "$(dirname "$0")/../lib/stack-lock.sh" -# stack_lock_acquire "ci-pipeline-$BUILD_NUMBER" "myproject" -# trap 'stack_lock_release "myproject"' EXIT -# # ... do work ... -# stack_lock_release "myproject" - -set -euo pipefail - -STACK_LOCK_DIR="${HOME}/data/locks" -STACK_LOCK_POLL_INTERVAL=30 # seconds between retry polls -STACK_LOCK_STALE_SECONDS=600 # 10 minutes — heartbeat older than this = stale -STACK_LOCK_MAX_WAIT=3600 # 1 hour — give up after this many seconds - -# _stack_lock_path -# Print the path of the lock file for the given project. -_stack_lock_path() { - local project="$1" - echo "${STACK_LOCK_DIR}/${project}-stack.lock" -} - -# _stack_lock_now -# Print current UTC timestamp in ISO-8601 format. -_stack_lock_now() { - date -u +"%Y-%m-%dT%H:%M:%SZ" -} - -# _stack_lock_epoch -# Convert an ISO-8601 UTC timestamp to a Unix epoch integer. -_stack_lock_epoch() { - local ts="$1" - # Strip trailing Z, replace T with space for `date -d` - date -u -d "${ts%Z}" +%s 2>/dev/null || date -u -j -f "%Y-%m-%dT%H:%M:%S" "${ts%Z}" +%s 2>/dev/null -} - -# stack_lock_check -# Print lock status to stdout: "free", "held:", or "stale:". -# Returns 0 in all cases (status is in stdout). -stack_lock_check() { - local project="$1" - local lock_file - lock_file="$(_stack_lock_path "$project")" - - if [ ! -f "$lock_file" ]; then - echo "free" - return 0 - fi - - local holder heartbeat - holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder","unknown"))' "$lock_file" 2>/dev/null || echo "unknown") - heartbeat=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("heartbeat",""))' "$lock_file" 2>/dev/null || echo "") - - if [ -z "$heartbeat" ]; then - echo "stale:${holder}" - return 0 - fi - - local hb_epoch now_epoch age - hb_epoch=$(_stack_lock_epoch "$heartbeat" 2>/dev/null || echo "0") - now_epoch=$(date -u +%s) - age=$(( now_epoch - hb_epoch )) - - if [ "$age" -gt "$STACK_LOCK_STALE_SECONDS" ]; then - echo "stale:${holder}" - else - echo "held:${holder}" - fi -} - -# stack_lock_acquire [max_wait_seconds] -# Acquire the lock for on behalf of . -# Polls every STACK_LOCK_POLL_INTERVAL seconds. -# Breaks stale locks automatically. -# Exits non-zero if the lock cannot be acquired within max_wait_seconds. -stack_lock_acquire() { - local holder="$1" - local project="$2" - local max_wait="${3:-$STACK_LOCK_MAX_WAIT}" - local lock_file - lock_file="$(_stack_lock_path "$project")" - local deadline - deadline=$(( $(date -u +%s) + max_wait )) - - mkdir -p "$STACK_LOCK_DIR" - - while true; do - local status - status=$(stack_lock_check "$project") - - case "$status" in - free) - # Write to temp file then rename to avoid partial reads by other processes - local tmp_lock - tmp_lock=$(mktemp "${STACK_LOCK_DIR}/.lock-tmp-XXXXXX") - local now - now=$(_stack_lock_now) - printf '{"holder": "%s", "since": "%s", "heartbeat": "%s"}\n' \ - "$holder" "$now" "$now" > "$tmp_lock" - mv "$tmp_lock" "$lock_file" - echo "[stack-lock] acquired lock for ${project} as ${holder}" >&2 - return 0 - ;; - stale:*) - local stale_holder="${status#stale:}" - echo "[stack-lock] breaking stale lock held by ${stale_holder} for ${project}" >&2 - rm -f "$lock_file" - # Loop back immediately to re-check and claim - ;; - held:*) - local cur_holder="${status#held:}" - local remaining - remaining=$(( deadline - $(date -u +%s) )) - if [ "$remaining" -le 0 ]; then - echo "[stack-lock] timed out waiting for lock on ${project} (held by ${cur_holder})" >&2 - return 1 - fi - echo "[stack-lock] ${project} locked by ${cur_holder}, waiting ${STACK_LOCK_POLL_INTERVAL}s (${remaining}s left)..." >&2 - sleep "$STACK_LOCK_POLL_INTERVAL" - ;; - *) - echo "[stack-lock] unexpected status '${status}' for ${project}" >&2 - return 1 - ;; - esac - done -} - -# stack_lock_heartbeat -# Update the heartbeat timestamp in the lock file. -# Should be called every 2 minutes while holding the lock. -# No-op if the lock file is absent or held by a different holder. -stack_lock_heartbeat() { - local holder="$1" - local project="$2" - local lock_file - lock_file="$(_stack_lock_path "$project")" - - [ -f "$lock_file" ] || return 0 - - local current_holder - current_holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder",""))' "$lock_file" 2>/dev/null || echo "") - [ "$current_holder" = "$holder" ] || return 0 - - local since - since=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("since",""))' "$lock_file" 2>/dev/null || echo "") - local now - now=$(_stack_lock_now) - - local tmp_lock - tmp_lock=$(mktemp "${STACK_LOCK_DIR}/.lock-tmp-XXXXXX") - printf '{"holder": "%s", "since": "%s", "heartbeat": "%s"}\n' \ - "$holder" "$since" "$now" > "$tmp_lock" - mv "$tmp_lock" "$lock_file" -} - -# stack_lock_release [holder_id] -# Release the lock for . -# If holder_id is provided, only releases if the lock is held by that holder -# (prevents accidentally releasing someone else's lock). -stack_lock_release() { - local project="$1" - local holder="${2:-}" - local lock_file - lock_file="$(_stack_lock_path "$project")" - - [ -f "$lock_file" ] || return 0 - - if [ -n "$holder" ]; then - local current_holder - current_holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder",""))' "$lock_file" 2>/dev/null || echo "") - if [ "$current_holder" != "$holder" ]; then - echo "[stack-lock] refusing to release: lock held by '${current_holder}', not '${holder}'" >&2 - return 1 - fi - fi - - rm -f "$lock_file" - echo "[stack-lock] released lock for ${project}" >&2 -} diff --git a/lib/vault.sh b/lib/vault.sh deleted file mode 100644 index 812d464..0000000 --- a/lib/vault.sh +++ /dev/null @@ -1,232 +0,0 @@ -#!/usr/bin/env bash -# vault.sh — Helper for agents to create vault PRs on ops repo -# -# Source after lib/env.sh: -# source "$(dirname "$0")/../lib/env.sh" -# source "$(dirname "$0")/lib/vault.sh" -# -# Required globals: FORGE_TOKEN, FORGE_URL, FORGE_REPO, FORGE_OPS_REPO -# Optional: OPS_REPO_ROOT (local path for ops repo) -# -# Functions: -# vault_request — Create vault PR, return PR number -# -# The function: -# 1. Validates TOML content using validate_vault_action() from vault/vault-env.sh -# 2. Creates a branch on the ops repo: vault/ -# 3. Writes TOML to vault/actions/.toml on that branch -# 4. Creates PR targeting main with title "vault: " -# 5. Body includes context field from TOML -# 6. Returns PR number (existing or newly created) -# -# Idempotent: if PR for same action-id exists, returns its number -# -# Uses Forgejo REST API (not git push) — works from containers without SSH - -set -euo pipefail - -# Internal log helper -_vault_log() { - if declare -f log >/dev/null 2>&1; then - log "vault: $*" - else - printf '[%s] vault: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >&2 - fi -} - -# Get ops repo API URL -_vault_ops_api() { - printf '%s' "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}" -} - -# ----------------------------------------------------------------------------- -# vault_request — Create a vault PR or return existing one -# Args: action_id toml_content -# Stdout: PR number -# Returns: 0=success, 1=validation failed, 2=API error -# ----------------------------------------------------------------------------- -vault_request() { - local action_id="$1" - local toml_content="$2" - - if [ -z "$action_id" ]; then - echo "ERROR: action_id is required" >&2 - return 1 - fi - - if [ -z "$toml_content" ]; then - echo "ERROR: toml_content is required" >&2 - return 1 - fi - - # Check if PR already exists for this action - local existing_pr - existing_pr=$(pr_find_by_branch "vault/${action_id}" "$(_vault_ops_api)") || true - if [ -n "$existing_pr" ]; then - _vault_log "PR already exists for action $action_id: #${existing_pr}" - printf '%s' "$existing_pr" - return 0 - fi - - # Validate TOML content - local tmp_toml - tmp_toml=$(mktemp /tmp/vault-XXXXXX.toml) - trap 'rm -f "$tmp_toml"' RETURN - - printf '%s' "$toml_content" > "$tmp_toml" - - # Source vault-env.sh for validate_vault_action - local vault_env="${FACTORY_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/vault/vault-env.sh" - if [ ! -f "$vault_env" ]; then - echo "ERROR: vault-env.sh not found at $vault_env" >&2 - return 1 - fi - - # Save caller's FORGE_TOKEN, source vault-env.sh for validate_vault_action, - # then restore caller's token so PR creation uses agent's identity (not vault-bot) - local _saved_forge_token="${FORGE_TOKEN:-}" - if ! source "$vault_env"; then - FORGE_TOKEN="${_saved_forge_token:-}" - echo "ERROR: failed to source vault-env.sh" >&2 - return 1 - fi - # Restore caller's FORGE_TOKEN after validation - FORGE_TOKEN="${_saved_forge_token:-}" - - # Run validation - if ! validate_vault_action "$tmp_toml"; then - echo "ERROR: TOML validation failed" >&2 - return 1 - fi - - # Extract values for PR creation - local pr_title pr_body - pr_title="vault: ${action_id}" - pr_body="Vault action: ${action_id} - -Context: ${VAULT_ACTION_CONTEXT:-No context provided} - -Formula: ${VAULT_ACTION_FORMULA:-} -Secrets: ${VAULT_ACTION_SECRETS:-} - ---- -This vault action has been created by an agent and requires admin approval -before execution. See the TOML file for details." - - # Get ops repo API URL - local ops_api - ops_api="$(_vault_ops_api)" - - # Create branch - local branch="vault/${action_id}" - local branch_exists - - branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${ops_api}/git/branches/${branch}" 2>/dev/null || echo "0") - - if [ "$branch_exists" != "200" ]; then - # Branch doesn't exist, create it from main - _vault_log "Creating branch ${branch} on ops repo" - - # Get the commit SHA of main branch - local main_sha - main_sha=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${ops_api}/git/branches/${PRIMARY_BRANCH:-main}" 2>/dev/null | \ - jq -r '.commit.id // empty' || true) - - if [ -z "$main_sha" ]; then - # Fallback: get from refs - main_sha=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${ops_api}/git/refs/heads/${PRIMARY_BRANCH:-main}" 2>/dev/null | \ - jq -r '.object.sha // empty' || true) - fi - - if [ -z "$main_sha" ]; then - echo "ERROR: could not get main branch SHA" >&2 - return 1 - fi - - # Create the branch - if ! curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${ops_api}/git/branches" \ - -d "{\"ref\":\"${branch}\",\"sha\":\"${main_sha}\"}" >/dev/null 2>&1; then - echo "ERROR: failed to create branch ${branch}" >&2 - return 1 - fi - else - _vault_log "Branch ${branch} already exists" - fi - - # Write TOML file to branch via API - local file_path="vault/actions/${action_id}.toml" - _vault_log "Writing ${file_path} to branch ${branch}" - - # Encode TOML content as base64 - local encoded_content - encoded_content=$(printf '%s' "$toml_content" | base64 -w 0) - - # Upload file using Forgejo content API - if ! curl -sf -X PUT \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${ops_api}/contents/${file_path}" \ - -d "{\"message\":\"vault: add ${action_id}\",\"branch\":\"${branch}\",\"content\":\"${encoded_content}\",\"committer\":{\"name\":\"vault-bot\",\"email\":\"vault-bot@${FORGE_REPO}\"},\"overwrite\":true}" >/dev/null 2>&1; then - echo "ERROR: failed to write ${file_path} to branch ${branch}" >&2 - return 1 - fi - - # Create PR - _vault_log "Creating PR for ${branch}" - - local pr_num - pr_num=$(pr_create "$branch" "$pr_title" "$pr_body" "$PRIMARY_BRANCH" "$ops_api") || { - echo "ERROR: failed to create PR" >&2 - return 1 - } - - # Enable auto-merge on the PR — Forgejo will auto-merge after approval - _vault_log "Enabling auto-merge for PR #${pr_num}" - curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${ops_api}/pulls/${pr_num}/merge" \ - -d '{"Do":"merge","merge_when_checks_succeed":true}' >/dev/null 2>&1 || { - _vault_log "Warning: failed to enable auto-merge (may already be enabled or not supported)" - } - - # Add labels to PR (vault, pending-approval) - _vault_log "PR #${pr_num} created, adding labels" - - # Get label IDs - local vault_label_id pending_label_id - vault_label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${ops_api}/labels" 2>/dev/null | \ - jq -r --arg n "vault" '.[] | select(.name == $n) | .id // empty' || true) - - pending_label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${ops_api}/labels" 2>/dev/null | \ - jq -r --arg n "pending-approval" '.[] | select(.name == $n) | .id // empty' || true) - - # Add labels if they exist - if [ -n "$vault_label_id" ]; then - curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${ops_api}/issues/${pr_num}/labels" \ - -d "[{\"id\":${vault_label_id}}]" >/dev/null 2>&1 || true - fi - - if [ -n "$pending_label_id" ]; then - curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${ops_api}/issues/${pr_num}/labels" \ - -d "[{\"id\":${pending_label_id}}]" >/dev/null 2>&1 || true - fi - - printf '%s' "$pr_num" - return 0 -} diff --git a/planner/AGENTS.md b/planner/AGENTS.md index 7343b7c..9749afd 100644 --- a/planner/AGENTS.md +++ b/planner/AGENTS.md @@ -1,4 +1,4 @@ - + # Planner Agent **Role**: Strategic planning using a Prerequisite Tree (Theory of Constraints), @@ -22,13 +22,12 @@ to detect issues ping-ponging between backlog and underspecified. Issues that need human decisions or external resources are filed as vault procurement items (`$OPS_REPO_ROOT/vault/pending/*.md`) instead of being escalated. Phase 3 (file-at-constraints): identify the top 3 unresolved prerequisites that block -the most downstream objectives — file issues using a **template-or-vision gate**: -read issue templates from `.codeberg/ISSUE_TEMPLATE/*.yaml`, attempt to fill -template fields (affected_files ≤3, acceptance_criteria ≤5, single clear approach), -then apply complexity test: if work touches one subsystem with no design forks, -file as `backlog` using matching template (bug/feature/refactor); otherwise -label `vision` with problem statement and why it's vision-sized. **Human-blocked -issues are routed through the vault** — the planner files an actionable procurement +the most downstream objectives — file issues as either `backlog` (code changes, +dev-agent) or `action` (run existing formula, action-agent). **Stuck issues +(detected BOUNCED/LABEL_CHURN) are dispatched to the `groom-backlog` formula +in breakdown mode instead of being re-promoted** — this breaks the ping-pong +loop by splitting them into dev-agent-sized sub-issues. **Human-blocked issues +are routed through the vault** — the planner files an actionable procurement item (`$OPS_REPO_ROOT/vault/pending/-.md` with What/Why/Human action/Factory will then sections) and marks the prerequisite as blocked-on-vault in the tree. Deduplication: checks pending/ + approved/ + fired/ before creating. @@ -57,15 +56,15 @@ component, not work. prediction-triage, update-prerequisite-tree, file-at-constraints, journal-and-memory, commit-and-pr) with `needs` dependencies. Claude executes all steps in a single interactive session with tool access -- `formulas/groom-backlog.toml` — Grooming formula for backlog triage and - grooming. (Note: the planner no longer dispatches breakdown mode — complex - issues are labeled `vision` instead.) +- `formulas/groom-backlog.toml` — Dual-mode formula: grooming (default) or + breakdown (dispatched by planner for bounced/stuck issues — splits the issue + into dev-agent-sized sub-issues, removes `underspecified` label) - `$OPS_REPO_ROOT/prerequisites.md` — Prerequisite tree: versioned constraint map linking VISION.md objectives to their prerequisites. Planner owns the tree, humans steer by editing VISION.md. Tree grows organically as the planner discovers new prerequisites during runs - `$OPS_REPO_ROOT/knowledge/planner-memory.md` — Persistent memory across runs (in ops repo) - +- `$OPS_REPO_ROOT/journal/planner/*.md` — Daily raw logs from each planner run (in ops repo) **Constraint focus**: The planner uses Theory of Constraints to avoid premature issue filing. Only the top 3 unresolved prerequisites that block the most diff --git a/planner/planner-run.sh b/planner/planner-run.sh index 3c71d44..313f6ef 100755 --- a/planner/planner-run.sh +++ b/planner/planner-run.sh @@ -35,7 +35,7 @@ source "$FACTORY_ROOT/lib/guard.sh" # shellcheck source=../lib/agent-sdk.sh source "$FACTORY_ROOT/lib/agent-sdk.sh" -LOG_FILE="${DISINTO_LOG_DIR}/planner/planner.log" +LOG_FILE="$SCRIPT_DIR/planner.log" # shellcheck disable=SC2034 # consumed by agent-sdk.sh LOGFILE="$LOG_FILE" # shellcheck disable=SC2034 # consumed by agent-sdk.sh @@ -43,32 +43,17 @@ SID_FILE="/tmp/planner-session-${PROJECT_NAME}.sid" SCRATCH_FILE="/tmp/planner-${PROJECT_NAME}-scratch.md" WORKTREE="/tmp/${PROJECT_NAME}-planner-run" -# Override LOG_AGENT for consistent agent identification -# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() -LOG_AGENT="planner" - -# Override log() to append to planner-specific log file -# shellcheck disable=SC2034 -log() { - local agent="${LOG_AGENT:-planner}" - printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE" -} +log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } # ── Guards ──────────────────────────────────────────────────────────────── check_active planner acquire_cron_lock "/tmp/planner-run.lock" -memory_guard 2000 +check_memory 2000 log "--- Planner run start ---" -# ── Resolve forge remote for git operations ───────────────────────────── -resolve_forge_remote - -# ── Resolve agent identity for .profile repo ──────────────────────────── -resolve_agent_identity || true - # ── Load formula + context ─────────────────────────────────────────────── -load_formula_or_profile "planner" "$FACTORY_ROOT/formulas/run-planner.toml" || exit 1 +load_formula "$FACTORY_ROOT/formulas/run-planner.toml" build_context_block VISION.md AGENTS.md ops:RESOURCES.md ops:prerequisites.md # ── Build structural analysis graph ────────────────────────────────────── @@ -87,8 +72,24 @@ $(cat "$MEMORY_FILE") " fi -# ── Prepare .profile context (lessons injection) ───────────────────────── -formula_prepare_profile_context +# ── Read recent journal files ────────────────────────────────────────── +JOURNAL_BLOCK="" +JOURNAL_DIR="$OPS_REPO_ROOT/journal/planner" +if [ -d "$JOURNAL_DIR" ]; then + # Load last 5 journal files (most recent first) for run history context + JOURNAL_FILES=$(find "$JOURNAL_DIR" -name '*.md' -type f | sort -r | head -5) + if [ -n "$JOURNAL_FILES" ]; then + JOURNAL_BLOCK=" +### Recent journal entries (journal/planner/) +" + while IFS= read -r jf; do + JOURNAL_BLOCK="${JOURNAL_BLOCK} +#### $(basename "$jf") +$(cat "$jf") +" + done <<< "$JOURNAL_FILES" + fi +fi # ── Read scratch file (compaction survival) ─────────────────────────────── SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE") @@ -104,7 +105,7 @@ build_sdk_prompt_footer " PROMPT="You are the strategic planner for ${FORGE_REPO}. Work through the formula below. ## Project context -${CONTEXT_BLOCK}${MEMORY_BLOCK}$(formula_lessons_block) +${CONTEXT_BLOCK}${MEMORY_BLOCK}${JOURNAL_BLOCK} ${GRAPH_SECTION} ${SCRATCH_CONTEXT:+${SCRATCH_CONTEXT} } @@ -124,8 +125,5 @@ export CLAUDE_MODEL="opus" agent_run --worktree "$WORKTREE" "$PROMPT" log "agent_run complete" -# Write journal entry post-session -profile_write_journal "planner-run" "Planner run $(date -u +%Y-%m-%d)" "complete" "" || true - rm -f "$SCRATCH_FILE" log "--- Planner run done ---" diff --git a/predictor/AGENTS.md b/predictor/AGENTS.md index d0bae51..327a842 100644 --- a/predictor/AGENTS.md +++ b/predictor/AGENTS.md @@ -1,4 +1,4 @@ - + # Predictor Agent **Role**: Abstract adversary (the "goblin"). Runs a 2-step formula diff --git a/predictor/predictor-run.sh b/predictor/predictor-run.sh index 889fe1c..fb9bf51 100755 --- a/predictor/predictor-run.sh +++ b/predictor/predictor-run.sh @@ -36,7 +36,7 @@ source "$FACTORY_ROOT/lib/guard.sh" # shellcheck source=../lib/agent-sdk.sh source "$FACTORY_ROOT/lib/agent-sdk.sh" -LOG_FILE="${DISINTO_LOG_DIR}/predictor/predictor.log" +LOG_FILE="$SCRIPT_DIR/predictor.log" # shellcheck disable=SC2034 # consumed by agent-sdk.sh LOGFILE="$LOG_FILE" # shellcheck disable=SC2034 # consumed by agent-sdk.sh @@ -44,40 +44,22 @@ SID_FILE="/tmp/predictor-session-${PROJECT_NAME}.sid" SCRATCH_FILE="/tmp/predictor-${PROJECT_NAME}-scratch.md" WORKTREE="/tmp/${PROJECT_NAME}-predictor-run" -# Override LOG_AGENT for consistent agent identification -# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() -LOG_AGENT="predictor" - -# Override log() to append to predictor-specific log file -# shellcheck disable=SC2034 -log() { - local agent="${LOG_AGENT:-predictor}" - printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE" -} +log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } # ── Guards ──────────────────────────────────────────────────────────────── check_active predictor acquire_cron_lock "/tmp/predictor-run.lock" -memory_guard 2000 +check_memory 2000 log "--- Predictor run start ---" -# ── Resolve forge remote for git operations ───────────────────────────── -resolve_forge_remote - -# ── Resolve agent identity for .profile repo ──────────────────────────── -resolve_agent_identity || true - # ── Load formula + context ─────────────────────────────────────────────── -load_formula_or_profile "predictor" "$FACTORY_ROOT/formulas/run-predictor.toml" || exit 1 +load_formula "$FACTORY_ROOT/formulas/run-predictor.toml" build_context_block AGENTS.md ops:RESOURCES.md VISION.md ops:prerequisites.md # ── Build structural analysis graph ────────────────────────────────────── build_graph_section -# ── Prepare .profile context (lessons injection) ───────────────────────── -formula_prepare_profile_context - # ── Read scratch file (compaction survival) ─────────────────────────────── SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE") SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE") @@ -100,10 +82,9 @@ Use WebSearch for external signal scanning — be targeted (project dependencies and tools only, not general news). Limit to 3 web searches per run. ## Project context -${CONTEXT_BLOCK}$(formula_lessons_block) +${CONTEXT_BLOCK} ${GRAPH_SECTION} -${SCRATCH_CONTEXT:+${SCRATCH_CONTEXT} -} +${SCRATCH_CONTEXT} ## Formula ${FORMULA_CONTENT} @@ -117,8 +98,5 @@ formula_worktree_setup "$WORKTREE" agent_run --worktree "$WORKTREE" "$PROMPT" log "agent_run complete" -# Write journal entry post-session -profile_write_journal "predictor-run" "Predictor run $(date -u +%Y-%m-%d)" "complete" "" || true - rm -f "$SCRATCH_FILE" log "--- Predictor run done ---" diff --git a/projects/disinto.toml.example b/projects/disinto.toml.example index 61781e5..ea0b8c5 100644 --- a/projects/disinto.toml.example +++ b/projects/disinto.toml.example @@ -5,7 +5,7 @@ name = "disinto" repo = "johba/disinto" -ops_repo = "disinto-admin/disinto-ops" +ops_repo = "johba/disinto-ops" forge_url = "http://localhost:3000" repo_root = "/home/YOU/dark-factory" ops_repo_root = "/home/YOU/disinto-ops" diff --git a/review/AGENTS.md b/review/AGENTS.md index 6976c04..e010ff5 100644 --- a/review/AGENTS.md +++ b/review/AGENTS.md @@ -1,4 +1,4 @@ - + # Review Agent **Role**: AI-powered PR review — post structured findings and formal @@ -9,8 +9,8 @@ whose CI has passed and that lack a review for the current HEAD SHA, then spawns `review-pr.sh `. **Key files**: -- `review/review-poll.sh` — Cron scheduler: finds unreviewed PRs with passing CI. Sources `lib/guard.sh` and calls `check_active reviewer` — skips if `$FACTORY_ROOT/state/.reviewer-active` is absent. **Circuit breaker**: counts existing `` comments; skips a PR if ≥3 consecutive errors for the same HEAD SHA (prevents flooding on repeated review failures). -- `review/review-pr.sh` — Creates/reuses a tmux session (`review-{project}-{pr}`), injects PR diff, waits for Claude to write structured JSON output, posts markdown review + formal forge review, auto-creates follow-up issues for pre-existing tech debt. Calls `resolve_forge_remote()` at startup to determine the correct git remote name (avoids hardcoded 'origin'). Before starting the session, runs `lib/build-graph.py --changed-files ` and appends the JSON structural analysis (affected objectives, orphaned prerequisites, thin evidence) to the review prompt. Graph failures are non-fatal — review proceeds without it. +- `review/review-poll.sh` — Cron scheduler: finds unreviewed PRs with passing CI. Sources `lib/guard.sh` and calls `check_active reviewer` — skips if `$FACTORY_ROOT/state/.reviewer-active` is absent. +- `review/review-pr.sh` — Creates/reuses a tmux session (`review-{project}-{pr}`), injects PR diff, waits for Claude to write structured JSON output, posts markdown review + formal forge review, auto-creates follow-up issues for pre-existing tech debt. Before starting the session, runs `lib/build-graph.py --changed-files ` and appends the JSON structural analysis (affected objectives, orphaned prerequisites, thin evidence) to the review prompt. Graph failures are non-fatal — review proceeds without it. **Environment variables consumed**: - `FORGE_TOKEN` — Dev-agent token (must not be the same account as FORGE_REVIEW_TOKEN) diff --git a/review/review-poll.sh b/review/review-poll.sh index 72a6e85..57a647c 100755 --- a/review/review-poll.sh +++ b/review/review-poll.sh @@ -23,15 +23,8 @@ LOGFILE="${DISINTO_LOG_DIR}/review/review-poll.log" MAX_REVIEWS=3 REVIEW_IDLE_TIMEOUT=14400 # 4h: kill review session if idle -# Override LOG_AGENT for consistent agent identification -# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() -LOG_AGENT="review" - -# Override log() to append to review-specific log file -# shellcheck disable=SC2034 log() { - local agent="${LOG_AGENT:-review}" - printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOGFILE" + printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" } # Log rotation @@ -133,11 +126,10 @@ if [ -n "$REVIEW_SIDS" ]; then log " #${pr_num} re-review: new commits (${reviewed_sha:0:7}→${current_sha:0:7})" - review_output=$("${SCRIPT_DIR}/review-pr.sh" "$pr_num" 2>&1) && review_rc=0 || review_rc=$? - if [ "$review_rc" -eq 0 ]; then + if "${SCRIPT_DIR}/review-pr.sh" "$pr_num" 2>&1; then REVIEWED=$((REVIEWED + 1)) else - log " #${pr_num} re-review failed (exit code $review_rc): $(echo "$review_output" | tail -3)" + log " #${pr_num} re-review failed" fi [ "$REVIEWED" -lt "$MAX_REVIEWS" ] || break @@ -174,25 +166,10 @@ while IFS= read -r line; do log " #${PR_NUM} needs review (CI=success, SHA=${PR_SHA:0:7})" - # Circuit breaker: count existing review-error comments for this SHA - ERROR_COMMENTS=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API_BASE}/issues/${PR_NUM}/comments" | \ - jq --arg sha "$PR_SHA" \ - '[.[] | select(.body | contains(""))] | length') - - if [ "${ERROR_COMMENTS:-0}" -ge 3 ]; then - log " #${PR_NUM} blocked: ${ERROR_COMMENTS} consecutive error comments for ${PR_SHA:0:7}, skipping" - SKIPPED=$((SKIPPED + 1)) - continue - fi - - log " #${PR_NUM} error check: ${ERROR_COMMENTS:-0} prior error(s) for ${PR_SHA:0:7}" - - review_output=$("${SCRIPT_DIR}/review-pr.sh" "$PR_NUM" 2>&1) && review_rc=0 || review_rc=$? - if [ "$review_rc" -eq 0 ]; then + if "${SCRIPT_DIR}/review-pr.sh" "$PR_NUM" 2>&1; then REVIEWED=$((REVIEWED + 1)) else - log " #${PR_NUM} review failed (exit code $review_rc): $(echo "$review_output" | tail -3)" + log " #${PR_NUM} review failed" fi if [ "$REVIEWED" -ge "$MAX_REVIEWS" ]; then diff --git a/review/review-pr.sh b/review/review-pr.sh index a0e0ada..0ae0fdb 100755 --- a/review/review-pr.sh +++ b/review/review-pr.sh @@ -27,18 +27,12 @@ source "$(dirname "$0")/../lib/env.sh" source "$(dirname "$0")/../lib/ci-helpers.sh" source "$(dirname "$0")/../lib/worktree.sh" source "$(dirname "$0")/../lib/agent-sdk.sh" -# shellcheck source=../lib/formula-session.sh -source "$(dirname "$0")/../lib/formula-session.sh" # Auto-pull factory code to pick up merged fixes before any logic runs git -C "$FACTORY_ROOT" pull --ff-only origin main 2>/dev/null || true # --- Config --- PR_NUMBER="${1:?Usage: review-pr.sh [--force]}" - -# Change to project repo early — required before any git commands -# (factory root is not a git repo after image rebuild) -cd "${PROJECT_REPO_ROOT}" FORCE="${2:-}" API="${FORGE_API}" LOGFILE="${DISINTO_LOG_DIR}/review/review.log" @@ -62,16 +56,6 @@ if [ -f "$LOGFILE" ] && [ "$(stat -c%s "$LOGFILE" 2>/dev/null || echo 0)" -gt 10 mv "$LOGFILE" "$LOGFILE.old" fi -# ============================================================================= -# RESOLVE FORGE REMOTE FOR GIT OPERATIONS -# ============================================================================= -resolve_forge_remote - -# ============================================================================= -# RESOLVE AGENT IDENTITY FOR .PROFILE REPO -# ============================================================================= -resolve_agent_identity || true - # ============================================================================= # MEMORY GUARD # ============================================================================= @@ -137,7 +121,7 @@ PREV_REV=$(printf '%s' "$ALL_COMMENTS" | jq -r --arg s "$PR_SHA" \ if [ -n "$PREV_REV" ] && [ "$PREV_REV" != "null" ]; then PREV_BODY=$(printf '%s' "$PREV_REV" | jq -r '.body') PREV_SHA=$(printf '%s' "$PREV_BODY" | grep -oP '
-

Vault — being redesigned

+

Vault — quality gate

-
Redesign in progress
-

The vault is being redesigned as a PR-based approval workflow on the ops repo. Instead of polling pending files, vault items will be created as PRs that require admin approval before execution.

-

See issues #73-#77 for the design: #75 defines the vault.sh helper for creating vault PRs, #76 rewrites the dispatcher to poll for merged vault PRs, #77 adds branch protection requiring admin approval.

+
How it works
+

The vault sits between agents and dangerous actions. Before an agent can execute a risky operation (force push, deploy, delete), the vault reviews the request.

+

Auto-approve — safe, well-understood operations pass through instantly. Escalate — risky or novel operations get sent to a human via Matrix. Reject — clearly unsafe actions are blocked.

+

You define the boundaries. The vault enforces them. This is what lets you sleep while the factory runs.

@@ -518,7 +524,8 @@ disinto/ ├── predictor/ predictor-run.sh (daily cron executor) ├── planner/ planner-run.sh (weekly cron executor) ├── supervisor/ supervisor-run.sh (health monitoring) -├── vault/ vault-env.sh (vault redesign in progress, see #73-#77) +├── vault/ vault-poll.sh, vault-agent.sh, vault-fire.sh +├── action/ action-poll.sh, action-agent.sh ├── lib/ env.sh, agent-session.sh, ci-helpers.sh ├── projects/ *.toml per-project config ├── formulas/ TOML specs for multi-step agent tasks diff --git a/skill/SKILL.md b/skill/SKILL.md new file mode 100644 index 0000000..4077ae0 --- /dev/null +++ b/skill/SKILL.md @@ -0,0 +1,350 @@ +--- +name: disinto +description: >- + Operate the disinto autonomous code factory. Use when bootstrapping a new + project with `disinto init`, managing factory agents, filing issues on the + forge, reading agent journals, querying CI pipelines, checking the dependency + graph, or inspecting factory health. +license: AGPL-3.0 +metadata: + author: johba + version: "0.2.0" +env_vars: + required: + - FORGE_TOKEN + - FORGE_API + - PROJECT_REPO_ROOT + optional: + - WOODPECKER_SERVER + - WOODPECKER_TOKEN + - WOODPECKER_REPO_ID +tools: + - bash + - curl + - jq + - git +--- + +# Disinto Factory Skill + +You are the human's assistant for operating the disinto autonomous code factory. +You ask the questions, explain the choices, and run the commands on the human's +behalf. The human makes decisions; you execute. + +Disinto manages eight agents that implement issues, review PRs, plan from a +vision, predict risks, groom the backlog, gate actions, and keep the system +healthy — all driven by cron and Claude. + +## System requirements + +Before bootstrapping, verify the target machine meets these minimums: + +| Requirement | Detail | +|-------------|--------| +| **VPS** | 8 GB+ RAM (4 GB swap recommended) | +| **Docker + Docker Compose** | Required for the default containerized stack | +| **Claude Code CLI** | Authenticated with API access (`claude --version`) | +| **`CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1`** | Set in the factory environment — prevents auto-update pings in production | +| **Disk** | Sufficient for CI images, git mirrors, and agent worktrees (40 GB+ recommended) | +| **tmux** | Required for persistent dev sessions | +| **git, jq, python3, curl** | Used by agents and helper scripts | + +Optional but recommended: + +| Tool | Purpose | +|------|---------| +| **sops + age** | Encrypt secrets at rest (`.env.enc`) | + +## Bootstrapping with `disinto init` + +The primary setup path. Walk the human through each step. + +### Step 1 — Check prerequisites + +Confirm Docker, Claude Code CLI, and required tools are installed: + +```bash +docker --version && docker compose version +claude --version +tmux -V && git --version && jq --version && python3 --version +``` + +### Step 2 — Run `disinto init` + +```bash +disinto init +``` + +Accepts GitHub, Codeberg, or any git URL. Common variations: + +```bash +disinto init https://github.com/org/repo # default (docker compose) +disinto init org/repo --forge-url http://forge:3000 # custom forge URL +disinto init org/repo --bare # bare-metal, no compose +disinto init org/repo --yes # skip confirmation prompts +``` + +### What `disinto init` does + +1. **Generates `docker-compose.yml`** with four services: Forgejo, Woodpecker + server, Woodpecker agent, and the agents container. +2. **Starts a local Forgejo instance** via Docker (at `http://localhost:3000`). +3. **Creates admin + bot users** (dev-bot, review-bot) with API tokens. +4. **Creates the repo** on Forgejo and pushes the code. +5. **Sets up Woodpecker CI** — OAuth2 app on Forgejo, activates the repo. +6. **Generates `projects/.toml`** — per-project config with paths, CI IDs, + and forge URL. +7. **Creates standard labels** (backlog, in-progress, blocked, etc.). +8. **Configures git mirror remotes** if `[mirrors]` is set in the TOML. +9. **Encrypts secrets** to `.env.enc` if sops + age are available. +10. **Brings up the full docker compose stack**. + +### Step 3 — Set environment variable + +Ensure `CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1` is set in the factory +environment (`.env` or the agents container). This prevents Claude Code from +making auto-update and telemetry requests in production. + +### Step 4 — Verify + +```bash +disinto status +``` + +## Docker stack architecture + +The default deployment is a docker-compose stack with four services: + +``` +┌──────────────────────────────────────────────────┐ +│ disinto-net │ +│ │ +│ ┌──────────┐ ┌─────────────┐ ┌────────────┐ │ +│ │ Forgejo │ │ Woodpecker │ │ Woodpecker │ │ +│ │ (forge) │◀─│ (CI server)│◀─│ (agent) │ │ +│ │ :3000 │ │ :8000 │ │ │ │ +│ └──────────┘ └─────────────┘ └────────────┘ │ +│ ▲ │ +│ │ │ +│ ┌─────┴──────────────────────────────────────┐ │ +│ │ agents │ │ +│ │ (cron → dev, review, gardener, planner, │ │ +│ │ predictor, supervisor, action, vault) │ │ +│ │ Claude CLI mounted from host │ │ +│ └────────────────────────────────────────────┘ │ +└──────────────────────────────────────────────────┘ +``` + +| Service | Image | Purpose | +|---------|-------|---------| +| **forgejo** | `codeberg.org/forgejo/forgejo:11.0` | Git forge, issue tracker, PR reviews | +| **woodpecker** | `woodpeckerci/woodpecker-server:v3` | CI server, triggers on push | +| **woodpecker-agent** | `woodpeckerci/woodpecker-agent:v3` | Runs CI pipelines in Docker | +| **agents** | `./docker/agents` (custom) | All eight factory agents, driven by cron | + +The agents container mounts the Claude CLI binary and `~/.claude` credentials +from the host. Secrets are loaded from `.env` (or decrypted from `.env.enc`). + +## Git mirror + +The factory assumes a local git mirror on the Forgejo instance to avoid +rate limits from upstream forges (GitHub, Codeberg). When `disinto init` runs: + +1. The repo is cloned from the upstream URL. +2. A `forgejo` remote is added pointing to the local Forgejo instance. +3. All branches and tags are pushed to Forgejo. +4. If `[mirrors]` is configured in the project TOML, additional remotes + (e.g. GitHub, Codeberg) are set up and synced via `lib/mirrors.sh`. + +All agent work happens against the local Forgejo forge. This means: +- No GitHub/Codeberg API rate limits on polling. +- CI triggers are local (Woodpecker watches Forgejo webhooks). +- Mirror pushes are fire-and-forget background operations after merge. + +To configure mirrors in the project TOML: + +```toml +[mirrors] +github = "git@github.com:user/repo.git" +codeberg = "git@codeberg.org:user/repo.git" +``` + +## Required environment + +| Variable | Purpose | +|----------|---------| +| `FORGE_TOKEN` | Forgejo/Gitea API token with repo scope | +| `FORGE_API` | Base API URL, e.g. `https://forge.example/api/v1/repos/owner/repo` | +| `PROJECT_REPO_ROOT` | Absolute path to the checked-out disinto repository | + +Optional: + +| Variable | Purpose | +|----------|---------| +| `WOODPECKER_SERVER` | Woodpecker CI base URL (for pipeline queries) | +| `WOODPECKER_TOKEN` | Woodpecker API bearer token | +| `WOODPECKER_REPO_ID` | Numeric repo ID in Woodpecker | + +## The eight agents + +| Agent | Role | Runs via | +|-------|------|----------| +| **Dev** | Picks backlog issues, implements in worktrees, opens PRs | `dev/dev-poll.sh` (cron) | +| **Review** | Reviews PRs against conventions, approves or requests changes | `review/review-poll.sh` (cron) | +| **Gardener** | Grooms backlog: dedup, quality gates, dust bundling, stale cleanup | `gardener/gardener-run.sh` (cron 0,6,12,18 UTC) | +| **Planner** | Tracks vision progress, maintains prerequisite tree, files constraint issues | `planner/planner-run.sh` (cron daily 07:00 UTC) | +| **Predictor** | Challenges claims, detects structural risks, files predictions | `predictor/predictor-run.sh` (cron daily 06:00 UTC) | +| **Supervisor** | Monitors health (RAM, disk, CI, agents), auto-fixes, escalates | `supervisor/supervisor-run.sh` (cron */20) | +| **Action** | Executes operational tasks dispatched by planner via formulas | `action/action-poll.sh` (cron) | +| **Vault** | Gates dangerous actions, manages resource procurement | `vault/vault-poll.sh` (cron) | + +### How agents interact + +``` +Planner ──creates-issues──▶ Backlog ◀──grooms── Gardener + │ │ + │ ▼ + │ Dev (implements) + │ │ + │ ▼ + │ Review (approves/rejects) + │ │ + │ ▼ + ▼ Merged +Predictor ──challenges──▶ Planner (triages predictions) +Supervisor ──monitors──▶ All agents (health, escalation) +Vault ──gates──▶ Action, Dev (dangerous operations) +``` + +### Issue lifecycle + +`backlog` → `in-progress` → PR → CI → review → merge → closed. + +Key labels: `backlog`, `priority`, `in-progress`, `blocked`, `underspecified`, +`tech-debt`, `vision`, `action`, `prediction/unreviewed`. + +Issues declare dependencies in a `## Dependencies` section listing `#N` +references. Dev-poll only picks issues whose dependencies are all closed. + +## Available scripts + +- **`scripts/factory-status.sh`** — Show agent status, open issues, and CI + pipeline state. Pass `--agents`, `--issues`, or `--ci` for specific sections. +- **`scripts/file-issue.sh`** — Create an issue on the forge with proper labels + and formatting. Pass `--title`, `--body`, and optionally `--labels`. +- **`scripts/read-journal.sh`** — Read agent journal entries. Pass agent name + (`planner`, `supervisor`) and optional `--date YYYY-MM-DD`. + +## Common workflows + +### 1. Bootstrap a new project + +Walk the human through `disinto init`: + +```bash +# 1. Verify prerequisites +docker --version && claude --version + +# 2. Bootstrap +disinto init https://github.com/org/repo + +# 3. Verify +disinto status +``` + +### 2. Check factory health + +```bash +bash scripts/factory-status.sh +``` + +This shows: which agents are active, recent open issues, and CI pipeline +status. Use `--agents` for just the agent status section. + +### 3. Read what the planner decided today + +```bash +bash scripts/read-journal.sh planner +``` + +Returns today's planner journal: predictions triaged, prerequisite tree +updates, top constraints, issues created, and observations. + +### 4. File a new issue + +```bash +bash scripts/file-issue.sh --title "fix: broken auth flow" \ + --body "$(cat scripts/../templates/issue-template.md)" \ + --labels backlog +``` + +Or generate the body inline — the template shows the expected format with +acceptance criteria and affected files sections. + +### 5. Check the dependency graph + +```bash +python3 "${PROJECT_REPO_ROOT}/lib/build-graph.py" \ + --project-root "${PROJECT_REPO_ROOT}" \ + --output /tmp/graph-report.json +cat /tmp/graph-report.json | jq '.analyses' +``` + +The graph builder parses VISION.md, the prerequisite tree, formulas, and open +issues. It detects: orphan issues (not referenced), dependency cycles, +disconnected clusters, bottleneck nodes, and thin objectives. + +### 6. Query a specific CI pipeline + +```bash +bash scripts/factory-status.sh --ci +``` + +Or query Woodpecker directly: + +```bash +curl -s -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ + "${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines?per_page=5" \ + | jq '.[] | {number, status, commit: .commit[:8], branch}' +``` + +### 7. Manage the docker stack + +```bash +disinto up # start all services +disinto down # stop all services +disinto logs # tail all service logs +disinto logs forgejo # tail specific service +disinto shell # shell into agents container +``` + +### 8. Read and interpret VISION.md progress + +Read `VISION.md` at the repo root for the full vision. Then cross-reference +with the prerequisite tree: + +```bash +cat "${OPS_REPO_ROOT}/prerequisites.md" +``` + +The prerequisite tree maps vision objectives to concrete issues. Items marked +`[x]` are complete; items marked `[ ]` show what blocks progress. The planner +updates this daily. + +## Gotchas + +- **Single-threaded pipeline**: only one issue is in-progress per project at a + time. Don't file issues expecting parallel work. +- **Secrets via env vars only**: never embed secrets in issue bodies, PR + descriptions, or comments. Use `$VAR_NAME` references. +- **Formulas are not skills**: formulas in `formulas/` are TOML issue templates + for multi-step agent tasks. Skills teach assistants; formulas drive agents. +- **Predictor journals**: the predictor does not write journal files. Its memory + lives in `prediction/unreviewed` and `prediction/actioned` issues. +- **State files**: agent activity is tracked via `state/.{agent}-active` files. + These are presence files, not logs. +- **ShellCheck required**: all `.sh` files must pass ShellCheck. CI enforces this. +- **Local forge is the source of truth**: all agent work targets the local + Forgejo instance. Upstream mirrors are synced after merge. +- **`CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1`**: must be set in production + to prevent Claude Code from making auto-update requests. diff --git a/skill/scripts/factory-status.sh b/skill/scripts/factory-status.sh new file mode 100755 index 0000000..ee0d683 --- /dev/null +++ b/skill/scripts/factory-status.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env bash +set -euo pipefail + +# factory-status.sh — query agent status, open issues, and CI pipelines +# +# Usage: factory-status.sh [--agents] [--issues] [--ci] [--help] +# No flags: show all sections +# --agents: show only agent activity status +# --issues: show only open issues summary +# --ci: show only CI pipeline status +# +# Required env: FORGE_TOKEN, FORGE_API, PROJECT_REPO_ROOT +# Optional env: WOODPECKER_SERVER, WOODPECKER_TOKEN, WOODPECKER_REPO_ID + +usage() { + sed -n '3,10s/^# //p' "$0" + exit 0 +} + +show_agents=false +show_issues=false +show_ci=false +show_all=true + +while [[ $# -gt 0 ]]; do + case "$1" in + --agents) show_agents=true; show_all=false; shift ;; + --issues) show_issues=true; show_all=false; shift ;; + --ci) show_ci=true; show_all=false; shift ;; + --help|-h) usage ;; + *) echo "Unknown option: $1" >&2; exit 1 ;; + esac +done + +: "${FORGE_TOKEN:?FORGE_TOKEN is required}" +: "${FORGE_API:?FORGE_API is required}" +: "${PROJECT_REPO_ROOT:?PROJECT_REPO_ROOT is required}" + +forge_get() { + curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Accept: application/json" \ + "${FORGE_API}$1" +} + +# --- Agent status --- +print_agent_status() { + echo "## Agent Status" + echo "" + local state_dir="${PROJECT_REPO_ROOT}/state" + local agents=(dev review gardener supervisor planner predictor action vault) + for agent in "${agents[@]}"; do + local state_file="${state_dir}/.${agent}-active" + if [[ -f "$state_file" ]]; then + echo " ${agent}: ACTIVE (since $(stat -c '%y' "$state_file" 2>/dev/null | cut -d. -f1 || echo 'unknown'))" + else + echo " ${agent}: idle" + fi + done + echo "" +} + +# --- Open issues --- +print_open_issues() { + echo "## Open Issues" + echo "" + local issues + issues=$(forge_get "/issues?state=open&type=issues&limit=50&sort=created&direction=desc" 2>/dev/null) || { + echo " (failed to fetch issues from forge)" + echo "" + return + } + local count + count=$(echo "$issues" | jq 'length') + echo " Total open: ${count}" + echo "" + + # Group by key labels + for label in backlog priority in-progress blocked; do + local labeled + labeled=$(echo "$issues" | jq --arg l "$label" '[.[] | select(.labels[]?.name == $l)]') + local n + n=$(echo "$labeled" | jq 'length') + if [[ "$n" -gt 0 ]]; then + echo " [${label}] (${n}):" + echo "$labeled" | jq -r '.[] | " #\(.number) \(.title)"' | head -10 + echo "" + fi + done +} + +# --- CI pipelines --- +print_ci_status() { + echo "## CI Pipelines" + echo "" + if [[ -z "${WOODPECKER_SERVER:-}" || -z "${WOODPECKER_TOKEN:-}" || -z "${WOODPECKER_REPO_ID:-}" ]]; then + echo " (Woodpecker not configured — set WOODPECKER_SERVER, WOODPECKER_TOKEN, WOODPECKER_REPO_ID)" + echo "" + return + fi + local pipelines + pipelines=$(curl -sf -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ + "${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines?per_page=10" 2>/dev/null) || { + echo " (failed to fetch pipelines from Woodpecker)" + echo "" + return + } + echo "$pipelines" | jq -r '.[] | " #\(.number) [\(.status)] \(.branch) \(.commit[:8]) — \(.message // "" | split("\n")[0])"' | head -10 + echo "" +} + +# --- Output --- +if $show_all || $show_agents; then print_agent_status; fi +if $show_all || $show_issues; then print_open_issues; fi +if $show_all || $show_ci; then print_ci_status; fi diff --git a/skill/scripts/file-issue.sh b/skill/scripts/file-issue.sh new file mode 100755 index 0000000..fdcf788 --- /dev/null +++ b/skill/scripts/file-issue.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +set -euo pipefail + +# file-issue.sh — create an issue on the forge with labels +# +# Usage: file-issue.sh --title TITLE --body BODY [--labels LABEL1,LABEL2] [--help] +# +# Required env: FORGE_TOKEN, FORGE_API + +usage() { + sed -n '3,8s/^# //p' "$0" + exit 0 +} + +title="" +body="" +labels="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --title) title="$2"; shift 2 ;; + --body) body="$2"; shift 2 ;; + --labels) labels="$2"; shift 2 ;; + --help|-h) usage ;; + *) printf 'file-issue: unknown option: %s\n' "$1" >&2; exit 1 ;; + esac +done + +: "${FORGE_TOKEN:?FORGE_TOKEN is required}" +: "${FORGE_API:?FORGE_API is required}" + +if [[ -z "$title" ]]; then + echo "Error: --title is required" >&2 + exit 1 +fi +if [[ -z "$body" ]]; then + echo "Error: --body is required" >&2 + exit 1 +fi + +# --- Resolve label names to IDs --- +label_ids="[]" +if [[ -n "$labels" ]]; then + all_labels=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Accept: application/json" \ + "${FORGE_API}/labels?limit=50" 2>/dev/null) || { + echo "Warning: could not fetch labels, creating issue without labels" >&2 + all_labels="[]" + } + label_ids="[" + first=true + IFS=',' read -ra label_arr <<< "$labels" + for lname in "${label_arr[@]}"; do + lname=$(echo "$lname" | xargs) # trim whitespace + lid=$(echo "$all_labels" | jq -r --arg n "$lname" '.[] | select(.name == $n) | .id') + if [[ -n "$lid" ]]; then + if ! $first; then label_ids+=","; fi + label_ids+="$lid" + first=false + else + echo "Warning: label '${lname}' not found, skipping" >&2 + fi + done + label_ids+="]" +fi + +# --- Secret scan (refuse to post bodies containing obvious secrets) --- +if echo "$body" | grep -qiE '(sk-[a-zA-Z0-9]{20,}|ghp_[a-zA-Z0-9]{36}|AKIA[A-Z0-9]{16}|-----BEGIN (RSA |EC )?PRIVATE KEY)'; then + echo "Error: body appears to contain a secret — refusing to post" >&2 + exit 1 +fi + +# --- Create the issue --- +payload=$(jq -n \ + --arg t "$title" \ + --arg b "$body" \ + --argjson l "$label_ids" \ + '{title: $t, body: $b, labels: $l}') + +response=$(curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + -d "$payload" \ + "${FORGE_API}/issues") || { + echo "Error: failed to create issue" >&2 + exit 1 +} + +number=$(echo "$response" | jq -r '.number') +url=$(echo "$response" | jq -r '.html_url') +echo "Created issue #${number}: ${url}" diff --git a/skill/scripts/read-journal.sh b/skill/scripts/read-journal.sh new file mode 100755 index 0000000..78bd787 --- /dev/null +++ b/skill/scripts/read-journal.sh @@ -0,0 +1,93 @@ +#!/usr/bin/env bash +set -euo pipefail + +# read-journal.sh — read agent journal entries +# +# Usage: read-journal.sh AGENT [--date YYYY-MM-DD] [--list] [--help] +# AGENT: planner, supervisor, or predictor +# --date: specific date (default: today) +# --list: list available journal dates instead of reading +# +# Required env: PROJECT_REPO_ROOT + +usage() { + cat <<'USAGE' +read-journal.sh AGENT [--date YYYY-MM-DD] [--list] [--help] + AGENT: planner, supervisor, or predictor + --date: specific date (default: today) + --list: list available journal dates instead of reading +USAGE + exit 0 +} + +agent="" +target_date=$(date +%Y-%m-%d) +list_mode=false + +while [[ $# -gt 0 ]]; do + case "$1" in + --date) target_date="$2"; shift 2 ;; + --list) list_mode=true; shift ;; + --help|-h) usage ;; + -*) echo "Unknown option: $1" >&2; exit 1 ;; + *) + if [[ -z "$agent" ]]; then + agent="$1" + else + echo "Unexpected argument: $1" >&2; exit 1 + fi + shift + ;; + esac +done + +: "${OPS_REPO_ROOT:?OPS_REPO_ROOT is required}" + +if [[ -z "$agent" ]]; then + echo "Error: agent name is required (planner, supervisor, predictor)" >&2 + echo "" >&2 + usage +fi + +# --- Resolve journal directory --- +case "$agent" in + planner) journal_dir="${OPS_REPO_ROOT}/journal/planner" ;; + supervisor) journal_dir="${OPS_REPO_ROOT}/journal/supervisor" ;; + predictor) + echo "The predictor does not write journal files." + echo "Its memory lives in forge issues labeled 'prediction/unreviewed' and 'prediction/actioned'." + echo "" + echo "Query predictions with:" + echo " curl -sH 'Authorization: token \${FORGE_TOKEN}' '\${FORGE_API}/issues?state=open&labels=prediction%2Funreviewed'" + exit 0 + ;; + *) + echo "Error: unknown agent '${agent}'" >&2 + echo "Available: planner, supervisor, predictor" >&2 + exit 1 + ;; +esac + +if [[ ! -d "$journal_dir" ]]; then + echo "No journal directory found at ${journal_dir}" >&2 + exit 1 +fi + +# --- List mode --- +if $list_mode; then + echo "Available journal dates for ${agent}:" + find "$journal_dir" -maxdepth 1 -name '*.md' -printf '%f\n' 2>/dev/null | sed 's|\.md$||' | sort -r | head -20 + exit 0 +fi + +# --- Read specific date --- +journal_file="${journal_dir}/${target_date}.md" +if [[ -f "$journal_file" ]]; then + cat "$journal_file" +else + echo "No journal entry for ${agent} on ${target_date}" >&2 + echo "" >&2 + echo "Recent entries:" >&2 + find "$journal_dir" -maxdepth 1 -name '*.md' -printf '%f\n' 2>/dev/null | sed 's|\.md$||' | sort -r | head -5 >&2 + exit 1 +fi diff --git a/skill/templates/issue-template.md b/skill/templates/issue-template.md new file mode 100644 index 0000000..2399bc7 --- /dev/null +++ b/skill/templates/issue-template.md @@ -0,0 +1,21 @@ +## Summary + + + +## Acceptance criteria + +- [ ] +- [ ] +- [ ] + +## Affected files + + + +- `path/to/file.sh` + +## Dependencies + + + +None diff --git a/state/.gitignore b/state/.gitignore index eb205a2..0a0c1e8 100644 --- a/state/.gitignore +++ b/state/.gitignore @@ -1,4 +1,2 @@ # Active-state files are runtime state, not committed .*-active -# Supervisor is always active in the edge container — committed guard file -!.supervisor-active diff --git a/supervisor/AGENTS.md b/supervisor/AGENTS.md index 3348c86..322ab4b 100644 --- a/supervisor/AGENTS.md +++ b/supervisor/AGENTS.md @@ -1,4 +1,4 @@ - + # Supervisor Agent **Role**: Health monitoring and auto-remediation, executed as a formula-driven @@ -9,17 +9,19 @@ resources or human decisions, files vault items instead of escalating directly. **Trigger**: `supervisor-run.sh` runs every 20 min via cron. Sources `lib/guard.sh` and calls `check_active supervisor` first — skips if -`$FACTORY_ROOT/state/.supervisor-active` is absent. Then runs `claude -p` -via `agent-sdk.sh`, injects `formulas/run-supervisor.toml` with -pre-collected metrics as context, and cleans up on completion or timeout (20 min max session). -No action issues — the supervisor runs directly from cron like the planner and predictor. +`$FACTORY_ROOT/state/.supervisor-active` is absent. Then creates a tmux session +with `claude --model sonnet`, injects `formulas/run-supervisor.toml` with +pre-collected metrics as context, monitors the phase file, and cleans up on +completion or timeout (20 min max session). No action issues — the supervisor +runs directly from cron like the planner and predictor. **Key files**: - `supervisor/supervisor-run.sh` — Cron wrapper + orchestrator: lock, memory guard, - runs preflight.sh, sources disinto project config, runs claude -p via agent-sdk.sh, - injects formula prompt with metrics, handles crash recovery + runs preflight.sh, sources disinto project config, creates tmux session, injects + formula prompt with metrics, monitors phase file, handles crash recovery via + `run_formula_and_monitor` - `supervisor/preflight.sh` — Data collection: system resources (RAM, disk, swap, - load), Docker status, active sessions + phase files, lock files, agent log + load), Docker status, active tmux sessions + phase files, lock files, agent log tails, CI pipeline status, open PRs, issue counts, stale worktrees, blocked issues. Also performs **stale phase cleanup**: scans `/tmp/*-session-*.phase` files for `PHASE:escalate` entries and auto-removes any whose linked issue @@ -29,8 +31,12 @@ No action issues — the supervisor runs directly from cron like the planner and - `formulas/run-supervisor.toml` — Execution spec: five steps (preflight review, health-assessment, decide-actions, report, journal) with `needs` dependencies. Claude evaluates all metrics and takes actions in a single interactive session +- `$OPS_REPO_ROOT/journal/supervisor/*.md` — Daily health logs from each supervisor run +- `supervisor/PROMPT.md` — Best-practices reference for remediation actions - `$OPS_REPO_ROOT/knowledge/*.md` — Domain-specific remediation guides (memory, disk, CI, git, dev-agent, review-agent, forge) +- `supervisor/supervisor-poll.sh` — Legacy bash orchestrator (superseded by + supervisor-run.sh + formula) **Alert priorities**: P0 (memory crisis), P1 (disk), P2 (factory stopped/stalled), P3 (degraded PRs, circular deps, stale deps), P4 (housekeeping). @@ -41,5 +47,5 @@ P3 (degraded PRs, circular deps, stale deps), P4 (housekeeping). - `WOODPECKER_TOKEN`, `WOODPECKER_SERVER`, `WOODPECKER_DB_PASSWORD`, `WOODPECKER_DB_USER`, `WOODPECKER_DB_HOST`, `WOODPECKER_DB_NAME` — CI database queries **Lifecycle**: supervisor-run.sh (cron */20) → lock + memory guard → run -preflight.sh (collect metrics) → load formula + context → run claude -p via agent-sdk.sh -→ Claude assesses health, auto-fixes, writes journal → `PHASE:done`. +preflight.sh (collect metrics) → load formula + context → create tmux +session → Claude assesses health, auto-fixes, writes journal → `PHASE:done`. diff --git a/supervisor/PROMPT.md b/supervisor/PROMPT.md new file mode 100644 index 0000000..7381785 --- /dev/null +++ b/supervisor/PROMPT.md @@ -0,0 +1,118 @@ +# Supervisor Agent + +You are the supervisor agent for `$FORGE_REPO`. You were called because +`supervisor-poll.sh` detected an issue it couldn't auto-fix. + +## Priority Order + +1. **P0 — Memory crisis:** RAM <500MB or swap >3GB +2. **P1 — Disk pressure:** Disk >80% +3. **P2 — Factory stopped:** Dev-agent dead, CI down, git broken, all backlog dep-blocked +4. **P3 — Factory degraded:** Derailed PR, stuck pipeline, unreviewed PRs, circular deps, stale deps +5. **P4 — Housekeeping:** Stale processes, log rotation + +## What You Can Do + +Fix the issue yourself. You have full shell access and `--dangerously-skip-permissions`. + +Before acting, read the relevant knowledge file from the ops repo: +- Memory issues → `cat ${OPS_REPO_ROOT}/knowledge/memory.md` +- Disk issues → `cat ${OPS_REPO_ROOT}/knowledge/disk.md` +- CI issues → `cat ${OPS_REPO_ROOT}/knowledge/ci.md` +- forge / rate limits → `cat ${OPS_REPO_ROOT}/knowledge/forge.md` +- Dev-agent issues → `cat ${OPS_REPO_ROOT}/knowledge/dev-agent.md` +- Review-agent issues → `cat ${OPS_REPO_ROOT}/knowledge/review-agent.md` +- Git issues → `cat ${OPS_REPO_ROOT}/knowledge/git.md` + +## Credentials & API Access + +Environment variables are set. Source the helper library for convenience functions: +```bash +source ${FACTORY_ROOT}/lib/env.sh +``` + +This gives you: +- `forge_api GET "/pulls?state=open"` — forge API (uses $FORGE_TOKEN) +- `wpdb -c "SELECT ..."` — Woodpecker Postgres (uses $WOODPECKER_DB_PASSWORD) +- `woodpecker_api "/repos/$WOODPECKER_REPO_ID/pipelines"` — Woodpecker REST API (uses $WOODPECKER_TOKEN) +- `$FORGE_REVIEW_TOKEN` — for posting reviews as the review_bot account +- `$PROJECT_REPO_ROOT` — path to the target project repo +- `$PROJECT_NAME` — short project name (for worktree prefixes, container names) +- `$PRIMARY_BRANCH` — main branch (master or main) +- `$FACTORY_ROOT` — path to the disinto repo + +## Handling Dependency Alerts + +### Circular dependencies (P3) +When you see "Circular dependency deadlock: #A -> #B -> #A", the backlog is permanently +stuck. Your job: figure out the correct dependency direction and fix the wrong one. + +1. Read both issue bodies: `forge_api GET "/issues/A"`, `forge_api GET "/issues/B"` +2. Read the referenced source files in `$PROJECT_REPO_ROOT` to understand which change + actually depends on which +3. Edit the issue that has the incorrect dep to remove the `#NNN` reference from its + `## Dependencies` section (replace with `- None` if it was the only dep) +4. If the correct direction is unclear from code, file a vault item with both issue summaries + +Use the forge API to edit issue bodies: +```bash +# Read current body +BODY=$(forge_api GET "/issues/NNN" | jq -r '.body') +# Edit (remove the circular ref, keep other deps) +NEW_BODY=$(echo "$BODY" | sed 's/- #XXX/- None/') +forge_api PATCH "/issues/NNN" -d "$(jq -nc --arg b "$NEW_BODY" '{body:$b}')" +``` + +### Stale dependencies (P3) +When you see "Stale dependency: #A blocked by #B (open N days)", the dep may be +obsolete or misprioritized. Investigate: + +1. Check if dep #B is still relevant (read its body, check if the code it targets changed) +2. If the dep is obsolete → remove it from #A's `## Dependencies` section +3. If the dep is still needed → file a vault item, suggesting to prioritize #B or split #A + +### Dev-agent blocked (P2) +When you see "Dev-agent blocked: last N polls all report 'no ready issues'": + +1. Check if circular deps exist (they'll appear as separate P3 alerts) +2. Check if all backlog issues depend on a single unmerged issue — if so, file a vault + item to prioritize that blocker +3. If no clear blocker, file a vault item with the list of blocked issues and their deps + +## When you cannot fix it + +File a vault procurement item so the human is notified through the vault: +```bash +cat > "${OPS_REPO_ROOT}/vault/pending/supervisor-$(date -u +%Y%m%d-%H%M)-issue.md" <<'VAULT_EOF' +# +## What + +## Why + +## Unblocks +- Factory health: +VAULT_EOF +``` + +The vault-poll will notify the human and track the request. + +Do NOT talk to the human directly. The vault is the factory's only interface +to the human for resources and approvals. Fix first, report after. + +## Output + +``` +FIXED: +``` +or +``` +VAULT: filed $OPS_REPO_ROOT/vault/pending/.md — +``` + +## Learning + +If you discover something new, append it to the relevant knowledge file in the ops repo: +```bash +echo "### Lesson title +Description of what you learned." >> "${OPS_REPO_ROOT}/knowledge/.md" +``` diff --git a/supervisor/preflight.sh b/supervisor/preflight.sh index e9e4de2..ba740b7 100755 --- a/supervisor/preflight.sh +++ b/supervisor/preflight.sh @@ -132,7 +132,8 @@ echo "" echo "## Recent Agent Logs" for _log in supervisor/supervisor.log dev/dev-agent.log review/review.log \ - gardener/gardener.log planner/planner.log predictor/predictor.log; do + gardener/gardener.log planner/planner.log predictor/predictor.log \ + action/action.log; do _logpath="${FACTORY_ROOT}/${_log}" if [ -f "$_logpath" ]; then _log_age_min=$(( ($(date +%s) - $(stat -c %Y "$_logpath" 2>/dev/null || echo 0)) / 60 )) diff --git a/supervisor/supervisor-poll.sh b/supervisor/supervisor-poll.sh new file mode 100755 index 0000000..1e83966 --- /dev/null +++ b/supervisor/supervisor-poll.sh @@ -0,0 +1,808 @@ +#!/usr/bin/env bash +set -euo pipefail +# supervisor-poll.sh — Supervisor agent: bash checks + claude -p for fixes +# +# Two-layer architecture: +# 1. Factory infrastructure (project-agnostic): RAM, disk, swap, docker, stale processes +# 2. Per-project checks (config-driven): CI, PRs, dev-agent, deps — iterated over projects/*.toml +# +# Runs every 10min via cron. +# +# Cron: */10 * * * * /path/to/disinto/supervisor/supervisor-poll.sh +# +# Peek: cat /tmp/supervisor-status +# Log: tail -f /path/to/disinto/supervisor/supervisor.log + +source "$(dirname "$0")/../lib/env.sh" +source "$(dirname "$0")/../lib/ci-helpers.sh" + +LOGFILE="${DISINTO_LOG_DIR}/supervisor/supervisor.log" +STATUSFILE="/tmp/supervisor-status" +LOCKFILE="/tmp/supervisor-poll.lock" +PROMPT_FILE="${FACTORY_ROOT}/supervisor/PROMPT.md" +PROJECTS_DIR="${FACTORY_ROOT}/projects" + +METRICS_FILE="${DISINTO_LOG_DIR}/metrics/supervisor-metrics.jsonl" + +emit_metric() { + printf '%s\n' "$1" >> "$METRICS_FILE" +} + +# Count all matching items from a paginated forge API endpoint. +# Usage: codeberg_count_paginated "/issues?state=open&labels=backlog&type=issues" +# Returns total count across all pages (max 20 pages = 1000 items). +codeberg_count_paginated() { + local endpoint="$1" total=0 page=1 count + while true; do + count=$(forge_api GET "${endpoint}&limit=50&page=${page}" 2>/dev/null | jq 'length' 2>/dev/null || echo 0) + total=$((total + ${count:-0})) + [ "${count:-0}" -lt 50 ] && break + page=$((page + 1)) + [ "$page" -gt 20 ] && break + done + echo "$total" +} + +rotate_metrics() { + [ -f "$METRICS_FILE" ] || return 0 + local cutoff tmpfile + cutoff=$(date -u -d '30 days ago' +%Y-%m-%dT%H:%M) + tmpfile="${METRICS_FILE}.tmp" + jq -c --arg cutoff "$cutoff" 'select(.ts >= $cutoff)' \ + "$METRICS_FILE" > "$tmpfile" 2>/dev/null + # Only replace if jq produced output, or the source is already empty + if [ -s "$tmpfile" ] || [ ! -s "$METRICS_FILE" ]; then + mv "$tmpfile" "$METRICS_FILE" + else + rm -f "$tmpfile" + fi +} + +# Prevent overlapping runs +if [ -f "$LOCKFILE" ]; then + LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null) + if kill -0 "$LOCK_PID" 2>/dev/null; then + exit 0 + fi + rm -f "$LOCKFILE" +fi +echo $$ > "$LOCKFILE" +trap 'rm -f "$LOCKFILE" "$STATUSFILE"' EXIT +mkdir -p "$(dirname "$METRICS_FILE")" +rotate_metrics + +flog() { + printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" +} + +status() { + printf '[%s] supervisor: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" > "$STATUSFILE" + flog "$*" +} + +# Alerts by priority +P0_ALERTS="" +P1_ALERTS="" +P2_ALERTS="" +P3_ALERTS="" +P4_ALERTS="" + +p0() { P0_ALERTS="${P0_ALERTS}• [P0] $*\n"; flog "P0: $*"; } +p1() { P1_ALERTS="${P1_ALERTS}• [P1] $*\n"; flog "P1: $*"; } +p2() { P2_ALERTS="${P2_ALERTS}• [P2] $*\n"; flog "P2: $*"; } +p3() { P3_ALERTS="${P3_ALERTS}• [P3] $*\n"; flog "P3: $*"; } +p4() { P4_ALERTS="${P4_ALERTS}• [P4] $*\n"; flog "P4: $*"; } + +FIXES="" +fixed() { FIXES="${FIXES}• ✅ $*\n"; flog "FIXED: $*"; } + +# ############################################################################# +# LAYER 1: FACTORY INFRASTRUCTURE +# (project-agnostic, runs once) +# ############################################################################# + +# ============================================================================= +# P0: MEMORY — check first, fix first +# ============================================================================= +status "P0: checking memory" + +AVAIL_MB=$(free -m | awk '/Mem:/{print $7}') +SWAP_USED_MB=$(free -m | awk '/Swap:/{print $3}') + +if [ "${AVAIL_MB:-9999}" -lt 500 ] || { [ "${SWAP_USED_MB:-0}" -gt 3000 ] && [ "${AVAIL_MB:-9999}" -lt 2000 ]; }; then + flog "MEMORY CRISIS: avail=${AVAIL_MB}MB swap_used=${SWAP_USED_MB}MB — auto-fixing" + + # Kill stale agent-spawned claude processes (>3h old) — skip interactive sessions + STALE_CLAUDES=$(pgrep -f "claude -p" --older 10800 2>/dev/null || true) + if [ -n "$STALE_CLAUDES" ]; then + echo "$STALE_CLAUDES" | xargs kill 2>/dev/null || true + fixed "Killed stale claude processes: ${STALE_CLAUDES}" + fi + + # Drop filesystem caches + sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null 2>&1 + fixed "Dropped filesystem caches" + + # Re-check after fixes + AVAIL_MB_AFTER=$(free -m | awk '/Mem:/{print $7}') + SWAP_AFTER=$(free -m | awk '/Swap:/{print $3}') + + if [ "${AVAIL_MB_AFTER:-0}" -lt 500 ] || [ "${SWAP_AFTER:-0}" -gt 3000 ]; then + p0 "Memory still critical after auto-fix: avail=${AVAIL_MB_AFTER}MB swap=${SWAP_AFTER}MB" + else + flog "Memory recovered: avail=${AVAIL_MB_AFTER}MB swap=${SWAP_AFTER}MB" + fi +fi + +# P0 alerts already logged — clear so they are not duplicated in the final consolidated log +if [ -n "$P0_ALERTS" ]; then + P0_ALERTS="" +fi + +# ============================================================================= +# P1: DISK +# ============================================================================= +status "P1: checking disk" + +DISK_PERCENT=$(df -h / | awk 'NR==2{print $5}' | tr -d '%') + +if [ "${DISK_PERCENT:-0}" -gt 80 ]; then + flog "DISK PRESSURE: ${DISK_PERCENT}% — auto-cleaning" + + # Docker cleanup (safe — keeps images) + sudo docker system prune -f >/dev/null 2>&1 && fixed "Docker prune" + + # Truncate logs >10MB + for logfile in "${DISINTO_LOG_DIR}"/{dev,review,supervisor}/*.log; do + if [ -f "$logfile" ]; then + SIZE_KB=$(du -k "$logfile" 2>/dev/null | cut -f1) + if [ "${SIZE_KB:-0}" -gt 10240 ]; then + truncate -s 0 "$logfile" + fixed "Truncated $(basename "$logfile") (was ${SIZE_KB}KB)" + fi + fi + done + + # Woodpecker log_entries cleanup + LOG_ENTRIES_MB=$(wpdb -c "SELECT pg_size_pretty(pg_total_relation_size('log_entries'));" 2>/dev/null | xargs) + if echo "$LOG_ENTRIES_MB" | grep -qP '\d+\s*(GB|MB)'; then + SIZE_NUM=$(echo "$LOG_ENTRIES_MB" | grep -oP '\d+') + SIZE_UNIT=$(echo "$LOG_ENTRIES_MB" | grep -oP '(GB|MB)') + if [ "$SIZE_UNIT" = "GB" ] || { [ "$SIZE_UNIT" = "MB" ] && [ "$SIZE_NUM" -gt 500 ]; }; then + wpdb -c "DELETE FROM log_entries WHERE id < (SELECT max(id) - 100000 FROM log_entries);" 2>/dev/null + fixed "Trimmed Woodpecker log_entries (was ${LOG_ENTRIES_MB})" + fi + fi + + DISK_AFTER=$(df -h / | awk 'NR==2{print $5}' | tr -d '%') + if [ "${DISK_AFTER:-0}" -gt 80 ]; then + p1 "Disk still ${DISK_AFTER}% after auto-clean" + else + flog "Disk recovered: ${DISK_AFTER}%" + fi +fi + +# P1 alerts already logged — clear so they are not duplicated in the final consolidated log +if [ -n "$P1_ALERTS" ]; then + P1_ALERTS="" +fi + +# Emit infra metric +_RAM_TOTAL_MB=$(free -m | awk '/Mem:/{print $2}') +_RAM_USED_PCT=$(( ${_RAM_TOTAL_MB:-0} > 0 ? (${_RAM_TOTAL_MB:-0} - ${AVAIL_MB:-0}) * 100 / ${_RAM_TOTAL_MB:-1} : 0 )) +emit_metric "$(jq -nc \ + --arg ts "$(date -u +%Y-%m-%dT%H:%MZ)" \ + --argjson ram "${_RAM_USED_PCT:-0}" \ + --argjson disk "${DISK_PERCENT:-0}" \ + --argjson swap "${SWAP_USED_MB:-0}" \ + '{ts:$ts,type:"infra",ram_used_pct:$ram,disk_used_pct:$disk,swap_mb:$swap}' 2>/dev/null)" 2>/dev/null || true + +# ============================================================================= +# P4-INFRA: HOUSEKEEPING — stale processes, log rotation (project-agnostic) +# ============================================================================= +status "P4: infra housekeeping" + +# Stale agent-spawned claude processes (>3h) — skip interactive sessions +STALE_CLAUDES=$(pgrep -f "claude -p" --older 10800 2>/dev/null || true) +if [ -n "$STALE_CLAUDES" ]; then + echo "$STALE_CLAUDES" | xargs kill 2>/dev/null || true + fixed "Killed stale claude processes: $(echo $STALE_CLAUDES | wc -w) procs" +fi + +# Rotate logs >5MB +for logfile in "${DISINTO_LOG_DIR}"/{dev,review,supervisor}/*.log; do + if [ -f "$logfile" ]; then + SIZE_KB=$(du -k "$logfile" 2>/dev/null | cut -f1) + if [ "${SIZE_KB:-0}" -gt 5120 ]; then + mv "$logfile" "${logfile}.old" 2>/dev/null + fixed "Rotated $(basename "$logfile")" + fi + fi +done + +# ############################################################################# +# LAYER 2: PER-PROJECT CHECKS +# (iterated over projects/*.toml, config-driven) +# ############################################################################# + +# Infra retry tracking (shared across projects, created once) +_RETRY_DIR="/tmp/supervisor-infra-retries" +mkdir -p "$_RETRY_DIR" + +# Function: run all per-project checks for the currently loaded project config +check_project() { + local proj_name="${PROJECT_NAME:-unknown}" + flog "── checking project: ${proj_name} (${FORGE_REPO}) ──" + + # =========================================================================== + # P2: FACTORY STOPPED — CI, dev-agent, git + # =========================================================================== + status "P2: ${proj_name}: checking pipeline" + + # CI stuck + STUCK_CI=$(wpdb -c "SELECT count(*) FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status='running' AND EXTRACT(EPOCH FROM now() - to_timestamp(started)) > 1200;" 2>/dev/null | xargs || true) + [ "${STUCK_CI:-0}" -gt 0 ] 2>/dev/null && p2 "${proj_name}: CI: ${STUCK_CI} pipeline(s) running >20min" + + PENDING_CI=$(wpdb -c "SELECT count(*) FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status='pending' AND EXTRACT(EPOCH FROM now() - to_timestamp(created)) > 1800;" 2>/dev/null | xargs || true) + [ "${PENDING_CI:-0}" -gt 0 ] && p2 "${proj_name}: CI: ${PENDING_CI} pipeline(s) pending >30min" + + # Emit CI metric (last completed pipeline within 24h — skip if project has no recent CI) + _CI_ROW=$(wpdb -A -F ',' -c "SELECT id, COALESCE(ROUND(EXTRACT(EPOCH FROM (to_timestamp(finished) - to_timestamp(started)))/60)::int, 0), status FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status IN ('success','failure','error') AND finished > 0 AND to_timestamp(finished) > now() - interval '24 hours' ORDER BY id DESC LIMIT 1;" 2>/dev/null | grep -E '^[0-9]' | head -1 || true) + if [ -n "$_CI_ROW" ]; then + _CI_ID=$(echo "$_CI_ROW" | cut -d',' -f1 | tr -d ' ') + _CI_DUR=$(echo "$_CI_ROW" | cut -d',' -f2 | tr -d ' ') + _CI_STAT=$(echo "$_CI_ROW" | cut -d',' -f3 | tr -d ' ') + emit_metric "$(jq -nc \ + --arg ts "$(date -u +%Y-%m-%dT%H:%MZ)" \ + --arg proj "$proj_name" \ + --argjson pipeline "${_CI_ID:-0}" \ + --argjson duration "${_CI_DUR:-0}" \ + --arg status "${_CI_STAT:-unknown}" \ + '{ts:$ts,type:"ci",project:$proj,pipeline:$pipeline,duration_min:$duration,status:$status}' 2>/dev/null)" 2>/dev/null || true + fi + + # =========================================================================== + # P2e: INFRA FAILURES — auto-retrigger pipelines with infra failures + # =========================================================================== + if [ "${CHECK_INFRA_RETRY:-true}" = "true" ]; then + status "P2e: ${proj_name}: checking infra failures" + + # Recent failed pipelines (last 6h) + _failed_nums=$(wpdb -A -c " + SELECT number FROM pipelines + WHERE repo_id = ${WOODPECKER_REPO_ID} + AND status IN ('failure', 'error') + AND finished > 0 + AND to_timestamp(finished) > now() - interval '6 hours' + ORDER BY number DESC LIMIT 5;" 2>/dev/null \ + | tr -d ' ' | grep -E '^[0-9]+$' || true) + + # shellcheck disable=SC2086 + for _pip_num in $_failed_nums; do + [ -z "$_pip_num" ] && continue + + # Check retry count; alert if retries exhausted + _retry_file="${_RETRY_DIR}/${WOODPECKER_REPO_ID}-${_pip_num}" + _retries=0 + [ -f "$_retry_file" ] && _retries=$(cat "$_retry_file" 2>/dev/null || echo 0) + if [ "${_retries:-0}" -ge 2 ]; then + p2 "${proj_name}: Pipeline #${_pip_num}: infra retries exhausted (2/2), needs manual investigation" + continue + fi + + # Classify failure type via shared helper + _classification=$(classify_pipeline_failure "${WOODPECKER_REPO_ID}" "$_pip_num" 2>/dev/null || echo "code") + + if [[ "$_classification" == infra* ]]; then + _infra_reason="${_classification#infra }" + _new_retries=$(( _retries + 1 )) + if woodpecker_api "/repos/${WOODPECKER_REPO_ID}/pipelines/${_pip_num}" \ + -X POST >/dev/null 2>&1; then + echo "$_new_retries" > "$_retry_file" + fixed "${proj_name}: Retriggered pipeline #${_pip_num} (${_infra_reason}, retry ${_new_retries}/2)" + else + p2 "${proj_name}: Pipeline #${_pip_num}: infra failure (${_infra_reason}) but retrigger API call failed" + flog "${proj_name}: Failed to retrigger pipeline #${_pip_num}: API error" + fi + fi + done + + # Clean up stale retry tracking files (>24h) + find "$_RETRY_DIR" -type f -mmin +1440 -delete 2>/dev/null || true + fi + + # Dev-agent health (only if monitoring enabled) + if [ "${CHECK_DEV_AGENT:-true}" = "true" ]; then + DEV_LOCK="/tmp/dev-agent-${proj_name}.lock" + if [ -f "$DEV_LOCK" ]; then + DEV_PID=$(cat "$DEV_LOCK" 2>/dev/null) + if ! kill -0 "$DEV_PID" 2>/dev/null; then + rm -f "$DEV_LOCK" + fixed "${proj_name}: Removed stale dev-agent lock (PID ${DEV_PID} dead)" + else + DEV_STATUS_AGE=$(stat -c %Y "/tmp/dev-agent-status-${proj_name}" 2>/dev/null || echo 0) + NOW_EPOCH=$(date +%s) + STATUS_AGE_MIN=$(( (NOW_EPOCH - DEV_STATUS_AGE) / 60 )) + if [ "$STATUS_AGE_MIN" -gt 30 ]; then + p2 "${proj_name}: Dev-agent: status unchanged for ${STATUS_AGE_MIN}min" + fi + fi + fi + fi + + # Git repo health + if [ -d "${PROJECT_REPO_ROOT}" ]; then + cd "${PROJECT_REPO_ROOT}" 2>/dev/null || true + GIT_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") + GIT_REBASE=$([ -d .git/rebase-merge ] || [ -d .git/rebase-apply ] && echo "yes" || echo "no") + + if [ "$GIT_REBASE" = "yes" ]; then + git rebase --abort 2>/dev/null && git checkout "${PRIMARY_BRANCH}" 2>/dev/null && \ + fixed "${proj_name}: Aborted stale rebase, switched to ${PRIMARY_BRANCH}" || \ + p2 "${proj_name}: Git: stale rebase, auto-abort failed" + fi + if [ "$GIT_BRANCH" != "${PRIMARY_BRANCH}" ] && [ "$GIT_BRANCH" != "unknown" ]; then + git checkout "${PRIMARY_BRANCH}" 2>/dev/null && \ + fixed "${proj_name}: Switched repo from '${GIT_BRANCH}' to ${PRIMARY_BRANCH}" || \ + p2 "${proj_name}: Git: on '${GIT_BRANCH}' instead of ${PRIMARY_BRANCH}" + fi + fi + + # =========================================================================== + # P2b: FACTORY STALLED — backlog exists but no agent running + # =========================================================================== + if [ "${CHECK_PIPELINE_STALL:-true}" = "true" ]; then + status "P2: ${proj_name}: checking pipeline stall" + + BACKLOG_COUNT=$(forge_api GET "/issues?state=open&labels=backlog&type=issues&limit=1" 2>/dev/null | jq -r 'length' 2>/dev/null || echo "0") + IN_PROGRESS=$(forge_api GET "/issues?state=open&labels=in-progress&type=issues&limit=1" 2>/dev/null | jq -r 'length' 2>/dev/null || echo "0") + + if [ "${BACKLOG_COUNT:-0}" -gt 0 ] && [ "${IN_PROGRESS:-0}" -eq 0 ]; then + DEV_LOG="${DISINTO_LOG_DIR}/dev/dev-agent.log" + if [ -f "$DEV_LOG" ]; then + LAST_LOG_EPOCH=$(stat -c %Y "$DEV_LOG" 2>/dev/null || echo 0) + else + LAST_LOG_EPOCH=0 + fi + NOW_EPOCH=$(date +%s) + IDLE_MIN=$(( (NOW_EPOCH - LAST_LOG_EPOCH) / 60 )) + + if [ "$IDLE_MIN" -gt 20 ]; then + p2 "${proj_name}: Pipeline stalled: ${BACKLOG_COUNT} backlog issue(s), no agent ran for ${IDLE_MIN}min" + fi + fi + fi + + # =========================================================================== + # P2c: DEV-AGENT PRODUCTIVITY — all backlog blocked for too long + # =========================================================================== + if [ "${CHECK_DEV_AGENT:-true}" = "true" ]; then + status "P2: ${proj_name}: checking dev-agent productivity" + + DEV_LOG_FILE="${DISINTO_LOG_DIR}/dev/dev-agent.log" + if [ -f "$DEV_LOG_FILE" ]; then + RECENT_POLLS=$(tail -100 "$DEV_LOG_FILE" | grep "poll:" | tail -6) + TOTAL_RECENT=$(echo "$RECENT_POLLS" | grep -c "." || true) + BLOCKED_IN_RECENT=$(echo "$RECENT_POLLS" | grep -c "no ready issues" || true) + if [ "$TOTAL_RECENT" -ge 6 ] && [ "$BLOCKED_IN_RECENT" -eq "$TOTAL_RECENT" ]; then + p2 "${proj_name}: Dev-agent blocked: last ${BLOCKED_IN_RECENT} polls all report 'no ready issues'" + fi + fi + fi + + # =========================================================================== + # P3: FACTORY DEGRADED — derailed PRs, unreviewed PRs + # =========================================================================== + if [ "${CHECK_PRS:-true}" = "true" ]; then + status "P3: ${proj_name}: checking PRs" + + OPEN_PRS=$(forge_api GET "/pulls?state=open&limit=10" 2>/dev/null | jq -r '.[].number' 2>/dev/null || true) + for pr in $OPEN_PRS; do + PR_JSON=$(forge_api GET "/pulls/${pr}" 2>/dev/null || true) + [ -z "$PR_JSON" ] && continue + PR_SHA=$(echo "$PR_JSON" | jq -r '.head.sha // ""') + [ -z "$PR_SHA" ] && continue + + CI_STATE=$(ci_commit_status "$PR_SHA" 2>/dev/null || true) + + MERGEABLE=$(echo "$PR_JSON" | jq -r '.mergeable // true') + if [ "$MERGEABLE" = "false" ] && ci_passed "$CI_STATE"; then + p3 "${proj_name}: PR #${pr}: CI pass but merge conflict — needs rebase" + elif [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then + UPDATED=$(echo "$PR_JSON" | jq -r '.updated_at // ""') + if [ -n "$UPDATED" ]; then + UPDATED_EPOCH=$(date -d "$UPDATED" +%s 2>/dev/null || echo 0) + NOW_EPOCH=$(date +%s) + AGE_MIN=$(( (NOW_EPOCH - UPDATED_EPOCH) / 60 )) + [ "$AGE_MIN" -gt 30 ] && p3 "${proj_name}: PR #${pr}: CI=${CI_STATE}, stale ${AGE_MIN}min" + fi + elif ci_passed "$CI_STATE"; then + HAS_REVIEW=$(forge_api GET "/issues/${pr}/comments?limit=50" 2>/dev/null | \ + jq -r --arg sha "$PR_SHA" '[.[] | select(.body | contains(" - -## What was expected - - - -## Steps to reproduce - - -1. -2. -3. - -## Environment - - -- Browser/Client: -- Wallet (if applicable): -- Network (if applicable): -- Version: diff --git a/tests/mock-forgejo.py b/tests/mock-forgejo.py deleted file mode 100755 index c65b522..0000000 --- a/tests/mock-forgejo.py +++ /dev/null @@ -1,834 +0,0 @@ -#!/usr/bin/env python3 -"""Mock Forgejo API server for CI smoke tests. - -Implements 16 Forgejo API endpoints that disinto init calls. -State stored in-memory (dicts), responds instantly. -""" - -import base64 -import hashlib -import json -import os -import re -import signal -import socket -import sys -import threading -import uuid -from http.server import HTTPServer, BaseHTTPRequestHandler -from socketserver import ThreadingMixIn -from urllib.parse import parse_qs, urlparse - -# Global state -state = { - "users": {}, # key: username -> user object - "tokens": {}, # key: token_sha1 -> token object - "repos": {}, # key: "owner/repo" -> repo object - "orgs": {}, # key: orgname -> org object - "labels": {}, # key: "owner/repo" -> list of labels - "collaborators": {}, # key: "owner/repo" -> set of usernames - "protections": {}, # key: "owner/repo" -> list of protections - "oauth2_apps": [], # list of oauth2 app objects -} - -next_ids = {"users": 1, "tokens": 1, "repos": 1, "orgs": 1, "labels": 1, "oauth2_apps": 1} - -SHUTDOWN_REQUESTED = False - - -def log_request(handler, method, path, status): - """Log request details.""" - print(f"[{handler.log_date_time_string()}] {method} {path} {status}", file=sys.stderr) - - -def json_response(handler, status, data): - """Send JSON response.""" - body = json.dumps(data).encode("utf-8") - handler.send_response(status) - handler.send_header("Content-Type", "application/json") - handler.send_header("Content-Length", len(body)) - handler.end_headers() - handler.wfile.write(body) - - -def basic_auth_user(handler): - """Extract username from Basic auth header. Returns None if invalid.""" - auth_header = handler.headers.get("Authorization", "") - if not auth_header.startswith("Basic "): - return None - try: - decoded = base64.b64decode(auth_header[6:]).decode("utf-8") - username, _ = decoded.split(":", 1) - return username - except Exception: - return None - - -def token_auth_valid(handler): - """Check if Authorization header contains token. Doesn't validate value.""" - auth_header = handler.headers.get("Authorization", "") - return auth_header.startswith("token ") - - -def require_token(handler): - """Require token auth. Return user or None if invalid.""" - if not token_auth_valid(handler): - return None - return True # Any token is valid for mock purposes - - -def require_basic_auth(handler, required_user=None): - """Require basic auth. Return username or None if invalid.""" - username = basic_auth_user(handler) - if username is None: - return None - # Check user exists in state - if username not in state["users"]: - return None - if required_user and username != required_user: - return None - return username - - -class ForgejoHandler(BaseHTTPRequestHandler): - """HTTP request handler for mock Forgejo API.""" - - def log_message(self, format, *args): - """Override to use our logging.""" - pass # We log in do_request - - def do_request(self, method): - """Route request to appropriate handler.""" - parsed = urlparse(self.path) - path = parsed.path - query = parse_qs(parsed.query) - - log_request(self, method, self.path, "PENDING") - - # Strip /api/v1/ prefix for routing (or leading slash for other routes) - route_path = path - if route_path.startswith("/api/v1/"): - route_path = route_path[8:] - elif route_path.startswith("/"): - route_path = route_path.lstrip("/") - - # Route to handler - try: - # First try exact match (with / replaced by _) - handler_path = route_path.replace("/", "_") - handler_name = f"handle_{method}_{handler_path}" - handler = getattr(self, handler_name, None) - - if handler: - handler(query) - else: - # Try pattern matching for routes with dynamic segments - self._handle_patterned_route(method, route_path, query) - except Exception as e: - log_request(self, method, self.path, 500) - json_response(self, 500, {"message": str(e)}) - - def _handle_patterned_route(self, method, route_path, query): - """Handle routes with dynamic segments using pattern matching.""" - # Define patterns: (regex, handler_name) - patterns = [ - # Users patterns - (r"^users/([^/]+)$", f"handle_{method}_users_username"), - (r"^users/([^/]+)/tokens$", f"handle_{method}_users_username_tokens"), - (r"^users/([^/]+)/tokens/([^/]+)$", f"handle_{method}_users_username_tokens_token_id"), - (r"^users/([^/]+)/repos$", f"handle_{method}_users_username_repos"), - # Repos patterns - (r"^repos/([^/]+)/([^/]+)$", f"handle_{method}_repos_owner_repo"), - (r"^repos/([^/]+)/([^/]+)/labels$", f"handle_{method}_repos_owner_repo_labels"), - (r"^repos/([^/]+)/([^/]+)/branch_protections$", f"handle_{method}_repos_owner_repo_branch_protections"), - (r"^repos/([^/]+)/([^/]+)/collaborators/([^/]+)$", f"handle_{method}_repos_owner_repo_collaborators_collaborator"), - # Org patterns - (r"^orgs/([^/]+)/repos$", f"handle_{method}_orgs_org_repos"), - # User patterns - (r"^user/repos$", f"handle_{method}_user_repos"), - (r"^user/applications/oauth2$", f"handle_{method}_user_applications_oauth2"), - # Admin patterns - (r"^admin/users$", f"handle_{method}_admin_users"), - (r"^admin/users/([^/]+)$", f"handle_{method}_admin_users_username"), - (r"^admin/users/([^/]+)/repos$", f"handle_{method}_admin_users_username_repos"), - # Org patterns - (r"^orgs$", f"handle_{method}_orgs"), - ] - - for pattern, handler_name in patterns: - if re.match(pattern, route_path): - handler = getattr(self, handler_name, None) - if handler: - handler(query) - return - - self.handle_404() - - def do_GET(self): - self.do_request("GET") - - def do_POST(self): - self.do_request("POST") - - def do_PATCH(self): - self.do_request("PATCH") - - def do_PUT(self): - self.do_request("PUT") - - def handle_GET_version(self, query): - """GET /api/v1/version""" - json_response(self, 200, {"version": "11.0.0-mock"}) - - def handle_GET_users_username(self, query): - """GET /api/v1/users/{username}""" - # Extract username from path - parts = self.path.split("/") - if len(parts) >= 5: - username = parts[4] - else: - json_response(self, 404, {"message": "user does not exist"}) - return - - if username in state["users"]: - json_response(self, 200, state["users"][username]) - else: - json_response(self, 404, {"message": "user does not exist"}) - - def handle_GET_users_username_repos(self, query): - """GET /api/v1/users/{username}/repos""" - if not require_token(self): - json_response(self, 401, {"message": "invalid authentication"}) - return - - parts = self.path.split("/") - if len(parts) >= 5: - username = parts[4] - else: - json_response(self, 404, {"message": "user not found"}) - return - - if username not in state["users"]: - json_response(self, 404, {"message": "user not found"}) - return - - # Return repos owned by this user - user_repos = [r for r in state["repos"].values() if r["owner"]["login"] == username] - json_response(self, 200, user_repos) - - def handle_GET_repos_owner_repo(self, query): - """GET /api/v1/repos/{owner}/{repo}""" - parts = self.path.split("/") - if len(parts) >= 6: - owner = parts[4] - repo = parts[5] - else: - json_response(self, 404, {"message": "repository not found"}) - return - - key = f"{owner}/{repo}" - if key in state["repos"]: - json_response(self, 200, state["repos"][key]) - else: - json_response(self, 404, {"message": "repository not found"}) - - def handle_GET_repos_owner_repo_labels(self, query): - """GET /api/v1/repos/{owner}/{repo}/labels""" - parts = self.path.split("/") - if len(parts) >= 6: - owner = parts[4] - repo = parts[5] - else: - json_response(self, 404, {"message": "repository not found"}) - return - - require_token(self) - - key = f"{owner}/{repo}" - if key in state["labels"]: - json_response(self, 200, state["labels"][key]) - else: - json_response(self, 200, []) - - def handle_GET_user_applications_oauth2(self, query): - """GET /api/v1/user/applications/oauth2""" - require_token(self) - json_response(self, 200, state["oauth2_apps"]) - - def handle_GET_mock_shutdown(self, query): - """GET /mock/shutdown""" - global SHUTDOWN_REQUESTED - SHUTDOWN_REQUESTED = True - json_response(self, 200, {"status": "shutdown"}) - - def handle_POST_admin_users(self, query): - """POST /api/v1/admin/users""" - require_token(self) - - content_length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(content_length).decode("utf-8") - data = json.loads(body) if body else {} - - username = data.get("username") - email = data.get("email") - - if not username or not email: - json_response(self, 400, {"message": "username and email are required"}) - return - - user_id = next_ids["users"] - next_ids["users"] += 1 - - user = { - "id": user_id, - "login": username, - "email": email, - "full_name": data.get("full_name", ""), - "is_admin": data.get("admin", False), - "must_change_password": data.get("must_change_password", False), - "login_name": data.get("login_name", username), - "visibility": data.get("visibility", "public"), - "avatar_url": f"https://seccdn.libravatar.org/avatar/{hashlib.md5(email.encode()).hexdigest()}", - } - - state["users"][username] = user - json_response(self, 201, user) - - def handle_GET_users_username_tokens(self, query): - """GET /api/v1/users/{username}/tokens""" - # Support both token auth (for listing own tokens) and basic auth (for admin listing) - username = require_token(self) - if not username: - username = require_basic_auth(self) - if not username: - json_response(self, 401, {"message": "invalid authentication"}) - return - - # Return list of tokens for this user - tokens = [t for t in state["tokens"].values() if t.get("username") == username] - json_response(self, 200, tokens) - - def handle_DELETE_users_username_tokens_token_id(self, query): - """DELETE /api/v1/users/{username}/tokens/{id}""" - # Support both token auth and basic auth - username = require_token(self) - if not username: - username = require_basic_auth(self) - if not username: - json_response(self, 401, {"message": "invalid authentication"}) - return - - parts = self.path.split("/") - if len(parts) >= 8: - token_id_str = parts[7] - else: - json_response(self, 404, {"message": "token not found"}) - return - - # Find and delete token by ID - deleted = False - for tok_sha1, tok in list(state["tokens"].items()): - if tok.get("id") == int(token_id_str) and tok.get("username") == username: - del state["tokens"][tok_sha1] - deleted = True - break - - if deleted: - self.send_response(204) - self.send_header("Content-Length", 0) - self.end_headers() - else: - json_response(self, 404, {"message": "token not found"}) - - def handle_POST_users_username_tokens(self, query): - """POST /api/v1/users/{username}/tokens""" - username = require_basic_auth(self) - if not username: - json_response(self, 401, {"message": "invalid authentication"}) - return - - content_length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(content_length).decode("utf-8") - data = json.loads(body) if body else {} - - token_name = data.get("name") - if not token_name: - json_response(self, 400, {"message": "name is required"}) - return - - token_id = next_ids["tokens"] - next_ids["tokens"] += 1 - - # Deterministic token: sha256(username + name)[:40] - token_str = hashlib.sha256(f"{username}{token_name}".encode()).hexdigest()[:40] - - token = { - "id": token_id, - "name": token_name, - "sha1": token_str, - "scopes": data.get("scopes", ["all"]), - "created_at": "2026-04-01T00:00:00Z", - "expires_at": None, - "username": username, # Store username for lookup - } - - state["tokens"][token_str] = token - json_response(self, 201, token) - - def handle_GET_orgs(self, query): - """GET /api/v1/orgs""" - if not require_token(self): - json_response(self, 401, {"message": "invalid authentication"}) - return - json_response(self, 200, list(state["orgs"].values())) - - def handle_POST_orgs(self, query): - """POST /api/v1/orgs""" - require_token(self) - - content_length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(content_length).decode("utf-8") - data = json.loads(body) if body else {} - - username = data.get("username") - if not username: - json_response(self, 400, {"message": "username is required"}) - return - - org_id = next_ids["orgs"] - next_ids["orgs"] += 1 - - org = { - "id": org_id, - "username": username, - "full_name": username, - "avatar_url": f"https://seccdn.libravatar.org/avatar/{hashlib.md5(username.encode()).hexdigest()}", - "visibility": data.get("visibility", "public"), - } - - state["orgs"][username] = org - json_response(self, 201, org) - - def handle_POST_orgs_org_repos(self, query): - """POST /api/v1/orgs/{org}/repos""" - require_token(self) - - parts = self.path.split("/") - if len(parts) >= 6: - org = parts[4] - else: - json_response(self, 404, {"message": "organization not found"}) - return - - content_length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(content_length).decode("utf-8") - data = json.loads(body) if body else {} - - repo_name = data.get("name") - if not repo_name: - json_response(self, 400, {"message": "name is required"}) - return - - repo_id = next_ids["repos"] - next_ids["repos"] += 1 - - key = f"{org}/{repo_name}" - repo = { - "id": repo_id, - "full_name": key, - "name": repo_name, - "owner": {"id": state["orgs"][org]["id"], "login": org}, - "empty": False, - "default_branch": data.get("default_branch", "main"), - "description": data.get("description", ""), - "private": data.get("private", False), - "html_url": f"https://example.com/{key}", - "ssh_url": f"git@example.com:{key}.git", - "clone_url": f"https://example.com/{key}.git", - "created_at": "2026-04-01T00:00:00Z", - } - - state["repos"][key] = repo - json_response(self, 201, repo) - - def handle_POST_users_username_repos(self, query): - """POST /api/v1/users/{username}/repos""" - require_token(self) - - parts = self.path.split("/") - if len(parts) >= 5: - username = parts[4] - else: - json_response(self, 400, {"message": "username required"}) - return - - if username not in state["users"]: - json_response(self, 404, {"message": "user not found"}) - return - - content_length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(content_length).decode("utf-8") - data = json.loads(body) if body else {} - - repo_name = data.get("name") - if not repo_name: - json_response(self, 400, {"message": "name is required"}) - return - - repo_id = next_ids["repos"] - next_ids["repos"] += 1 - - key = f"{username}/{repo_name}" - repo = { - "id": repo_id, - "full_name": key, - "name": repo_name, - "owner": {"id": state["users"][username]["id"], "login": username}, - "empty": not data.get("auto_init", False), - "default_branch": data.get("default_branch", "main"), - "description": data.get("description", ""), - "private": data.get("private", False), - "html_url": f"https://example.com/{key}", - "ssh_url": f"git@example.com:{key}.git", - "clone_url": f"https://example.com/{key}.git", - "created_at": "2026-04-01T00:00:00Z", - } - - state["repos"][key] = repo - json_response(self, 201, repo) - - def handle_POST_admin_users_username_repos(self, query): - """POST /api/v1/admin/users/{username}/repos - Admin API to create a repo under a specific user namespace. - This allows creating repos in any user's namespace when authenticated as admin. - """ - require_token(self) - - parts = self.path.split("/") - if len(parts) >= 6: - target_user = parts[4] - else: - json_response(self, 400, {"message": "username required"}) - return - - if target_user not in state["users"]: - json_response(self, 404, {"message": "user not found"}) - return - - content_length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(content_length).decode("utf-8") - data = json.loads(body) if body else {} - - repo_name = data.get("name") - if not repo_name: - json_response(self, 400, {"message": "name is required"}) - return - - repo_id = next_ids["repos"] - next_ids["repos"] += 1 - - key = f"{target_user}/{repo_name}" - repo = { - "id": repo_id, - "full_name": key, - "name": repo_name, - "owner": {"id": state["users"][target_user]["id"], "login": target_user}, - "empty": not data.get("auto_init", False), - "default_branch": data.get("default_branch", "main"), - "description": data.get("description", ""), - "private": data.get("private", False), - "html_url": f"https://example.com/{key}", - "ssh_url": f"git@example.com:{key}.git", - "clone_url": f"https://example.com/{key}.git", - "created_at": "2026-04-01T00:00:00Z", - } - - state["repos"][key] = repo - json_response(self, 201, repo) - - def handle_POST_user_repos(self, query): - """POST /api/v1/user/repos""" - require_token(self) - - content_length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(content_length).decode("utf-8") - data = json.loads(body) if body else {} - - repo_name = data.get("name") - if not repo_name: - json_response(self, 400, {"message": "name is required"}) - return - - # Get authenticated user from token - auth_header = self.headers.get("Authorization", "") - token = auth_header.split(" ", 1)[1] if " " in auth_header else "" - - # Find user by token (use stored username field) - owner = None - for tok_sha1, tok in state["tokens"].items(): - if tok_sha1 == token: - owner = tok.get("username") - break - - if not owner: - json_response(self, 401, {"message": "invalid token"}) - return - - repo_id = next_ids["repos"] - next_ids["repos"] += 1 - - key = f"{owner}/{repo_name}" - repo = { - "id": repo_id, - "full_name": key, - "name": repo_name, - "owner": {"id": state["users"].get(owner, {}).get("id", 0), "login": owner}, - "empty": False, - "default_branch": data.get("default_branch", "main"), - "description": data.get("description", ""), - "private": data.get("private", False), - "html_url": f"https://example.com/{key}", - "ssh_url": f"git@example.com:{key}.git", - "clone_url": f"https://example.com/{key}.git", - "created_at": "2026-04-01T00:00:00Z", - } - - state["repos"][key] = repo - json_response(self, 201, repo) - - def handle_POST_repos_owner_repo_labels(self, query): - """POST /api/v1/repos/{owner}/{repo}/labels""" - require_token(self) - - parts = self.path.split("/") - if len(parts) >= 6: - owner = parts[4] - repo = parts[5] - else: - json_response(self, 404, {"message": "repository not found"}) - return - - content_length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(content_length).decode("utf-8") - data = json.loads(body) if body else {} - - label_name = data.get("name") - label_color = data.get("color") - - if not label_name or not label_color: - json_response(self, 400, {"message": "name and color are required"}) - return - - label_id = next_ids["labels"] - next_ids["labels"] += 1 - - key = f"{owner}/{repo}" - label = { - "id": label_id, - "name": label_name, - "color": label_color, - "description": data.get("description", ""), - "url": f"https://example.com/api/v1/repos/{key}/labels/{label_id}", - } - - if key not in state["labels"]: - state["labels"][key] = [] - state["labels"][key].append(label) - json_response(self, 201, label) - - def handle_POST_repos_owner_repo_branch_protections(self, query): - """POST /api/v1/repos/{owner}/{repo}/branch_protections""" - require_token(self) - - parts = self.path.split("/") - if len(parts) >= 6: - owner = parts[4] - repo = parts[5] - else: - json_response(self, 404, {"message": "repository not found"}) - return - - content_length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(content_length).decode("utf-8") - data = json.loads(body) if body else {} - - branch_name = data.get("branch_name", "main") - key = f"{owner}/{repo}" - - # Generate unique ID for protection - if key in state["protections"]: - protection_id = len(state["protections"][key]) + 1 - else: - protection_id = 1 - - protection = { - "id": protection_id, - "repo_id": state["repos"].get(key, {}).get("id", 0), - "branch_name": branch_name, - "rule_name": data.get("rule_name", branch_name), - "enable_push": data.get("enable_push", False), - "enable_merge_whitelist": data.get("enable_merge_whitelist", True), - "merge_whitelist_usernames": data.get("merge_whitelist_usernames", ["admin"]), - "required_approvals": data.get("required_approvals", 1), - "apply_to_admins": data.get("apply_to_admins", True), - } - - if key not in state["protections"]: - state["protections"][key] = [] - state["protections"][key].append(protection) - json_response(self, 201, protection) - - def handle_POST_user_applications_oauth2(self, query): - """POST /api/v1/user/applications/oauth2""" - require_token(self) - - content_length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(content_length).decode("utf-8") - data = json.loads(body) if body else {} - - app_name = data.get("name") - if not app_name: - json_response(self, 400, {"message": "name is required"}) - return - - app_id = next_ids["oauth2_apps"] - next_ids["oauth2_apps"] += 1 - - app = { - "id": app_id, - "name": app_name, - "client_id": str(uuid.uuid4()), - "client_secret": hashlib.sha256(str(uuid.uuid4()).encode()).hexdigest(), - "redirect_uris": data.get("redirect_uris", []), - "confidential_client": data.get("confidential_client", True), - "created_at": "2026-04-01T00:00:00Z", - } - - state["oauth2_apps"].append(app) - json_response(self, 201, app) - - def handle_PATCH_admin_users_username(self, query): - """PATCH /api/v1/admin/users/{username}""" - if not require_token(self): - json_response(self, 401, {"message": "invalid authentication"}) - return - - parts = self.path.split("/") - if len(parts) >= 6: - username = parts[5] - else: - json_response(self, 404, {"message": "user does not exist"}) - return - - if username not in state["users"]: - json_response(self, 404, {"message": "user does not exist"}) - return - - content_length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(content_length).decode("utf-8") - data = json.loads(body) if body else {} - - user = state["users"][username] - for key, value in data.items(): - # Map 'admin' to 'is_admin' for consistency - update_key = 'is_admin' if key == 'admin' else key - if update_key in user: - user[update_key] = value - - json_response(self, 200, user) - - def handle_PUT_repos_owner_repo_collaborators_collaborator(self, query): - """PUT /api/v1/repos/{owner}/{repo}/collaborators/{collaborator}""" - require_token(self) - - parts = self.path.split("/") - if len(parts) >= 8: - owner = parts[4] - repo = parts[5] - collaborator = parts[7] - else: - json_response(self, 404, {"message": "repository not found"}) - return - - content_length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(content_length).decode("utf-8") - data = json.loads(body) if body else {} - - key = f"{owner}/{repo}" - if key not in state["collaborators"]: - state["collaborators"][key] = set() - state["collaborators"][key].add(collaborator) - - self.send_response(204) - self.send_header("Content-Length", 0) - self.end_headers() - - def handle_GET_repos_owner_repo_collaborators_collaborator(self, query): - """GET /api/v1/repos/{owner}/{repo}/collaborators/{collaborator}""" - require_token(self) - - parts = self.path.split("/") - if len(parts) >= 8: - owner = parts[4] - repo = parts[5] - collaborator = parts[7] - else: - json_response(self, 404, {"message": "repository not found"}) - return - - key = f"{owner}/{repo}" - if key in state["collaborators"] and collaborator in state["collaborators"][key]: - self.send_response(204) - self.send_header("Content-Length", 0) - self.end_headers() - else: - json_response(self, 404, {"message": "collaborator not found"}) - - def handle_404(self): - """Return 404 for unknown routes.""" - json_response(self, 404, {"message": "route not found"}) - - -class ThreadingHTTPServer(ThreadingMixIn, HTTPServer): - """Threaded HTTP server for handling concurrent requests.""" - daemon_threads = True - - -def main(): - """Start the mock server.""" - global SHUTDOWN_REQUESTED - - port = int(os.environ.get("MOCK_FORGE_PORT", 3000)) - try: - server = ThreadingHTTPServer(("0.0.0.0", port), ForgejoHandler) - try: - server.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - except OSError: - pass # Not all platforms support this - except OSError as e: - print(f"Error: Failed to start server on port {port}: {e}", file=sys.stderr) - sys.exit(1) - - print(f"Mock Forgejo server starting on port {port}", file=sys.stderr) - sys.stderr.flush() - - def shutdown_handler(signum, frame): - global SHUTDOWN_REQUESTED - SHUTDOWN_REQUESTED = True - # Can't call server.shutdown() directly from signal handler in threaded server - threading.Thread(target=server.shutdown, daemon=True).start() - - signal.signal(signal.SIGTERM, shutdown_handler) - signal.signal(signal.SIGINT, shutdown_handler) - - try: - server.serve_forever() - except KeyboardInterrupt: - pass - finally: - server.shutdown() - print("Mock Forgejo server stopped", file=sys.stderr) - - -if __name__ == "__main__": - main() diff --git a/tests/smoke-init.sh b/tests/smoke-init.sh index a8371bd..b0a6cf0 100644 --- a/tests/smoke-init.sh +++ b/tests/smoke-init.sh @@ -1,34 +1,32 @@ #!/usr/bin/env bash -# tests/smoke-init.sh — End-to-end smoke test for disinto init with mock Forgejo +# tests/smoke-init.sh — End-to-end smoke test for disinto init # -# Validates the full init flow using mock Forgejo server: -# 1. Verify mock Forgejo is ready -# 2. Set up mock binaries (docker, claude, tmux) -# 3. Run disinto init -# 4. Verify Forgejo state (users, repo) -# 5. Verify local state (TOML, .env, repo clone) -# 6. Verify cron setup +# Expects a running Forgejo at SMOKE_FORGE_URL with a bootstrap admin +# user already created (see .woodpecker/smoke-init.yml for CI setup). +# Validates the full init flow: Forgejo API, user/token creation, +# repo setup, labels, TOML generation, and cron installation. # -# Required env: FORGE_URL (default: http://localhost:3000) +# Required env: SMOKE_FORGE_URL (default: http://localhost:3000) # Required tools: bash, curl, jq, python3, git set -euo pipefail FACTORY_ROOT="$(cd "$(dirname "$0")/.." && pwd)" -# Always use localhost for mock Forgejo (in case FORGE_URL is set from docker-compose) -export FORGE_URL="http://localhost:3000" -MOCK_BIN="/tmp/smoke-mock-bin" +FORGE_URL="${SMOKE_FORGE_URL:-http://localhost:3000}" +SETUP_ADMIN="setup-admin" +SETUP_PASS="SetupPass-789xyz" TEST_SLUG="smoke-org/smoke-repo" +MOCK_BIN="/tmp/smoke-mock-bin" +MOCK_STATE="/tmp/smoke-mock-state" FAILED=0 fail() { printf 'FAIL: %s\n' "$*" >&2; FAILED=1; } pass() { printf 'PASS: %s\n' "$*"; } cleanup() { - # Kill any leftover mock-forgejo.py processes by name - pkill -f "mock-forgejo.py" 2>/dev/null || true - rm -rf "$MOCK_BIN" /tmp/smoke-test-repo \ - "${FACTORY_ROOT}/projects/smoke-repo.toml" + rm -rf "$MOCK_BIN" "$MOCK_STATE" /tmp/smoke-test-repo \ + "${FACTORY_ROOT}/projects/smoke-repo.toml" \ + "${FACTORY_ROOT}/docker-compose.yml" # Restore .env only if we created the backup if [ -f "${FACTORY_ROOT}/.env.smoke-backup" ]; then mv "${FACTORY_ROOT}/.env.smoke-backup" "${FACTORY_ROOT}/.env" @@ -42,11 +40,11 @@ trap cleanup EXIT if [ -f "${FACTORY_ROOT}/.env" ]; then cp "${FACTORY_ROOT}/.env" "${FACTORY_ROOT}/.env.smoke-backup" fi -# Start with a clean .env +# Start with a clean .env (setup_forge writes tokens here) printf '' > "${FACTORY_ROOT}/.env" -# ── 1. Verify mock Forgejo is ready ───────────────────────────────────────── -echo "=== 1/6 Verifying mock Forgejo at ${FORGE_URL} ===" +# ── 1. Verify Forgejo is ready ────────────────────────────────────────────── +echo "=== 1/6 Verifying Forgejo at ${FORGE_URL} ===" retries=0 api_version="" while true; do @@ -57,64 +55,163 @@ while true; do fi retries=$((retries + 1)) if [ "$retries" -gt 30 ]; then - fail "Mock Forgejo API not responding after 30s" + fail "Forgejo API not responding after 30s" exit 1 fi sleep 1 done -pass "Mock Forgejo API v${api_version} (${retries}s)" +pass "Forgejo API v${api_version} (${retries}s)" + +# Verify bootstrap admin user exists +if curl -sf --max-time 5 "${FORGE_URL}/api/v1/users/${SETUP_ADMIN}" >/dev/null 2>&1; then + pass "Bootstrap admin '${SETUP_ADMIN}' exists" +else + fail "Bootstrap admin '${SETUP_ADMIN}' not found — was Forgejo set up?" + exit 1 +fi # ── 2. Set up mock binaries ───────────────────────────────────────────────── echo "=== 2/6 Setting up mock binaries ===" -mkdir -p "$MOCK_BIN" +mkdir -p "$MOCK_BIN" "$MOCK_STATE" + +# Store bootstrap admin credentials for the docker mock +printf '%s:%s' "${SETUP_ADMIN}" "${SETUP_PASS}" > "$MOCK_STATE/bootstrap_creds" # ── Mock: docker ── -# Intercepts docker exec calls that disinto init --bare makes to Forgejo CLI +# Routes 'docker exec' user-creation calls to the Forgejo admin API, +# using the bootstrap admin's credentials. cat > "$MOCK_BIN/docker" << 'DOCKERMOCK' #!/usr/bin/env bash set -euo pipefail -FORGE_URL="${SMOKE_FORGE_URL:-${FORGE_URL:-http://localhost:3000}}" -if [ "${1:-}" = "ps" ]; then exit 0; fi + +FORGE_URL="${SMOKE_FORGE_URL:-http://localhost:3000}" +MOCK_STATE="/tmp/smoke-mock-state" + +if [ ! -f "$MOCK_STATE/bootstrap_creds" ]; then + echo "mock-docker: bootstrap credentials not found" >&2 + exit 1 +fi +BOOTSTRAP_CREDS="$(cat "$MOCK_STATE/bootstrap_creds")" + +# docker ps — return empty (no containers running) +if [ "${1:-}" = "ps" ]; then + exit 0 +fi + +# docker exec — route to Forgejo API if [ "${1:-}" = "exec" ]; then - shift + shift # remove 'exec' + + # Skip docker exec flags (-u VALUE, -T, -i, etc.) while [ $# -gt 0 ] && [ "${1#-}" != "$1" ]; do - case "$1" in -u|-w|-e) shift 2 ;; *) shift ;; esac + case "$1" in + -u|-w|-e) shift 2 ;; + *) shift ;; + esac done - shift # container name + shift # remove container name (e.g. disinto-forgejo) + + # $@ is now: forgejo admin user list|create [flags] if [ "${1:-}" = "forgejo" ] && [ "${2:-}" = "admin" ] && [ "${3:-}" = "user" ]; then subcmd="${4:-}" - if [ "$subcmd" = "list" ]; then echo "ID Username Email"; exit 0; fi + + if [ "$subcmd" = "list" ]; then + echo "ID Username Email" + exit 0 + fi + if [ "$subcmd" = "create" ]; then - shift 4; username="" password="" email="" is_admin="false" + shift 4 # skip 'forgejo admin user create' + username="" password="" email="" is_admin="false" while [ $# -gt 0 ]; do case "$1" in - --admin) is_admin="true"; shift ;; --username) username="$2"; shift 2 ;; - --password) password="$2"; shift 2 ;; --email) email="$2"; shift 2 ;; - --must-change-password*) shift ;; *) shift ;; + --admin) is_admin="true"; shift ;; + --username) username="$2"; shift 2 ;; + --password) password="$2"; shift 2 ;; + --email) email="$2"; shift 2 ;; + --must-change-password*) shift ;; + *) shift ;; esac done - curl -sf -X POST -H "Content-Type: application/json" \ - "${FORGE_URL}/api/v1/admin/users" \ - -d "{\"username\":\"${username}\",\"password\":\"${password}\",\"email\":\"${email}\",\"must_change_password\":false}" >/dev/null 2>&1 - if [ "$is_admin" = "true" ]; then - curl -sf -X PATCH -H "Content-Type: application/json" \ - "${FORGE_URL}/api/v1/admin/users/${username}" \ - -d "{\"admin\":true,\"must_change_password\":false}" >/dev/null 2>&1 || true + + if [ -z "$username" ] || [ -z "$password" ] || [ -z "$email" ]; then + echo "mock-docker: missing required args" >&2 + exit 1 fi - echo "New user '${username}' has been successfully created!"; exit 0 - fi - if [ "$subcmd" = "change-password" ]; then - shift 4; username="" - while [ $# -gt 0 ]; do - case "$1" in --username) username="$2"; shift 2 ;; --password) shift 2 ;; --must-change-password*|--config*) shift ;; *) shift ;; esac - done - curl -sf -X PATCH -H "Content-Type: application/json" \ + + # Create user via Forgejo admin API + if ! curl -sf -X POST \ + -u "$BOOTSTRAP_CREDS" \ + -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/admin/users" \ + -d "{\"username\":\"${username}\",\"password\":\"${password}\",\"email\":\"${email}\",\"must_change_password\":false,\"login_name\":\"${username}\",\"source_id\":0}" \ + >/dev/null 2>&1; then + echo "mock-docker: failed to create user '${username}'" >&2 + exit 1 + fi + + # Patch user: ensure must_change_password is false (Forgejo admin + # API POST may ignore it) and promote to admin if requested + patch_body="{\"must_change_password\":false,\"login_name\":\"${username}\",\"source_id\":0" + if [ "$is_admin" = "true" ]; then + patch_body="${patch_body},\"admin\":true" + fi + patch_body="${patch_body}}" + + curl -sf -X PATCH \ + -u "$BOOTSTRAP_CREDS" \ + -H "Content-Type: application/json" \ "${FORGE_URL}/api/v1/admin/users/${username}" \ - -d "{\"must_change_password\":false}" >/dev/null 2>&1 || true + -d "${patch_body}" \ + >/dev/null 2>&1 || true + + echo "New user '${username}' has been successfully created!" + exit 0 + fi + + if [ "$subcmd" = "change-password" ]; then + shift 4 # skip 'forgejo admin user change-password' + username="" password="" + while [ $# -gt 0 ]; do + case "$1" in + --username) username="$2"; shift 2 ;; + --password) password="$2"; shift 2 ;; + --must-change-password*) shift ;; + --config*) shift ;; + *) shift ;; + esac + done + + if [ -z "$username" ]; then + echo "mock-docker: change-password missing --username" >&2 + exit 1 + fi + + # PATCH user via Forgejo admin API to clear must_change_password + patch_body="{\"must_change_password\":false,\"login_name\":\"${username}\",\"source_id\":0" + if [ -n "$password" ]; then + patch_body="${patch_body},\"password\":\"${password}\"" + fi + patch_body="${patch_body}}" + + if ! curl -sf -X PATCH \ + -u "$BOOTSTRAP_CREDS" \ + -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/admin/users/${username}" \ + -d "${patch_body}" \ + >/dev/null 2>&1; then + echo "mock-docker: failed to change-password for '${username}'" >&2 + exit 1 + fi exit 0 fi fi + + echo "mock-docker: unhandled exec: $*" >&2 + exit 1 fi + +echo "mock-docker: unhandled command: $*" >&2 exit 1 DOCKERMOCK chmod +x "$MOCK_BIN/docker" @@ -134,8 +231,11 @@ chmod +x "$MOCK_BIN/claude" printf '#!/usr/bin/env bash\nexit 0\n' > "$MOCK_BIN/tmux" chmod +x "$MOCK_BIN/tmux" +# No crontab mock — use real BusyBox crontab (available in the Forgejo +# Alpine image). Cron entries are verified via 'crontab -l' in step 6. + export PATH="$MOCK_BIN:$PATH" -pass "Mock binaries installed" +pass "Mock binaries installed (docker, claude, tmux)" # ── 3. Run disinto init ───────────────────────────────────────────────────── echo "=== 3/6 Running disinto init ===" @@ -145,26 +245,9 @@ rm -f "${FACTORY_ROOT}/projects/smoke-repo.toml" git config --global user.email "smoke@test.local" git config --global user.name "Smoke Test" -# USER needs to be set twice: assignment then export (SC2155) -USER=$(whoami) -export USER - -# Create mock git repo to avoid clone failure (mock server has no git support) -mkdir -p "/tmp/smoke-test-repo" -cd "/tmp/smoke-test-repo" -git init --quiet -git config user.email "smoke@test.local" -git config user.name "Smoke Test" -echo "# smoke-repo" > README.md -git add README.md -git commit --quiet -m "Initial commit" - export SMOKE_FORGE_URL="$FORGE_URL" export FORGE_URL -# Skip push to mock server (no git support) -export SKIP_PUSH=true - if bash "${FACTORY_ROOT}/bin/disinto" init \ "${TEST_SLUG}" \ --bare --yes \ @@ -175,18 +258,6 @@ else fail "disinto init exited non-zero" fi -# ── Idempotency test: run init again ─────────────────────────────────────── -echo "=== Idempotency test: running disinto init again ===" -if bash "${FACTORY_ROOT}/bin/disinto" init \ - "${TEST_SLUG}" \ - --bare --yes \ - --forge-url "$FORGE_URL" \ - --repo-root "/tmp/smoke-test-repo"; then - pass "disinto init (re-run) completed successfully" -else - fail "disinto init (re-run) exited non-zero" -fi - # ── 4. Verify Forgejo state ───────────────────────────────────────────────── echo "=== 4/6 Verifying Forgejo state ===" @@ -219,6 +290,35 @@ if [ "$repo_found" = false ]; then fail "Repo not found on Forgejo under any expected path" fi +# Labels exist on repo — use bootstrap admin to check +setup_token=$(curl -sf -X POST \ + -u "${SETUP_ADMIN}:${SETUP_PASS}" \ + -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/users/${SETUP_ADMIN}/tokens" \ + -d '{"name":"smoke-verify","scopes":["all"]}' 2>/dev/null \ + | jq -r '.sha1 // empty') || setup_token="" + +if [ -n "$setup_token" ]; then + label_count=0 + for repo_path in "${TEST_SLUG}" "dev-bot/smoke-repo" "disinto-admin/smoke-repo"; do + label_count=$(curl -sf \ + -H "Authorization: token ${setup_token}" \ + "${FORGE_URL}/api/v1/repos/${repo_path}/labels?limit=50" 2>/dev/null \ + | jq 'length' 2>/dev/null) || label_count=0 + if [ "$label_count" -gt 0 ]; then + break + fi + done + + if [ "$label_count" -ge 5 ]; then + pass "Labels created on repo (${label_count} labels)" + else + fail "Expected >= 5 labels, found ${label_count}" + fi +else + fail "Could not obtain verification token from bootstrap admin" +fi + # ── 5. Verify local state ─────────────────────────────────────────────────── echo "=== 5/6 Verifying local state ===" @@ -257,7 +357,7 @@ else fail ".env not found" fi -# Repo was cloned (mock git repo created before disinto init) +# Repo was cloned if [ -d "/tmp/smoke-test-repo/.git" ]; then pass "Repo cloned to /tmp/smoke-test-repo" else diff --git a/state/.supervisor-active b/vault/.locks/.gitkeep similarity index 100% rename from state/.supervisor-active rename to vault/.locks/.gitkeep diff --git a/vault/AGENTS.md b/vault/AGENTS.md new file mode 100644 index 0000000..5b010ec --- /dev/null +++ b/vault/AGENTS.md @@ -0,0 +1,45 @@ + +# Vault Agent + +**Role**: Three-pipeline gate — action safety classification, resource procurement, and human-action drafting. + +**Pipeline A — Action Gating (*.json)**: Actions enter a pending queue and are +classified by Claude via `vault-agent.sh`, which can auto-approve (call +`vault-fire.sh` directly), auto-reject (call `vault-reject.sh`), or escalate +to a human by writing `PHASE:escalate` to a phase file — using the same +unified escalation path as dev/action agents. + +**Pipeline B — Procurement (*.md)**: The planner files resource requests as +markdown files in `$OPS_REPO_ROOT/vault/pending/`. `vault-poll.sh` notifies the human via +vault/forge. The human fulfills the request (creates accounts, provisions infra, +adds secrets to `.env`) and moves the file to `$OPS_REPO_ROOT/vault/approved/`. +`vault-fire.sh` then extracts the proposed entry and appends it to +`$OPS_REPO_ROOT/RESOURCES.md`. + +**Pipeline C — Rent-a-Human (outreach drafts)**: Any agent can dispatch the +`run-rent-a-human` formula (via an `action` issue) when a task requires a human +touch — posting on Reddit, commenting on HN, signing up for a service, etc. +Claude drafts copy-paste-ready content to `vault/outreach/{platform}/drafts/` +and notifies the human via vault/forge for one-click execution. No vault approval +needed — the human reviews and publishes directly. + +**Trigger**: `vault-poll.sh` runs every 30 min via cron. + +**Key files**: +- `vault/vault-poll.sh` — Processes pending items: retry approved, auto-reject after 48h timeout, invoke vault-agent for JSON actions, notify human for procurement requests +- `vault/vault-agent.sh` — Classifies and routes pending JSON actions via `claude -p`: auto-approve, auto-reject, or escalate to human +- `vault/vault-env.sh` — Shared env setup for vault sub-scripts: sources `lib/env.sh`, overrides `FORGE_TOKEN` with `FORGE_VAULT_TOKEN`, sets `VAULT_TOKEN` for vault-runner container +- `vault/PROMPT.md` — System prompt for the vault agent's Claude invocation +- `vault/vault-fire.sh` — Executes an approved action (JSON) in an **ephemeral Docker container** with vault-only secrets injected (GITHUB_TOKEN, CLAWHUB_TOKEN — never exposed to agents). For deployment actions, calls `lib/ci-helpers.sh:ci_promote()` to gate production promotes via Woodpecker environments. Writes `$OPS_REPO_ROOT/RESOURCES.md` entry for procurement MD approvals. +- `vault/vault-reject.sh` — Marks a JSON action as rejected +- `formulas/run-rent-a-human.toml` — Formula for human-action drafts: Claude researches target platform norms, drafts copy-paste content, writes to `vault/outreach/{platform}/drafts/`, notifies human via vault/forge + +**Procurement flow** (all vault items live in `$OPS_REPO_ROOT/vault/`): +1. Planner drops `$OPS_REPO_ROOT/vault/pending/.md` with what/why/proposed RESOURCES.md entry +2. `vault-poll.sh` notifies human via vault/forge +3. Human fulfills: creates account, adds secrets to `.env`, moves file to `approved/` +4. `vault-fire.sh` extracts proposed entry, appends to `$OPS_REPO_ROOT/RESOURCES.md`, moves to `fired/` +5. Next planner run reads RESOURCES.md → new capability available → unblocks prerequisite tree + +**Environment variables consumed**: +- All from `lib/env.sh` diff --git a/vault/PROMPT.md b/vault/PROMPT.md new file mode 100644 index 0000000..3f93ee5 --- /dev/null +++ b/vault/PROMPT.md @@ -0,0 +1,122 @@ +# Vault Agent + +You are the vault agent for `$FORGE_REPO`. You were called by +`vault-poll.sh` because one or more actions in `$OPS_REPO_ROOT/vault/pending/` need +classification and routing. + +## Two Pipelines + +The vault handles two kinds of items: + +### A. Action Gating (*.json) +Actions from agents that need safety classification before execution. +You classify and route these: auto-approve, escalate, or reject. + +### B. Procurement Requests (*.md) +Resource requests from the planner. These always escalate to the human — +you do NOT auto-approve or reject procurement requests. The human fulfills +the request (creates accounts, provisions infra, adds secrets to .env) +and moves the file from `$OPS_REPO_ROOT/vault/pending/` to `$OPS_REPO_ROOT/vault/approved/`. +`vault-fire.sh` then writes the RESOURCES.md entry. + +## Your Job (Action Gating only) + +For each pending JSON action, decide: **auto-approve**, **escalate**, or **reject**. + +## Routing Table (risk × reversibility) + +| Risk | Reversible | Route | +|----------|------------|---------------------------------------------| +| low | true | auto-approve → fire immediately | +| low | false | auto-approve → fire, log prominently | +| medium | true | auto-approve → fire, notify via vault/forge | +| medium | false | escalate via vault/forge → wait for human reply | +| high | any | always escalate → wait for human reply | + +## Rules + +1. **Never lower risk.** You may override the source agent's self-assessed + risk *upward*, never downward. If a `blog-post` looks like it contains + pricing claims, bump it to `medium` or `high`. +2. **`requires_human: true` always escalates.** Regardless of risk level. +3. **Unknown action types → reject** with reason `unknown_type`. +4. **Malformed JSON → reject** with reason `malformed`. +5. **Payload validation:** Check that the payload has the minimum required + fields for the action type. Missing fields → reject with reason. +6. **Procurement requests (*.md) → skip.** These are handled by the human + directly. Do not attempt to classify, approve, or reject them. + +## Action Type Defaults + +| Type | Default Risk | Default Reversible | +|------------------|-------------|-------------------| +| `blog-post` | low | yes | +| `social-post` | medium | yes | +| `email-blast` | high | no | +| `pricing-change` | high | partial | +| `dns-change` | high | partial | +| `webhook-call` | medium | depends | +| `stripe-charge` | high | no | + +## Procurement Request Format (reference only) + +Procurement requests dropped by the planner look like: + +```markdown +# Procurement Request: + +## What + + +## Why + + +## Unblocks + + +## Proposed RESOURCES.md Entry +## +- type: +- capability: +- env: +``` + +## Available Tools + +You have shell access. Use these for routing decisions: + +```bash +source ${FACTORY_ROOT}/lib/env.sh +``` + +### Auto-approve and fire +```bash +bash ${FACTORY_ROOT}/vault/vault-fire.sh +``` + +### Escalate +```bash +echo "PHASE:escalate" > "$PHASE_FILE" +``` + +### Reject +```bash +bash ${FACTORY_ROOT}/vault/vault-reject.sh "" +``` + +## Output Format + +After processing each action, print exactly: + +``` +ROUTE: +``` + +## Important + +- Process ALL pending JSON actions in the batch. Never skip silently. +- For auto-approved actions, fire them immediately via `vault-fire.sh`. +- For escalated actions, move to `$OPS_REPO_ROOT/vault/approved/` only AFTER human approval. +- Read the action JSON carefully. Check the payload, not just the metadata. +- Ignore `.md` files in pending/ — those are procurement requests handled + separately by vault-poll.sh and the human. diff --git a/vault/SCHEMA.md b/vault/SCHEMA.md deleted file mode 100644 index 0a465c3..0000000 --- a/vault/SCHEMA.md +++ /dev/null @@ -1,81 +0,0 @@ -# Vault Action TOML Schema - -This document defines the schema for vault action TOML files used in the PR-based approval workflow (issue #74). - -## File Location - -Vault actions are stored in `vault/actions/.toml` on the ops repo. - -## Schema Definition - -```toml -# Required -id = "publish-skill-20260331" -formula = "clawhub-publish" -context = "SKILL.md bumped to 0.3.0" - -# Required secrets to inject -secrets = ["CLAWHUB_TOKEN"] - -# Optional -model = "sonnet" -tools = ["clawhub"] -timeout_minutes = 30 -``` - -## Field Specifications - -### Required Fields - -| Field | Type | Description | -|-------|------|-------------| -| `id` | string | Unique identifier for the vault action. Format: `-` (e.g., `publish-skill-20260331`) | -| `formula` | string | Formula name from `formulas/` directory that defines the operational task to execute | -| `context` | string | Human-readable explanation of why this action is needed. Used in PR description | -| `secrets` | array of strings | List of secret names to inject into the execution environment. Only these secrets are passed to the container | - -### Optional Fields - -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `model` | string | `sonnet` | Override the default Claude model for this action | -| `tools` | array of strings | `[]` | MCP tools to enable during execution | -| `timeout_minutes` | integer | `60` | Maximum execution time in minutes | - -## Secret Names - -Secret names must be defined in `.env.vault.enc` on the ops repo. The vault validates that requested secrets exist in the allowlist before execution. - -Common secret names: -- `CLAWHUB_TOKEN` - Token for ClawHub skill publishing -- `GITHUB_TOKEN` - GitHub API token for repository operations -- `DEPLOY_KEY` - Infrastructure deployment key - -## Validation Rules - -1. **Required fields**: `id`, `formula`, `context`, and `secrets` must be present -2. **Formula validation**: The formula must exist in the `formulas/` directory -3. **Secret validation**: All secrets in the `secrets` array must be in the allowlist -4. **No unknown fields**: The TOML must not contain fields outside the schema -5. **ID uniqueness**: The `id` must be unique across all vault actions - -## Example Files - -See `vault/examples/` for complete examples: -- `webhook-call.toml` - Example of calling an external webhook -- `promote.toml` - Example of promoting a build/artifact -- `publish.toml` - Example of publishing a skill to ClawHub - -## Usage - -Validate a vault action file: - -```bash -./vault/validate.sh vault/actions/.toml -``` - -The validator will check: -- All required fields are present -- Secret names are in the allowlist -- No unknown fields are present -- Formula exists in the formulas directory diff --git a/vault/examples/promote.toml b/vault/examples/promote.toml deleted file mode 100644 index b956c9f..0000000 --- a/vault/examples/promote.toml +++ /dev/null @@ -1,21 +0,0 @@ -# vault/examples/promote.toml -# Example: Promote a build/artifact to production -# -# This vault action demonstrates promoting a built artifact to a -# production environment with proper authentication. - -id = "promote-20260331" -formula = "run-supervisor" -context = "Promote build v1.2.3 to production environment" - -# Secrets to inject for deployment authentication -secrets = ["DEPLOY_KEY", "DOCKER_HUB_TOKEN"] - -# Optional: use larger model for complex deployment logic -model = "sonnet" - -# Optional: enable MCP tools for container operations -tools = ["docker"] - -# Optional: deployments may take longer -timeout_minutes = 45 diff --git a/vault/examples/publish.toml b/vault/examples/publish.toml deleted file mode 100644 index 2373b00..0000000 --- a/vault/examples/publish.toml +++ /dev/null @@ -1,21 +0,0 @@ -# vault/examples/publish.toml -# Example: Publish a skill to ClawHub -# -# This vault action demonstrates publishing a skill to ClawHub -# using the clawhub-publish formula. - -id = "publish-site-20260331" -formula = "run-publish-site" -context = "Publish updated site to production" - -# Secrets to inject (only these get passed to the container) -secrets = ["DEPLOY_KEY"] - -# Optional: use sonnet model -model = "sonnet" - -# Optional: enable MCP tools -tools = [] - -# Optional: 30 minute timeout -timeout_minutes = 30 diff --git a/vault/examples/release.toml b/vault/examples/release.toml deleted file mode 100644 index f8af6d1..0000000 --- a/vault/examples/release.toml +++ /dev/null @@ -1,35 +0,0 @@ -# vault/examples/release.toml -# Example: Release vault item schema -# -# This example demonstrates the release vault item schema for creating -# versioned releases with vault-gated approval. -# -# The release formula tags Forgejo main, pushes to mirrors, builds and -# tags the agents Docker image, and restarts agent containers. -# -# Example vault item (auto-generated by `disinto release v1.2.0`): -# -# id = "release-v120" -# formula = "release" -# context = "Release v1.2.0" -# secrets = [] -# -# Steps executed by the release formula: -# 1. preflight - Validate prerequisites (version, FORGE_TOKEN, Docker) -# 2. tag-main - Create tag on Forgejo main via API -# 3. push-mirrors - Push tag to Codeberg and GitHub mirrors -# 4. build-image - Build agents Docker image with --no-cache -# 5. tag-image - Tag image with version (disinto-agents:v1.2.0) -# 6. restart-agents - Restart agent containers with new image -# 7. commit-result - Write release result to tracking file - -id = "release-v120" -formula = "release" -context = "Release v1.2.0 — includes vault redesign, .profile system, architect agent" -secrets = [] - -# Optional: specify a larger model for complex release logic -# model = "sonnet" - -# Optional: releases may take longer due to Docker builds -# timeout_minutes = 60 diff --git a/vault/examples/webhook-call.toml b/vault/examples/webhook-call.toml deleted file mode 100644 index 27b3f25..0000000 --- a/vault/examples/webhook-call.toml +++ /dev/null @@ -1,21 +0,0 @@ -# vault/examples/webhook-call.toml -# Example: Call an external webhook with authentication -# -# This vault action demonstrates calling an external webhook endpoint -# with proper authentication via injected secrets. - -id = "webhook-call-20260331" -formula = "run-rent-a-human" -context = "Notify Slack channel about deployment completion" - -# Secrets to inject (only these get passed to the container) -secrets = ["DEPLOY_KEY"] - -# Optional: use sonnet model for this action -model = "sonnet" - -# Optional: enable MCP tools -tools = [] - -# Optional: 30 minute timeout -timeout_minutes = 30 diff --git a/vault/validate.sh b/vault/validate.sh deleted file mode 100755 index f01ea63..0000000 --- a/vault/validate.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env bash -# vault/validate.sh — Validate vault action TOML files -# -# Usage: ./vault/validate.sh -# -# Validates a vault action TOML file according to the schema defined in -# vault/SCHEMA.md. Checks: -# - Required fields are present -# - Secret names are in the allowlist -# - No unknown fields are present -# - Formula exists in formulas/ - -set -euo pipefail - -# Get script directory -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -# Source vault environment -source "$SCRIPT_DIR/vault-env.sh" - -# Get the TOML file to validate -TOML_FILE="${1:-}" - -if [ -z "$TOML_FILE" ]; then - echo "Usage: $0 " >&2 - echo "Example: $0 vault/examples/publish.toml" >&2 - exit 1 -fi - -# Resolve relative paths -if [[ "$TOML_FILE" != /* ]]; then - TOML_FILE="$(cd "$(dirname "$TOML_FILE")" && pwd)/$(basename "$TOML_FILE")" -fi - -# Run validation -if validate_vault_action "$TOML_FILE"; then - echo "VALID: $TOML_FILE" - echo " ID: $VAULT_ACTION_ID" - echo " Formula: $VAULT_ACTION_FORMULA" - echo " Context: $VAULT_ACTION_CONTEXT" - echo " Secrets: $VAULT_ACTION_SECRETS" - exit 0 -else - echo "INVALID: $TOML_FILE" >&2 - exit 1 -fi diff --git a/vault/vault-agent.sh b/vault/vault-agent.sh new file mode 100755 index 0000000..4436982 --- /dev/null +++ b/vault/vault-agent.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# vault-agent.sh — Invoke claude -p to classify and route pending vault actions +# +# Called by vault-poll.sh when pending actions exist. Reads all pending/*.json, +# builds a prompt with action summaries, and lets the LLM decide routing. +# +# The LLM can call vault-fire.sh (auto-approve) or vault-reject.sh (reject) +# directly. For escalations, it writes a PHASE:escalate file and marks the +# action as "escalated" in pending/ so vault-poll skips it on future runs. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +source "${SCRIPT_DIR}/vault-env.sh" + +VAULT_SCRIPT_DIR="${FACTORY_ROOT}/vault" +OPS_VAULT_DIR="${OPS_REPO_ROOT}/vault" +PROMPT_FILE="${VAULT_SCRIPT_DIR}/PROMPT.md" +LOGFILE="${VAULT_SCRIPT_DIR}/vault.log" +CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-3600}" + +log() { + printf '[%s] vault-agent: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" +} + +# Collect all pending actions (skip already-escalated) +ACTIONS_BATCH="" +ACTION_COUNT=0 + +for action_file in "${OPS_VAULT_DIR}/pending/"*.json; do + [ -f "$action_file" ] || continue + + ACTION_STATUS=$(jq -r '.status // ""' < "$action_file" 2>/dev/null) + [ "$ACTION_STATUS" = "escalated" ] && continue + + # Validate JSON + if ! jq empty < "$action_file" 2>/dev/null; then + ACTION_ID=$(basename "$action_file" .json) + log "malformed JSON: $action_file — rejecting" + bash "${VAULT_SCRIPT_DIR}/vault-reject.sh" "$ACTION_ID" "malformed JSON" 2>/dev/null || true + continue + fi + + ACTION_JSON=$(cat "$action_file") + ACTIONS_BATCH="${ACTIONS_BATCH} +--- ACTION --- +$(echo "$ACTION_JSON" | jq '.') +--- END ACTION --- +" + ACTION_COUNT=$((ACTION_COUNT + 1)) +done + +if [ "$ACTION_COUNT" -eq 0 ]; then + log "no actionable pending items" + exit 0 +fi + +log "processing $ACTION_COUNT pending action(s) via claude -p" + +# Build the prompt +SYSTEM_PROMPT=$(cat "$PROMPT_FILE" 2>/dev/null || echo "You are a vault agent. Classify and route actions.") + +PROMPT="${SYSTEM_PROMPT} + +## Pending Actions (${ACTION_COUNT} total) +${ACTIONS_BATCH} + +## Environment +- FACTORY_ROOT=${FACTORY_ROOT} +- OPS_REPO_ROOT=${OPS_REPO_ROOT} +- Vault data: ${OPS_VAULT_DIR} +- vault-fire.sh: bash ${VAULT_SCRIPT_DIR}/vault-fire.sh +- vault-reject.sh: bash ${VAULT_SCRIPT_DIR}/vault-reject.sh \"\" + +Process each action now. For auto-approve, fire immediately. For reject, call vault-reject.sh. + +For actions that need human approval (escalate), write a PHASE:escalate file +to signal the unified escalation path: + printf 'PHASE:escalate\nReason: vault procurement — %s\n' '' \\ + > /tmp/vault-escalate-.phase +Then STOP and wait — a human will review via the forge." + +CLAUDE_OUTPUT=$(timeout "$CLAUDE_TIMEOUT" claude -p "$PROMPT" \ + --model sonnet \ + --dangerously-skip-permissions \ + --max-turns 20 \ + 2>/dev/null) || true + +log "claude finished ($(echo "$CLAUDE_OUTPUT" | wc -c) bytes)" + +# Log routing decisions +ROUTES=$(echo "$CLAUDE_OUTPUT" | grep "^ROUTE:" || true) +if [ -n "$ROUTES" ]; then + echo "$ROUTES" | while read -r line; do + log " $line" + done +fi diff --git a/vault/vault-env.sh b/vault/vault-env.sh index 8e7f7c6..79e4176 100644 --- a/vault/vault-env.sh +++ b/vault/vault-env.sh @@ -7,148 +7,3 @@ source "$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/lib/env.sh" # Use vault-bot's own Forgejo identity FORGE_TOKEN="${FORGE_VAULT_TOKEN:-${FORGE_TOKEN}}" - -# Vault redesign in progress (PR-based approval workflow) -# This file is kept for shared env setup; scripts being replaced by #73 - -# ============================================================================= -# VAULT ACTION VALIDATION -# ============================================================================= - -# Allowed secret names - must match keys in .env.vault.enc -VAULT_ALLOWED_SECRETS="CLAWHUB_TOKEN GITHUB_TOKEN DEPLOY_KEY NPM_TOKEN DOCKER_HUB_TOKEN" - -# Validate a vault action TOML file -# Usage: validate_vault_action -# Returns: 0 if valid, 1 if invalid -# Sets: VAULT_ACTION_ID, VAULT_ACTION_FORMULA, VAULT_ACTION_CONTEXT on success -validate_vault_action() { - local toml_file="$1" - - if [ -z "$toml_file" ]; then - echo "ERROR: No TOML file specified" >&2 - return 1 - fi - - if [ ! -f "$toml_file" ]; then - echo "ERROR: File not found: $toml_file" >&2 - return 1 - fi - - log "Validating vault action: $toml_file" - - # Get script directory for relative path resolution - # FACTORY_ROOT is set by lib/env.sh which is sourced above - local formulas_dir="${FACTORY_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)}/formulas" - - # Extract TOML values using grep/sed (basic TOML parsing) - local toml_content - toml_content=$(cat "$toml_file") - - # Extract string values (id, formula, context) - local id formula context - id=$(echo "$toml_content" | grep -E '^id\s*=' | sed -E 's/^id\s*=\s*"(.*)"/\1/' | tr -d '\r') - formula=$(echo "$toml_content" | grep -E '^formula\s*=' | sed -E 's/^formula\s*=\s*"(.*)"/\1/' | tr -d '\r') - context=$(echo "$toml_content" | grep -E '^context\s*=' | sed -E 's/^context\s*=\s*"(.*)"/\1/' | tr -d '\r') - - # Extract secrets array - local secrets_line secrets_array - secrets_line=$(echo "$toml_content" | grep -E '^secrets\s*=' | tr -d '\r') - secrets_array=$(echo "$secrets_line" | sed -E 's/^secrets\s*=\s*\[(.*)\]/\1/' | tr -d '[]"' | tr ',' ' ' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') - - # Check for unknown fields (any top-level key not in allowed list) - local unknown_fields - unknown_fields=$(echo "$toml_content" | grep -E '^[a-zA-Z_][a-zA-Z0-9_]*\s*=' | sed -E 's/^([a-zA-Z_][a-zA-Z0-9_]*)\s*=.*/\1/' | sort -u | while read -r field; do - case "$field" in - id|formula|context|secrets|model|tools|timeout_minutes) ;; - *) echo "$field" ;; - esac - done) - - if [ -n "$unknown_fields" ]; then - echo "ERROR: Unknown fields in TOML: $(echo "$unknown_fields" | tr '\n' ', ' | sed 's/,$//')" >&2 - return 1 - fi - - # Validate required fields - if [ -z "$id" ]; then - echo "ERROR: Missing required field: id" >&2 - return 1 - fi - - if [ -z "$formula" ]; then - echo "ERROR: Missing required field: formula" >&2 - return 1 - fi - - if [ -z "$context" ]; then - echo "ERROR: Missing required field: context" >&2 - return 1 - fi - - # Validate formula exists in formulas/ - if [ ! -f "$formulas_dir/${formula}.toml" ]; then - echo "ERROR: Formula not found: $formula" >&2 - return 1 - fi - - # Validate secrets field exists and is not empty - if [ -z "$secrets_line" ]; then - echo "ERROR: Missing required field: secrets" >&2 - return 1 - fi - - # Validate each secret is in the allowlist - for secret in $secrets_array; do - secret=$(echo "$secret" | tr -d '"' | xargs) # trim whitespace and quotes - if [ -n "$secret" ]; then - if ! echo " $VAULT_ALLOWED_SECRETS " | grep -q " $secret "; then - echo "ERROR: Unknown secret (not in allowlist): $secret" >&2 - return 1 - fi - fi - done - - # Validate optional fields if present - # model - if echo "$toml_content" | grep -qE '^model\s*='; then - local model_value - model_value=$(echo "$toml_content" | grep -E '^model\s*=' | sed -E 's/^model\s*=\s*"(.*)"/\1/' | tr -d '\r') - if [ -z "$model_value" ]; then - echo "ERROR: 'model' must be a non-empty string" >&2 - return 1 - fi - fi - - # tools - if echo "$toml_content" | grep -qE '^tools\s*='; then - local tools_line - tools_line=$(echo "$toml_content" | grep -E '^tools\s*=' | tr -d '\r') - if ! echo "$tools_line" | grep -q '\['; then - echo "ERROR: 'tools' must be an array" >&2 - return 1 - fi - fi - - # timeout_minutes - if echo "$toml_content" | grep -qE '^timeout_minutes\s*='; then - local timeout_value - timeout_value=$(echo "$toml_content" | grep -E '^timeout_minutes\s*=' | sed -E 's/^timeout_minutes\s*=\s*([0-9]+)/\1/' | tr -d '\r') - if [ -z "$timeout_value" ] || [ "$timeout_value" -le 0 ] 2>/dev/null; then - echo "ERROR: 'timeout_minutes' must be a positive integer" >&2 - return 1 - fi - fi - - # Export validated values (for use by caller script) - export VAULT_ACTION_ID="$id" - export VAULT_ACTION_FORMULA="$formula" - export VAULT_ACTION_CONTEXT="$context" - export VAULT_ACTION_SECRETS="$secrets_array" - - log "VAULT_ACTION_ID=$VAULT_ACTION_ID" - log "VAULT_ACTION_FORMULA=$VAULT_ACTION_FORMULA" - log "VAULT_ACTION_SECRETS=$VAULT_ACTION_SECRETS" - - return 0 -} diff --git a/vault/vault-fire.sh b/vault/vault-fire.sh new file mode 100755 index 0000000..ad57022 --- /dev/null +++ b/vault/vault-fire.sh @@ -0,0 +1,141 @@ +#!/usr/bin/env bash +# vault-fire.sh — Execute an approved vault item by ID +# +# Handles two pipelines: +# A. Action gating (*.json): pending/ → approved/ → fired/ +# Execution delegated to ephemeral vault-runner container via disinto vault-run. +# The vault-runner gets vault secrets (.env.vault.enc); this script does NOT. +# B. Procurement (*.md): approved/ → fired/ (writes RESOURCES.md entry) +# +# If item is in pending/, moves to approved/ first. +# If item is already in approved/, fires directly (crash recovery). +# +# Usage: bash vault-fire.sh + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +source "${SCRIPT_DIR}/vault-env.sh" + +OPS_VAULT_DIR="${OPS_REPO_ROOT}/vault" +LOCKS_DIR="${DISINTO_LOG_DIR}/vault/.locks" +LOGFILE="${DISINTO_LOG_DIR}/vault/vault.log" +RESOURCES_FILE="${OPS_REPO_ROOT}/RESOURCES.md" + +log() { + printf '[%s] vault-fire: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" +} + +ACTION_ID="${1:?Usage: vault-fire.sh }" + +# ============================================================================= +# Detect pipeline: procurement (.md) or action gating (.json) +# ============================================================================= +IS_PROCUREMENT=false +ACTION_FILE="" + +if [ -f "${OPS_VAULT_DIR}/approved/${ACTION_ID}.md" ]; then + IS_PROCUREMENT=true + ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.md" +elif [ -f "${OPS_VAULT_DIR}/pending/${ACTION_ID}.md" ]; then + IS_PROCUREMENT=true + mv "${OPS_VAULT_DIR}/pending/${ACTION_ID}.md" "${OPS_VAULT_DIR}/approved/${ACTION_ID}.md" + ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.md" + log "$ACTION_ID: pending → approved (procurement)" +elif [ -f "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" ]; then + ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" +elif [ -f "${OPS_VAULT_DIR}/pending/${ACTION_ID}.json" ]; then + mv "${OPS_VAULT_DIR}/pending/${ACTION_ID}.json" "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" + ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" + TMP=$(mktemp) + jq '.status = "approved"' "$ACTION_FILE" > "$TMP" && mv "$TMP" "$ACTION_FILE" + log "$ACTION_ID: pending → approved" +else + log "ERROR: item $ACTION_ID not found in pending/ or approved/" + exit 1 +fi + +# Acquire lock +mkdir -p "$LOCKS_DIR" +LOCKFILE="${LOCKS_DIR}/${ACTION_ID}.lock" +if [ -f "$LOCKFILE" ]; then + LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || true) + if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then + log "$ACTION_ID: already being fired by PID $LOCK_PID" + exit 0 + fi +fi +echo $$ > "$LOCKFILE" +trap 'rm -f "$LOCKFILE"' EXIT + +# ============================================================================= +# Pipeline A: Procurement — extract RESOURCES.md entry and append +# ============================================================================= +if [ "$IS_PROCUREMENT" = true ]; then + log "$ACTION_ID: firing procurement request" + + # Extract the proposed RESOURCES.md entry from the markdown file. + # Everything after the "## Proposed RESOURCES.md Entry" heading to EOF. + # Uses awk because the entry itself contains ## headings (## ). + ENTRY="" + ENTRY=$(awk '/^## Proposed RESOURCES\.md Entry/{found=1; next} found{print}' "$ACTION_FILE" 2>/dev/null || true) + + # Strip leading/trailing blank lines and markdown code fences + ENTRY=$(echo "$ENTRY" | sed '/^```/d' | sed -e '/./,$!d' -e :a -e '/^\n*$/{$d;N;ba;}') + + if [ -z "$ENTRY" ]; then + log "ERROR: $ACTION_ID has no '## Proposed RESOURCES.md Entry' section" + exit 1 + fi + + # Append entry to RESOURCES.md + printf '\n%s\n' "$ENTRY" >> "$RESOURCES_FILE" + log "$ACTION_ID: wrote RESOURCES.md entry" + + # Move to fired/ + mv "$ACTION_FILE" "${OPS_VAULT_DIR}/fired/${ACTION_ID}.md" + rm -f "${LOCKS_DIR}/${ACTION_ID}.notified" + log "$ACTION_ID: approved → fired (procurement)" + exit 0 +fi + +# ============================================================================= +# Pipeline B: Action gating — delegate to ephemeral vault-runner container +# ============================================================================= +ACTION_TYPE=$(jq -r '.type // ""' < "$ACTION_FILE") +ACTION_SOURCE=$(jq -r '.source // ""' < "$ACTION_FILE") + +if [ -z "$ACTION_TYPE" ]; then + log "ERROR: $ACTION_ID has no type field" + exit 1 +fi + +log "$ACTION_ID: firing type=$ACTION_TYPE source=$ACTION_SOURCE via vault-runner" + +FIRE_EXIT=0 + +# Delegate execution to the ephemeral vault-runner container. +# The vault-runner gets vault secrets (.env.vault.enc) injected at runtime; +# this host process never sees those secrets. +if [ -f "${FACTORY_ROOT}/.env.vault.enc" ] && [ -f "${FACTORY_ROOT}/docker-compose.yml" ]; then + bash "${FACTORY_ROOT}/bin/disinto" vault-run "$ACTION_ID" >> "$LOGFILE" 2>&1 || FIRE_EXIT=$? +else + # Fallback for bare-metal or pre-migration setups: run action handler directly + log "$ACTION_ID: no .env.vault.enc or docker-compose.yml — running action directly" + bash "${SCRIPT_DIR}/vault-run-action.sh" "$ACTION_ID" >> "$LOGFILE" 2>&1 || FIRE_EXIT=$? +fi + +# ============================================================================= +# Move to fired/ or leave in approved/ on failure +# ============================================================================= +if [ "$FIRE_EXIT" -eq 0 ]; then + # Update with fired timestamp and move to fired/ + TMP=$(mktemp) + jq --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" '.status = "fired" | .fired_at = $ts' "$ACTION_FILE" > "$TMP" \ + && mv "$TMP" "${OPS_VAULT_DIR}/fired/${ACTION_ID}.json" + rm -f "$ACTION_FILE" + log "$ACTION_ID: approved → fired" +else + log "ERROR: $ACTION_ID fire failed (exit $FIRE_EXIT) — stays in approved/ for retry" + exit "$FIRE_EXIT" +fi diff --git a/vault/vault-poll.sh b/vault/vault-poll.sh new file mode 100755 index 0000000..a32b31f --- /dev/null +++ b/vault/vault-poll.sh @@ -0,0 +1,301 @@ +#!/usr/bin/env bash +# vault-poll.sh — Vault: process pending actions + procurement requests +# +# Runs every 30min via cron. Two pipelines: +# A. Action gating (*.json): auto-approve/escalate/reject via vault-agent.sh +# B. Procurement (*.md): notify human, fire approved requests via vault-fire.sh +# +# Phases: +# 1. Retry any approved/ items that weren't fired (crash recovery) +# 2. Auto-reject escalations with no reply for 48h +# 3. Invoke vault-agent.sh for new pending JSON actions +# 4. Notify human about new pending procurement requests (.md) +# +# Cron: */30 * * * * /path/to/disinto/vault/vault-poll.sh +# +# Peek: cat /tmp/vault-status +# Log: tail -f /path/to/disinto/vault/vault.log + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +source "${SCRIPT_DIR}/../lib/env.sh" +# Use vault-bot's own Forgejo identity (#747) +FORGE_TOKEN="${FORGE_VAULT_TOKEN:-${FORGE_TOKEN}}" + +LOGFILE="${DISINTO_LOG_DIR}/vault/vault.log" +STATUSFILE="/tmp/vault-status" +LOCKFILE="/tmp/vault-poll.lock" +VAULT_SCRIPT_DIR="${FACTORY_ROOT}/vault" +OPS_VAULT_DIR="${OPS_REPO_ROOT}/vault" +LOCKS_DIR="${DISINTO_LOG_DIR}/vault/.locks" + +TIMEOUT_HOURS=48 + +# Prevent overlapping runs +if [ -f "$LOCKFILE" ]; then + LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null) + if kill -0 "$LOCK_PID" 2>/dev/null; then + exit 0 + fi + rm -f "$LOCKFILE" +fi +echo $$ > "$LOCKFILE" +trap 'rm -f "$LOCKFILE" "$STATUSFILE"' EXIT + +log() { + printf '[%s] vault: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" +} + +status() { + printf '[%s] vault: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" > "$STATUSFILE" + log "$*" +} + +# Acquire per-action lock (returns 0 if acquired, 1 if already locked) +lock_action() { + local action_id="$1" + local lockfile="${LOCKS_DIR}/${action_id}.lock" + mkdir -p "$LOCKS_DIR" + if [ -f "$lockfile" ]; then + local lock_pid + lock_pid=$(cat "$lockfile" 2>/dev/null || true) + if [ -n "$lock_pid" ] && kill -0 "$lock_pid" 2>/dev/null; then + return 1 + fi + rm -f "$lockfile" + fi + echo $$ > "$lockfile" + return 0 +} + +unlock_action() { + local action_id="$1" + rm -f "${LOCKS_DIR}/${action_id}.lock" +} + +# ============================================================================= +# PHASE 1: Retry approved items (crash recovery — JSON actions + MD procurement) +# ============================================================================= +status "phase 1: retrying approved items" + +for action_file in "${OPS_VAULT_DIR}/approved/"*.json; do + [ -f "$action_file" ] || continue + ACTION_ID=$(jq -r '.id // ""' < "$action_file" 2>/dev/null) + [ -z "$ACTION_ID" ] && continue + + if ! lock_action "$ACTION_ID"; then + log "skip $ACTION_ID — locked by another process" + continue + fi + + log "retrying approved action: $ACTION_ID" + if bash "${VAULT_SCRIPT_DIR}/vault-fire.sh" "$ACTION_ID" >> "$LOGFILE" 2>&1; then + log "fired $ACTION_ID (retry)" + else + log "ERROR: fire failed for $ACTION_ID (retry)" + fi + + unlock_action "$ACTION_ID" +done + +# Retry approved procurement requests (.md) +for req_file in "${OPS_VAULT_DIR}/approved/"*.md; do + [ -f "$req_file" ] || continue + REQ_ID=$(basename "$req_file" .md) + + if ! lock_action "$REQ_ID"; then + log "skip procurement $REQ_ID — locked by another process" + continue + fi + + log "retrying approved procurement: $REQ_ID" + if bash "${VAULT_SCRIPT_DIR}/vault-fire.sh" "$REQ_ID" >> "$LOGFILE" 2>&1; then + log "fired procurement $REQ_ID (retry)" + else + log "ERROR: fire failed for procurement $REQ_ID (retry)" + fi + + unlock_action "$REQ_ID" +done + +# ============================================================================= +# PHASE 2: Timeout escalations (48h no reply → auto-reject) +# ============================================================================= +status "phase 2: checking escalation timeouts" + +NOW_EPOCH=$(date +%s) +TIMEOUT_SECS=$((TIMEOUT_HOURS * 3600)) + +for action_file in "${OPS_VAULT_DIR}/pending/"*.json; do + [ -f "$action_file" ] || continue + + ACTION_STATUS=$(jq -r '.status // ""' < "$action_file" 2>/dev/null) + [ "$ACTION_STATUS" != "escalated" ] && continue + + ACTION_ID=$(jq -r '.id // ""' < "$action_file" 2>/dev/null) + ESCALATED_AT=$(jq -r '.escalated_at // ""' < "$action_file" 2>/dev/null) + [ -z "$ESCALATED_AT" ] && continue + + ESCALATED_EPOCH=$(date -d "$ESCALATED_AT" +%s 2>/dev/null || echo 0) + AGE_SECS=$((NOW_EPOCH - ESCALATED_EPOCH)) + + if [ "$AGE_SECS" -gt "$TIMEOUT_SECS" ]; then + AGE_HOURS=$((AGE_SECS / 3600)) + log "timeout: $ACTION_ID escalated ${AGE_HOURS}h ago with no reply — auto-rejecting" + bash "${VAULT_SCRIPT_DIR}/vault-reject.sh" "$ACTION_ID" "timeout (${AGE_HOURS}h, no human reply)" >> "$LOGFILE" 2>&1 || true + fi +done + +# ============================================================================= +# PHASE 3: Process new pending actions (JSON — action gating) +# ============================================================================= +status "phase 3: processing pending actions" + +PENDING_COUNT=0 +PENDING_SUMMARY="" + +for action_file in "${OPS_VAULT_DIR}/pending/"*.json; do + [ -f "$action_file" ] || continue + + ACTION_STATUS=$(jq -r '.status // ""' < "$action_file" 2>/dev/null) + # Skip already-escalated actions (waiting for human reply) + [ "$ACTION_STATUS" = "escalated" ] && continue + + ACTION_ID=$(jq -r '.id // ""' < "$action_file" 2>/dev/null) + [ -z "$ACTION_ID" ] && continue + + if ! lock_action "$ACTION_ID"; then + log "skip $ACTION_ID — locked" + continue + fi + + PENDING_COUNT=$((PENDING_COUNT + 1)) + ACTION_TYPE=$(jq -r '.type // "unknown"' < "$action_file" 2>/dev/null) + ACTION_SOURCE=$(jq -r '.source // "unknown"' < "$action_file" 2>/dev/null) + PENDING_SUMMARY="${PENDING_SUMMARY} ${ACTION_ID} [${ACTION_TYPE}] from ${ACTION_SOURCE}\n" + + unlock_action "$ACTION_ID" +done + +if [ "$PENDING_COUNT" -gt 0 ]; then + log "found $PENDING_COUNT pending action(s), invoking vault-agent" + status "invoking vault-agent for $PENDING_COUNT action(s)" + + bash "${VAULT_SCRIPT_DIR}/vault-agent.sh" >> "$LOGFILE" 2>&1 || { + log "ERROR: vault-agent failed" + } +fi + +# ============================================================================= +# PHASE 4: Notify human about new pending procurement requests (.md) +# ============================================================================= +status "phase 4: processing pending procurement requests" + +PROCURE_COUNT=0 + +for req_file in "${OPS_VAULT_DIR}/pending/"*.md; do + [ -f "$req_file" ] || continue + REQ_ID=$(basename "$req_file" .md) + + # Check if already notified (marker file) + if [ -f "${LOCKS_DIR}/${REQ_ID}.notified" ]; then + continue + fi + + if ! lock_action "$REQ_ID"; then + log "skip procurement $REQ_ID — locked" + continue + fi + + PROCURE_COUNT=$((PROCURE_COUNT + 1)) + + # Extract title from first heading + REQ_TITLE=$(grep -m1 '^# ' "$req_file" | sed 's/^# //' || echo "$REQ_ID") + + log "new procurement request: $REQ_ID — $REQ_TITLE" + + # Mark as notified so we don't re-send + mkdir -p "${LOCKS_DIR}" + touch "${LOCKS_DIR}/${REQ_ID}.notified" + + unlock_action "$REQ_ID" +done + +# ============================================================================= +# PHASE 5: Detect vault-bot authorized comments on issues +# ============================================================================= +status "phase 5: scanning for vault-bot authorized comments" + +COMMENT_COUNT=0 + +if [ -n "${FORGE_REPO:-}" ] && [ -n "${FORGE_TOKEN:-}" ]; then + # Get open issues with action label + ACTION_ISSUES=$(curl -sf \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_URL}/api/v1/repos/${FORGE_REPO}/issues?state=open&labels=action&limit=50" 2>/dev/null) || ACTION_ISSUES="[]" + + ISSUE_COUNT=$(printf '%s' "$ACTION_ISSUES" | jq 'length') + for idx in $(seq 0 $((ISSUE_COUNT - 1))); do + ISSUE_NUM=$(printf '%s' "$ACTION_ISSUES" | jq -r ".[$idx].number") + + # Skip if already processed + if [ -f "${LOCKS_DIR}/issue-${ISSUE_NUM}.vault-fired" ]; then + continue + fi + + # Get comments on this issue + COMMENTS=$(curl -sf \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_URL}/api/v1/repos/${FORGE_REPO}/issues/${ISSUE_NUM}/comments?limit=50" 2>/dev/null) || continue + + # Look for vault-bot comments containing VAULT:APPROVED with a JSON action spec + APPROVED_BODY=$(printf '%s' "$COMMENTS" | jq -r ' + [.[] | select(.user.login == "vault-bot") | select(.body | test("VAULT:APPROVED"))] | last | .body // empty + ' 2>/dev/null) || continue + + [ -z "$APPROVED_BODY" ] && continue + + # Extract JSON action spec from fenced code block in the comment + ACTION_JSON=$(printf '%s' "$APPROVED_BODY" | sed -n '/^```json$/,/^```$/p' | sed '1d;$d') + [ -z "$ACTION_JSON" ] && continue + + # Validate JSON + if ! printf '%s' "$ACTION_JSON" | jq empty 2>/dev/null; then + log "malformed action JSON in vault-bot comment on issue #${ISSUE_NUM}" + continue + fi + + ACTION_ID=$(printf '%s' "$ACTION_JSON" | jq -r '.id // empty') + if [ -z "$ACTION_ID" ]; then + ACTION_ID="issue-${ISSUE_NUM}-$(date +%s)" + ACTION_JSON=$(printf '%s' "$ACTION_JSON" | jq --arg id "$ACTION_ID" '.id = $id') + fi + + # Skip if this action already exists in any stage + if [ -f "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" ] || \ + [ -f "${OPS_VAULT_DIR}/fired/${ACTION_ID}.json" ] || \ + [ -f "${OPS_VAULT_DIR}/rejected/${ACTION_ID}.json" ]; then + continue + fi + + log "vault-bot authorized action on issue #${ISSUE_NUM}: ${ACTION_ID}" + printf '%s' "$ACTION_JSON" | jq '.status = "approved"' > "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" + COMMENT_COUNT=$((COMMENT_COUNT + 1)) + + # Fire the action + if bash "${VAULT_SCRIPT_DIR}/vault-fire.sh" "$ACTION_ID" >> "$LOGFILE" 2>&1; then + log "fired ${ACTION_ID} from issue #${ISSUE_NUM}" + # Mark issue as processed + touch "${LOCKS_DIR}/issue-${ISSUE_NUM}.vault-fired" + else + log "ERROR: fire failed for ${ACTION_ID} from issue #${ISSUE_NUM}" + fi + done +fi + +if [ "$PENDING_COUNT" -eq 0 ] && [ "$PROCURE_COUNT" -eq 0 ] && [ "$COMMENT_COUNT" -eq 0 ]; then + status "all clear — no pending items" +else + status "poll complete — ${PENDING_COUNT} action(s), ${PROCURE_COUNT} procurement(s), ${COMMENT_COUNT} comment-authorized" +fi diff --git a/vault/vault-reject.sh b/vault/vault-reject.sh new file mode 100755 index 0000000..54fa127 --- /dev/null +++ b/vault/vault-reject.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# vault-reject.sh — Move a vault action to rejected/ with reason +# +# Usage: bash vault-reject.sh "" + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +source "${SCRIPT_DIR}/vault-env.sh" + +OPS_VAULT_DIR="${OPS_REPO_ROOT}/vault" +LOGFILE="${DISINTO_LOG_DIR}/vault/vault.log" +LOCKS_DIR="${DISINTO_LOG_DIR}/vault/.locks" + +log() { + printf '[%s] vault-reject: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" +} + +ACTION_ID="${1:?Usage: vault-reject.sh \"\"}" +REASON="${2:-unspecified}" + +# Find the action file +ACTION_FILE="" +if [ -f "${OPS_VAULT_DIR}/pending/${ACTION_ID}.json" ]; then + ACTION_FILE="${OPS_VAULT_DIR}/pending/${ACTION_ID}.json" +elif [ -f "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" ]; then + ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" +else + log "ERROR: action $ACTION_ID not found in pending/ or approved/" + exit 1 +fi + +# Update with rejection metadata and move to rejected/ +TMP=$(mktemp) +jq --arg reason "$REASON" --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ + '.status = "rejected" | .rejected_at = $ts | .reject_reason = $reason' \ + "$ACTION_FILE" > "$TMP" && mv "$TMP" "${OPS_VAULT_DIR}/rejected/${ACTION_ID}.json" +rm -f "$ACTION_FILE" + +# Clean up lock if present +rm -f "${LOCKS_DIR}/${ACTION_ID}.lock" + +log "$ACTION_ID: rejected — $REASON" diff --git a/vault/vault-run-action.sh b/vault/vault-run-action.sh new file mode 100755 index 0000000..707f3db --- /dev/null +++ b/vault/vault-run-action.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash +# vault-run-action.sh — Execute an action inside the ephemeral vault-runner container +# +# This script is the entrypoint for the vault-runner container. It runs with +# vault secrets injected as environment variables (GITHUB_TOKEN, CLAWHUB_TOKEN, +# deploy keys, etc.) and dispatches to the appropriate action handler. +# +# The vault-runner container is ephemeral: it starts, runs the action, and is +# destroyed. Secrets exist only in container memory, never on disk. +# +# Usage: vault-run-action.sh + +set -euo pipefail + +VAULT_SCRIPT_DIR="${DISINTO_VAULT_DIR:-/home/agent/disinto/vault}" +OPS_VAULT_DIR="${DISINTO_OPS_VAULT_DIR:-${VAULT_SCRIPT_DIR}}" +LOGFILE="${VAULT_SCRIPT_DIR}/vault.log" +ACTION_ID="${1:?Usage: vault-run-action.sh }" + +log() { + printf '[%s] vault-runner: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" 2>/dev/null || \ + printf '[%s] vault-runner: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >&2 +} + +# Find action file in approved/ +ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" +if [ ! -f "$ACTION_FILE" ]; then + log "ERROR: action file not found: ${ACTION_FILE}" + echo "ERROR: action file not found: ${ACTION_FILE}" >&2 + exit 1 +fi + +ACTION_TYPE=$(jq -r '.type // ""' < "$ACTION_FILE") +ACTION_SOURCE=$(jq -r '.source // ""' < "$ACTION_FILE") +PAYLOAD=$(jq -c '.payload // {}' < "$ACTION_FILE") + +if [ -z "$ACTION_TYPE" ]; then + log "ERROR: ${ACTION_ID} has no type field" + exit 1 +fi + +log "${ACTION_ID}: executing type=${ACTION_TYPE} source=${ACTION_SOURCE}" + +FIRE_EXIT=0 + +case "$ACTION_TYPE" in + webhook-call) + # HTTP call to endpoint with optional method/headers/body + ENDPOINT=$(echo "$PAYLOAD" | jq -r '.endpoint // ""') + METHOD=$(echo "$PAYLOAD" | jq -r '.method // "POST"') + REQ_BODY=$(echo "$PAYLOAD" | jq -r '.body // ""') + + if [ -z "$ENDPOINT" ]; then + log "ERROR: ${ACTION_ID} webhook-call missing endpoint" + exit 1 + fi + + CURL_ARGS=(-sf -X "$METHOD" -o /dev/null -w "%{http_code}") + while IFS= read -r header; do + [ -n "$header" ] && CURL_ARGS+=(-H "$header") + done < <(echo "$PAYLOAD" | jq -r '.headers // {} | to_entries[] | "\(.key): \(.value)"' 2>/dev/null || true) + if [ -n "$REQ_BODY" ] && [ "$REQ_BODY" != "null" ]; then + CURL_ARGS+=(-d "$REQ_BODY") + fi + + HTTP_CODE=$(curl "${CURL_ARGS[@]}" "$ENDPOINT" 2>/dev/null) || HTTP_CODE="000" + if [[ "$HTTP_CODE" =~ ^2 ]]; then + log "${ACTION_ID}: webhook-call -> HTTP ${HTTP_CODE} OK" + else + log "ERROR: ${ACTION_ID} webhook-call -> HTTP ${HTTP_CODE}" + FIRE_EXIT=1 + fi + ;; + + promote) + # Promote a Woodpecker pipeline to a deployment environment (staging/production). + # Payload: {"repo_id": N, "pipeline": N, "environment": "staging"|"production"} + PROMOTE_REPO_ID=$(echo "$PAYLOAD" | jq -r '.repo_id // ""') + PROMOTE_PIPELINE=$(echo "$PAYLOAD" | jq -r '.pipeline // ""') + PROMOTE_ENV=$(echo "$PAYLOAD" | jq -r '.environment // ""') + + if [ -z "$PROMOTE_REPO_ID" ] || [ -z "$PROMOTE_PIPELINE" ] || [ -z "$PROMOTE_ENV" ]; then + log "ERROR: ${ACTION_ID} promote missing repo_id, pipeline, or environment" + FIRE_EXIT=1 + else + # Validate environment is staging or production + case "$PROMOTE_ENV" in + staging|production) ;; + *) + log "ERROR: ${ACTION_ID} promote invalid environment '${PROMOTE_ENV}' (must be staging or production)" + FIRE_EXIT=1 + ;; + esac + + if [ "$FIRE_EXIT" -eq 0 ]; then + WP_SERVER="${WOODPECKER_SERVER:-http://woodpecker:8000}" + WP_TOKEN="${WOODPECKER_TOKEN:-}" + + if [ -z "$WP_TOKEN" ]; then + log "ERROR: ${ACTION_ID} promote requires WOODPECKER_TOKEN" + FIRE_EXIT=1 + else + PROMOTE_RESP=$(curl -sf -X POST \ + -H "Authorization: Bearer ${WP_TOKEN}" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "event=deployment&deploy_to=${PROMOTE_ENV}" \ + "${WP_SERVER}/api/repos/${PROMOTE_REPO_ID}/pipelines/${PROMOTE_PIPELINE}" 2>/dev/null) || PROMOTE_RESP="" + + NEW_PIPELINE=$(printf '%s' "$PROMOTE_RESP" | jq -r '.number // empty' 2>/dev/null) + if [ -n "$NEW_PIPELINE" ]; then + log "${ACTION_ID}: promoted pipeline ${PROMOTE_PIPELINE} to ${PROMOTE_ENV} -> new pipeline #${NEW_PIPELINE}" + else + log "ERROR: ${ACTION_ID} promote API failed (repo_id=${PROMOTE_REPO_ID} pipeline=${PROMOTE_PIPELINE} env=${PROMOTE_ENV})" + FIRE_EXIT=1 + fi + fi + fi + fi + ;; + + blog-post|social-post|email-blast|pricing-change|dns-change|stripe-charge) + HANDLER="${VAULT_SCRIPT_DIR}/handlers/${ACTION_TYPE}.sh" + if [ -x "$HANDLER" ]; then + bash "$HANDLER" "$ACTION_ID" "$PAYLOAD" 2>&1 || FIRE_EXIT=$? + else + log "ERROR: ${ACTION_ID} no handler for type '${ACTION_TYPE}' (${HANDLER} not found)" + FIRE_EXIT=1 + fi + ;; + + *) + log "ERROR: ${ACTION_ID} unknown action type '${ACTION_TYPE}'" + FIRE_EXIT=1 + ;; +esac + +exit "$FIRE_EXIT"