diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..d9781fe --- /dev/null +++ b/.dockerignore @@ -0,0 +1,20 @@ +# Secrets — prevent .env files from being baked into the image +.env +.env.enc +.env.vault +.env.vault.enc + +# Version control — .git is huge and not needed in image +.git + +# Archives — not needed at runtime +*.tar.gz + +# Prometheus data — large, ephemeral data +prometheus-data/ + +# Compose files — only needed at runtime via volume mount +docker-compose.yml + +# Project TOML files — gitignored anyway, won't be in build context +projects/*.toml diff --git a/.env.example b/.env.example index 762acd3..6124671 100644 --- a/.env.example +++ b/.env.example @@ -26,8 +26,8 @@ FORGE_GARDENER_TOKEN= # [SECRET] gardener-bot API token FORGE_VAULT_TOKEN= # [SECRET] vault-bot API token FORGE_SUPERVISOR_TOKEN= # [SECRET] supervisor-bot API token FORGE_PREDICTOR_TOKEN= # [SECRET] predictor-bot API token -FORGE_ACTION_TOKEN= # [SECRET] action-bot API token -FORGE_BOT_USERNAMES=dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,action-bot +FORGE_ARCHITECT_TOKEN= # [SECRET] architect-bot API token +FORGE_BOT_USERNAMES=dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot # ── Backwards compatibility ─────────────────────────────────────────────── # If CODEBERG_TOKEN is set but FORGE_TOKEN is not, env.sh falls back to @@ -49,7 +49,7 @@ WOODPECKER_DB_NAME=woodpecker # [CONFIG] Postgres database name # ── Vault-only secrets (DO NOT put these in .env) ──────────────────────── # These tokens grant access to external systems (GitHub, ClawHub, deploy targets). -# They live ONLY in .env.vault.enc and are injected into the ephemeral vault-runner +# They live ONLY in .env.vault.enc and are injected into the ephemeral runner # container at fire time (#745). lib/env.sh explicitly unsets them so agents # can never hold them directly — all external actions go through vault dispatch. # @@ -58,7 +58,7 @@ WOODPECKER_DB_NAME=woodpecker # [CONFIG] Postgres database name # (deploy keys) — SSH keys for deployment targets # # To manage vault secrets: disinto secrets edit-vault -# See also: vault/vault-run-action.sh, vault/vault-fire.sh +# (vault redesign in progress: PR-based approval, see #73-#77) # ── Project-specific secrets ────────────────────────────────────────────── # Store all project secrets here so formulas reference env vars, never hardcode. diff --git a/.gitignore b/.gitignore index dd9365d..fc2d715 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,9 @@ metrics/supervisor-metrics.jsonl .DS_Store dev/ci-fixes-*.json gardener/dust.jsonl + +# Individual encrypted secrets (managed by disinto secrets add) +secrets/ + +# Pre-built binaries for Docker builds (avoid network calls during build) +docker/agents/bin/ diff --git a/.woodpecker/agent-smoke.sh b/.woodpecker/agent-smoke.sh index 9a37bf4..40fc580 100644 --- a/.woodpecker/agent-smoke.sh +++ b/.woodpecker/agent-smoke.sh @@ -6,8 +6,6 @@ # 2. Every custom function called by agent scripts is defined in lib/ or the script itself # # Fast (<10s): no network, no tmux, no Claude needed. -# Would have caught: kill_tmux_session (renamed), create_agent_session (missing), -# read_phase (missing from dev-agent.sh scope) set -euo pipefail @@ -21,12 +19,16 @@ FAILED=0 # Uses awk instead of grep -Eo for busybox/Alpine compatibility (#296). get_fns() { local f="$1" - # BRE mode (no -E). Use [(][)] for literal parens — unambiguous across - # GNU grep and BusyBox grep (some BusyBox builds treat bare () as grouping - # even in BRE). BRE one-or-more via [X][X]* instead of +. - grep '^[[:space:]]*[a-zA-Z_][a-zA-Z0-9_][a-zA-Z0-9_]*[[:space:]]*[(][)]' "$f" 2>/dev/null \ - | sed 's/^[[:space:]]*//; s/[[:space:]]*[(][)].*$//' \ - | sort -u || true + # Pure-awk implementation: avoids grep/sed cross-platform differences + # (BusyBox grep BRE quirks, sed ; separator issues on Alpine). + awk ' + /^[[:space:]]*[a-zA-Z_][a-zA-Z0-9_][a-zA-Z0-9_]*[[:space:]]*[(][)]/ { + line = $0 + gsub(/^[[:space:]]+/, "", line) + sub(/[[:space:]]*[(].*/, "", line) + print line + } + ' "$f" 2>/dev/null | sort -u || true } # Extract call-position identifiers that look like custom function calls: @@ -84,7 +86,7 @@ while IFS= read -r -d '' f; do printf 'FAIL [syntax] %s\n' "$f" FAILED=1 fi -done < <(find dev gardener review planner supervisor lib vault action -name "*.sh" -print0 2>/dev/null) +done < <(find dev gardener review planner supervisor architect lib vault -name "*.sh" -print0 2>/dev/null) echo "syntax check done" # ── 2. Function-resolution check ───────────────────────────────────────────── @@ -95,13 +97,12 @@ echo "=== 2/2 Function resolution ===" # # Included — these are inline-sourced by agent scripts: # lib/env.sh — sourced by every agent (log, forge_api, etc.) -# lib/agent-session.sh — sourced by orchestrators (create_agent_session, monitor_phase_loop, etc.) # lib/agent-sdk.sh — sourced by SDK agents (agent_run, agent_recover_session) # lib/ci-helpers.sh — sourced by pollers and review (ci_passed, classify_pipeline_failure, etc.) # lib/load-project.sh — sourced by env.sh when PROJECT_TOML is set # lib/file-action-issue.sh — sourced by gardener-run.sh (file_action_issue) -# lib/secret-scan.sh — sourced by file-action-issue.sh, phase-handler.sh (scan_for_secrets, redact_secrets) -# lib/formula-session.sh — sourced by formula-driven agents (acquire_cron_lock, run_formula_and_monitor, etc.) +# lib/secret-scan.sh — sourced by file-action-issue.sh (scan_for_secrets, redact_secrets) +# lib/formula-session.sh — sourced by formula-driven agents (acquire_cron_lock, check_memory, etc.) # lib/mirrors.sh — sourced by merge sites (mirror_push) # lib/guard.sh — sourced by all cron entry points (check_active) # lib/issue-lifecycle.sh — sourced by agents for issue claim/release/block/deps @@ -116,7 +117,7 @@ echo "=== 2/2 Function resolution ===" # If a new lib file is added and sourced by agents, add it to LIB_FUNS below # and add a check_script call for it in the lib files section further down. LIB_FUNS=$( - for f in lib/agent-session.sh lib/agent-sdk.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do + for f in lib/agent-sdk.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do if [ -f "$f" ]; then get_fns "$f"; fi done | sort -u ) @@ -180,13 +181,12 @@ check_script() { # These are already in LIB_FUNS (their definitions are available to agents), # but this verifies calls *within* each lib file are also resolvable. check_script lib/env.sh lib/mirrors.sh -check_script lib/agent-session.sh check_script lib/agent-sdk.sh check_script lib/ci-helpers.sh check_script lib/secret-scan.sh check_script lib/file-action-issue.sh lib/secret-scan.sh check_script lib/tea-helpers.sh lib/secret-scan.sh -check_script lib/formula-session.sh lib/agent-session.sh +check_script lib/formula-session.sh check_script lib/load-project.sh check_script lib/mirrors.sh lib/env.sh check_script lib/guard.sh @@ -199,26 +199,19 @@ check_script lib/ci-debug.sh check_script lib/parse-deps.sh # Agent scripts — list cross-sourced files where function scope flows across files. -# phase-handler.sh defines default callback stubs; sourcing agents may override. check_script dev/dev-agent.sh -check_script dev/phase-handler.sh lib/secret-scan.sh check_script dev/dev-poll.sh check_script dev/phase-test.sh check_script gardener/gardener-run.sh check_script review/review-pr.sh lib/agent-sdk.sh check_script review/review-poll.sh -check_script planner/planner-run.sh lib/agent-session.sh lib/formula-session.sh +check_script planner/planner-run.sh lib/formula-session.sh check_script supervisor/supervisor-poll.sh check_script supervisor/update-prompt.sh -check_script vault/vault-agent.sh -check_script vault/vault-fire.sh -check_script vault/vault-poll.sh -check_script vault/vault-reject.sh -check_script action/action-poll.sh -check_script action/action-agent.sh check_script supervisor/supervisor-run.sh check_script supervisor/preflight.sh check_script predictor/predictor-run.sh +check_script architect/architect-run.sh echo "function resolution check done" diff --git a/.woodpecker/ci.yml b/.woodpecker/ci.yml index 08ae24d..fc2f12a 100644 --- a/.woodpecker/ci.yml +++ b/.woodpecker/ci.yml @@ -8,6 +8,19 @@ when: event: [push, pull_request] +# Override default clone to authenticate against Forgejo using FORGE_TOKEN. +# Required because Forgejo is configured with REQUIRE_SIGN_IN, so anonymous +# git clones fail with exit code 128. FORGE_TOKEN is injected globally via +# WOODPECKER_ENVIRONMENT in docker-compose.yml (generated by lib/generators.sh). +clone: + git: + image: alpine/git + commands: + - AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:$FORGE_TOKEN@|") + - git clone --depth 1 "$AUTH_URL" . + - git fetch --depth 1 origin "$CI_COMMIT_REF" + - git checkout FETCH_HEAD + steps: - name: shellcheck image: koalaman/shellcheck-alpine:stable diff --git a/.woodpecker/detect-duplicates.py b/.woodpecker/detect-duplicates.py index c43fd1f..33ec6ac 100644 --- a/.woodpecker/detect-duplicates.py +++ b/.woodpecker/detect-duplicates.py @@ -179,9 +179,16 @@ def collect_findings(root): Returns ``(ap_hits, dup_groups)`` with file paths relative to *root*. """ root = Path(root) - sh_files = sorted( - p for p in root.rglob("*.sh") if ".git" not in p.parts - ) + # Skip architect scripts for duplicate detection (stub formulas, see #99) + EXCLUDED_SUFFIXES = ("architect/architect-run.sh",) + + def is_excluded(p): + """Check if path should be excluded by suffix match.""" + return p.suffix == ".sh" and ".git" not in p.parts and any( + str(p).endswith(suffix) for suffix in EXCLUDED_SUFFIXES + ) + + sh_files = sorted(p for p in root.rglob("*.sh") if not is_excluded(p)) ap_hits = check_anti_patterns(sh_files) dup_groups = check_duplicates(sh_files) @@ -238,9 +245,77 @@ def print_duplicates(groups, label=""): # --------------------------------------------------------------------------- def main() -> int: - sh_files = sorted( - p for p in Path(".").rglob("*.sh") if ".git" not in p.parts - ) + # Skip architect scripts for duplicate detection (stub formulas, see #99) + EXCLUDED_SUFFIXES = ("architect/architect-run.sh",) + + def is_excluded(p): + """Check if path should be excluded by suffix match.""" + return p.suffix == ".sh" and ".git" not in p.parts and any( + str(p).endswith(suffix) for suffix in EXCLUDED_SUFFIXES + ) + + sh_files = sorted(p for p in Path(".").rglob("*.sh") if not is_excluded(p)) + + # Standard patterns that are intentionally repeated across formula-driven agents + # These are not copy-paste violations but the expected structure + ALLOWED_HASHES = { + # Standard agent header: shebang, set -euo pipefail, directory resolution + "c93baa0f19d6b9ba271428bf1cf20b45": "Standard agent header (set -euo pipefail, SCRIPT_DIR, FACTORY_ROOT)", + # formula_prepare_profile_context followed by scratch context reading + "eaa735b3598b7b73418845ab00d8aba5": "Standard .profile context setup (formula_prepare_profile_context + SCRATCH_CONTEXT)", + # Standard prompt template: GRAPH_SECTION, SCRATCH_CONTEXT, FORMULA_CONTENT, SCRATCH_INSTRUCTION + "2653705045fdf65072cccfd16eb04900": "Standard prompt template (GRAPH_SECTION, SCRATCH_CONTEXT, FORMULA_CONTENT)", + "93726a3c799b72ed2898a55552031921": "Standard prompt template continuation (SCRATCH_CONTEXT, FORMULA_CONTENT, SCRATCH_INSTRUCTION)", + "c11eaaacab69c9a2d3c38c75215eca84": "Standard prompt template end (FORMULA_CONTENT, SCRATCH_INSTRUCTION)", + # install_project_crons function in entrypoint.sh and entrypoint-llama.sh (intentional duplicate) + "007e1390498374c68ab5d66aa6d277b2": "install_project_crons function in entrypoints (window 007e1390)", + "04143957d4c63e8a16ac28bddaff589b": "install_project_crons function in entrypoints (window 04143957)", + "076a19221cde674b2fce20a17292fa78": "install_project_crons function in entrypoints (window 076a1922)", + "0d498287626e105f16b24948aed53584": "install_project_crons function in entrypoints (window 0d498287)", + "137b746928011acd758c7a9c690810b2": "install_project_crons function in entrypoints (window 137b7469)", + "287d33d98d21e3e07e0869e56ad94527": "install_project_crons function in entrypoints (window 287d33d9)", + "325a3d54a15e59d333ec2a20c062cc8c": "install_project_crons function in entrypoints (window 325a3d54)", + "34e1943d5738f540d67c5c6bd3e60b20": "install_project_crons function in entrypoints (window 34e1943d)", + "3dabd19698f9705b05376c38042ccce8": "install_project_crons function in entrypoints (window 3dabd196)", + "446b420f7f9821a2553bc4995d1fac25": "install_project_crons function in entrypoints (window 446b420f)", + "4826cf4896b792368c7b4d77573d0f8b": "install_project_crons function in entrypoints (window 4826cf48)", + "4e564d3bbda0ef33962af6042736dc1e": "install_project_crons function in entrypoints (window 4e564d3b)", + "5a3d92b22e5d5bca8cce17d581ac6803": "install_project_crons function in entrypoints (window 5a3d92b2)", + "63c20c5a31cf5e08f3a901ddf6db98af": "install_project_crons function in entrypoints (window 63c20c5a)", + "77547751325562fac397bbfd3a21c88e": "install_project_crons function in entrypoints (window 77547751)", + "80bdff63e54b4a260043d264b83d8eb0": "install_project_crons function in entrypoints (window 80bdff63)", + "84e55706393f731b293890dd6d830316": "install_project_crons function in entrypoints (window 84e55706)", + "85f8a9d029ee9efecca73fd30449ccf4": "install_project_crons function in entrypoints (window 85f8a9d0)", + "86e28dae676c905c5aa0035128e20e46": "install_project_crons function in entrypoints (window 86e28dae)", + "a222b73bcd6a57adb2315726e81ab6cf": "install_project_crons function in entrypoints (window a222b73b)", + "abd6c7efe66f533c48c883c2a6998886": "install_project_crons function in entrypoints (window abd6c7ef)", + "bcfeb67ce4939181330afea4949a95cf": "install_project_crons function in entrypoints (window bcfeb67c)", + "c1248c98f978c48e4a1e5009a1440917": "install_project_crons function in entrypoints (window c1248c98)", + "c40571185b3306345ecf9ac33ab352a6": "install_project_crons function in entrypoints (window c4057118)", + "c566639b237036a7a385982274d3d271": "install_project_crons function in entrypoints (window c566639b)", + "d9cd2f3d874c32366d577ea0d334cd1a": "install_project_crons function in entrypoints (window d9cd2f3d)", + "df4d3e905b12f2c68b206e45dddf9214": "install_project_crons function in entrypoints (window df4d3e90)", + "e8e65ccf867fc6cbe49695ecdce2518e": "install_project_crons function in entrypoints (window e8e65ccf)", + "eb8b298f06cda4359cc171206e0014bf": "install_project_crons function in entrypoints (window eb8b298f)", + "ecdf0daa2f2845359a6a4aa12d327246": "install_project_crons function in entrypoints (window ecdf0daa)", + "eeac93b2fba4de4589d36ca20845ec9f": "install_project_crons function in entrypoints (window eeac93b2)", + "f08a7139db9c96cd3526549c499c0332": "install_project_crons function in entrypoints (window f08a7139)", + "f0917809bdf28ff93fff0749e7e7fea0": "install_project_crons function in entrypoints (window f0917809)", + "f0e4101f9b90c2fa921e088057a96db7": "install_project_crons function in entrypoints (window f0e4101f)", + # Structural end-of-while-loop+case pattern: `return 1 ;; esac done }` + # Appears in stack_lock_acquire (lib/stack-lock.sh) and lib/pr-lifecycle.sh + "29d4f34b703f44699237713cc8d8065b": "Structural end-of-while-loop+case (return 1, esac, done, closing brace)", + # Forgejo org-creation API call pattern shared between forge-setup.sh and ops-setup.sh + # Extracted from bin/disinto (not a .sh file, excluded from prior scans) into lib/forge-setup.sh + "059b11945140c172465f9126b829ed7f": "Forgejo org-creation curl pattern (forge-setup.sh + ops-setup.sh)", + # Docker compose environment block for agents service (generators.sh + hire-agent.sh) + # Intentional duplicate - both generate the same docker-compose.yml template + "8066210169a462fe565f18b6a26a57e0": "Docker compose environment block (generators.sh + hire-agent.sh)", + "fd978fcd726696e0f280eba2c5198d50": "Docker compose environment block continuation (generators.sh + hire-agent.sh)", + "e2760ccc2d4b993a3685bd8991594eb2": "Docker compose env_file + depends_on block (generators.sh + hire-agent.sh)", + # The hash shown in output is 161a80f7 - need to match exactly what the script finds + "161a80f7296d6e9d45895607b7f5b9c9": "Docker compose env_file + depends_on block (generators.sh + hire-agent.sh)", + } if not sh_files: print("No .sh files found.") @@ -276,8 +351,13 @@ def main() -> int: # Duplicate diff: key by content hash base_dup_hashes = {g[0] for g in base_dups} - new_dups = [g for g in cur_dups if g[0] not in base_dup_hashes] - pre_dups = [g for g in cur_dups if g[0] in base_dup_hashes] + # Filter out allowed standard patterns that are intentionally repeated + new_dups = [ + g for g in cur_dups + if g[0] not in base_dup_hashes and g[0] not in ALLOWED_HASHES + ] + # Also filter allowed hashes from pre_dups for reporting + pre_dups = [g for g in cur_dups if g[0] in base_dup_hashes and g[0] not in ALLOWED_HASHES] # Report pre-existing as info if pre_ap or pre_dups: diff --git a/.woodpecker/smoke-init.yml b/.woodpecker/smoke-init.yml index 69afddb..3953053 100644 --- a/.woodpecker/smoke-init.yml +++ b/.woodpecker/smoke-init.yml @@ -1,45 +1,19 @@ -# .woodpecker/smoke-init.yml — End-to-end smoke test for disinto init -# -# Uses the Forgejo image directly (not as a service) so we have CLI -# access to set up Forgejo and create the bootstrap admin user. -# Then runs disinto init --bare --yes against the local Forgejo instance. -# -# Forgejo refuses to run as root, so all forgejo commands use su-exec -# to run as the 'git' user (pre-created in the Forgejo Docker image). - when: - event: pull_request path: - "bin/disinto" - "lib/load-project.sh" - - "tests/smoke-init.sh" + - "lib/env.sh" + - "lib/generators.sh" + - "tests/**" - ".woodpecker/smoke-init.yml" - - "docker/**" - - event: push - branch: main - path: - - "bin/disinto" - - "lib/load-project.sh" - - "tests/smoke-init.sh" - - ".woodpecker/smoke-init.yml" - - "docker/**" steps: - name: smoke-init - image: codeberg.org/forgejo/forgejo:11.0 - environment: - SMOKE_FORGE_URL: http://localhost:3000 + image: python:3-alpine commands: - # Install test dependencies (Alpine-based image) - - apk add --no-cache bash curl jq python3 git >/dev/null 2>&1 - # Set up Forgejo data directories and config (owned by git user) - - mkdir -p /data/gitea/conf /data/gitea/repositories /data/gitea/lfs /data/gitea/log /data/git/.ssh /data/ssh - - printf '[database]\nDB_TYPE = sqlite3\nPATH = /data/gitea/forgejo.db\n\n[server]\nHTTP_PORT = 3000\nROOT_URL = http://localhost:3000/\nLFS_START_SERVER = false\n\n[security]\nINSTALL_LOCK = true\n\n[service]\nDISABLE_REGISTRATION = true\n' > /data/gitea/conf/app.ini - - chown -R git:git /data - # Start Forgejo as git user in background and wait for API - - su-exec git forgejo web --config /data/gitea/conf/app.ini & - - for i in $(seq 1 30); do curl -sf http://localhost:3000/api/v1/version >/dev/null 2>&1 && break; sleep 1; done - # Create bootstrap admin user via CLI - - su-exec git forgejo admin user create --admin --username setup-admin --password "SetupPass-789xyz" --email "setup-admin@smoke.test" --must-change-password=false --config /data/gitea/conf/app.ini - # Run the smoke test (as root is fine — only forgejo binary needs git user) + - apk add --no-cache bash curl jq git coreutils + - python3 tests/mock-forgejo.py & echo $! > /tmp/mock-forgejo.pid + - sleep 2 - bash tests/smoke-init.sh + - kill $(cat /tmp/mock-forgejo.pid) 2>/dev/null || true diff --git a/AGENTS.md b/AGENTS.md index ffc5561..78f1c29 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,12 +1,19 @@ - + # Disinto — Agent Instructions ## What this repo is -Disinto is an autonomous code factory. It manages eight agents (dev, review, -gardener, supervisor, planner, predictor, action, vault) that pick up issues from forge, -implement them, review PRs, plan from the vision, gate dangerous actions, and -keep the system healthy — all via cron and `claude -p`. +Disinto is an autonomous code factory. It manages seven agents (dev, review, +gardener, supervisor, planner, predictor, architect) that pick up issues from +forge, implement them, review PRs, plan from the vision, and keep the system +healthy — all via cron and `claude -p`. The dispatcher executes formula-based +operational tasks. + +Each agent has a `.profile` repository on Forgejo that stores lessons learned +from prior sessions, providing continuous improvement across runs. + +> **Note:** The vault is being redesigned as a PR-based approval workflow on the +> ops repo (see issues #73-#77). See [docs/VAULT.md](docs/VAULT.md) for details. Old vault scripts are being removed. See `README.md` for the full architecture and `disinto-factory/SKILL.md` for setup. @@ -14,17 +21,16 @@ See `README.md` for the full architecture and `disinto-factory/SKILL.md` for set ``` disinto/ (code repo) -├── dev/ dev-poll.sh, dev-agent.sh, phase-handler.sh — issue implementation +├── dev/ dev-poll.sh, dev-agent.sh, phase-test.sh — issue implementation ├── review/ review-poll.sh, review-pr.sh — PR review ├── gardener/ gardener-run.sh — direct cron executor for run-gardener formula ├── predictor/ predictor-run.sh — daily cron executor for run-predictor formula ├── planner/ planner-run.sh — direct cron executor for run-planner formula ├── supervisor/ supervisor-run.sh — formula-driven health monitoring (cron wrapper) │ preflight.sh — pre-flight data collection for supervisor formula -│ supervisor-poll.sh — legacy bash orchestrator (superseded) -├── vault/ vault-poll.sh, vault-agent.sh, vault-fire.sh — action gating + procurement -├── action/ action-poll.sh, action-agent.sh — operational task execution -├── lib/ env.sh, agent-session.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, build-graph.py +├── architect/ architect-run.sh — strategic decomposition of vision into sprints +├── vault/ vault-env.sh — shared env setup (vault redesign in progress, see #73-#77) +├── lib/ env.sh, agent-sdk.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, formula-session.sh, stack-lock.sh, forge-setup.sh, forge-push.sh, ops-setup.sh, ci-setup.sh, generators.sh, hire-agent.sh, release.sh, build-graph.py ├── projects/ *.toml.example — templates; *.toml — local per-box config (gitignored) ├── formulas/ Issue templates (TOML specs for multi-step agent tasks) └── docs/ Protocol docs (PHASE-PROTOCOL.md, EVIDENCE-ARCHITECTURE.md) @@ -35,9 +41,6 @@ disinto-ops/ (ops repo — {project}-ops) │ ├── approved/ approved vault items │ ├── fired/ executed vault items │ └── rejected/ rejected vault items -├── journal/ -│ ├── planner/ daily planning logs -│ └── supervisor/ operational health logs ├── knowledge/ shared agent knowledge + best practices ├── evidence/ engagement data, experiment results ├── portfolio.md addressables + observables @@ -45,10 +48,13 @@ disinto-ops/ (ops repo — {project}-ops) └── RESOURCES.md accounts, tokens (refs), infra inventory ``` -> **Terminology note:** "Formulas" in this repo are TOML issue templates in `formulas/` that -> orchestrate multi-step agent tasks (e.g., `run-gardener.toml`, `run-planner.toml`). This is -> distinct from "processes" described in `docs/EVIDENCE-ARCHITECTURE.md`, which are measurement -> and mutation pipelines that read external platforms and write structured evidence to git. +> **Note:** Journal directories (`journal/planner/` and `journal/supervisor/`) have been removed from the ops repo. Agent journals are now stored in each agent's `.profile` repo on Forgejo. + +## Agent .profile Model + +Each agent has a `.profile` repository on Forgejo storing `knowledge/lessons-learned.md` (injected into each session prompt) and `journal/` reflection entries (digested into lessons). Pre-session: `formula_prepare_profile_context()` loads lessons. Post-session: `profile_write_journal` records reflections. See `lib/profile.sh`. + +> **Terminology note:** "Formulas" are TOML issue templates in `formulas/` that orchestrate multi-step agent tasks. Distinct from "processes" in `docs/EVIDENCE-ARCHITECTURE.md`. ## Tech stack @@ -90,8 +96,10 @@ bash dev/phase-test.sh | Supervisor | `supervisor/` | Health monitoring | [supervisor/AGENTS.md](supervisor/AGENTS.md) | | Planner | `planner/` | Strategic planning | [planner/AGENTS.md](planner/AGENTS.md) | | Predictor | `predictor/` | Infrastructure pattern detection | [predictor/AGENTS.md](predictor/AGENTS.md) | -| Action | `action/` | Operational task execution | [action/AGENTS.md](action/AGENTS.md) | -| Vault | `vault/` | Action gating + resource procurement | [vault/AGENTS.md](vault/AGENTS.md) | +| Architect | `architect/` | Strategic decomposition | [architect/AGENTS.md](architect/AGENTS.md) | + +> **Vault:** Being redesigned as a PR-based approval workflow (issues #73-#77). +> See [docs/VAULT.md](docs/VAULT.md) for the vault PR workflow details. See [lib/AGENTS.md](lib/AGENTS.md) for the full shared helper reference. @@ -108,14 +116,16 @@ Issues flow: `backlog` → `in-progress` → PR → CI → review → merge → | `backlog` | Issue is queued for implementation. Dev-poll picks the first ready one. | Planner, gardener, humans | | `priority` | Queue tier above plain backlog. Issues with both `priority` and `backlog` are picked before plain `backlog` issues. FIFO within each tier. | Planner, humans | | `in-progress` | Dev-agent is actively working on this issue. Only one issue per project is in-progress at a time. | dev-agent.sh (claims issue) | -| `blocked` | Issue is stuck — agent session failed, crashed, timed out, or CI exhausted. Diagnostic comment on the issue has details. Also used for unmet dependencies. | dev-agent.sh, action-agent.sh, dev-poll.sh (on failure) | +| `blocked` | Issue is stuck — agent session failed, crashed, timed out, or CI exhausted. Diagnostic comment on the issue has details. Also used for unmet dependencies. | dev-agent.sh, dev-poll.sh (on failure) | | `tech-debt` | Pre-existing issue flagged by AI reviewer, not introduced by a PR. | review-pr.sh (auto-created follow-ups) | | `underspecified` | Dev-agent refused the issue as too large or vague. | dev-poll.sh (on preflight `too_large`), dev-agent.sh (on mid-run `too_large` refusal) | +| `bug-report` | Issue describes user-facing broken behavior with reproduction steps. Separate triage track for reproduction automation. | Gardener (bug-report detection in grooming) | +| `in-triage` | Bug reproduced but root cause not obvious — triage agent investigates. Set alongside `bug-report`. | reproduce-agent (when reproduction succeeds but cause unclear) | +| `rejected` | Issue formally rejected — cannot reproduce, out of scope, or invalid. | reproduce-agent, humans | | `vision` | Goal anchors — high-level objectives from VISION.md. | Planner, humans | | `prediction/unreviewed` | Unprocessed prediction filed by predictor. | predictor-run.sh | | `prediction/dismissed` | Prediction triaged as DISMISS — planner disagrees, closed with reason. | Planner (triage-predictions step) | | `prediction/actioned` | Prediction promoted or dismissed by planner. | Planner (triage-predictions step) | -| `action` | Operational task for the action-agent to execute via formula. | Planner, humans | ### Dependency conventions @@ -160,12 +170,12 @@ Humans write these. Agents read and enforce them. | ID | Decision | Rationale | |---|---|---| -| AD-001 | Nervous system runs from cron, not action issues. | Planner, predictor, gardener, supervisor run directly via `*-run.sh`. They create work, they don't become work. (See PR #474 revert.) | +| AD-001 | Nervous system runs from cron, not PR-based actions. | Planner, predictor, gardener, supervisor run directly via `*-run.sh`. They create work, they don't become work. (See PR #474 revert.) | | AD-002 | Single-threaded pipeline per project. | One dev issue at a time. No new work while a PR awaits CI or review. Prevents merge conflicts and keeps context clear. | | AD-003 | The runtime creates and destroys, the formula preserves. | Runtime manages worktrees/sessions/temp. Formulas commit knowledge to git before signaling done. | | AD-004 | Event-driven > polling > fixed delays. | Never `waitForTimeout` or hardcoded sleep. Use phase files, webhooks, or poll loops with backoff. | -| AD-005 | Secrets via env var indirection, never in issue bodies. | Issue bodies become code. Agent secrets go in `.env.enc`, vault secrets in `.env.vault.enc` (both SOPS-encrypted). Referenced as `$VAR_NAME`. Vault-runner gets only vault secrets; agents get only agent secrets. | -| AD-006 | External actions go through vault dispatch, never direct. | Agents build addressables; only the vault exercises them (publishes, deploys, posts). Tokens for external systems (`GITHUB_TOKEN`, `CLAWHUB_TOKEN`, deploy keys) live only in `.env.vault.enc` and are injected into the ephemeral vault-runner container. `lib/env.sh` unsets them so agents never hold them. PRs with direct external actions without vault dispatch get REQUEST_CHANGES. | +| AD-005 | Secrets via env var indirection, never in issue bodies. | Issue bodies become code. Agent secrets go in `.env.enc`, vault secrets in `.env.vault.enc` (both SOPS-encrypted). Referenced as `$VAR_NAME`. Runner gets only vault secrets; agents get only agent secrets. | +| AD-006 | External actions go through vault dispatch, never direct. | Agents build addressables; only the vault exercises them (publishes, deploys, posts). Tokens for external systems (`GITHUB_TOKEN`, `CLAWHUB_TOKEN`, deploy keys) live only in `.env.vault.enc` and are injected into the ephemeral runner container. `lib/env.sh` unsets them so agents never hold them. PRs with direct external actions without vault dispatch get REQUEST_CHANGES. (Vault redesign in progress: PR-based approval on ops repo, see #73-#77) | **Who enforces what:** - **Gardener** checks open backlog issues against ADs during grooming; closes violations with a comment referencing the AD number. diff --git a/README.md b/README.md index 2d0a798..40c9889 100644 --- a/README.md +++ b/README.md @@ -37,9 +37,6 @@ cron (daily) ──→ gardener-poll.sh ← backlog grooming (duplicates, stale cron (weekly) ──→ planner-poll.sh ← gap-analyse VISION.md, create backlog issues └── claude -p: update AGENTS.md → create issues -cron (*/30) ──→ vault-poll.sh ← safety gate for dangerous/irreversible actions - └── claude -p: classify → auto-approve/reject or escalate - ``` ## Prerequisites @@ -96,7 +93,6 @@ crontab -e # 3,13,23,33,43,53 * * * * /path/to/disinto/review/review-poll.sh # 6,16,26,36,46,56 * * * * /path/to/disinto/dev/dev-poll.sh # 15 8 * * * /path/to/disinto/gardener/gardener-poll.sh -# 0,30 * * * * /path/to/disinto/vault/vault-poll.sh # 0 9 * * 1 /path/to/disinto/planner/planner-poll.sh # 4. Verify @@ -123,16 +119,13 @@ disinto/ │ └── best-practices.md # Gardener knowledge base ├── planner/ │ ├── planner-poll.sh # Cron entry: weekly vision gap analysis -│ └── (formula-driven) # run-planner.toml executed by action-agent +│ └── (formula-driven) # run-planner.toml executed by dispatcher ├── vault/ -│ ├── vault-poll.sh # Cron entry: process pending dangerous actions -│ ├── vault-agent.sh # Classifies and routes actions (claude -p) -│ ├── vault-fire.sh # Executes an approved action -│ ├── vault-reject.sh # Marks an action as rejected -│ └── PROMPT.md # System prompt for vault agent +│ └── vault-env.sh # Shared env setup (vault redesign in progress, see #73-#77) +├── docs/ +│ └── VAULT.md # Vault PR workflow and branch protection documentation └── supervisor/ ├── supervisor-poll.sh # Supervisor: health checks + claude -p - ├── PROMPT.md # Supervisor's system prompt ├── update-prompt.sh # Self-learning: append to best-practices └── best-practices/ # Progressive disclosure knowledge base ├── memory.md @@ -153,7 +146,9 @@ disinto/ | **Review** | Every 10 min | Finds PRs without review, runs Claude-powered code review, approves or requests changes. | | **Gardener** | Daily | Grooms the issue backlog: detects duplicates, promotes `tech-debt` to `backlog`, closes stale issues, escalates ambiguous items. | | **Planner** | Weekly | Updates AGENTS.md documentation to reflect recent code changes, then gap-analyses VISION.md vs current state and creates up to 5 backlog issues for the highest-leverage gaps. | -| **Vault** | Every 30 min | Safety gate for dangerous or irreversible actions. Classifies pending actions via Claude: auto-approve, auto-reject, or escalate to a human via vault/forge. | + +> **Vault:** Being redesigned as a PR-based approval workflow (issues #73-#77). +> See [docs/VAULT.md](docs/VAULT.md) for the vault PR workflow and branch protection details. ## Design Principles diff --git a/action/AGENTS.md b/action/AGENTS.md deleted file mode 100644 index 55dadae..0000000 --- a/action/AGENTS.md +++ /dev/null @@ -1,34 +0,0 @@ - -# Action Agent - -**Role**: Execute operational tasks described by action formulas — run scripts, -call APIs, send messages, collect human approval. Shares the same phase handler -as the dev-agent: if an action produces code changes, the orchestrator creates a -PR and drives the CI/review loop; otherwise Claude closes the issue directly. - -**Trigger**: `action-poll.sh` runs every 10 min via cron. Sources `lib/guard.sh` -and calls `check_active action` first — skips if `$FACTORY_ROOT/state/.action-active` -is absent. Then scans for open issues labeled `action` that have no active tmux -session, and spawns `action-agent.sh `. - -**Key files**: -- `action/action-poll.sh` — Cron scheduler: finds open action issues with no active tmux session, spawns action-agent.sh -- `action/action-agent.sh` — Orchestrator: fetches issue body + prior comments, **checks all dependencies via `lib/parse-deps.sh` before spawning** (skips silently if any dep is still open), creates tmux session (`action-{project}-{issue_num}`) with interactive `claude`, injects formula prompt with phase protocol, enters `monitor_phase_loop` (shared via `dev/phase-handler.sh`) for CI/review lifecycle or direct completion - -**Session lifecycle**: -1. `action-poll.sh` finds open `action` issues with no active tmux session. -2. Spawns `action-agent.sh `. -3. Agent creates tmux session `action-{project}-{issue_num}`, injects prompt (formula + prior comments + phase protocol). -4. Agent enters `monitor_phase_loop` (shared with dev-agent via `dev/phase-handler.sh`). -5. **Path A (git output):** Claude pushes branch → `PHASE:awaiting_ci` → handler creates PR, polls CI → injects failures → Claude fixes → push → re-poll → CI passes → `PHASE:awaiting_review` → handler polls reviews → injects REQUEST_CHANGES → Claude fixes → approved → merge → cleanup. -6. **Path B (no git output):** Claude posts results as comment, closes issue → `PHASE:done` → handler cleans up (kill session, docker compose down, remove temp files). -7. For human input: Claude writes `PHASE:escalate`; human responds via vault/forge. - -**Crash recovery**: on `PHASE:crashed` or non-zero exit, the worktree is **preserved** (not destroyed) for debugging. Location logged. Supervisor housekeeping removes stale crashed worktrees older than 24h. - -**Environment variables consumed**: -- `FORGE_TOKEN`, `FORGE_ACTION_TOKEN` (falls back to FORGE_TOKEN), `FORGE_REPO`, `FORGE_API`, `FORGE_URL`, `PROJECT_NAME`, `FORGE_WEB` -- `ACTION_IDLE_TIMEOUT` — Max seconds before killing idle session (default 14400 = 4h) -- `ACTION_MAX_LIFETIME` — Max total session wall-clock seconds (default 28800 = 8h); caps session independently of idle timeout - -**FORGE_REMOTE**: `action-agent.sh` auto-detects the git remote for `FORGE_URL` (same logic as dev-agent). Exported as `FORGE_REMOTE`, used for worktree creation and push instructions injected into the Claude prompt. diff --git a/action/action-agent.sh b/action/action-agent.sh deleted file mode 100755 index 38d7d39..0000000 --- a/action/action-agent.sh +++ /dev/null @@ -1,323 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# action-agent.sh — Synchronous action agent: SDK + shared libraries -# -# Synchronous bash loop using claude -p (one-shot invocation). -# No tmux sessions, no phase files — the bash script IS the state machine. -# -# Usage: ./action-agent.sh [project.toml] -# -# Flow: -# 1. Preflight: issue_check_deps(), memory guard, concurrency lock -# 2. Parse model from YAML front matter in issue body (custom model selection) -# 3. Worktree: worktree_create() for action isolation -# 4. Load formula from issue body -# 5. Build prompt: formula + prior non-bot comments (resume context) -# 6. agent_run(worktree, prompt) → Claude executes action, may push -# 7. If pushed: pr_walk_to_merge() from lib/pr-lifecycle.sh -# 8. Cleanup: worktree_cleanup(), issue_close() -# -# Action-specific (stays in runner): -# - YAML front matter parsing (model selection) -# - Bot username filtering for prior comments -# - Lifetime watchdog (MAX_LIFETIME=8h wall-clock cap) -# - Child process cleanup (docker compose, background jobs) -# -# From shared libraries: -# - Issue lifecycle: lib/issue-lifecycle.sh -# - Worktree: lib/worktree.sh -# - PR lifecycle: lib/pr-lifecycle.sh -# - Agent SDK: lib/agent-sdk.sh -# -# Log: action/action-poll-{project}.log -# ============================================================================= -set -euo pipefail - -ISSUE="${1:?Usage: action-agent.sh [project.toml]}" -export PROJECT_TOML="${2:-${PROJECT_TOML:-}}" - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -FACTORY_ROOT="$(dirname "$SCRIPT_DIR")" - -# shellcheck source=../lib/env.sh -source "$FACTORY_ROOT/lib/env.sh" -# Use action-bot's own Forgejo identity (#747) -FORGE_TOKEN="${FORGE_ACTION_TOKEN:-${FORGE_TOKEN}}" -# shellcheck source=../lib/ci-helpers.sh -source "$FACTORY_ROOT/lib/ci-helpers.sh" -# shellcheck source=../lib/worktree.sh -source "$FACTORY_ROOT/lib/worktree.sh" -# shellcheck source=../lib/issue-lifecycle.sh -source "$FACTORY_ROOT/lib/issue-lifecycle.sh" -# shellcheck source=../lib/agent-sdk.sh -source "$FACTORY_ROOT/lib/agent-sdk.sh" -# shellcheck source=../lib/pr-lifecycle.sh -source "$FACTORY_ROOT/lib/pr-lifecycle.sh" - -BRANCH="action/issue-${ISSUE}" -WORKTREE="/tmp/action-${ISSUE}-$(date +%s)" -LOCKFILE="/tmp/action-agent-${ISSUE}.lock" -LOGFILE="${DISINTO_LOG_DIR}/action/action-poll-${PROJECT_NAME:-default}.log" -# shellcheck disable=SC2034 # consumed by agent-sdk.sh -SID_FILE="/tmp/action-session-${PROJECT_NAME:-default}-${ISSUE}.sid" -MAX_LIFETIME="${ACTION_MAX_LIFETIME:-28800}" # 8h default wall-clock cap -SESSION_START_EPOCH=$(date +%s) - -log() { - printf '[%s] action#%s %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$ISSUE" "$*" >> "$LOGFILE" -} - -# --- Concurrency lock (per issue) --- -if [ -f "$LOCKFILE" ]; then - LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || echo "") - if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then - log "SKIP: action-agent already running for #${ISSUE} (PID ${LOCK_PID})" - exit 0 - fi - rm -f "$LOCKFILE" -fi -echo $$ > "$LOCKFILE" - -cleanup() { - local exit_code=$? - # Kill lifetime watchdog if running - if [ -n "${LIFETIME_WATCHDOG_PID:-}" ] && kill -0 "$LIFETIME_WATCHDOG_PID" 2>/dev/null; then - kill "$LIFETIME_WATCHDOG_PID" 2>/dev/null || true - wait "$LIFETIME_WATCHDOG_PID" 2>/dev/null || true - fi - rm -f "$LOCKFILE" - # Kill any remaining child processes spawned during the run - local children - children=$(jobs -p 2>/dev/null) || true - if [ -n "$children" ]; then - # shellcheck disable=SC2086 # intentional word splitting - kill $children 2>/dev/null || true - # shellcheck disable=SC2086 - wait $children 2>/dev/null || true - fi - # Best-effort docker cleanup for containers started during this action - (cd "${WORKTREE}" 2>/dev/null && docker compose down 2>/dev/null) || true - # Preserve worktree on crash for debugging; clean up on success - if [ "$exit_code" -ne 0 ]; then - worktree_preserve "$WORKTREE" "crashed (exit=$exit_code)" - else - worktree_cleanup "$WORKTREE" - fi - rm -f "$SID_FILE" -} -trap cleanup EXIT - -# --- Memory guard --- -memory_guard 2000 - -# --- Fetch issue --- -log "fetching issue #${ISSUE}" -ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/issues/${ISSUE}") || true - -if [ -z "$ISSUE_JSON" ] || ! printf '%s' "$ISSUE_JSON" | jq -e '.id' >/dev/null 2>&1; then - log "ERROR: failed to fetch issue #${ISSUE}" - exit 1 -fi - -ISSUE_TITLE=$(printf '%s' "$ISSUE_JSON" | jq -r '.title') -ISSUE_BODY=$(printf '%s' "$ISSUE_JSON" | jq -r '.body // ""') -ISSUE_STATE=$(printf '%s' "$ISSUE_JSON" | jq -r '.state') - -if [ "$ISSUE_STATE" != "open" ]; then - log "SKIP: issue #${ISSUE} is ${ISSUE_STATE}" - exit 0 -fi - -log "Issue: ${ISSUE_TITLE}" - -# --- Dependency check (shared library) --- -if ! issue_check_deps "$ISSUE"; then - log "SKIP: issue #${ISSUE} blocked by: ${_ISSUE_BLOCKED_BY[*]}" - exit 0 -fi - -# --- Extract model from YAML front matter (if present) --- -YAML_MODEL=$(printf '%s' "$ISSUE_BODY" | \ - sed -n '/^---$/,/^---$/p' | grep '^model:' | awk '{print $2}' | tr -d '"' || true) -if [ -n "$YAML_MODEL" ]; then - export CLAUDE_MODEL="$YAML_MODEL" - log "model from front matter: ${YAML_MODEL}" -fi - -# --- Resolve bot username(s) for comment filtering --- -_bot_login=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API%%/repos*}/user" | jq -r '.login // empty' 2>/dev/null || true) - -# Build list: token owner + any extra names from FORGE_BOT_USERNAMES (comma-separated) -_bot_logins="${_bot_login}" -if [ -n "${FORGE_BOT_USERNAMES:-}" ]; then - _bot_logins="${_bot_logins:+${_bot_logins},}${FORGE_BOT_USERNAMES}" -fi - -# --- Fetch existing comments (resume context, excluding bot comments) --- -COMMENTS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/issues/${ISSUE}/comments?limit=50") || true - -PRIOR_COMMENTS="" -if [ -n "$COMMENTS_JSON" ] && [ "$COMMENTS_JSON" != "null" ] && [ "$COMMENTS_JSON" != "[]" ]; then - PRIOR_COMMENTS=$(printf '%s' "$COMMENTS_JSON" | \ - jq -r --arg bots "$_bot_logins" \ - '($bots | split(",") | map(select(. != ""))) as $bl | - .[] | select(.user.login as $u | $bl | index($u) | not) | - "[\(.user.login) at \(.created_at[:19])]\n\(.body)\n---"' 2>/dev/null || true) -fi - -# --- Determine git remote --- -cd "${PROJECT_REPO_ROOT}" -_forge_host=$(echo "$FORGE_URL" | sed 's|https\?://||; s|/.*||') -FORGE_REMOTE=$(git remote -v | awk -v host="$_forge_host" '$2 ~ host && /\(push\)/ {print $1; exit}') -FORGE_REMOTE="${FORGE_REMOTE:-origin}" -export FORGE_REMOTE - -# --- Create isolated worktree --- -log "creating worktree: ${WORKTREE}" -git fetch "${FORGE_REMOTE}" "${PRIMARY_BRANCH}" 2>/dev/null || true -if ! worktree_create "$WORKTREE" "$BRANCH"; then - log "ERROR: worktree creation failed" - exit 1 -fi -log "worktree ready: ${WORKTREE}" - -# --- Build prompt --- -PRIOR_SECTION="" -if [ -n "$PRIOR_COMMENTS" ]; then - PRIOR_SECTION="## Prior comments (resume context) - -${PRIOR_COMMENTS} - -" -fi - -GIT_INSTRUCTIONS=$(build_phase_protocol_prompt "$BRANCH" "$FORGE_REMOTE") - -PROMPT="You are an action agent. Your job is to execute the action formula -in the issue below. - -## Issue #${ISSUE}: ${ISSUE_TITLE} - -${ISSUE_BODY} - -${PRIOR_SECTION}## Instructions - -1. Read the action formula steps in the issue body carefully. - -2. Execute each step in order using your Bash tool and any other tools available. - -3. Post progress as comments on issue #${ISSUE} after significant steps: - curl -sf -X POST \\ - -H \"Authorization: token \${FORGE_TOKEN}\" \\ - -H 'Content-Type: application/json' \\ - \"${FORGE_API}/issues/${ISSUE}/comments\" \\ - -d \"{\\\"body\\\": \\\"your comment here\\\"}\" - -4. If a step requires human input or approval, post a comment explaining what - is needed and stop — the orchestrator will block the issue. - -### Path A: If this action produces code changes (e.g. config updates, baselines): - - You are already in an isolated worktree at: ${WORKTREE} - - You are on branch: ${BRANCH} - - Make your changes, commit, and push: git push ${FORGE_REMOTE} ${BRANCH} - - **IMPORTANT:** The worktree is destroyed after completion. Push all - results before finishing — unpushed work will be lost. - -### Path B: If this action produces no code changes (investigation, report): - - Post results as a comment on issue #${ISSUE}. - - **IMPORTANT:** The worktree is destroyed after completion. Copy any - files you need to persistent paths before finishing. - -5. Environment variables available in your bash sessions: - FORGE_TOKEN, FORGE_API, FORGE_REPO, FORGE_WEB, PROJECT_NAME - (all sourced from ${FACTORY_ROOT}/.env) - -### CRITICAL: Never embed secrets in issue bodies, comments, or PR descriptions - - NEVER put API keys, tokens, passwords, or private keys in issue text or comments. - - Always reference secrets via env var names (e.g. \\\$BASE_RPC_URL, \\\${FORGE_TOKEN}). - - If a formula step needs a secret, read it from .env or the environment at runtime. - - Before posting any comment, verify it contains no credentials, hex keys > 32 chars, - or URLs with embedded API keys. - -If the prior comments above show work already completed, resume from where it -left off. - -${GIT_INSTRUCTIONS}" - -# --- Wall-clock lifetime watchdog (background) --- -# Caps total run time independently of claude -p timeout. When the cap is -# hit the watchdog kills the main process, which triggers cleanup via trap. -_lifetime_watchdog() { - local remaining=$(( MAX_LIFETIME - ($(date +%s) - SESSION_START_EPOCH) )) - [ "$remaining" -le 0 ] && remaining=1 - sleep "$remaining" - local hours=$(( MAX_LIFETIME / 3600 )) - log "MAX_LIFETIME (${hours}h) reached — killing agent" - # Post summary comment on issue - local body="Action agent killed: wall-clock lifetime cap (${hours}h) reached." - curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H 'Content-Type: application/json' \ - "${FORGE_API}/issues/${ISSUE}/comments" \ - -d "{\"body\": \"${body}\"}" >/dev/null 2>&1 || true - kill $$ 2>/dev/null || true -} -_lifetime_watchdog & -LIFETIME_WATCHDOG_PID=$! - -# --- Run agent --- -log "running agent (worktree: ${WORKTREE})" -agent_run --worktree "$WORKTREE" "$PROMPT" -log "agent_run complete" - -# --- Detect if branch was pushed (Path A vs Path B) --- -PUSHED=false -# Check if remote branch exists -git fetch "${FORGE_REMOTE}" "$BRANCH" 2>/dev/null || true -if git rev-parse --verify "${FORGE_REMOTE}/${BRANCH}" >/dev/null 2>&1; then - PUSHED=true -fi -# Fallback: check local commits ahead of base -if [ "$PUSHED" = false ]; then - if git -C "$WORKTREE" log "${FORGE_REMOTE}/${PRIMARY_BRANCH}..${BRANCH}" --oneline 2>/dev/null | grep -q .; then - PUSHED=true - fi -fi - -if [ "$PUSHED" = true ]; then - # --- Path A: code changes pushed — create PR and walk to merge --- - log "branch pushed — creating PR" - PR_NUMBER="" - PR_NUMBER=$(pr_create "$BRANCH" "action: ${ISSUE_TITLE}" \ - "Closes #${ISSUE} - -Automated action execution by action-agent.") || true - - if [ -n "$PR_NUMBER" ]; then - log "walking PR #${PR_NUMBER} to merge" - pr_walk_to_merge "$PR_NUMBER" "$_AGENT_SESSION_ID" "$WORKTREE" || true - - case "${_PR_WALK_EXIT_REASON:-}" in - merged) - log "PR #${PR_NUMBER} merged — closing issue" - issue_close "$ISSUE" - ;; - *) - log "PR #${PR_NUMBER} not merged (reason: ${_PR_WALK_EXIT_REASON:-unknown})" - issue_block "$ISSUE" "pr_not_merged: ${_PR_WALK_EXIT_REASON:-unknown}" - ;; - esac - else - log "ERROR: failed to create PR" - issue_block "$ISSUE" "pr_creation_failed" - fi -else - # --- Path B: no code changes — close issue directly --- - log "no branch pushed — closing issue (Path B)" - issue_close "$ISSUE" -fi - -log "action-agent finished for issue #${ISSUE}" diff --git a/action/action-poll.sh b/action/action-poll.sh deleted file mode 100755 index 8d67c47..0000000 --- a/action/action-poll.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env bash -# action-poll.sh — Cron scheduler: find open 'action' issues, spawn action-agent -# -# An issue is ready for action if: -# - It is open and labeled 'action' -# - No tmux session named action-{project}-{issue_num} is already active -# -# Usage: -# cron every 10min -# action-poll.sh [projects/foo.toml] # optional project config - -set -euo pipefail - -export PROJECT_TOML="${1:-}" -source "$(dirname "$0")/../lib/env.sh" -# Use action-bot's own Forgejo identity (#747) -FORGE_TOKEN="${FORGE_ACTION_TOKEN:-${FORGE_TOKEN}}" -# shellcheck source=../lib/guard.sh -source "$(dirname "$0")/../lib/guard.sh" -check_active action - -LOGFILE="${DISINTO_LOG_DIR}/action/action-poll-${PROJECT_NAME:-default}.log" -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" - -log() { - printf '[%s] poll: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" -} - -# --- Memory guard --- -memory_guard 2000 - -# --- Find open 'action' issues --- -log "scanning for open action issues" -ACTION_ISSUES=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/issues?state=open&labels=action&limit=50&type=issues") || true - -if [ -z "$ACTION_ISSUES" ] || [ "$ACTION_ISSUES" = "null" ]; then - log "no action issues found" - exit 0 -fi - -COUNT=$(printf '%s' "$ACTION_ISSUES" | jq 'length') -if [ "$COUNT" -eq 0 ]; then - log "no action issues found" - exit 0 -fi - -log "found ${COUNT} open action issue(s)" - -# Spawn action-agent for each issue that has no active tmux session. -# Only one agent is spawned per poll to avoid memory pressure; the next -# poll picks up remaining issues. -for i in $(seq 0 $((COUNT - 1))); do - ISSUE_NUM=$(printf '%s' "$ACTION_ISSUES" | jq -r ".[$i].number") - SESSION="action-${PROJECT_NAME}-${ISSUE_NUM}" - - if tmux has-session -t "$SESSION" 2>/dev/null; then - log "issue #${ISSUE_NUM}: session ${SESSION} already active, skipping" - continue - fi - - LOCKFILE="/tmp/action-agent-${ISSUE_NUM}.lock" - if [ -f "$LOCKFILE" ]; then - LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || echo "") - if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then - log "issue #${ISSUE_NUM}: agent starting (PID ${LOCK_PID}), skipping" - continue - fi - fi - - log "spawning action-agent for issue #${ISSUE_NUM}" - nohup "${SCRIPT_DIR}/action-agent.sh" "$ISSUE_NUM" "$PROJECT_TOML" >> "$LOGFILE" 2>&1 & - log "started action-agent PID $! for issue #${ISSUE_NUM}" - break -done diff --git a/architect/AGENTS.md b/architect/AGENTS.md new file mode 100644 index 0000000..64b325e --- /dev/null +++ b/architect/AGENTS.md @@ -0,0 +1,65 @@ + +# Architect — Agent Instructions + +## What this agent is + +The architect is a strategic decomposition agent that breaks down vision issues +into development sprints. It proposes sprints via PRs on the ops repo and +converses with humans through PR comments. + +## Role + +- **Input**: Vision issues from VISION.md, prerequisite tree from ops repo +- **Output**: Sprint proposals as PRs on the ops repo, sub-issue files +- **Mechanism**: Formula-driven execution via `formulas/run-architect.toml` +- **Identity**: `architect-bot` on Forgejo + +## Responsibilities + +1. **Strategic decomposition**: Break down large vision items into coherent + sprints that can be executed by the dev agent +2. **Design fork identification**: When multiple implementation approaches exist, + identify the forks and file sub-issues for each path +3. **Sprint PR creation**: Propose sprints as PRs on the ops repo with clear + acceptance criteria and dependencies +4. **Human conversation**: Respond to PR comments, refine sprint proposals based + on human feedback +5. **Sub-issue filing**: After design forks are resolved, file concrete sub-issues + for implementation + +## Formula + +The architect is driven by `formulas/run-architect.toml`. This formula defines +the steps for: +- Research: analyzing vision items and prerequisite tree +- Design: identifying implementation approaches and forks +- Sprint proposal: creating structured sprint PRs +- Sub-issue filing: creating concrete implementation issues + +## Execution + +Run via `architect/architect-run.sh`, which: +- Acquires a cron lock and checks available memory +- Sources shared libraries (env.sh, formula-session.sh) +- Uses FORGE_ARCHITECT_TOKEN for authentication +- Loads the formula and builds context from VISION.md, AGENTS.md, and ops repo +- Executes the formula via `agent_run` + +## Cron + +Suggested cron entry (every 6 hours): +```cron +0 */6 * * * cd /path/to/disinto && bash architect/architect-run.sh +``` + +## State + +Architect state is tracked in `state/.architect-active` (disabled by default — +empty file not created, just document it). + +## Related issues + +- #96: Architect agent parent issue +- #100: Architect formula — research + design fork identification +- #101: Architect formula — sprint PR creation with questions +- #102: Architect formula — answer parsing + sub-issue filing diff --git a/architect/architect-run.sh b/architect/architect-run.sh new file mode 100755 index 0000000..0edeb70 --- /dev/null +++ b/architect/architect-run.sh @@ -0,0 +1,133 @@ +#!/usr/bin/env bash +# ============================================================================= +# architect-run.sh — Cron wrapper: architect execution via SDK + formula +# +# Synchronous bash loop using claude -p (one-shot invocation). +# No tmux sessions, no phase files — the bash script IS the state machine. +# +# Flow: +# 1. Guards: cron lock, memory check +# 2. Load formula (formulas/run-architect.toml) +# 3. Context: VISION.md, AGENTS.md, ops:prerequisites.md, structural graph +# 4. agent_run(worktree, prompt) → Claude decomposes vision into sprints +# +# Usage: +# architect-run.sh [projects/disinto.toml] # project config (default: disinto) +# +# Cron: 0 */6 * * * # every 6 hours +# ============================================================================= +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +FACTORY_ROOT="$(dirname "$SCRIPT_DIR")" + +# Accept project config from argument; default to disinto +export PROJECT_TOML="${1:-$FACTORY_ROOT/projects/disinto.toml}" +# shellcheck source=../lib/env.sh +source "$FACTORY_ROOT/lib/env.sh" +# Override FORGE_TOKEN with architect-bot's token (#747) +FORGE_TOKEN="${FORGE_ARCHITECT_TOKEN:-${FORGE_TOKEN}}" +# shellcheck source=../lib/formula-session.sh +source "$FACTORY_ROOT/lib/formula-session.sh" +# shellcheck source=../lib/worktree.sh +source "$FACTORY_ROOT/lib/worktree.sh" +# shellcheck source=../lib/guard.sh +source "$FACTORY_ROOT/lib/guard.sh" +# shellcheck source=../lib/agent-sdk.sh +source "$FACTORY_ROOT/lib/agent-sdk.sh" + +LOG_FILE="${DISINTO_LOG_DIR}/architect/architect.log" +# shellcheck disable=SC2034 # consumed by agent-sdk.sh +LOGFILE="$LOG_FILE" +# shellcheck disable=SC2034 # consumed by agent-sdk.sh +SID_FILE="/tmp/architect-session-${PROJECT_NAME}.sid" +SCRATCH_FILE="/tmp/architect-${PROJECT_NAME}-scratch.md" +WORKTREE="/tmp/${PROJECT_NAME}-architect-run" + +# Override LOG_AGENT for consistent agent identification +# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() +LOG_AGENT="architect" + +# Override log() to append to architect-specific log file +# shellcheck disable=SC2034 +log() { + local agent="${LOG_AGENT:-architect}" + printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE" +} + +# ── Guards ──────────────────────────────────────────────────────────────── +check_active architect +acquire_cron_lock "/tmp/architect-run.lock" +memory_guard 2000 + +log "--- Architect run start ---" + +# ── Resolve forge remote for git operations ───────────────────────────── +resolve_forge_remote + +# ── Resolve agent identity for .profile repo ──────────────────────────── +if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_ARCHITECT_TOKEN:-}" ]; then + AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_ARCHITECT_TOKEN}" \ + "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true) +fi + +# ── Load formula + context ─────────────────────────────────────────────── +load_formula_or_profile "architect" "$FACTORY_ROOT/formulas/run-architect.toml" || exit 1 +build_context_block VISION.md AGENTS.md ops:prerequisites.md + +# ── Prepare .profile context (lessons injection) ───────────────────────── +formula_prepare_profile_context + +# ── Build structural analysis graph ────────────────────────────────────── +build_graph_section + +# ── Read scratch file (compaction survival) ─────────────────────────────── +SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE") +SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE") + +# ── Build prompt ───────────────────────────────────────────────────────── +build_sdk_prompt_footer + +# Architect prompt: strategic decomposition of vision into sprints +# See: architect/AGENTS.md for full role description +# Pattern: heredoc function to avoid inline prompt construction +# Note: Uses CONTEXT_BLOCK, GRAPH_SECTION, SCRATCH_CONTEXT from formula-session.sh +# Architecture Decision: AD-003 — The runtime creates and destroys, the formula preserves. +build_architect_prompt() { + cat <<_PROMPT_EOF_ +You are the architect agent for ${FORGE_REPO}. Work through the formula below. + +Your role: strategic decomposition of vision issues into development sprints. +Propose sprints via PRs on the ops repo, converse with humans through PR comments, +and file sub-issues after design forks are resolved. + +## Project context +${CONTEXT_BLOCK} +${GRAPH_SECTION} +${SCRATCH_CONTEXT} +$(formula_lessons_block) +## Formula +${FORMULA_CONTENT} + +${SCRATCH_INSTRUCTION} +${PROMPT_FOOTER} +_PROMPT_EOF_ +} + +PROMPT=$(build_architect_prompt) + +# ── Create worktree ────────────────────────────────────────────────────── +formula_worktree_setup "$WORKTREE" + +# ── Run agent ───────────────────────────────────────────────────────────── +export CLAUDE_MODEL="sonnet" + +agent_run --worktree "$WORKTREE" "$PROMPT" +log "agent_run complete" + +rm -f "$SCRATCH_FILE" + +# Write journal entry post-session +profile_write_journal "architect-run" "Architect run $(date -u +%Y-%m-%d)" "complete" "" || true + +log "--- Architect run done ---" diff --git a/bin/disinto b/bin/disinto index 7a0714e..7d507a7 100755 --- a/bin/disinto +++ b/bin/disinto @@ -10,7 +10,8 @@ # disinto shell Shell into the agent container # disinto status Show factory status # disinto secrets Manage encrypted secrets -# disinto vault-run Run action in ephemeral vault container +# disinto run Run action in ephemeral runner container +# disinto ci-logs [--step ] Read CI logs from Woodpecker SQLite # # Usage: # disinto init https://github.com/user/repo @@ -24,6 +25,13 @@ set -euo pipefail FACTORY_ROOT="$(cd "$(dirname "$0")/.." && pwd)" source "${FACTORY_ROOT}/lib/env.sh" +source "${FACTORY_ROOT}/lib/ops-setup.sh" +source "${FACTORY_ROOT}/lib/hire-agent.sh" +source "${FACTORY_ROOT}/lib/forge-setup.sh" +source "${FACTORY_ROOT}/lib/generators.sh" +source "${FACTORY_ROOT}/lib/forge-push.sh" +source "${FACTORY_ROOT}/lib/ci-setup.sh" +source "${FACTORY_ROOT}/lib/release.sh" # ── Helpers ────────────────────────────────────────────────────────────────── @@ -39,7 +47,12 @@ Usage: disinto shell Shell into the agent container disinto status Show factory status disinto secrets Manage encrypted secrets - disinto vault-run Run action in ephemeral vault container + disinto run Run action in ephemeral runner container + disinto ci-logs [--step ] + Read CI logs from Woodpecker SQLite + disinto release Create vault PR for release (e.g., v1.2.0) + disinto hire-an-agent [--formula ] + Hire a new agent (create user + .profile repo) Init options: --branch Primary branch (default: auto-detect) @@ -48,6 +61,12 @@ Init options: --forge-url Forge base URL (default: http://localhost:3000) --bare Skip compose generation (bare-metal setup) --yes Skip confirmation prompts + +Hire an agent options: + --formula Path to role formula TOML (default: formulas/.toml) + +CI logs options: + --step Filter logs to a specific step (e.g., smoke-init) EOF exit 1 } @@ -148,381 +167,38 @@ write_secrets_encrypted() { return 0 } -FORGEJO_DATA_DIR="${HOME}/.disinto/forgejo" +export FORGEJO_DATA_DIR="${HOME}/.disinto/forgejo" # Generate docker-compose.yml in the factory root. +# (Implementation in lib/generators.sh) generate_compose() { - local forge_port="${1:-3000}" - local compose_file="${FACTORY_ROOT}/docker-compose.yml" - - cat > "$compose_file" <<'COMPOSEEOF' -# docker-compose.yml — generated by disinto init -# Brings up Forgejo, Woodpecker, and the agent runtime. - -services: - forgejo: - image: codeberg.org/forgejo/forgejo:11.0 - restart: unless-stopped - security_opt: - - apparmor=unconfined - volumes: - - forgejo-data:/data - environment: - FORGEJO__database__DB_TYPE: sqlite3 - FORGEJO__server__ROOT_URL: http://forgejo:3000/ - FORGEJO__server__HTTP_PORT: "3000" - FORGEJO__security__INSTALL_LOCK: "true" - FORGEJO__service__DISABLE_REGISTRATION: "true" - FORGEJO__webhook__ALLOWED_HOST_LIST: "private" - networks: - - disinto-net - - woodpecker: - image: woodpeckerci/woodpecker-server:v3 - restart: unless-stopped - security_opt: - - apparmor=unconfined - ports: - - "8000:8000" - - "9000:9000" - volumes: - - woodpecker-data:/var/lib/woodpecker - environment: - WOODPECKER_FORGEJO: "true" - WOODPECKER_FORGEJO_URL: http://forgejo:3000 - WOODPECKER_FORGEJO_CLIENT: ${WP_FORGEJO_CLIENT:-} - WOODPECKER_FORGEJO_SECRET: ${WP_FORGEJO_SECRET:-} - WOODPECKER_HOST: http://woodpecker:8000 - WOODPECKER_OPEN: "true" - WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-} - WOODPECKER_DATABASE_DRIVER: sqlite3 - WOODPECKER_DATABASE_DATASOURCE: /var/lib/woodpecker/woodpecker.sqlite - depends_on: - - forgejo - networks: - - disinto-net - - woodpecker-agent: - image: woodpeckerci/woodpecker-agent:v3 - restart: unless-stopped - network_mode: host - privileged: true - volumes: - - /var/run/docker.sock:/var/run/docker.sock - environment: - WOODPECKER_SERVER: localhost:9000 - WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-} - WOODPECKER_GRPC_SECURE: "false" - WOODPECKER_HEALTHCHECK_ADDR: ":3333" - WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net - WOODPECKER_MAX_WORKFLOWS: 1 - depends_on: - - woodpecker - - agents: - build: ./docker/agents - restart: unless-stopped - security_opt: - - apparmor=unconfined - volumes: - - agent-data:/home/agent/data - - project-repos:/home/agent/repos - - ./:/home/agent/disinto:ro - - ${HOME}/.claude:/home/agent/.claude - - ${HOME}/.claude.json:/home/agent/.claude.json:ro - - CLAUDE_BIN_PLACEHOLDER:/usr/local/bin/claude:ro - - \${HOME}/.ssh:/home/agent/.ssh:ro - environment: - FORGE_URL: http://forgejo:3000 - WOODPECKER_SERVER: http://woodpecker:8000 - DISINTO_CONTAINER: "1" - PROJECT_REPO_ROOT: /home/agent/repos/\${PROJECT_NAME:-project} - env_file: - - .env - # IMPORTANT: agents get .env only (forge tokens, CI tokens, config). - # Vault-only secrets (GITHUB_TOKEN, CLAWHUB_TOKEN, deploy keys) live in - # .env.vault.enc and are NEVER injected here — only the vault-runner - # container receives them at fire time (AD-006, #745). - depends_on: - - forgejo - - woodpecker - networks: - - disinto-net - - vault-runner: - build: ./docker/agents - profiles: ["vault"] - security_opt: - - apparmor=unconfined - volumes: - - ./vault:/home/agent/disinto/vault - - ./lib:/home/agent/disinto/lib:ro - - ./formulas:/home/agent/disinto/formulas:ro - environment: - FORGE_URL: http://forgejo:3000 - DISINTO_CONTAINER: "1" - PROJECT_REPO_ROOT: /home/agent/repos/\${PROJECT_NAME:-project} - # env_file set at runtime by: disinto vault-run --env-file - entrypoint: ["bash", "/home/agent/disinto/vault/vault-run-action.sh"] - networks: - - disinto-net - - # Edge proxy — reverse proxy to Forgejo, Woodpecker, and staging - # Serves on ports 80/443, routes based on path - edge: - image: caddy:alpine - ports: - - "80:80" - - "443:443" - volumes: - - ./docker/Caddyfile:/etc/caddy/Caddyfile - - caddy_data:/data - depends_on: - - forgejo - - woodpecker - - staging - networks: - - disinto-net - - # Staging container — static file server for staging artifacts - # Edge proxy routes to this container for default requests - staging: - image: caddy:alpine - command: ["caddy", "file-server", "--root", "/srv/site"] - volumes: - - ./docker:/srv/site:ro - networks: - - disinto-net - - # Staging deployment slot — activated by Woodpecker staging pipeline (#755). - # Profile-gated: only starts when explicitly targeted by deploy commands. - # Customize image/ports/volumes for your project after init. - staging-deploy: - image: alpine:3 - profiles: ["staging"] - security_opt: - - apparmor=unconfined - environment: - DEPLOY_ENV: staging - networks: - - disinto-net - command: ["echo", "staging slot — replace with project image"] - -volumes: - forgejo-data: - woodpecker-data: - agent-data: - project-repos: - caddy_data: - -networks: - disinto-net: - driver: bridge -COMPOSEEOF - - # Patch the Claude CLI binary path — resolve from host PATH at init time. - local claude_bin - claude_bin="$(command -v claude 2>/dev/null || true)" - if [ -n "$claude_bin" ]; then - # Resolve symlinks to get the real binary path - claude_bin="$(readlink -f "$claude_bin")" - sed -i "s|CLAUDE_BIN_PLACEHOLDER|${claude_bin}|" "$compose_file" - else - echo "Warning: claude CLI not found in PATH — update docker-compose.yml volumes manually" >&2 - sed -i "s|CLAUDE_BIN_PLACEHOLDER|/usr/local/bin/claude|" "$compose_file" - fi - - # Patch the forgejo port mapping into the file if non-default - if [ "$forge_port" != "3000" ]; then - # Add port mapping to forgejo service so it's reachable from host during init - sed -i "/image: codeberg\.org\/forgejo\/forgejo:11\.0/a\\ ports:\\n - \"${forge_port}:3000\"" "$compose_file" - else - sed -i "/image: codeberg\.org\/forgejo\/forgejo:11\.0/a\\ ports:\\n - \"3000:3000\"" "$compose_file" - fi - - echo "Created: ${compose_file}" + _generate_compose_impl "$@" } # Generate docker/agents/ files if they don't already exist. +# (Implementation in lib/generators.sh) generate_agent_docker() { - local docker_dir="${FACTORY_ROOT}/docker/agents" - mkdir -p "$docker_dir" - - if [ ! -f "${docker_dir}/Dockerfile" ]; then - echo "Warning: docker/agents/Dockerfile not found — expected in repo" >&2 - fi - if [ ! -f "${docker_dir}/entrypoint.sh" ]; then - echo "Warning: docker/agents/entrypoint.sh not found — expected in repo" >&2 - fi + _generate_agent_docker_impl "$@" } # Generate docker/Caddyfile template for edge proxy. +# (Implementation in lib/generators.sh) generate_caddyfile() { - local docker_dir="${FACTORY_ROOT}/docker" - local caddyfile="${docker_dir}/Caddyfile" - - if [ -f "$caddyfile" ]; then - echo "Caddyfile: ${caddyfile} (already exists, skipping)" - return - fi - - cat > "$caddyfile" <<'CADDYFILEEOF' -# Caddyfile — edge proxy configuration -# IP-only binding at bootstrap; domain + TLS added later via vault resource request - -:80 { - # Reverse proxy to Forgejo - handle /forgejo/* { - reverse_proxy forgejo:3000 - } - - # Reverse proxy to Woodpecker CI - handle /ci/* { - reverse_proxy woodpecker:8000 - } - - # Default: proxy to staging container - handle { - reverse_proxy staging:80 - } -} -CADDYFILEEOF - - echo "Created: ${caddyfile}" + _generate_caddyfile_impl "$@" } # Generate docker/index.html default page. +# (Implementation in lib/generators.sh) generate_staging_index() { - local docker_dir="${FACTORY_ROOT}/docker" - local index_file="${docker_dir}/index.html" - - if [ -f "$index_file" ]; then - echo "Staging: ${index_file} (already exists, skipping)" - return - fi - - cat > "$index_file" <<'INDEXEOF' - - - - - - Nothing shipped yet - - - -
-

Nothing shipped yet

-

CI pipelines will update this page with your staging artifacts.

-
- - -INDEXEOF - - echo "Created: ${index_file}" + _generate_staging_index_impl "$@" } # Generate template .woodpecker/ deployment pipeline configs in a project repo. # Creates staging.yml and production.yml alongside the project's existing CI config. # These pipelines trigger on Woodpecker's deployment event with environment filters. +# (Implementation in lib/generators.sh) generate_deploy_pipelines() { - local repo_root="$1" project_name="$2" - local wp_dir="${repo_root}/.woodpecker" - - mkdir -p "$wp_dir" - - # Skip if deploy pipelines already exist - if [ -f "${wp_dir}/staging.yml" ] && [ -f "${wp_dir}/production.yml" ]; then - echo "Deploy: .woodpecker/{staging,production}.yml (already exist)" - return - fi - - if [ ! -f "${wp_dir}/staging.yml" ]; then - cat > "${wp_dir}/staging.yml" <<'STAGINGEOF' -# .woodpecker/staging.yml — Staging deployment pipeline -# Triggered by vault-runner via Woodpecker promote API. -# Human approves promotion in vault → vault-runner calls promote → this runs. - -when: - event: deployment - environment: staging - -steps: - - name: deploy-staging - image: docker:27 - commands: - - echo "Deploying to staging environment..." - - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from CI #${CI_PIPELINE_PARENT}" - # Pull the image built by CI and deploy to staging - # Customize these commands for your project: - # - docker compose -f docker-compose.yml --profile staging up -d - - echo "Staging deployment complete" - - - name: verify-staging - image: alpine:3 - commands: - - echo "Verifying staging deployment..." - # Add health checks, smoke tests, or integration tests here: - # - curl -sf http://staging:8080/health || exit 1 - - echo "Staging verification complete" -STAGINGEOF - echo "Created: ${wp_dir}/staging.yml" - fi - - if [ ! -f "${wp_dir}/production.yml" ]; then - cat > "${wp_dir}/production.yml" <<'PRODUCTIONEOF' -# .woodpecker/production.yml — Production deployment pipeline -# Triggered by vault-runner via Woodpecker promote API. -# Human approves promotion in vault → vault-runner calls promote → this runs. - -when: - event: deployment - environment: production - -steps: - - name: deploy-production - image: docker:27 - commands: - - echo "Deploying to production environment..." - - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from staging" - # Pull the verified image and deploy to production - # Customize these commands for your project: - # - docker compose -f docker-compose.yml up -d - - echo "Production deployment complete" - - - name: verify-production - image: alpine:3 - commands: - - echo "Verifying production deployment..." - # Add production health checks here: - # - curl -sf http://production:8080/health || exit 1 - - echo "Production verification complete" -PRODUCTIONEOF - echo "Created: ${wp_dir}/production.yml" - fi + _generate_deploy_pipelines_impl "$@" } # Check whether compose mode is active (docker-compose.yml exists). @@ -530,497 +206,11 @@ is_compose_mode() { [ -f "${FACTORY_ROOT}/docker-compose.yml" ] } -# Provision or connect to a local Forgejo instance. -# Creates admin + bot users, generates API tokens, stores in .env. -# When $DISINTO_BARE is set, uses standalone docker run; otherwise uses compose. -setup_forge() { - local forge_url="$1" - local repo_slug="$2" - local use_bare="${DISINTO_BARE:-false}" - - echo "" - echo "── Forge setup ────────────────────────────────────────" - - # Helper: run a command inside the Forgejo container - _forgejo_exec() { - if [ "$use_bare" = true ]; then - docker exec -u git disinto-forgejo "$@" - else - docker compose -f "${FACTORY_ROOT}/docker-compose.yml" exec -T -u git forgejo "$@" - fi - } - - # Check if Forgejo is already running - if curl -sf --max-time 5 "${forge_url}/api/v1/version" >/dev/null 2>&1; then - echo "Forgejo: ${forge_url} (already running)" - else - echo "Forgejo not reachable at ${forge_url}" - echo "Starting Forgejo via Docker..." - - if ! command -v docker &>/dev/null; then - echo "Error: docker not found — needed to provision Forgejo" >&2 - echo " Install Docker or start Forgejo manually at ${forge_url}" >&2 - exit 1 - fi - - # Extract port from forge_url - local forge_port - forge_port=$(printf '%s' "$forge_url" | sed -E 's|.*:([0-9]+)/?$|\1|') - forge_port="${forge_port:-3000}" - - if [ "$use_bare" = true ]; then - # Bare-metal mode: standalone docker run - mkdir -p "${FORGEJO_DATA_DIR}" - - if docker ps -a --format '{{.Names}}' | grep -q '^disinto-forgejo$'; then - docker start disinto-forgejo >/dev/null 2>&1 || true - else - docker run -d \ - --name disinto-forgejo \ - --restart unless-stopped \ - -p "${forge_port}:3000" \ - -p 2222:22 \ - -v "${FORGEJO_DATA_DIR}:/data" \ - -e "FORGEJO__database__DB_TYPE=sqlite3" \ - -e "FORGEJO__server__ROOT_URL=${forge_url}/" \ - -e "FORGEJO__server__HTTP_PORT=3000" \ - -e "FORGEJO__service__DISABLE_REGISTRATION=true" \ - codeberg.org/forgejo/forgejo:11.0 - fi - else - # Compose mode: start Forgejo via docker compose - docker compose -f "${FACTORY_ROOT}/docker-compose.yml" up -d forgejo - fi - - # Wait for Forgejo to become healthy - echo -n "Waiting for Forgejo to start" - local retries=0 - while ! curl -sf --max-time 3 "${forge_url}/api/v1/version" >/dev/null 2>&1; do - retries=$((retries + 1)) - if [ "$retries" -gt 60 ]; then - echo "" - echo "Error: Forgejo did not become ready within 60s" >&2 - exit 1 - fi - echo -n "." - sleep 1 - done - echo " ready" - fi - - # Wait for Forgejo database to accept writes (API may be ready before DB is) - echo -n "Waiting for Forgejo database" - local db_ready=false - for _i in $(seq 1 30); do - if _forgejo_exec forgejo admin user list >/dev/null 2>&1; then - db_ready=true - break - fi - echo -n "." - sleep 1 - done - echo "" - if [ "$db_ready" != true ]; then - echo "Error: Forgejo database not ready after 30s" >&2 - exit 1 - fi - - # Create admin user if it doesn't exist - local admin_user="disinto-admin" - local admin_pass - admin_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - - if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then - echo "Creating admin user: ${admin_user}" - local create_output - if ! create_output=$(_forgejo_exec forgejo admin user create \ - --admin \ - --username "${admin_user}" \ - --password "${admin_pass}" \ - --email "admin@disinto.local" \ - --must-change-password=false 2>&1); then - echo "Error: failed to create admin user '${admin_user}':" >&2 - echo " ${create_output}" >&2 - exit 1 - fi - # Forgejo 11.x ignores --must-change-password=false on create; - # explicitly clear the flag so basic-auth token creation works. - _forgejo_exec forgejo admin user change-password \ - --username "${admin_user}" \ - --password "${admin_pass}" \ - --must-change-password=false - - # Verify admin user was actually created - if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then - echo "Error: admin user '${admin_user}' not found after creation" >&2 - exit 1 - fi - # Preserve password for Woodpecker OAuth2 token generation (#779) - _FORGE_ADMIN_PASS="$admin_pass" - fi - - # Get or create admin token - local admin_token - admin_token=$(curl -sf -X POST \ - -u "${admin_user}:${admin_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${admin_user}/tokens" \ - -d '{"name":"disinto-admin-token","scopes":["all"]}' 2>/dev/null \ - | jq -r '.sha1 // empty') || admin_token="" - - if [ -z "$admin_token" ]; then - # Token might already exist — try listing - admin_token=$(curl -sf \ - -u "${admin_user}:${admin_pass}" \ - "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \ - | jq -r '.[0].sha1 // empty') || admin_token="" - fi - - if [ -z "$admin_token" ]; then - echo "Error: failed to obtain admin API token" >&2 - exit 1 - fi - - # Create bot users and tokens - # Each agent gets its own Forgejo account for identity and audit trail (#747). - # Map: bot-username -> env-var-name for the token - local -A bot_token_vars=( - [dev-bot]="FORGE_TOKEN" - [review-bot]="FORGE_REVIEW_TOKEN" - [planner-bot]="FORGE_PLANNER_TOKEN" - [gardener-bot]="FORGE_GARDENER_TOKEN" - [vault-bot]="FORGE_VAULT_TOKEN" - [supervisor-bot]="FORGE_SUPERVISOR_TOKEN" - [predictor-bot]="FORGE_PREDICTOR_TOKEN" - [action-bot]="FORGE_ACTION_TOKEN" - ) - - local env_file="${FACTORY_ROOT}/.env" - local bot_user bot_pass token token_var - - for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot action-bot; do - bot_pass="bot-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - token_var="${bot_token_vars[$bot_user]}" - - if ! curl -sf --max-time 5 \ - -H "Authorization: token ${admin_token}" \ - "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then - echo "Creating bot user: ${bot_user}" - local create_output - if ! create_output=$(_forgejo_exec forgejo admin user create \ - --username "${bot_user}" \ - --password "${bot_pass}" \ - --email "${bot_user}@disinto.local" \ - --must-change-password=false 2>&1); then - echo "Error: failed to create bot user '${bot_user}':" >&2 - echo " ${create_output}" >&2 - exit 1 - fi - # Forgejo 11.x ignores --must-change-password=false on create; - # explicitly clear the flag so basic-auth token creation works. - _forgejo_exec forgejo admin user change-password \ - --username "${bot_user}" \ - --password "${bot_pass}" \ - --must-change-password=false - - # Verify bot user was actually created - if ! curl -sf --max-time 5 \ - -H "Authorization: token ${admin_token}" \ - "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then - echo "Error: bot user '${bot_user}' not found after creation" >&2 - exit 1 - fi - fi - - # Generate token via API (basic auth as the bot user — Forgejo requires - # basic auth on POST /users/{username}/tokens, token auth is rejected) - token=$(curl -sf -X POST \ - -u "${bot_user}:${bot_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${bot_user}/tokens" \ - -d "{\"name\":\"disinto-${bot_user}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \ - | jq -r '.sha1 // empty') || token="" - - if [ -z "$token" ]; then - # Token name collision — create with timestamp suffix - token=$(curl -sf -X POST \ - -u "${bot_user}:${bot_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${bot_user}/tokens" \ - -d "{\"name\":\"disinto-${bot_user}-$(date +%s)\",\"scopes\":[\"all\"]}" 2>/dev/null \ - | jq -r '.sha1 // empty') || token="" - fi - - if [ -z "$token" ]; then - echo "Error: failed to create API token for '${bot_user}'" >&2 - exit 1 - fi - - # Store token in .env under the per-agent variable name - if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then - sed -i "s|^${token_var}=.*|${token_var}=${token}|" "$env_file" - else - printf '%s=%s\n' "$token_var" "$token" >> "$env_file" - fi - export "${token_var}=${token}" - echo " ${bot_user} token saved (${token_var})" - - # Backwards-compat aliases for dev-bot and review-bot - if [ "$bot_user" = "dev-bot" ]; then - export CODEBERG_TOKEN="$token" - elif [ "$bot_user" = "review-bot" ]; then - export REVIEW_BOT_TOKEN="$token" - fi - done - - # Store FORGE_URL in .env if not already present - if ! grep -q '^FORGE_URL=' "$env_file" 2>/dev/null; then - printf 'FORGE_URL=%s\n' "$forge_url" >> "$env_file" - fi - - # Create the repo on Forgejo if it doesn't exist - local org_name="${repo_slug%%/*}" - local repo_name="${repo_slug##*/}" - - # Check if repo already exists - if ! curl -sf --max-time 5 \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/repos/${repo_slug}" >/dev/null 2>&1; then - - # Try creating org first (ignore if exists) - curl -sf -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/orgs" \ - -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true - - # Create repo under org - if ! curl -sf -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/orgs/${org_name}/repos" \ - -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then - # Fallback: create under the dev-bot user - curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/user/repos" \ - -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1 || true - fi - - # Add all bot users as collaborators - for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot action-bot; do - curl -sf -X PUT \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/repos/${repo_slug}/collaborators/${bot_user}" \ - -d '{"permission":"write"}' >/dev/null 2>&1 || true - done - - echo "Repo: ${repo_slug} created on Forgejo" - else - echo "Repo: ${repo_slug} (already exists on Forgejo)" - fi - - echo "Forge: ${forge_url} (ready)" -} - # Create and seed the {project}-ops repo on Forgejo with initial directory structure. # The ops repo holds operational data: vault items, journals, evidence, prerequisites. -setup_ops_repo() { - local forge_url="$1" ops_slug="$2" ops_root="$3" primary_branch="${4:-main}" - local org_name="${ops_slug%%/*}" - local ops_name="${ops_slug##*/}" +# ops repo setup is now in lib/ops-setup.sh - echo "" - echo "── Ops repo setup ─────────────────────────────────────" - - # Check if ops repo already exists on Forgejo - if curl -sf --max-time 5 \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/repos/${ops_slug}" >/dev/null 2>&1; then - echo "Ops repo: ${ops_slug} (already exists on Forgejo)" - else - # Create ops repo under org - if ! curl -sf -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/orgs/${org_name}/repos" \ - -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" >/dev/null 2>&1; then - # Fallback: create under the user - curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/user/repos" \ - -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data\"}" >/dev/null 2>&1 || true - fi - - # Add all bot users as collaborators - local bot_user - for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot action-bot; do - curl -sf -X PUT \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/repos/${ops_slug}/collaborators/${bot_user}" \ - -d '{"permission":"write"}' >/dev/null 2>&1 || true - done - - echo "Ops repo: ${ops_slug} created on Forgejo" - fi - - # Clone ops repo locally if not present - if [ ! -d "${ops_root}/.git" ]; then - local auth_url - auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_TOKEN}@|") - local clone_url="${auth_url}/${ops_slug}.git" - echo "Cloning: ops repo -> ${ops_root}" - git clone --quiet "$clone_url" "$ops_root" 2>/dev/null || { - echo "Initializing: ops repo at ${ops_root}" - mkdir -p "$ops_root" - git -C "$ops_root" init --initial-branch="${primary_branch}" -q - } - else - echo "Ops repo: ${ops_root} (already exists locally)" - fi - - # Seed directory structure - local seeded=false - mkdir -p "${ops_root}/vault/pending" - mkdir -p "${ops_root}/vault/approved" - mkdir -p "${ops_root}/vault/fired" - mkdir -p "${ops_root}/vault/rejected" - mkdir -p "${ops_root}/journal/planner" - mkdir -p "${ops_root}/journal/supervisor" - mkdir -p "${ops_root}/knowledge" - mkdir -p "${ops_root}/evidence/engagement" - - if [ ! -f "${ops_root}/README.md" ]; then - cat > "${ops_root}/README.md" < "${ops_root}/portfolio.md"; seeded=true; } - [ -f "${ops_root}/prerequisites.md" ] || { echo "# Prerequisite Tree" > "${ops_root}/prerequisites.md"; seeded=true; } - [ -f "${ops_root}/RESOURCES.md" ] || { echo "# Resources" > "${ops_root}/RESOURCES.md"; seeded=true; } - - # Commit and push seed content - if [ "$seeded" = true ] && [ -d "${ops_root}/.git" ]; then - # Auto-configure repo-local git identity if missing (#778) - if [ -z "$(git -C "$ops_root" config user.name 2>/dev/null)" ]; then - git -C "$ops_root" config user.name "disinto-admin" - fi - if [ -z "$(git -C "$ops_root" config user.email 2>/dev/null)" ]; then - git -C "$ops_root" config user.email "disinto-admin@localhost" - fi - - git -C "$ops_root" add -A - if ! git -C "$ops_root" diff --cached --quiet 2>/dev/null; then - git -C "$ops_root" commit -m "chore: seed ops repo structure" -q - # Push if remote exists - if git -C "$ops_root" remote get-url origin >/dev/null 2>&1; then - git -C "$ops_root" push origin "${primary_branch}" -q 2>/dev/null || true - fi - fi - echo "Seeded: ops repo with initial structure" - fi -} - -# Push local clone to the Forgejo remote. -push_to_forge() { - local repo_root="$1" forge_url="$2" repo_slug="$3" - - # Build authenticated remote URL: http://dev-bot:@host:port/org/repo.git - if [ -z "${FORGE_TOKEN:-}" ]; then - echo "Error: FORGE_TOKEN not set — cannot push to Forgejo" >&2 - return 1 - fi - local auth_url - auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_TOKEN}@|") - local remote_url="${auth_url}/${repo_slug}.git" - # Display URL without token - local display_url="${forge_url}/${repo_slug}.git" - - # Always set the remote URL to ensure credentials are current - if git -C "$repo_root" remote get-url forgejo >/dev/null 2>&1; then - git -C "$repo_root" remote set-url forgejo "$remote_url" - else - git -C "$repo_root" remote add forgejo "$remote_url" - fi - echo "Remote: forgejo -> ${display_url}" - - # Skip push if local repo has no commits (e.g. cloned from empty Forgejo repo) - if ! git -C "$repo_root" rev-parse HEAD >/dev/null 2>&1; then - echo "Push: skipped (local repo has no commits)" - return 0 - fi - - # Push all branches and tags - echo "Pushing: branches to forgejo" - if ! git -C "$repo_root" push forgejo --all 2>&1; then - echo "Error: failed to push branches to Forgejo" >&2 - return 1 - fi - echo "Pushing: tags to forgejo" - if ! git -C "$repo_root" push forgejo --tags 2>&1; then - echo "Error: failed to push tags to Forgejo" >&2 - return 1 - fi - - # Verify the repo is no longer empty (Forgejo may need a moment to index pushed refs) - local is_empty="true" - local verify_attempt - for verify_attempt in $(seq 1 5); do - local repo_info - repo_info=$(curl -sf --max-time 10 \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/repos/${repo_slug}" 2>/dev/null) || repo_info="" - if [ -z "$repo_info" ]; then - is_empty="skipped" - break # API unreachable, skip verification - fi - is_empty=$(printf '%s' "$repo_info" | jq -r '.empty // "unknown"') - if [ "$is_empty" != "true" ]; then - echo "Verify: repo is not empty (push confirmed)" - break - fi - if [ "$verify_attempt" -lt 5 ]; then - sleep 2 - fi - done - if [ "$is_empty" = "true" ]; then - echo "Warning: Forgejo repo still reports empty after push" >&2 - return 1 - fi -} +# push_to_forge() is sourced from lib/forge-push.sh # Preflight check — verify all factory requirements before proceeding. preflight_check() { @@ -1189,6 +379,15 @@ create_labels() { ["underspecified"]="#fbca04" ["vision"]="#0e8a16" ["action"]="#1d76db" + ["prediction/unreviewed"]="#a2eeef" + ["prediction/dismissed"]="#d73a4a" + ["prediction/actioned"]="#28a745" + ["bug-report"]="#e11d48" + ["needs-triage"]="#f9d0c4" + ["reproduced"]="#0e8a16" + ["cannot-reproduce"]="#cccccc" + ["in-triage"]="#1d76db" + ["rejected"]="#cccccc" ) echo "Creating labels on ${repo}..." @@ -1201,9 +400,11 @@ create_labels() { | grep -o '"name":"[^"]*"' | cut -d'"' -f4) || existing="" local name color - for name in backlog in-progress blocked tech-debt underspecified vision action; do + local created=0 skipped=0 failed=0 + for name in backlog in-progress blocked tech-debt underspecified vision action bug-report prediction/unreviewed prediction/dismissed prediction/actioned needs-triage reproduced cannot-reproduce in-triage rejected; do if echo "$existing" | grep -qx "$name"; then echo " . ${name} (already exists)" + skipped=$((skipped + 1)) continue fi color="${labels[$name]}" @@ -1212,11 +413,15 @@ create_labels() { -H "Content-Type: application/json" \ "${api}/labels" \ -d "{\"name\":\"${name}\",\"color\":\"${color}\"}" >/dev/null 2>&1; then - echo " + ${name}" + echo " + ${name} (created)" + created=$((created + 1)) else echo " ! ${name} (failed to create)" + failed=$((failed + 1)) fi done + + echo "Labels: ${created} created, ${skipped} skipped, ${failed} failed" } # Generate a minimal VISION.md template in the target project. @@ -1256,402 +461,57 @@ EOF echo " Commit this to your repo when ready" } -# Generate and optionally install cron entries for the project agents. +# Copy issue templates from templates/ to target project repo. +copy_issue_templates() { + local repo_root="$1" + local template_dir="${FACTORY_ROOT}/templates" + local target_dir="${repo_root}/.forgejo/ISSUE_TEMPLATE" + + # Skip if templates directory doesn't exist + if [ ! -d "$template_dir" ]; then + return + fi + + # Create target directory + mkdir -p "$target_dir" + + # Copy each template file if it doesn't already exist + for template in "$template_dir"/issue/*; do + [ -f "$template" ] || continue + local filename + filename=$(basename "$template") + local target_path="${target_dir}/${filename}" + if [ ! -f "$target_path" ]; then + cp "$template" "$target_path" + echo "Copied: ${target_path}" + else + echo "Skipped: ${target_path} (already exists)" + fi + done +} + +# Install cron entries for project agents (implementation in lib/ci-setup.sh) install_cron() { - local name="$1" toml="$2" auto_yes="$3" bare="${4:-false}" - - # In compose mode, skip host cron — the agents container runs cron internally - if [ "$bare" = false ]; then - echo "" - echo "Cron: skipped (agents container handles scheduling in compose mode)" - return - fi - - # Bare mode: crontab is required on the host - if ! command -v crontab &>/dev/null; then - echo "Error: crontab not found (required for bare-metal mode)" >&2 - echo " Install: apt install cron / brew install cron" >&2 - exit 1 - fi - - # Use absolute path for the TOML in cron entries - local abs_toml - abs_toml="$(cd "$(dirname "$toml")" && pwd)/$(basename "$toml")" - - local cron_block - cron_block="# disinto: ${name} -2,7,12,17,22,27,32,37,42,47,52,57 * * * * ${FACTORY_ROOT}/review/review-poll.sh ${abs_toml} >/dev/null 2>&1 -4,9,14,19,24,29,34,39,44,49,54,59 * * * * ${FACTORY_ROOT}/dev/dev-poll.sh ${abs_toml} >/dev/null 2>&1 -0 0,6,12,18 * * * cd ${FACTORY_ROOT} && bash gardener/gardener-run.sh ${abs_toml} >/dev/null 2>&1" - - echo "" - echo "Cron entries to install:" - echo "$cron_block" - echo "" - - if [ "$auto_yes" = false ] && [ -t 0 ]; then - read -rp "Install these cron entries? [y/N] " confirm - if [[ ! "$confirm" =~ ^[Yy] ]]; then - echo "Skipped cron install. Add manually with: crontab -e" - return - fi - fi - - # Append to existing crontab - { crontab -l 2>/dev/null || true; printf '%s\n' "$cron_block"; } | crontab - - echo "Cron entries installed" + _load_ci_context + _install_cron_impl "$@" } -# Set up Woodpecker CI to use Forgejo as its forge backend. -# Creates an OAuth2 app on Forgejo for Woodpecker, activates the repo. +# Create Woodpecker OAuth2 app on Forgejo (implementation in lib/ci-setup.sh) create_woodpecker_oauth() { - local forge_url="$1" repo_slug="$2" - - echo "" - echo "── Woodpecker OAuth2 setup ────────────────────────────" - - # Create OAuth2 application on Forgejo for Woodpecker - local oauth2_name="woodpecker-ci" - local redirect_uri="http://localhost:8000/authorize" - local existing_app client_id client_secret - - # Check if OAuth2 app already exists - existing_app=$(curl -sf \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/user/applications/oauth2" 2>/dev/null \ - | jq -r --arg name "$oauth2_name" '.[] | select(.name == $name) | .client_id // empty' 2>/dev/null) || true - - if [ -n "$existing_app" ]; then - echo "OAuth2: ${oauth2_name} (already exists, client_id=${existing_app})" - client_id="$existing_app" - else - local oauth2_resp - oauth2_resp=$(curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/user/applications/oauth2" \ - -d "{\"name\":\"${oauth2_name}\",\"redirect_uris\":[\"${redirect_uri}\"],\"confidential_client\":true}" \ - 2>/dev/null) || oauth2_resp="" - - if [ -z "$oauth2_resp" ]; then - echo "Warning: failed to create OAuth2 app on Forgejo" >&2 - return - fi - - client_id=$(printf '%s' "$oauth2_resp" | jq -r '.client_id // empty') - client_secret=$(printf '%s' "$oauth2_resp" | jq -r '.client_secret // empty') - - if [ -z "$client_id" ]; then - echo "Warning: OAuth2 app creation returned no client_id" >&2 - return - fi - - echo "OAuth2: ${oauth2_name} created (client_id=${client_id})" - fi - - # Store Woodpecker forge config in .env - # WP_FORGEJO_CLIENT/SECRET match the docker-compose.yml variable references - local env_file="${FACTORY_ROOT}/.env" - local wp_vars=( - "WOODPECKER_FORGEJO=true" - "WOODPECKER_FORGEJO_URL=${forge_url}" - ) - if [ -n "${client_id:-}" ]; then - wp_vars+=("WP_FORGEJO_CLIENT=${client_id}") - fi - if [ -n "${client_secret:-}" ]; then - wp_vars+=("WP_FORGEJO_SECRET=${client_secret}") - fi - - for var_line in "${wp_vars[@]}"; do - local var_name="${var_line%%=*}" - if grep -q "^${var_name}=" "$env_file" 2>/dev/null; then - sed -i "s|^${var_name}=.*|${var_line}|" "$env_file" - else - printf '%s\n' "$var_line" >> "$env_file" - fi - done - echo "Config: Woodpecker forge vars written to .env" + _load_ci_context + _create_woodpecker_oauth_impl "$@" } -# Auto-generate WOODPECKER_TOKEN by driving the Forgejo OAuth2 login flow. -# Requires _FORGE_ADMIN_PASS (set by setup_forge when admin user was just created). -# Called after compose stack is up, before activate_woodpecker_repo. +# Generate WOODPECKER_TOKEN via Forgejo OAuth2 flow (implementation in lib/ci-setup.sh) generate_woodpecker_token() { - local forge_url="$1" - local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}" - local env_file="${FACTORY_ROOT}/.env" - local admin_user="disinto-admin" - local admin_pass="${_FORGE_ADMIN_PASS:-}" - - # Skip if already set - if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then - echo "Config: WOODPECKER_TOKEN already set in .env" - return 0 - fi - - echo "" - echo "── Woodpecker token generation ────────────────────────" - - if [ -z "$admin_pass" ]; then - echo "Warning: Forgejo admin password not available — cannot generate WOODPECKER_TOKEN" >&2 - echo " Log into Woodpecker at ${wp_server} and create a token manually" >&2 - return 1 - fi - - # Wait for Woodpecker to become ready - echo -n "Waiting for Woodpecker" - local retries=0 - while ! curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; do - retries=$((retries + 1)) - if [ "$retries" -gt 30 ]; then - echo "" - echo "Warning: Woodpecker not ready at ${wp_server} — skipping token generation" >&2 - return 1 - fi - echo -n "." - sleep 2 - done - echo " ready" - - # Flow: Forgejo web login → OAuth2 authorize → Woodpecker callback → token - local cookie_jar auth_body_file - cookie_jar=$(mktemp /tmp/wp-auth-XXXXXX) - auth_body_file=$(mktemp /tmp/wp-body-XXXXXX) - - # Step 1: Log into Forgejo web UI (session cookie needed for OAuth consent) - local csrf - csrf=$(curl -sf -c "$cookie_jar" "${forge_url}/user/login" 2>/dev/null \ - | grep -o 'name="_csrf"[^>]*' | head -1 \ - | grep -oE '(content|value)="[^"]*"' | head -1 \ - | cut -d'"' -f2) || csrf="" - - if [ -z "$csrf" ]; then - echo "Warning: could not get Forgejo CSRF token — skipping token generation" >&2 - rm -f "$cookie_jar" "$auth_body_file" - return 1 - fi - - curl -sf -b "$cookie_jar" -c "$cookie_jar" -X POST \ - -o /dev/null \ - "${forge_url}/user/login" \ - --data-urlencode "_csrf=${csrf}" \ - --data-urlencode "user_name=${admin_user}" \ - --data-urlencode "password=${admin_pass}" \ - 2>/dev/null || true - - # Step 2: Start Woodpecker OAuth2 flow (captures authorize URL with state param) - local wp_redir - wp_redir=$(curl -sf -o /dev/null -w '%{redirect_url}' \ - "${wp_server}/authorize" 2>/dev/null) || wp_redir="" - - if [ -z "$wp_redir" ]; then - echo "Warning: Woodpecker did not provide OAuth redirect — skipping token generation" >&2 - rm -f "$cookie_jar" "$auth_body_file" - return 1 - fi - - # Rewrite internal Docker network URLs to host-accessible URLs. - # Handle both plain and URL-encoded forms of the internal hostnames. - local forge_url_enc wp_server_enc - forge_url_enc=$(printf '%s' "$forge_url" | sed 's|:|%3A|g; s|/|%2F|g') - wp_server_enc=$(printf '%s' "$wp_server" | sed 's|:|%3A|g; s|/|%2F|g') - wp_redir=$(printf '%s' "$wp_redir" \ - | sed "s|http://forgejo:3000|${forge_url}|g" \ - | sed "s|http%3A%2F%2Fforgejo%3A3000|${forge_url_enc}|g" \ - | sed "s|http://woodpecker:8000|${wp_server}|g" \ - | sed "s|http%3A%2F%2Fwoodpecker%3A8000|${wp_server_enc}|g") - - # Step 3: Hit Forgejo OAuth authorize endpoint with session - # First time: shows consent page. Already approved: redirects with code. - local auth_headers redirect_loc auth_code - auth_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \ - -D - -o "$auth_body_file" \ - "$wp_redir" 2>/dev/null) || auth_headers="" - - redirect_loc=$(printf '%s' "$auth_headers" \ - | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') - - if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then - # Auto-approved: extract code from redirect - auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/') - else - # Consent page: extract CSRF and all form fields, POST grant approval - local consent_csrf form_client_id form_state form_redirect_uri - consent_csrf=$(grep -o 'name="_csrf"[^>]*' "$auth_body_file" 2>/dev/null \ - | head -1 | grep -oE '(content|value)="[^"]*"' | head -1 \ - | cut -d'"' -f2) || consent_csrf="" - form_client_id=$(grep 'name="client_id"' "$auth_body_file" 2>/dev/null \ - | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_client_id="" - form_state=$(grep 'name="state"' "$auth_body_file" 2>/dev/null \ - | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_state="" - form_redirect_uri=$(grep 'name="redirect_uri"' "$auth_body_file" 2>/dev/null \ - | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_redirect_uri="" - - if [ -n "$consent_csrf" ]; then - local grant_headers - grant_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \ - -D - -o /dev/null -X POST \ - "${forge_url}/login/oauth/grant" \ - --data-urlencode "_csrf=${consent_csrf}" \ - --data-urlencode "client_id=${form_client_id}" \ - --data-urlencode "state=${form_state}" \ - --data-urlencode "scope=" \ - --data-urlencode "nonce=" \ - --data-urlencode "redirect_uri=${form_redirect_uri}" \ - --data-urlencode "granted=true" \ - 2>/dev/null) || grant_headers="" - - redirect_loc=$(printf '%s' "$grant_headers" \ - | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') - - if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then - auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/') - fi - fi - fi - - rm -f "$auth_body_file" - - if [ -z "${auth_code:-}" ]; then - echo "Warning: could not obtain OAuth2 authorization code — skipping token generation" >&2 - rm -f "$cookie_jar" - return 1 - fi - - # Step 4: Complete Woodpecker OAuth callback (exchanges code for session) - local state - state=$(printf '%s' "$wp_redir" | sed -n 's/.*[&?]state=\([^&]*\).*/\1/p') - - local wp_headers wp_token - wp_headers=$(curl -sf -c "$cookie_jar" \ - -D - -o /dev/null \ - "${wp_server}/authorize?code=${auth_code}&state=${state:-}" \ - 2>/dev/null) || wp_headers="" - - # Extract token from redirect URL (Woodpecker returns ?access_token=...) - redirect_loc=$(printf '%s' "$wp_headers" \ - | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') - - wp_token="" - if printf '%s' "${redirect_loc:-}" | grep -q 'access_token='; then - wp_token=$(printf '%s' "$redirect_loc" | sed 's/.*access_token=\([^&]*\).*/\1/') - fi - - # Fallback: check for user_sess cookie - if [ -z "$wp_token" ]; then - wp_token=$(awk '/user_sess/{print $NF}' "$cookie_jar" 2>/dev/null) || wp_token="" - fi - - rm -f "$cookie_jar" - - if [ -z "$wp_token" ]; then - echo "Warning: could not obtain Woodpecker token — skipping token generation" >&2 - return 1 - fi - - # Step 5: Create persistent personal access token via Woodpecker API - # WP v3 requires CSRF header for POST operations with session tokens. - local wp_csrf - wp_csrf=$(curl -sf -b "user_sess=${wp_token}" \ - "${wp_server}/web-config.js" 2>/dev/null \ - | sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p') || wp_csrf="" - - local pat_resp final_token - pat_resp=$(curl -sf -X POST \ - -b "user_sess=${wp_token}" \ - ${wp_csrf:+-H "X-CSRF-Token: ${wp_csrf}"} \ - "${wp_server}/api/user/token" \ - 2>/dev/null) || pat_resp="" - - final_token="" - if [ -n "$pat_resp" ]; then - final_token=$(printf '%s' "$pat_resp" \ - | jq -r 'if .token then .token elif .access_token then .access_token else empty end' \ - 2>/dev/null) || final_token="" - fi - - # Use persistent token if available, otherwise use session token - final_token="${final_token:-$wp_token}" - - # Save to .env - if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then - sed -i "s|^WOODPECKER_TOKEN=.*|WOODPECKER_TOKEN=${final_token}|" "$env_file" - else - printf 'WOODPECKER_TOKEN=%s\n' "$final_token" >> "$env_file" - fi - export WOODPECKER_TOKEN="$final_token" - echo "Config: WOODPECKER_TOKEN generated and saved to .env" + _load_ci_context + _generate_woodpecker_token_impl "$@" } +# Activate repo in Woodpecker CI (implementation in lib/ci-setup.sh) activate_woodpecker_repo() { - local forge_repo="$1" - local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}" - - # Wait for Woodpecker to become ready after stack start - local retries=0 - while [ $retries -lt 10 ]; do - if curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; then - break - fi - retries=$((retries + 1)) - sleep 2 - done - - if ! curl -sf --max-time 5 "${wp_server}/api/version" >/dev/null 2>&1; then - echo "Woodpecker: not reachable at ${wp_server} after stack start, skipping repo activation" >&2 - return - fi - - echo "" - echo "── Woodpecker repo activation ─────────────────────────" - - local wp_token="${WOODPECKER_TOKEN:-}" - if [ -z "$wp_token" ]; then - echo "Warning: WOODPECKER_TOKEN not set — cannot activate repo" >&2 - echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2 - return - fi - - local wp_repo_id - wp_repo_id=$(curl -sf \ - -H "Authorization: Bearer ${wp_token}" \ - "${wp_server}/api/repos/lookup/${forge_repo}" 2>/dev/null \ - | jq -r '.id // empty' 2>/dev/null) || true - - if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then - echo "Repo: ${forge_repo} already active in Woodpecker (id=${wp_repo_id})" - else - # Get Forgejo repo numeric ID for WP activation - local forge_repo_id - forge_repo_id=$(curl -sf \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_URL:-http://localhost:3000}/api/v1/repos/${forge_repo}" 2>/dev/null \ - | jq -r '.id // empty' 2>/dev/null) || forge_repo_id="" - - local activate_resp - activate_resp=$(curl -sf -X POST \ - -H "Authorization: Bearer ${wp_token}" \ - "${wp_server}/api/repos?forge_remote_id=${forge_repo_id:-0}" \ - 2>/dev/null) || activate_resp="" - - wp_repo_id=$(printf '%s' "$activate_resp" | jq -r '.id // empty' 2>/dev/null) || true - - if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then - echo "Repo: ${forge_repo} activated in Woodpecker (id=${wp_repo_id})" - - # Set pipeline timeout to 5 minutes (default is 60) - curl -sf -X PATCH -H "Authorization: Bearer ${wp_token}" -H "Content-Type: application/json" "${wp_server}/api/repos/${wp_repo_id}" -d '{"timeout": 5}' >/dev/null 2>&1 && echo "Config: pipeline timeout set to 5 minutes" || true - else - echo "Warning: could not activate repo in Woodpecker" >&2 - echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2 - fi - fi - - # Store repo ID for later TOML generation - if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then - _WP_REPO_ID="$wp_repo_id" - fi + _load_ci_context + _activate_woodpecker_repo_impl "$@" } # ── init command ───────────────────────────────────────────────────────────── @@ -1784,8 +644,10 @@ p.write_text(text) echo "Repo: ${repo_root} (existing clone)" fi - # Push to local Forgejo - push_to_forge "$repo_root" "$forge_url" "$forge_repo" + # Push to local Forgejo (skip if SKIP_PUSH is set) + if [ "${SKIP_PUSH:-false}" = "false" ]; then + push_to_forge "$repo_root" "$forge_url" "$forge_repo" + fi # Detect primary branch if [ -z "$branch" ]; then @@ -1794,10 +656,26 @@ p.write_text(text) echo "Branch: ${branch}" # Set up {project}-ops repo (#757) - local ops_slug="${forge_repo}-ops" + # Always use disinto-admin as the ops repo owner — forge_repo owner may be + # the calling user (e.g. johba) but the ops repo belongs to disinto-admin. + local ops_slug="disinto-admin/${project_name}-ops" local ops_root="/home/${USER}/${project_name}-ops" setup_ops_repo "$forge_url" "$ops_slug" "$ops_root" "$branch" + # Set up vault branch protection on ops repo (#77) + # This ensures admin-only merge to main, blocking bots from merging vault PRs + # Use HUMAN_TOKEN (disinto-admin) or FORGE_TOKEN (dev-bot) for admin operations + export FORGE_OPS_REPO="$ops_slug" + # Source env.sh to ensure FORGE_TOKEN is available + source "${FACTORY_ROOT}/lib/env.sh" + source "${FACTORY_ROOT}/lib/branch-protection.sh" + if setup_vault_branch_protection "$branch"; then + echo "Branch protection: vault protection configured on ${ops_slug}" + else + echo "Warning: failed to set up vault branch protection" >&2 + fi + unset FORGE_OPS_REPO + # Generate project TOML (skip if already exists) if [ "$toml_exists" = false ]; then # Prompt for CI ID if interactive and not already set via flag @@ -1810,6 +688,24 @@ p.write_text(text) echo "Created: ${toml_path}" fi + # Update ops_repo in TOML with the resolved actual ops slug. + # Uses in-place substitution to prevent duplicate keys on repeated init runs. + # If the key is missing (manually created TOML), it is inserted after the repo line. + if [ -n "${_ACTUAL_OPS_SLUG:-}" ] && [ -f "$toml_path" ]; then + python3 -c " +import sys, re, pathlib +p = pathlib.Path(sys.argv[1]) +text = p.read_text() +new_val = 'ops_repo = \"' + sys.argv[2] + '\"' +if re.search(r'^ops_repo\s*=', text, re.MULTILINE): + text = re.sub(r'^ops_repo\s*=\s*.*\$', new_val, text, flags=re.MULTILINE) +else: + text = re.sub(r'^(repo\s*=\s*\"[^\"]*\")', r'\1\n' + new_val, text, flags=re.MULTILINE) +p.write_text(text) +" "$toml_path" "${_ACTUAL_OPS_SLUG}" + echo "Updated: ops_repo in ${toml_path}" + fi + # Create OAuth2 app on Forgejo for Woodpecker (before compose up) _WP_REPO_ID="" create_woodpecker_oauth "$forge_url" "$forge_repo" @@ -1832,12 +728,23 @@ p.write_text(text) # Create labels on remote create_labels "$forge_repo" "$forge_url" + # Set up branch protection on project repo (#10) + # This enforces PR flow: no direct pushes, 1 approval required, dev-bot can merge after CI + if setup_project_branch_protection "$forge_repo" "$branch"; then + echo "Branch protection: project protection configured on ${forge_repo}" + else + echo "Warning: failed to set up project branch protection" >&2 + fi + # Generate VISION.md template generate_vision "$repo_root" "$project_name" # Generate template deployment pipeline configs in project repo generate_deploy_pipelines "$repo_root" "$project_name" + # Copy issue templates to target project + copy_issue_templates "$repo_root" + # Install cron jobs install_cron "$project_name" "$toml_path" "$auto_yes" "$bare" @@ -1846,17 +753,36 @@ p.write_text(text) if [ -n "${MIRROR_NAMES:-}" ]; then echo "Mirrors: setting up remotes" local mname murl + local mirrors_ok=true for mname in $MIRROR_NAMES; do murl=$(eval "echo \"\$MIRROR_$(echo "$mname" | tr '[:lower:]' '[:upper:]')\"") || true [ -z "$murl" ] && continue - git -C "$repo_root" remote add "$mname" "$murl" 2>/dev/null \ - || git -C "$repo_root" remote set-url "$mname" "$murl" 2>/dev/null || true - echo " + ${mname} -> ${murl}" + if git -C "$repo_root" remote get-url "$mname" >/dev/null 2>&1; then + if git -C "$repo_root" remote set-url "$mname" "$murl"; then + echo " + ${mname} -> ${murl} (updated)" + else + echo " ! ${mname} -> ${murl} (failed to update URL)" + mirrors_ok=false + fi + else + if git -C "$repo_root" remote add "$mname" "$murl"; then + echo " + ${mname} -> ${murl} (added)" + else + echo " ! ${mname} -> ${murl} (failed to add remote)" + mirrors_ok=false + fi + fi done # Initial sync: push current primary branch to mirrors - source "${FACTORY_ROOT}/lib/mirrors.sh" - export PROJECT_REPO_ROOT="$repo_root" - mirror_push + if [ "$mirrors_ok" = true ]; then + source "${FACTORY_ROOT}/lib/mirrors.sh" + export PROJECT_REPO_ROOT="$repo_root" + if mirror_push; then + echo "Mirrors: initial sync complete" + else + echo "Warning: mirror push failed" >&2 + fi + fi fi # Encrypt secrets if SOPS + age are available @@ -1895,9 +821,16 @@ p.write_text(text) # Activate default agents (zero-cost when idle — they only invoke Claude # when there is actual work, so an empty project burns no LLM tokens) mkdir -p "${FACTORY_ROOT}/state" - touch "${FACTORY_ROOT}/state/.dev-active" - touch "${FACTORY_ROOT}/state/.reviewer-active" - touch "${FACTORY_ROOT}/state/.gardener-active" + + # State files are idempotent — create if missing, skip if present + for state_file in ".dev-active" ".reviewer-active" ".gardener-active"; do + if [ -f "${FACTORY_ROOT}/state/${state_file}" ]; then + echo "State: ${state_file} (already active)" + else + touch "${FACTORY_ROOT}/state/${state_file}" + echo "State: ${state_file} (created)" + fi + done echo "" echo "Done. Project ${project_name} is ready." @@ -2022,7 +955,88 @@ disinto_secrets() { fi } + local secrets_dir="${FACTORY_ROOT}/secrets" + local age_key_file="${HOME}/.config/sops/age/keys.txt" + + # Shared helper: ensure age key exists and export AGE_PUBLIC_KEY + _secrets_ensure_age_key() { + if ! command -v age &>/dev/null; then + echo "Error: age is required." >&2 + echo " Install age: apt install age / brew install age" >&2 + exit 1 + fi + if [ ! -f "$age_key_file" ]; then + echo "Error: age key not found at ${age_key_file}" >&2 + echo " Run 'disinto init' to generate one, or create manually with:" >&2 + echo " mkdir -p ~/.config/sops/age && age-keygen -o ${age_key_file}" >&2 + exit 1 + fi + AGE_PUBLIC_KEY="$(age-keygen -y "$age_key_file" 2>/dev/null)" + if [ -z "$AGE_PUBLIC_KEY" ]; then + echo "Error: failed to read public key from ${age_key_file}" >&2 + exit 1 + fi + export AGE_PUBLIC_KEY + } + case "$subcmd" in + add) + local name="${2:-}" + if [ -z "$name" ]; then + echo "Usage: disinto secrets add " >&2 + exit 1 + fi + _secrets_ensure_age_key + mkdir -p "$secrets_dir" + + printf 'Enter value for %s: ' "$name" >&2 + local value + IFS= read -rs value + echo >&2 + if [ -z "$value" ]; then + echo "Error: empty value" >&2 + exit 1 + fi + + local enc_path="${secrets_dir}/${name}.enc" + if [ -f "$enc_path" ]; then + printf 'Secret %s already exists. Overwrite? [y/N] ' "$name" >&2 + local confirm + read -r confirm + if [ "$confirm" != "y" ] && [ "$confirm" != "Y" ]; then + echo "Aborted." >&2 + exit 1 + fi + fi + if ! printf '%s' "$value" | age -r "$AGE_PUBLIC_KEY" -o "$enc_path"; then + echo "Error: encryption failed" >&2 + exit 1 + fi + echo "Stored: ${enc_path}" + ;; + show) + local name="${2:-}" + if [ -n "$name" ]; then + # Show individual secret: disinto secrets show + local enc_path="${secrets_dir}/${name}.enc" + if [ ! -f "$enc_path" ]; then + echo "Error: ${enc_path} not found" >&2 + exit 1 + fi + if [ ! -f "$age_key_file" ]; then + echo "Error: age key not found at ${age_key_file}" >&2 + exit 1 + fi + age -d -i "$age_key_file" "$enc_path" + else + # Show all agent secrets: disinto secrets show + if [ ! -f "$enc_file" ]; then + echo "Error: ${enc_file} not found." >&2 + exit 1 + fi + sops -d "$enc_file" + fi + ;; edit) if [ ! -f "$enc_file" ]; then echo "Error: ${enc_file} not found. Run 'disinto secrets migrate' first." >&2 @@ -2030,13 +1044,6 @@ disinto_secrets() { fi sops "$enc_file" ;; - show) - if [ ! -f "$enc_file" ]; then - echo "Error: ${enc_file} not found." >&2 - exit 1 - fi - sops -d "$enc_file" - ;; migrate) if [ ! -f "$env_file" ]; then echo "Error: ${env_file} not found — nothing to migrate." >&2 @@ -2044,6 +1051,12 @@ disinto_secrets() { fi _secrets_ensure_sops encrypt_env_file "$env_file" "$enc_file" + # Verify decryption works + if ! sops -d "$enc_file" >/dev/null 2>&1; then + echo "Error: failed to verify .env.enc decryption" >&2 + rm -f "$enc_file" + exit 1 + fi rm -f "$env_file" echo "Migrated: .env -> .env.enc (plaintext removed)" ;; @@ -2069,6 +1082,12 @@ disinto_secrets() { fi _secrets_ensure_sops encrypt_env_file "$vault_env_file" "$vault_enc_file" + # Verify decryption works before removing plaintext + if ! sops -d "$vault_enc_file" >/dev/null 2>&1; then + echo "Error: failed to verify .env.vault.enc decryption" >&2 + rm -f "$vault_enc_file" + exit 1 + fi rm -f "$vault_env_file" echo "Migrated: .env.vault -> .env.vault.enc (plaintext removed)" ;; @@ -2076,9 +1095,13 @@ disinto_secrets() { cat <&2 Usage: disinto secrets +Individual secrets (secrets/.enc): + add Prompt for value, encrypt, store in secrets/.enc + show Decrypt and print an individual secret + Agent secrets (.env.enc): edit Edit agent secrets (FORGE_TOKEN, CLAUDE_API_KEY, etc.) - show Show decrypted agent secrets + show Show decrypted agent secrets (no argument) migrate Encrypt .env -> .env.enc Vault secrets (.env.vault.enc): @@ -2091,10 +1114,10 @@ EOF esac } -# ── vault-run command ───────────────────────────────────────────────────────── +# ── run command ─────────────────────────────────────────────────────────────── -disinto_vault_run() { - local action_id="${1:?Usage: disinto vault-run }" +disinto_run() { + local action_id="${1:?Usage: disinto run }" local compose_file="${FACTORY_ROOT}/docker-compose.yml" local vault_enc="${FACTORY_ROOT}/.env.vault.enc" @@ -2128,24 +1151,73 @@ disinto_vault_run() { echo "Vault secrets decrypted to tmpfile" - # Run action in ephemeral vault-runner container + # Run action in ephemeral runner container local rc=0 docker compose -f "$compose_file" \ run --rm --env-file "$tmp_env" \ - vault-runner "$action_id" || rc=$? + runner "$action_id" || rc=$? # Clean up — secrets gone rm -f "$tmp_env" - echo "Vault tmpfile removed" + echo "Run tmpfile removed" if [ "$rc" -eq 0 ]; then - echo "Vault action ${action_id} completed successfully" + echo "Run action ${action_id} completed successfully" else - echo "Vault action ${action_id} failed (exit ${rc})" >&2 + echo "Run action ${action_id} failed (exit ${rc})" >&2 fi return "$rc" } +# ── Pre-build: download binaries to docker/agents/bin/ ──────────────────────── +# This avoids network calls during docker build (needed for Docker-in-LXD builds) +# Returns 0 on success, 1 on failure +download_agent_binaries() { + local bin_dir="${FACTORY_ROOT}/docker/agents/bin" + mkdir -p "$bin_dir" + + echo "Downloading agent binaries to ${bin_dir}..." + + # Download SOPS + local sops_file="${bin_dir}/sops" + if [ ! -f "$sops_file" ]; then + echo " Downloading SOPS v3.9.4..." + curl -sL https://github.com/getsops/sops/releases/download/v3.9.4/sops-v3.9.4.linux.amd64 -o "$sops_file" + if [ ! -f "$sops_file" ]; then + echo "Error: failed to download SOPS" >&2 + return 1 + fi + fi + # Verify checksum + echo " Verifying SOPS checksum..." + if ! echo "5488e32bc471de7982ad895dd054bbab3ab91c417a118426134551e9626e4e85 ${sops_file}" | sha256sum -c - >/dev/null 2>&1; then + echo "Error: SOPS checksum verification failed" >&2 + return 1 + fi + chmod +x "$sops_file" + + # Download tea CLI + local tea_file="${bin_dir}/tea" + if [ ! -f "$tea_file" ]; then + echo " Downloading tea CLI v0.9.2..." + curl -sL https://dl.gitea.com/tea/0.9.2/tea-0.9.2-linux-amd64 -o "$tea_file" + if [ ! -f "$tea_file" ]; then + echo "Error: failed to download tea CLI" >&2 + return 1 + fi + fi + # Verify checksum + echo " Verifying tea CLI checksum..." + if ! echo "be10cdf9a619e3c0f121df874960ed19b53e62d1c7036cf60313a28b5227d54d ${tea_file}" | sha256sum -c - >/dev/null 2>&1; then + echo "Error: tea CLI checksum verification failed" >&2 + return 1 + fi + chmod +x "$tea_file" + + echo "Binaries downloaded and verified successfully" + return 0 +} + # ── up command ──────────────────────────────────────────────────────────────── disinto_up() { @@ -2156,6 +1228,14 @@ disinto_up() { exit 1 fi + # Pre-build: download binaries to docker/agents/bin/ to avoid network calls during docker build + echo "── Pre-build: downloading agent binaries ────────────────────────" + if ! download_agent_binaries; then + echo "Error: failed to download agent binaries" >&2 + exit 1 + fi + echo "" + # Decrypt secrets to temp .env if SOPS available and .env.enc exists local tmp_env="" local enc_file="${FACTORY_ROOT}/.env.enc" @@ -2211,17 +1291,82 @@ disinto_shell() { docker compose -f "$compose_file" exec agents bash } +# ── hire-an-agent command ───────────────────────────────────────────────────── + +# Creates a Forgejo user and .profile repo for an agent. +# Usage: disinto hire-an-agent [--formula ] +# disinto_hire_an_agent() is sourced from lib/hire-agent.sh + +# ── release command ─────────────────────────────────────────────────────────── +# disinto_release() is sourced from lib/release.sh + +# ── ci-logs command ────────────────────────────────────────────────────────── +# Reads CI logs from the Woodpecker SQLite database. +# Usage: disinto ci-logs [--step ] +disinto_ci_logs() { + local pipeline_number="" step_name="" + + if [ $# -lt 1 ]; then + echo "Error: pipeline number required" >&2 + echo "Usage: disinto ci-logs [--step ]" >&2 + exit 1 + fi + + # Parse arguments + while [ $# -gt 0 ]; do + case "$1" in + --step|-s) + step_name="$2" + shift 2 + ;; + -*) + echo "Unknown option: $1" >&2 + exit 1 + ;; + *) + if [ -z "$pipeline_number" ]; then + pipeline_number="$1" + else + echo "Unexpected argument: $1" >&2 + exit 1 + fi + shift + ;; + esac + done + + if [ -z "$pipeline_number" ] || ! [[ "$pipeline_number" =~ ^[0-9]+$ ]]; then + echo "Error: pipeline number must be a positive integer" >&2 + exit 1 + fi + + local log_reader="${FACTORY_ROOT}/lib/ci-log-reader.py" + if [ ! -f "$log_reader" ]; then + echo "Error: ci-log-reader.py not found at $log_reader" >&2 + exit 1 + fi + + if [ -n "$step_name" ]; then + python3 "$log_reader" "$pipeline_number" --step "$step_name" + else + python3 "$log_reader" "$pipeline_number" + fi +} + # ── Main dispatch ──────────────────────────────────────────────────────────── case "${1:-}" in - init) shift; disinto_init "$@" ;; - up) shift; disinto_up "$@" ;; - down) shift; disinto_down "$@" ;; - logs) shift; disinto_logs "$@" ;; - shell) shift; disinto_shell ;; - status) shift; disinto_status "$@" ;; - secrets) shift; disinto_secrets "$@" ;; - vault-run) shift; disinto_vault_run "$@" ;; - -h|--help) usage ;; - *) usage ;; + init) shift; disinto_init "$@" ;; + up) shift; disinto_up "$@" ;; + down) shift; disinto_down "$@" ;; + logs) shift; disinto_logs "$@" ;; + shell) shift; disinto_shell ;; + status) shift; disinto_status "$@" ;; + secrets) shift; disinto_secrets "$@" ;; + run) shift; disinto_run "$@" ;; + ci-logs) shift; disinto_ci_logs "$@" ;; + release) shift; disinto_release "$@" ;; + hire-an-agent) shift; disinto_hire_an_agent "$@" ;; + -h|--help) usage ;; + *) usage ;; esac diff --git a/dev/AGENTS.md b/dev/AGENTS.md index ccfe0c7..e8a0ead 100644 --- a/dev/AGENTS.md +++ b/dev/AGENTS.md @@ -1,4 +1,4 @@ - + # Dev Agent **Role**: Implement issues autonomously — write code, push branches, address @@ -14,9 +14,8 @@ in-progress issues are also picked up. The direct-merge scan runs before the loc check so approved PRs get merged even while a dev-agent session is active. **Key files**: -- `dev/dev-poll.sh` — Cron scheduler: finds next ready issue, handles merge/rebase of approved PRs, tracks CI fix attempts. Formula guard skips issues labeled `formula`, `action`, `prediction/dismissed`, or `prediction/unreviewed` (replaced `prediction/backlog` — that label no longer exists) +- `dev/dev-poll.sh` — Cron scheduler: finds next ready issue, handles merge/rebase of approved PRs, tracks CI fix attempts. Formula guard skips issues labeled `formula`, `prediction/dismissed`, or `prediction/unreviewed`. **Race prevention**: checks issue assignee before claiming — skips if assigned to a different bot user. **Stale branch abandonment**: closes PRs and deletes branches that are behind `$PRIMARY_BRANCH` (restarts poll cycle for a fresh start). **Stale in-progress recovery**: on each poll cycle, scans for issues labeled `in-progress`. If the issue is assigned to `$BOT_USER` (this agent), sets `BLOCKED_BY_INPROGRESS=true` — my thread is busy. If assigned to another agent, logs and falls through (does not block). If no assignee, no open PR, and no agent lock file — removes `in-progress`, adds `blocked` with a human-triage comment. **Per-agent open-PR gate**: before starting new work, filters open waiting PRs to only those assigned to this agent (`$BOT_USER`). Other agents' PRs do not block this agent's pipeline (#358, #369). - `dev/dev-agent.sh` — Orchestrator: claims issue, creates worktree + tmux session with interactive `claude`, monitors phase file, injects CI results and review feedback, merges on approval -- `dev/phase-handler.sh` — Phase callback functions: `post_refusal_comment()`, `_on_phase_change()`, `build_phase_protocol_prompt()`. `do_merge()` detects already-merged PRs on HTTP 405 (race with dev-poll's pre-lock scan) and returns success instead of escalating. Sources `lib/mirrors.sh` and calls `mirror_push()` after every successful merge. - `dev/phase-test.sh` — Integration test for the phase protocol **Environment variables consumed** (via `lib/env.sh` + project TOML): @@ -33,7 +32,7 @@ check so approved PRs get merged even while a dev-agent session is active. **Crash recovery**: on `PHASE:crashed` or non-zero exit, the worktree is **preserved** (not destroyed) for debugging. Location logged. Supervisor housekeeping removes stale crashed worktrees older than 24h. -**Lifecycle**: dev-poll.sh (`check_active dev`) → dev-agent.sh → tmux `dev-{project}-{issue}` → phase file +**Lifecycle**: dev-poll.sh (`check_active dev`) → dev-agent.sh → tmux session → phase file drives CI/review loop → merge + `mirror_push()` → close issue. On respawn after `PHASE:escalate`, the stale phase file is cleared first so the session starts clean; the reinject prompt tells Claude not to re-escalate for the same reason. diff --git a/dev/dev-agent.sh b/dev/dev-agent.sh index 3a78f53..c534dbd 100755 --- a/dev/dev-agent.sh +++ b/dev/dev-agent.sh @@ -30,6 +30,7 @@ source "$(dirname "$0")/../lib/worktree.sh" source "$(dirname "$0")/../lib/pr-lifecycle.sh" source "$(dirname "$0")/../lib/mirrors.sh" source "$(dirname "$0")/../lib/agent-sdk.sh" +source "$(dirname "$0")/../lib/formula-session.sh" # Auto-pull factory code to pick up merged fixes before any logic runs git -C "$FACTORY_ROOT" pull --ff-only origin main 2>/dev/null || true @@ -40,7 +41,7 @@ REPO_ROOT="${PROJECT_REPO_ROOT}" LOCKFILE="/tmp/dev-agent-${PROJECT_NAME:-default}.lock" STATUSFILE="/tmp/dev-agent-status-${PROJECT_NAME:-default}" -BRANCH="fix/issue-${ISSUE}" +BRANCH="fix/issue-${ISSUE}" # Default; will be updated after FORGE_REMOTE is known WORKTREE="/tmp/${PROJECT_NAME}-worktree-${ISSUE}" SID_FILE="/tmp/dev-session-${PROJECT_NAME}-${ISSUE}.sid" PREFLIGHT_RESULT="/tmp/dev-agent-preflight.json" @@ -185,7 +186,11 @@ log "preflight passed" # ============================================================================= # CLAIM ISSUE # ============================================================================= -issue_claim "$ISSUE" +if ! issue_claim "$ISSUE"; then + log "SKIP: failed to claim issue #${ISSUE} (already assigned to another agent)" + echo '{"status":"already_done","reason":"issue was claimed by another agent"}' > "$PREFLIGHT_RESULT" + exit 0 +fi CLAIMED=true # ============================================================================= @@ -258,6 +263,19 @@ FORGE_REMOTE="${FORGE_REMOTE:-origin}" export FORGE_REMOTE log "forge remote: ${FORGE_REMOTE}" +# Generate unique branch name per attempt to avoid collision with failed attempts +# Only apply when not in recovery mode (RECOVERY_MODE branch is already set from existing PR) +# First attempt: fix/issue-N, subsequent: fix/issue-N-1, fix/issue-N-2, etc. +if [ "$RECOVERY_MODE" = false ]; then + # Count only branches matching fix/issue-N, fix/issue-N-1, fix/issue-N-2, etc. (exact prefix match) + ATTEMPT=$(git ls-remote --heads "$FORGE_REMOTE" "refs/heads/fix/issue-${ISSUE}" 2>/dev/null | grep -c "refs/heads/fix/issue-${ISSUE}$" || echo 0) + ATTEMPT=$((ATTEMPT + $(git ls-remote --heads "$FORGE_REMOTE" "refs/heads/fix/issue-${ISSUE}-*" 2>/dev/null | wc -l))) + if [ "$ATTEMPT" -gt 0 ]; then + BRANCH="fix/issue-${ISSUE}-${ATTEMPT}" + fi +fi +log "using branch: ${BRANCH}" + if [ "$RECOVERY_MODE" = true ]; then if ! worktree_recover "$WORKTREE" "$BRANCH" "$FORGE_REMOTE"; then log "ERROR: worktree recovery failed" @@ -302,6 +320,10 @@ OPEN_ISSUES_SUMMARY=$(forge_api GET "/issues?state=open&labels=backlog&limit=20& PUSH_INSTRUCTIONS=$(build_phase_protocol_prompt "$BRANCH" "$FORGE_REMOTE") +# Load lessons from .profile repo if available (pre-session) +profile_load_lessons || true +LESSONS_INJECTION="${LESSONS_CONTEXT:-}" + if [ "$RECOVERY_MODE" = true ]; then GIT_DIFF_STAT=$(git -C "$WORKTREE" diff "${FORGE_REMOTE}/${PRIMARY_BRANCH}..HEAD" --stat 2>/dev/null \ | head -20 || echo "(no diff)") @@ -332,6 +354,10 @@ ${GIT_DIFF_STAT} 3. Address any pending review comments or CI failures. 4. Commit and push to \`${BRANCH}\`. +${LESSONS_INJECTION:+## Lessons learned +${LESSONS_INJECTION} + +} ${PUSH_INSTRUCTIONS}" else INITIAL_PROMPT="You are working in a git worktree at ${WORKTREE} on branch ${BRANCH}. @@ -347,6 +373,10 @@ ${OPEN_ISSUES_SUMMARY} $(if [ -n "$PRIOR_ART_DIFF" ]; then printf '## Prior Art (closed PR — DO NOT start from scratch)\n\nA previous PR attempted this issue but was closed without merging. Reuse as much as possible.\n\n```diff\n%s\n```\n' "$PRIOR_ART_DIFF" fi) +${LESSONS_INJECTION:+## Lessons learned +${LESSONS_INJECTION} + +} ## Instructions 1. Read AGENTS.md in this repo for project context and coding conventions. @@ -450,6 +480,40 @@ Closing as already implemented." fi log "ERROR: no branch pushed after agent_run" + # Dump diagnostics + diag_file="${DISINTO_LOG_DIR:-/tmp}/dev/agent-run-last.json" + if [ -f "$diag_file" ]; then + result_text=""; cost_usd=""; num_turns="" + result_text=$(jq -r '.result // "no result field"' "$diag_file" 2>/dev/null | head -50) || result_text="(parse error)" + cost_usd=$(jq -r '.cost_usd // "?"' "$diag_file" 2>/dev/null) || cost_usd="?" + num_turns=$(jq -r '.num_turns // "?"' "$diag_file" 2>/dev/null) || num_turns="?" + log "no_push diagnostics: turns=${num_turns} cost=${cost_usd}" + log "no_push result: ${result_text}" + # Save full output for later analysis + cp "$diag_file" "${DISINTO_LOG_DIR:-/tmp}/dev/no-push-${ISSUE}-$(date +%s).json" 2>/dev/null || true + fi + + # Save full session log for debugging + # Session logs are stored in CLAUDE_CONFIG_DIR/projects/{worktree-hash}/{session-id}.jsonl + _wt_hash=$(printf '%s' "$WORKTREE" | md5sum | cut -c1-12) + _cl_config="${CLAUDE_CONFIG_DIR:-$HOME/.claude}" + _session_log="${_cl_config}/projects/${_wt_hash}/${_AGENT_SESSION_ID}.jsonl" + if [ -f "$_session_log" ]; then + cp "$_session_log" "${DISINTO_LOG_DIR}/dev/no-push-session-${ISSUE}-$(date +%s).jsonl" 2>/dev/null || true + log "no_push session log saved to ${DISINTO_LOG_DIR}/dev/no-push-session-${ISSUE}-*.jsonl" + fi + + # Log session summary for debugging + if [ -f "$_session_log" ]; then + _read_calls=$(grep -c '"type":"read"' "$_session_log" 2>/dev/null || echo "0") + _edit_calls=$(grep -c '"type":"edit"' "$_session_log" 2>/dev/null || echo "0") + _bash_calls=$(grep -c '"type":"bash"' "$_session_log" 2>/dev/null || echo "0") + _text_calls=$(grep -c '"type":"text"' "$_session_log" 2>/dev/null || echo "0") + _failed_calls=$(grep -c '"exit_code":null' "$_session_log" 2>/dev/null || echo "0") + _total_turns=$(grep -c '"type":"turn"' "$_session_log" 2>/dev/null || echo "0") + log "no_push session summary: turns=${_total_turns} reads=${_read_calls} edits=${_edit_calls} bash=${_bash_calls} text=${_text_calls} failed=${_failed_calls}" + fi + issue_block "$ISSUE" "no_push" "Claude did not push branch ${BRANCH}" CLAIMED=false worktree_cleanup "$WORKTREE" @@ -497,6 +561,12 @@ if [ "$rc" -eq 0 ]; then log "PR #${PR_NUMBER} merged" issue_close "$ISSUE" + # Capture files changed for journal entry (after agent work) + FILES_CHANGED=$(git -C "$WORKTREE" diff "${FORGE_REMOTE}/${PRIMARY_BRANCH}..HEAD" --name-only 2>/dev/null | tr '\n' ',' | sed 's/,$//') || FILES_CHANGED="" + + # Write journal entry post-session (before cleanup) + profile_write_journal "$ISSUE" "$ISSUE_TITLE" "merged" "$FILES_CHANGED" || true + # Pull primary branch and push to mirrors git -C "$REPO_ROOT" fetch "$FORGE_REMOTE" "$PRIMARY_BRANCH" 2>/dev/null || true git -C "$REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true @@ -510,6 +580,18 @@ else # Exhausted or unrecoverable failure log "PR walk failed: ${_PR_WALK_EXIT_REASON:-unknown}" issue_block "$ISSUE" "${_PR_WALK_EXIT_REASON:-agent_failed}" + + # Capture files changed for journal entry (after agent work) + FILES_CHANGED=$(git -C "$WORKTREE" diff "${FORGE_REMOTE}/${PRIMARY_BRANCH}..HEAD" --name-only 2>/dev/null | tr '\n' ',' | sed 's/,$//') || FILES_CHANGED="" + + # Write journal entry post-session (before cleanup) + outcome="blocked_${_PR_WALK_EXIT_REASON:-agent_failed}" + profile_write_journal "$ISSUE" "$ISSUE_TITLE" "$outcome" "$FILES_CHANGED" || true + + # Cleanup on failure: preserve remote branch and PR for debugging, clean up local worktree + # Remote state (PR and branch) stays open for inspection of CI logs and review comments + worktree_cleanup "$WORKTREE" + rm -f "$SID_FILE" "$IMPL_SUMMARY_FILE" CLAIMED=false fi diff --git a/dev/dev-poll.sh b/dev/dev-poll.sh index 98b8b7d..f0980d6 100755 --- a/dev/dev-poll.sh +++ b/dev/dev-poll.sh @@ -42,6 +42,11 @@ log() { printf '[%s] poll: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" } +# Resolve current agent identity once at startup — cache for all assignee checks +BOT_USER=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API%%/repos*}/user" | jq -r '.login') || BOT_USER="" +log "running as agent: ${BOT_USER}" + # ============================================================================= # CI FIX TRACKER: per-PR counter to avoid infinite respawn loops (max 3) # ============================================================================= @@ -94,6 +99,68 @@ is_blocked() { | jq -e '.[] | select(.name == "blocked")' >/dev/null 2>&1 } +# ============================================================================= +# STALENESS DETECTION FOR IN-PROGRESS ISSUES +# ============================================================================= + +# Check if there's an open PR for a specific issue +# Args: issue_number +# Returns: 0 if open PR exists, 1 if not +open_pr_exists() { + local issue="$1" + local branch="fix/issue-${issue}" + local pr_num + + pr_num=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -r --arg branch "$branch" \ + '.[] | select(.head.ref == $branch) | .number' | head -1) || true + + [ -n "$pr_num" ] +} + +# Relabel a stale in-progress issue to blocked with diagnostic comment +# Args: issue_number reason +# Uses shared helpers from lib/issue-lifecycle.sh +relabel_stale_issue() { + local issue="$1" reason="$2" + + log "relabeling stale in-progress issue #${issue} to blocked: ${reason}" + + # Remove in-progress label + local ip_id + ip_id=$(_ilc_in_progress_id) + if [ -n "$ip_id" ]; then + curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${issue}/labels/${ip_id}" >/dev/null 2>&1 || true + fi + + # Add blocked label + local bk_id + bk_id=$(_ilc_blocked_id) + if [ -n "$bk_id" ]; then + curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${issue}/labels" \ + -d "{\"labels\":[${bk_id}]}" >/dev/null 2>&1 || true + fi + + # Post diagnostic comment using shared helper + local comment_body + comment_body=$( + printf '%s\n\n' '### Stale in-progress issue detected' + printf '%s\n' '| Field | Value |' + printf '%s\n' '|---|---|' + printf '| Detection reason | `%s` |\n' "$reason" + printf '| Timestamp | `%s` |\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" + printf '%s\n' '**Status:** This issue was labeled `in-progress` but has no assignee, no open PR, and no agent lock file.' + printf '%s\n' '**Action required:** A maintainer should triage this issue.' + ) + _ilc_post_comment "$issue" "$comment_body" + + _ilc_log "stale issue #${issue} relabeled to blocked: ${reason}" +} + # ============================================================================= # HELPER: handle CI-exhaustion check/block (DRY for 3 call sites) # Sets CI_FIX_ATTEMPTS for caller use. Returns 0 if exhausted, 1 if not. @@ -155,9 +222,10 @@ try_direct_merge() { if [ "$issue_num" -gt 0 ]; then issue_close "$issue_num" # Remove in-progress label (don't re-add backlog — issue is closed) + IP_ID=$(_ilc_in_progress_id) curl -sf -X DELETE \ -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${issue_num}/labels/in-progress" >/dev/null 2>&1 || true + "${API}/issues/${issue_num}/labels/${IP_ID}" >/dev/null 2>&1 || true rm -f "/tmp/dev-session-${PROJECT_NAME}-${issue_num}.sid" \ "/tmp/dev-impl-summary-${PROJECT_NAME}-${issue_num}.txt" fi @@ -277,6 +345,16 @@ for i in $(seq 0 $(($(echo "$PL_PRS" | jq 'length') - 1))); do jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true if [ "${PL_HAS_APPROVE:-0}" -gt 0 ]; then + # Check if issue is assigned to this agent — only merge own PRs + if [ "$PL_ISSUE" -gt 0 ]; then + PR_ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${PL_ISSUE}") || true + PR_ISSUE_ASSIGNEE=$(echo "$PR_ISSUE_JSON" | jq -r '.assignee.login // ""') || true + if [ -n "$PR_ISSUE_ASSIGNEE" ] && [ "$PR_ISSUE_ASSIGNEE" != "$BOT_USER" ]; then + log "PR #${PL_PR_NUM} (issue #${PL_ISSUE}) assigned to ${PR_ISSUE_ASSIGNEE} — skipping merge (not mine)" + continue + fi + fi if try_direct_merge "$PL_PR_NUM" "$PL_ISSUE"; then PL_MERGED_ANY=true fi @@ -300,6 +378,9 @@ if [ -f "$LOCKFILE" ]; then rm -f "$LOCKFILE" fi +# --- Fetch origin refs before any stale branch checks --- +git fetch origin --prune 2>/dev/null || true + # --- Memory guard --- memory_guard 2000 @@ -307,89 +388,177 @@ memory_guard 2000 # PRIORITY 1: orphaned in-progress issues # ============================================================================= log "checking for in-progress issues" + ORPHANS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/issues?state=open&labels=in-progress&limit=10&type=issues") ORPHAN_COUNT=$(echo "$ORPHANS_JSON" | jq 'length') +BLOCKED_BY_INPROGRESS=false if [ "$ORPHAN_COUNT" -gt 0 ]; then ISSUE_NUM=$(echo "$ORPHANS_JSON" | jq -r '.[0].number') - # Formula guard: formula-labeled issues should not be worked on by dev-agent. - # Remove in-progress label and skip to prevent infinite respawn cycle (#115). - ORPHAN_LABELS=$(echo "$ORPHANS_JSON" | jq -r '.[0].labels[].name' 2>/dev/null) || true - SKIP_LABEL=$(echo "$ORPHAN_LABELS" | grep -oE '^(formula|action|prediction/dismissed|prediction/unreviewed)$' | head -1) || true - if [ -n "$SKIP_LABEL" ]; then - log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — removing in-progress, skipping" - curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${ISSUE_NUM}/labels/in-progress" >/dev/null 2>&1 || true - exit 0 + # Staleness check: if no assignee, no open PR, and no agent lock, the issue is stale + OPEN_PR=false + if curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -e --arg branch "fix/issue-${ISSUE_NUM}" \ + '.[] | select(.head.ref == $branch)' >/dev/null 2>&1; then + OPEN_PR=true fi - # Check if there's already an open PR for this issue - HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls?state=open&limit=20" | \ - jq -r --arg branch "fix/issue-${ISSUE_NUM}" \ - '.[] | select(.head.ref == $branch) | .number' | head -1) || true - - if [ -n "$HAS_PR" ]; then - PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${HAS_PR}" | jq -r '.head.sha') || true - CI_STATE=$(ci_commit_status "$PR_SHA") || true - - # Non-code PRs (docs, formulas, evidence) may have no CI — treat as passed - if ! ci_passed "$CI_STATE" && ! ci_required_for_pr "$HAS_PR"; then - CI_STATE="success" - log "PR #${HAS_PR} has no code files — treating CI as passed" - fi - - # Check formal reviews (single fetch to avoid race window) - REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${HAS_PR}/reviews") || true - HAS_APPROVE=$(echo "$REVIEWS_JSON" | \ - jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true - HAS_CHANGES=$(echo "$REVIEWS_JSON" | \ - jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true - - if ci_passed "$CI_STATE" && [ "${HAS_APPROVE:-0}" -gt 0 ]; then - if try_direct_merge "$HAS_PR" "$ISSUE_NUM"; then - exit 0 - fi - # Direct merge failed (conflicts?) — fall back to dev-agent - log "falling back to dev-agent for PR #${HAS_PR} merge" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)" - exit 0 - - # Do NOT gate REQUEST_CHANGES on ci_passed: act immediately even if CI is - # pending/unknown. Definitive CI failure is handled by the elif below. - elif [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then - log "issue #${ISSUE_NUM} PR #${HAS_PR} has REQUEST_CHANGES — spawning agent" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)" - exit 0 - - elif ci_failed "$CI_STATE"; then - if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM" "check_only"; then - # Fall through to backlog scan instead of exit - : - else - # Increment at actual launch time (not on guard-hit paths) - if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM"; then - exit 0 # exhausted between check and launch - fi - log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix (attempt ${CI_FIX_ATTEMPTS}/3)" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)" - exit 0 - fi - + # Check if issue has an assignee — only block on issues assigned to this agent + assignee=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" "${API}/issues/${ISSUE_NUM}" | jq -r '.assignee.login // ""') + if [ -n "$assignee" ]; then + if [ "$assignee" = "$BOT_USER" ]; then + log "issue #${ISSUE_NUM} assigned to me — my thread is busy" + BLOCKED_BY_INPROGRESS=true else - log "issue #${ISSUE_NUM} has open PR #${HAS_PR} (CI: ${CI_STATE}, waiting)" + log "issue #${ISSUE_NUM} assigned to ${assignee} — their thread, not blocking" + # Issue assigned to another agent — don't block, fall through to backlog fi - else - log "recovering orphaned issue #${ISSUE_NUM} (no PR found)" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (recovery)" + fi + + # Only proceed with in-progress checks if not blocked by another agent + if [ "$BLOCKED_BY_INPROGRESS" = false ]; then + # Check for dev-agent lock file (agent may be running in another container) + LOCK_FILE="/tmp/dev-impl-summary-${PROJECT_NAME}-${ISSUE_NUM}.txt" + if [ -f "$LOCK_FILE" ]; then + log "issue #${ISSUE_NUM} has agent lock file — trusting active work" + BLOCKED_BY_INPROGRESS=true + fi + + if [ "$OPEN_PR" = false ] && [ "$BLOCKED_BY_INPROGRESS" = false ]; then + log "issue #${ISSUE_NUM} is stale (no assignee, no open PR, no agent lock) — relabeling to blocked" + relabel_stale_issue "$ISSUE_NUM" "no_assignee_no_open_pr_no_lock" + BLOCKED_BY_INPROGRESS=true + fi + + # Formula guard: formula-labeled issues should not be worked on by dev-agent. + # Remove in-progress label and skip to prevent infinite respawn cycle (#115). + if [ "$BLOCKED_BY_INPROGRESS" = false ]; then + ORPHAN_LABELS=$(echo "$ORPHANS_JSON" | jq -r '.[0].labels[].name' 2>/dev/null) || true + SKIP_LABEL=$(echo "$ORPHAN_LABELS" | grep -oE '^(formula|prediction/dismissed|prediction/unreviewed)$' | head -1) || true + if [ -n "$SKIP_LABEL" ]; then + log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — removing in-progress, skipping" + IP_ID=$(_ilc_in_progress_id) + curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true + BLOCKED_BY_INPROGRESS=true + fi + fi + + # Check if there's already an open PR for this issue + if [ "$BLOCKED_BY_INPROGRESS" = false ]; then + HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -r --arg branch "fix/issue-${ISSUE_NUM}" \ + '.[] | select(.head.ref == $branch) | .number' | head -1) || true + + if [ -n "$HAS_PR" ]; then + # Check if branch is stale (behind primary branch) + BRANCH="fix/issue-${ISSUE_NUM}" + AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "0") + if [ "$AHEAD" -gt 0 ]; then + log "issue #${ISSUE_NUM} PR #${HAS_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR" + # Close the PR via API + curl -sf -X PATCH \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/pulls/${HAS_PR}" \ + -d '{"state":"closed"}' >/dev/null 2>&1 || true + # Delete the branch via git push + git -C "${PROJECT_REPO_ROOT:-}" push origin --delete "${BRANCH}" 2>/dev/null || true + # Reset to fresh start on primary branch + git -C "${PROJECT_REPO_ROOT:-}" checkout "${PRIMARY_BRANCH}" 2>/dev/null || true + git -C "${PROJECT_REPO_ROOT:-}" pull --ff-only origin "${PRIMARY_BRANCH}" 2>/dev/null || true + BLOCKED_BY_INPROGRESS=true + fi + + # Only process PR if not abandoned (stale branch check above) + if [ "$BLOCKED_BY_INPROGRESS" = false ]; then + PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${HAS_PR}" | jq -r '.head.sha') || true + CI_STATE=$(ci_commit_status "$PR_SHA") || true + + # Non-code PRs (docs, formulas, evidence) may have no CI — treat as passed + if ! ci_passed "$CI_STATE" && ! ci_required_for_pr "$HAS_PR"; then + CI_STATE="success" + log "PR #${HAS_PR} has no code files — treating CI as passed" + fi + + # Check formal reviews (single fetch to avoid race window) + REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${HAS_PR}/reviews") || true + HAS_APPROVE=$(echo "$REVIEWS_JSON" | \ + jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true + HAS_CHANGES=$(echo "$REVIEWS_JSON" | \ + jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true + + if ci_passed "$CI_STATE" && [ "${HAS_APPROVE:-0}" -gt 0 ]; then + if try_direct_merge "$HAS_PR" "$ISSUE_NUM"; then + BLOCKED_BY_INPROGRESS=true + else + # Direct merge failed (conflicts?) — fall back to dev-agent + log "falling back to dev-agent for PR #${HAS_PR} merge" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)" + BLOCKED_BY_INPROGRESS=true + fi + + # Do NOT gate REQUEST_CHANGES on ci_passed: act immediately even if CI is + # pending/unknown. Definitive CI failure is handled by the elif below. + elif [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then + log "issue #${ISSUE_NUM} PR #${HAS_PR} has REQUEST_CHANGES — spawning agent" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)" + BLOCKED_BY_INPROGRESS=true + + elif ci_failed "$CI_STATE"; then + if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM" "check_only"; then + # Fall through to backlog scan instead of exit + : + else + # Increment at actual launch time (not on guard-hit paths) + if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM"; then + BLOCKED_BY_INPROGRESS=true # exhausted between check and launch + else + log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix (attempt ${CI_FIX_ATTEMPTS}/3)" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)" + BLOCKED_BY_INPROGRESS=true + fi + fi + + else + log "issue #${ISSUE_NUM} has open PR #${HAS_PR} (CI: ${CI_STATE}, waiting)" + BLOCKED_BY_INPROGRESS=true + fi + fi + else + # Check assignee before adopting orphaned issue + ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${ISSUE_NUM}") || true + ASSIGNEE=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true + + if [ -n "$ASSIGNEE" ] && [ "$ASSIGNEE" != "$BOT_USER" ]; then + log "issue #${ISSUE_NUM} assigned to ${ASSIGNEE} — skipping (not orphaned)" + # Remove in-progress label since this agent isn't working on it + IP_ID=$(_ilc_in_progress_id) + curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true + # Don't block — fall through to backlog + else + log "recovering orphaned issue #${ISSUE_NUM} (no PR found, assigned to ${BOT_USER:-unassigned})" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (recovery)" + BLOCKED_BY_INPROGRESS=true + fi + fi + fi + fi + + # If blocked by in-progress work, exit now + if [ "$BLOCKED_BY_INPROGRESS" = true ]; then exit 0 fi fi @@ -521,9 +690,18 @@ for i in $(seq 0 $((BACKLOG_COUNT - 1))); do ISSUE_NUM=$(echo "$BACKLOG_JSON" | jq -r ".[$i].number") ISSUE_BODY=$(echo "$BACKLOG_JSON" | jq -r ".[$i].body // \"\"") + # Check assignee before claiming — skip if assigned to another bot + ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${ISSUE_NUM}") || true + ASSIGNEE=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true + if [ -n "$ASSIGNEE" ] && [ "$ASSIGNEE" != "$BOT_USER" ]; then + log " #${ISSUE_NUM} assigned to ${ASSIGNEE} — skipping" + continue + fi + # Formula guard: formula-labeled issues must not be picked up by dev-agent. ISSUE_LABELS=$(echo "$BACKLOG_JSON" | jq -r ".[$i].labels[].name" 2>/dev/null) || true - SKIP_LABEL=$(echo "$ISSUE_LABELS" | grep -oE '^(formula|action|prediction/dismissed|prediction/unreviewed)$' | head -1) || true + SKIP_LABEL=$(echo "$ISSUE_LABELS" | grep -oE '^(formula|prediction/dismissed|prediction/unreviewed)$' | head -1) || true if [ -n "$SKIP_LABEL" ]; then log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — skipping in backlog scan" continue @@ -540,6 +718,26 @@ for i in $(seq 0 $((BACKLOG_COUNT - 1))); do '.[] | select((.head.ref == $branch) or (.title | contains($num))) | .number' | head -1) || true if [ -n "$EXISTING_PR" ]; then + # Check if branch is stale (behind primary branch) + BRANCH="fix/issue-${ISSUE_NUM}" + AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "0") + if [ "$AHEAD" -gt 0 ]; then + log "issue #${ISSUE_NUM} PR #${EXISTING_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR" + # Close the PR via API + curl -sf -X PATCH \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/pulls/${EXISTING_PR}" \ + -d '{"state":"closed"}' >/dev/null 2>&1 || true + # Delete the branch via git push + git -C "${PROJECT_REPO_ROOT:-}" push origin --delete "${BRANCH}" 2>/dev/null || true + # Reset to fresh start on primary branch + git -C "${PROJECT_REPO_ROOT:-}" checkout "${PRIMARY_BRANCH}" 2>/dev/null || true + git -C "${PROJECT_REPO_ROOT:-}" pull --ff-only origin "${PRIMARY_BRANCH}" 2>/dev/null || true + # Continue to find another ready issue + continue + fi + PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls/${EXISTING_PR}" | jq -r '.head.sha') || true CI_STATE=$(ci_commit_status "$PR_SHA") || true @@ -597,9 +795,32 @@ done # Single-threaded per project: if any issue has an open PR waiting for review/CI, # don't start new work — let the pipeline drain first +# But only block on PRs assigned to this agent (per-agent logic from #358) if [ -n "$READY_ISSUE" ] && [ -n "${WAITING_PRS:-}" ]; then - log "holding #${READY_ISSUE} — waiting for open PR(s) to land first: ${WAITING_PRS}" - exit 0 + # Filter to only this agent's waiting PRs + MY_WAITING_PRS="" + for pr_num in $(echo "$WAITING_PRS" | tr ',' ' '); do + pr_num="${pr_num#\#}" # Remove leading # + # Check if this PR's issue is assigned to this agent + pr_info=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${pr_num}" 2>/dev/null) || true + pr_branch=$(echo "$pr_info" | jq -r '.head.ref') || true + issue_num=$(echo "$pr_branch" | grep -oP '(?<=fix/issue-)\d+' || true) + if [ -z "$issue_num" ]; then + continue + fi + issue_assignee=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${issue_num}" 2>/dev/null | jq -r '.assignee.login // ""') || true + if [ -n "$issue_assignee" ] && [ "$issue_assignee" = "$BOT_USER" ]; then + MY_WAITING_PRS="${MY_WAITING_PRS:-}${MY_WAITING_PRS:+, }#${pr_num}" + fi + done + + if [ -n "$MY_WAITING_PRS" ]; then + log "holding #${READY_ISSUE} — waiting for my open PR(s) to land first: ${MY_WAITING_PRS}" + exit 0 + fi + log "other agents' PRs waiting: ${WAITING_PRS} — proceeding with #${READY_ISSUE}" fi if [ -z "$READY_ISSUE" ]; then diff --git a/dev/phase-handler.sh b/dev/phase-handler.sh deleted file mode 100644 index 8f3b3b4..0000000 --- a/dev/phase-handler.sh +++ /dev/null @@ -1,820 +0,0 @@ -#!/usr/bin/env bash -# dev/phase-handler.sh — Phase callback functions for dev-agent.sh -# -# Source this file from agent orchestrators after lib/agent-session.sh is loaded. -# Defines: post_refusal_comment(), _on_phase_change(), build_phase_protocol_prompt() -# -# Required globals (set by calling agent before or after sourcing): -# ISSUE, FORGE_TOKEN, API, FORGE_WEB, PROJECT_NAME, FACTORY_ROOT -# BRANCH, PHASE_FILE, WORKTREE, IMPL_SUMMARY_FILE -# PRIMARY_BRANCH, SESSION_NAME, LOGFILE, ISSUE_TITLE -# WOODPECKER_REPO_ID, WOODPECKER_TOKEN, WOODPECKER_SERVER -# -# Globals with defaults (agents can override after sourcing): -# PR_NUMBER, CI_POLL_TIMEOUT, MAX_CI_FIXES, MAX_REVIEW_ROUNDS, -# REVIEW_POLL_TIMEOUT, CI_RETRY_COUNT, CI_FIX_COUNT, REVIEW_ROUND, -# CLAIMED, PHASE_POLL_INTERVAL -# -# Calls back to agent-defined helpers: -# cleanup_worktree(), cleanup_labels(), status(), log() -# -# shellcheck shell=bash -# shellcheck disable=SC2154 # globals are set in dev-agent.sh before calling -# shellcheck disable=SC2034 # CLAIMED is read by cleanup() in dev-agent.sh - -# Load secret scanner for redacting tmux output before posting to issues -# shellcheck source=../lib/secret-scan.sh -source "$(dirname "${BASH_SOURCE[0]}")/../lib/secret-scan.sh" - -# Load shared CI helpers (is_infra_step, classify_pipeline_failure, etc.) -# shellcheck source=../lib/ci-helpers.sh -source "$(dirname "${BASH_SOURCE[0]}")/../lib/ci-helpers.sh" - -# Load mirror push helper -# shellcheck source=../lib/mirrors.sh -source "$(dirname "${BASH_SOURCE[0]}")/../lib/mirrors.sh" - -# --- Default callback stubs (agents can override after sourcing) --- -# cleanup_worktree and cleanup_labels are called during phase transitions. -# Provide no-op defaults so phase-handler.sh is self-contained; sourcing -# agents override these with real implementations. -if ! declare -f cleanup_worktree >/dev/null 2>&1; then - cleanup_worktree() { :; } -fi -if ! declare -f cleanup_labels >/dev/null 2>&1; then - cleanup_labels() { :; } -fi - -# --- Default globals (agents can override after sourcing) --- -: "${CI_POLL_TIMEOUT:=1800}" -: "${REVIEW_POLL_TIMEOUT:=10800}" -: "${MAX_CI_FIXES:=3}" -: "${MAX_REVIEW_ROUNDS:=5}" -: "${CI_RETRY_COUNT:=0}" -: "${CI_FIX_COUNT:=0}" -: "${REVIEW_ROUND:=0}" -: "${PR_NUMBER:=}" -: "${CLAIMED:=false}" -: "${PHASE_POLL_INTERVAL:=30}" - -# --- Post diagnostic comment + label issue as blocked --- -# Captures tmux pane output, posts a structured comment on the issue, removes -# in-progress label, and adds the "blocked" label. -# -# Args: reason [session_name] -# Uses globals: ISSUE, SESSION_NAME, PR_NUMBER, FORGE_TOKEN, API -post_blocked_diagnostic() { - local reason="$1" - local session="${2:-${SESSION_NAME:-}}" - - # Capture last 50 lines from tmux pane (before kill) - local tmux_output="" - if [ -n "$session" ] && tmux has-session -t "$session" 2>/dev/null; then - tmux_output=$(tmux capture-pane -p -t "$session" -S -50 2>/dev/null || true) - fi - - # Redact any secrets from tmux output before posting to issue - if [ -n "$tmux_output" ]; then - tmux_output=$(redact_secrets "$tmux_output") - fi - - # Build diagnostic comment body - local comment - comment="### Session failure diagnostic - -| Field | Value | -|---|---| -| Exit reason | \`${reason}\` | -| Timestamp | \`$(date -u +%Y-%m-%dT%H:%M:%SZ)\` |" - [ -n "${PR_NUMBER:-}" ] && [ "${PR_NUMBER:-0}" != "0" ] && \ - comment="${comment} -| PR | #${PR_NUMBER} |" - - if [ -n "$tmux_output" ]; then - comment="${comment} - -
Last 50 lines from tmux pane - -\`\`\` -${tmux_output} -\`\`\` -
" - fi - - # Post comment to issue - curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/issues/${ISSUE}/comments" \ - -d "$(jq -nc --arg b "$comment" '{body:$b}')" >/dev/null 2>&1 || true - - # Remove in-progress, add blocked - cleanup_labels - local blocked_id - blocked_id=$(ensure_blocked_label_id) - if [ -n "$blocked_id" ]; then - curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/issues/${ISSUE}/labels" \ - -d "{\"labels\":[${blocked_id}]}" >/dev/null 2>&1 || true - fi - CLAIMED=false - _BLOCKED_POSTED=true -} - -# --- Build phase protocol prompt (shared across agents) --- -# Generates the phase-signaling instructions for Claude prompts. -# Args: phase_file summary_file branch [remote] -# Output: The protocol text (stdout) -build_phase_protocol_prompt() { - local _pf="$1" _sf="$2" _br="$3" _remote="${4:-${FORGE_REMOTE:-origin}}" - cat <<_PHASE_PROTOCOL_EOF_ -## Phase-Signaling Protocol (REQUIRED) - -You are running in a persistent tmux session managed by an orchestrator. -Communicate progress by writing to the phase file. The orchestrator watches -this file and injects events (CI results, review feedback) back into this session. - -### Key files -\`\`\` -PHASE_FILE="${_pf}" -SUMMARY_FILE="${_sf}" -\`\`\` - -### Phase transitions — write these exactly: - -**After committing and pushing your branch:** -\`\`\`bash -# Rebase on target branch before push to avoid merge conflicts -git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH} -git push ${_remote} ${_br} -# Write a short summary of what you implemented: -printf '%s' "" > "\${SUMMARY_FILE}" -# Signal the orchestrator to create the PR and watch for CI: -echo "PHASE:awaiting_ci" > "${_pf}" -\`\`\` -Then STOP and wait. The orchestrator will inject CI results. - -**When you receive a "CI passed" injection:** -\`\`\`bash -echo "PHASE:awaiting_review" > "${_pf}" -\`\`\` -Then STOP and wait. The orchestrator will inject review feedback. - -**When you receive a "CI failed:" injection:** -Fix the CI issue, then rebase on target branch and push: -\`\`\`bash -git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH} -git push --force-with-lease ${_remote} ${_br} -echo "PHASE:awaiting_ci" > "${_pf}" -\`\`\` -Then STOP and wait. - -**When you receive a "Review: REQUEST_CHANGES" injection:** -Address ALL review feedback, then rebase on target branch and push: -\`\`\`bash -git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH} -git push --force-with-lease ${_remote} ${_br} -echo "PHASE:awaiting_ci" > "${_pf}" -\`\`\` -(CI runs again after each push — always write awaiting_ci, not awaiting_review) - -**When you need human help (CI exhausted, merge blocked, stuck on a decision):** -\`\`\`bash -printf 'PHASE:escalate\nReason: %s\n' "describe what you need" > "${_pf}" -\`\`\` -Then STOP and wait. A human will review and respond via the forge. - -**On unrecoverable failure:** -\`\`\`bash -printf 'PHASE:failed\nReason: %s\n' "describe what failed" > "${_pf}" -\`\`\` -_PHASE_PROTOCOL_EOF_ -} - -# --- Merge helper --- -# do_merge — attempt to merge PR via forge API. -# Args: pr_num -# Returns: -# 0 = merged successfully -# 1 = other failure (conflict, network error, etc.) -# 2 = not enough approvals (HTTP 405) — PHASE:escalate already written -do_merge() { - local pr_num="$1" - local merge_response merge_http_code merge_body - merge_response=$(curl -s -w "\n%{http_code}" -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H 'Content-Type: application/json' \ - "${API}/pulls/${pr_num}/merge" \ - -d '{"Do":"merge","delete_branch_after_merge":true}') || true - merge_http_code=$(echo "$merge_response" | tail -1) - merge_body=$(echo "$merge_response" | sed '$d') - - if [ "$merge_http_code" = "200" ] || [ "$merge_http_code" = "204" ]; then - log "do_merge: PR #${pr_num} merged (HTTP ${merge_http_code})" - return 0 - fi - - # HTTP 405 — could be "merge requirements not met" OR "already merged" (race with dev-poll). - # Before escalating, check whether the PR was already merged by another agent. - if [ "$merge_http_code" = "405" ]; then - local pr_state - pr_state=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${pr_num}" | jq -r '.merged // false') || pr_state="false" - if [ "$pr_state" = "true" ]; then - log "do_merge: PR #${pr_num} already merged (detected after HTTP 405) — treating as success" - return 0 - fi - log "do_merge: PR #${pr_num} blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" - printf 'PHASE:escalate\nReason: %s\n' \ - "PR #${pr_num} merge blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" \ - > "$PHASE_FILE" - return 2 - fi - - log "do_merge: PR #${pr_num} merge failed (HTTP ${merge_http_code}): ${merge_body:0:200}" - return 1 -} - -# --- Refusal comment helper --- -post_refusal_comment() { - local emoji="$1" title="$2" body="$3" - local last_has_title - last_has_title=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${ISSUE}/comments?limit=5" | \ - jq -r --arg t "Dev-agent: ${title}" '[.[] | .body // ""] | any(contains($t)) | tostring') || true - if [ "$last_has_title" = "true" ]; then - log "skipping duplicate refusal comment: ${title}" - return 0 - fi - local comment - comment="${emoji} **Dev-agent: ${title}** - -${body} - ---- -*Automated assessment by dev-agent · $(date -u '+%Y-%m-%d %H:%M UTC')*" - printf '%s' "$comment" > "/tmp/refusal-comment.txt" - jq -Rs '{body: .}' < "/tmp/refusal-comment.txt" > "/tmp/refusal-comment.json" - curl -sf -o /dev/null -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/issues/${ISSUE}/comments" \ - --data-binary @"/tmp/refusal-comment.json" 2>/dev/null || \ - log "WARNING: failed to post refusal comment" - rm -f "/tmp/refusal-comment.txt" "/tmp/refusal-comment.json" -} - -# ============================================================================= -# PHASE DISPATCH CALLBACK -# ============================================================================= - -# _on_phase_change — Phase dispatch callback for monitor_phase_loop -# Receives the current phase as $1. -# Returns 0 to continue the loop, 1 to break (terminal phase reached). -_on_phase_change() { - local phase="$1" - - # ── PHASE: awaiting_ci ────────────────────────────────────────────────────── - if [ "$phase" = "PHASE:awaiting_ci" ]; then - # Release session lock — Claude is idle during CI polling (#724) - session_lock_release - - # Create PR if not yet created - if [ -z "${PR_NUMBER:-}" ]; then - status "creating PR for issue #${ISSUE}" - IMPL_SUMMARY="" - if [ -f "$IMPL_SUMMARY_FILE" ]; then - # Don't treat refusal JSON as a PR summary - if ! jq -e '.status' < "$IMPL_SUMMARY_FILE" >/dev/null 2>&1; then - IMPL_SUMMARY=$(head -c 4000 "$IMPL_SUMMARY_FILE") - fi - fi - - printf 'Fixes #%s\n\n## Changes\n%s' "$ISSUE" "$IMPL_SUMMARY" > "/tmp/pr-body-${ISSUE}.txt" - jq -n \ - --arg title "fix: ${ISSUE_TITLE} (#${ISSUE})" \ - --rawfile body "/tmp/pr-body-${ISSUE}.txt" \ - --arg head "$BRANCH" \ - --arg base "${PRIMARY_BRANCH}" \ - '{title: $title, body: $body, head: $head, base: $base}' > "/tmp/pr-request-${ISSUE}.json" - - PR_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/pulls" \ - --data-binary @"/tmp/pr-request-${ISSUE}.json") - - PR_HTTP_CODE=$(echo "$PR_RESPONSE" | tail -1) - PR_RESPONSE_BODY=$(echo "$PR_RESPONSE" | sed '$d') - rm -f "/tmp/pr-body-${ISSUE}.txt" "/tmp/pr-request-${ISSUE}.json" - - if [ "$PR_HTTP_CODE" = "201" ] || [ "$PR_HTTP_CODE" = "200" ]; then - PR_NUMBER=$(echo "$PR_RESPONSE_BODY" | jq -r '.number') - log "created PR #${PR_NUMBER}" - elif [ "$PR_HTTP_CODE" = "409" ]; then - # PR already exists (race condition) — find it - FOUND_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls?state=open&limit=20" | \ - jq -r --arg branch "$BRANCH" \ - '.[] | select(.head.ref == $branch) | .number' | head -1) || true - if [ -n "$FOUND_PR" ]; then - PR_NUMBER="$FOUND_PR" - log "PR already exists: #${PR_NUMBER}" - else - log "ERROR: PR creation got 409 but no existing PR found" - agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP 409, no existing PR found). Check the forge API. Retry by writing PHASE:awaiting_ci again after verifying the branch was pushed." - return 0 - fi - else - log "ERROR: PR creation failed (HTTP ${PR_HTTP_CODE})" - agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP ${PR_HTTP_CODE}). Check branch was pushed: git push ${FORGE_REMOTE:-origin} ${BRANCH}. Then write PHASE:awaiting_ci again." - return 0 - fi - fi - - # No CI configured? Treat as success immediately - if [ "${WOODPECKER_REPO_ID:-2}" = "0" ]; then - log "no CI configured — treating as passed" - agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER} (no CI configured for this project). -Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback." - return 0 - fi - - # Poll CI until done or timeout - status "waiting for CI on PR #${PR_NUMBER}" - CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || \ - curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') - - CI_DONE=false - CI_STATE="unknown" - CI_POLL_ELAPSED=0 - while [ "$CI_POLL_ELAPSED" -lt "$CI_POLL_TIMEOUT" ]; do - sleep 30 - CI_POLL_ELAPSED=$(( CI_POLL_ELAPSED + 30 )) - - # Check session still alive during CI wait (exit_marker + tmux fallback) - if [ -f "/tmp/claude-exited-${SESSION_NAME}.ts" ] || ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then - log "session died during CI wait" - break - fi - - # Re-fetch HEAD — Claude may have pushed new commits since loop started - CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || echo "$CI_CURRENT_SHA") - - CI_STATE=$(ci_commit_status "$CI_CURRENT_SHA") - if [ "$CI_STATE" = "success" ] || [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then - CI_DONE=true - [ "$CI_STATE" = "success" ] && CI_FIX_COUNT=0 - break - fi - done - - if ! $CI_DONE; then - log "TIMEOUT: CI didn't complete in ${CI_POLL_TIMEOUT}s" - agent_inject_into_session "$SESSION_NAME" "CI TIMEOUT: CI did not complete within 30 minutes for PR #${PR_NUMBER} (SHA: ${CI_CURRENT_SHA:0:7}). This may be an infrastructure issue. Write PHASE:escalate if you cannot proceed." - return 0 - fi - - log "CI: ${CI_STATE}" - - if [ "$CI_STATE" = "success" ]; then - agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER}. -Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback: - echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\"" - else - # Fetch CI error details - PIPELINE_NUM=$(ci_pipeline_number "$CI_CURRENT_SHA") - - FAILED_STEP="" - FAILED_EXIT="" - IS_INFRA=false - if [ -n "$PIPELINE_NUM" ]; then - FAILED_INFO=$(curl -sf \ - -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ - "${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines/${PIPELINE_NUM}" | \ - jq -r '.workflows[]?.children[]? | select(.state=="failure") | "\(.name)|\(.exit_code)"' | head -1 || true) - FAILED_STEP=$(echo "$FAILED_INFO" | cut -d'|' -f1) - FAILED_EXIT=$(echo "$FAILED_INFO" | cut -d'|' -f2) - fi - - log "CI failed: step=${FAILED_STEP:-unknown} exit=${FAILED_EXIT:-?}" - - if [ -n "$FAILED_STEP" ] && is_infra_step "$FAILED_STEP" "${FAILED_EXIT:-0}" >/dev/null 2>&1; then - IS_INFRA=true - fi - - if [ "$IS_INFRA" = true ] && [ "${CI_RETRY_COUNT:-0}" -lt 1 ]; then - CI_RETRY_COUNT=$(( CI_RETRY_COUNT + 1 )) - log "infra failure — retrigger CI (retry ${CI_RETRY_COUNT})" - (cd "$WORKTREE" && git commit --allow-empty \ - -m "ci: retrigger after infra failure (#${ISSUE})" --no-verify 2>&1 | tail -1) - # Rebase on target branch before push to avoid merge conflicts - if ! (cd "$WORKTREE" && \ - git fetch "${FORGE_REMOTE:-origin}" "${PRIMARY_BRANCH}" 2>/dev/null && \ - git rebase "${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH}" 2>&1 | tail -5); then - log "rebase conflict detected — aborting, agent must resolve" - (cd "$WORKTREE" && git rebase --abort 2>/dev/null || git reset --hard HEAD 2>/dev/null) || true - agent_inject_into_session "$SESSION_NAME" "REBASE CONFLICT: Cannot rebase onto ${PRIMARY_BRANCH} automatically. - -Please resolve merge conflicts manually: -1. Check conflict status: git status -2. Resolve conflicts in the conflicted files -3. Stage resolved files: git add -4. Continue rebase: git rebase --continue - -If you cannot resolve conflicts, abort: git rebase --abort -Then write PHASE:escalate with a reason." - return 0 - fi - # Rebase succeeded — push the result - (cd "$WORKTREE" && git push --force-with-lease "${FORGE_REMOTE:-origin}" "$BRANCH" 2>&1 | tail -3) - # Touch phase file so we recheck CI on the new SHA - # Do NOT update LAST_PHASE_MTIME here — let the main loop detect the fresh mtime - touch "$PHASE_FILE" - CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || true) - return 0 - fi - - CI_FIX_COUNT=$(( CI_FIX_COUNT + 1 )) - _ci_pipeline_url="${WOODPECKER_SERVER}/repos/${WOODPECKER_REPO_ID}/pipeline/${PIPELINE_NUM:-0}" - if [ "$CI_FIX_COUNT" -gt "$MAX_CI_FIXES" ]; then - log "CI failure not recoverable after ${CI_FIX_COUNT} fix attempts — escalating" - printf 'PHASE:escalate\nReason: ci_exhausted after %d attempts (step: %s)\n' "$CI_FIX_COUNT" "${FAILED_STEP:-unknown}" > "$PHASE_FILE" - # Do NOT update LAST_PHASE_MTIME here — let the main loop detect PHASE:escalate - return 0 - fi - - CI_ERROR_LOG="" - if [ -n "$PIPELINE_NUM" ]; then - CI_ERROR_LOG=$(bash "${FACTORY_ROOT}/lib/ci-debug.sh" failures "$PIPELINE_NUM" 2>/dev/null | tail -80 | head -c 8000 || echo "") - fi - - # Save CI result for crash recovery - printf 'CI failed (attempt %d/%d)\nStep: %s\nExit: %s\n\n%s' \ - "$CI_FIX_COUNT" "$MAX_CI_FIXES" "${FAILED_STEP:-unknown}" "${FAILED_EXIT:-?}" "$CI_ERROR_LOG" \ - > "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt" 2>/dev/null || true - - agent_inject_into_session "$SESSION_NAME" "CI failed on PR #${PR_NUMBER} (attempt ${CI_FIX_COUNT}/${MAX_CI_FIXES}). - -Failed step: ${FAILED_STEP:-unknown} (exit code ${FAILED_EXIT:-?}, pipeline #${PIPELINE_NUM:-?}) - -CI debug tool: - bash ${FACTORY_ROOT}/lib/ci-debug.sh failures ${PIPELINE_NUM:-0} - bash ${FACTORY_ROOT}/lib/ci-debug.sh logs ${PIPELINE_NUM:-0} - -Error snippet: -${CI_ERROR_LOG:-No logs available. Use ci-debug.sh to query the pipeline.} - -Instructions: -1. Run ci-debug.sh failures to get the full error output. -2. Read the failing test file(s) — understand what the tests EXPECT. -3. Fix the root cause — do NOT weaken tests. -4. Rebase on target branch and push: git fetch ${FORGE_REMOTE:-origin} ${PRIMARY_BRANCH} && git rebase ${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH} - git push --force-with-lease ${FORGE_REMOTE:-origin} ${BRANCH} -5. Write: echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\" -6. Stop and wait." - fi - - # ── PHASE: awaiting_review ────────────────────────────────────────────────── - elif [ "$phase" = "PHASE:awaiting_review" ]; then - # Release session lock — Claude is idle during review wait (#724) - session_lock_release - status "waiting for review on PR #${PR_NUMBER:-?}" - CI_FIX_COUNT=0 # Reset CI fix budget for this review cycle - - if [ -z "${PR_NUMBER:-}" ]; then - log "WARNING: awaiting_review but PR_NUMBER unknown — searching for PR" - FOUND_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls?state=open&limit=20" | \ - jq -r --arg branch "$BRANCH" \ - '.[] | select(.head.ref == $branch) | .number' | head -1) || true - if [ -n "$FOUND_PR" ]; then - PR_NUMBER="$FOUND_PR" - log "found PR #${PR_NUMBER}" - else - agent_inject_into_session "$SESSION_NAME" "ERROR: Cannot find open PR for branch ${BRANCH}. Did you push? Verify with git status and git push ${FORGE_REMOTE:-origin} ${BRANCH}, then write PHASE:awaiting_ci." - return 0 - fi - fi - - REVIEW_POLL_ELAPSED=0 - REVIEW_FOUND=false - while [ "$REVIEW_POLL_ELAPSED" -lt "$REVIEW_POLL_TIMEOUT" ]; do - sleep 300 # 5 min between review checks - REVIEW_POLL_ELAPSED=$(( REVIEW_POLL_ELAPSED + 300 )) - - # Check session still alive (exit_marker + tmux fallback) - if [ -f "/tmp/claude-exited-${SESSION_NAME}.ts" ] || ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then - log "session died during review wait" - REVIEW_FOUND=false - break - fi - - # Check if phase was updated while we wait (e.g., Claude reacted to something) - NEW_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0) - if [ "$NEW_MTIME" -gt "$LAST_PHASE_MTIME" ]; then - log "phase file updated during review wait — re-entering main loop" - # Do NOT update LAST_PHASE_MTIME here — leave it stale so the outer - # loop detects the change on its next tick and dispatches the new phase. - REVIEW_FOUND=true # Prevent timeout injection - # Clean up review-poll sentinel if it exists (session already advanced) - rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}" - break - fi - - REVIEW_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') || true - REVIEW_COMMENT=$(forge_api_all "/issues/${PR_NUMBER}/comments" | \ - jq -r --arg sha "$REVIEW_SHA" \ - '[.[] | select(.body | contains("" in planner-memory.md. If (count - N) >= 5 or planner-memory.md missing, write to: @@ -268,15 +256,19 @@ If (count - N) >= 5 or planner-memory.md missing, write to: Include: run counter marker, date, constraint focus, patterns, direction. Keep under 100 lines. Replace entire file. -### 4. Commit ops repo changes -Commit the ops repo changes (prerequisites, journal, memory, vault items): +### 3. Commit ops repo changes +Commit the ops repo changes (prerequisites, memory, vault items): cd "$OPS_REPO_ROOT" - git add prerequisites.md journal/planner/ knowledge/planner-memory.md vault/pending/ + git add prerequisites.md knowledge/planner-memory.md vault/pending/ git add -u if ! git diff --cached --quiet; then git commit -m "chore: planner run $(date -u +%Y-%m-%d)" git push origin "$PRIMARY_BRANCH" fi cd "$PROJECT_REPO_ROOT" + +### 4. Write journal entry (generic) +The planner-run.sh wrapper will handle journal writing via profile_write_journal() +after the formula completes. This step is informational only. """ needs = ["triage-and-plan"] diff --git a/formulas/run-publish-site.toml b/formulas/run-publish-site.toml index 2de4455..9a7c1e7 100644 --- a/formulas/run-publish-site.toml +++ b/formulas/run-publish-site.toml @@ -3,7 +3,7 @@ # Trigger: action issue created by planner (gap analysis), dev-poll (post-merge # hook detecting site/ changes), or gardener (periodic SHA drift check). # -# The action-agent picks up the issue, executes these steps, posts results +# The dispatcher picks up the issue, executes these steps, posts results # as a comment, and closes the issue. name = "run-publish-site" diff --git a/formulas/run-rent-a-human.toml b/formulas/run-rent-a-human.toml index 9009418..41b8f1f 100644 --- a/formulas/run-rent-a-human.toml +++ b/formulas/run-rent-a-human.toml @@ -5,7 +5,7 @@ # the action and notifies the human for one-click copy-paste execution. # # Trigger: action issue created by planner or any formula. -# The action-agent picks up the issue, executes these steps, writes a draft +# The dispatcher picks up the issue, executes these steps, writes a draft # to vault/outreach/{platform}/drafts/, notifies the human via the forge, # and closes the issue. # diff --git a/formulas/run-supervisor.toml b/formulas/run-supervisor.toml index 6f60905..ceaf340 100644 --- a/formulas/run-supervisor.toml +++ b/formulas/run-supervisor.toml @@ -1,7 +1,7 @@ # formulas/run-supervisor.toml — Supervisor formula (health monitoring + remediation) # # Executed by supervisor/supervisor-run.sh via cron (every 20 minutes). -# supervisor-run.sh creates a tmux session with Claude (sonnet) and injects +# supervisor-run.sh runs claude -p via agent-sdk.sh and injects # this formula with pre-collected metrics as context. # # Steps: preflight → health-assessment → decide-actions → report → journal @@ -137,14 +137,15 @@ For each finding from the health assessment, decide and execute an action. **P3 Stale PRs (CI done >20min, no push since):** Do NOT read dev-poll.sh, push branches, attempt merges, or investigate pipeline code. - Instead, nudge the dev-agent via tmux injection if a session is alive: - # Find the dev session for this issue - SESSION=$(tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "dev-.*-${ISSUE_NUM}" | head -1) - if [ -n "$SESSION" ]; then - # Inject a nudge into the dev-agent session - tmux send-keys -t "$SESSION" "# [supervisor] PR stale >20min — CI finished, please push or update" Enter - fi - If no active tmux session exists, note it in the journal for the next dev-poll cycle. + Instead, file a vault item for the dev-agent to pick up: + Write $OPS_REPO_ROOT/vault/pending/stale-pr-${ISSUE_NUM}.md: + # Stale PR: ${PR_TITLE} + ## What + CI finished >20min ago but no git push has been made to the PR branch. + ## Why + P3 — Factory degraded: PRs should be pushed within 20min of CI completion. + ## Unblocks + - Factory health: dev-agent will push the branch and continue the workflow Do NOT file vault items for stale PRs unless they remain stale for >3 consecutive runs. ### Cannot auto-fix → file vault item @@ -159,7 +160,7 @@ human judgment, file a vault procurement item: ## Unblocks - Factory health: - The vault-poll will notify the human and track the request. + Vault PR filed on ops repo — human approves via PR review. Read the relevant best-practices file before taking action: cat "$OPS_REPO_ROOT/knowledge/memory.md" # P0 @@ -241,7 +242,16 @@ run-to-run context so future supervisor runs can detect trends IMPORTANT: Do NOT commit or push the journal — it is a local working file. The journal directory is committed to git periodically by other agents. -After writing the journal, write the phase signal: - echo 'PHASE:done' > "$PHASE_FILE" +## Learning + +If you discover something new during this run, append it to the relevant +knowledge file in the ops repo: + echo "### Lesson title + Description of what you learned." >> "${OPS_REPO_ROOT}/knowledge/.md" + +Knowledge files: memory.md, disk.md, ci.md, forge.md, dev-agent.md, +review-agent.md, git.md. + +After writing the journal, the agent session completes automatically. """ needs = ["report"] diff --git a/formulas/triage.toml b/formulas/triage.toml new file mode 100644 index 0000000..a2ec909 --- /dev/null +++ b/formulas/triage.toml @@ -0,0 +1,267 @@ +# formulas/triage.toml — Triage-agent formula (generic template) +# +# This is the base template for triage investigations. +# Project-specific formulas (e.g. formulas/triage-harb.toml) extend this by +# overriding the fields in the [project] section and providing stack-specific +# step descriptions. +# +# Triggered by: bug-report + in-triage label combination. +# Set by the reproduce-agent when: +# - Bug was confirmed (reproduced) +# - Quick log analysis did not reveal an obvious root cause +# - Reproduce-agent documented all steps taken and logs examined +# +# Steps: +# 1. read-findings — parse issue comments for prior reproduce-agent evidence +# 2. trace-data-flow — follow symptom through UI → API → backend → data store +# 3. instrumentation — throwaway branch, add logging, restart, observe +# 4. decompose — file backlog issues for each root cause +# 5. link-back — update original issue, swap in-triage → in-progress +# 6. cleanup — delete throwaway debug branch +# +# Best practices: +# - Start from reproduce-agent findings; do not repeat their work +# - Budget: 70% tracing data flow, 30% instrumented re-runs +# - Multiple causes: check if layered (Depends-on) or independent (Related) +# - Always delete the throwaway debug branch before finishing +# - If inconclusive after full turn budget: leave in-triage, post what was +# tried, do NOT relabel — supervisor handles stale triage sessions +# +# Project-specific formulas extend this template by defining: +# - stack_script: how to start/stop the project stack +# - [project].data_flow: layer names (e.g. "chain → indexer → GraphQL → UI") +# - [project].api_endpoints: which APIs/services to inspect +# - [project].stack_lock: stack lock configuration +# - Per-step description overrides with project-specific commands +# +# No hard timeout — runs until Claude hits its turn limit. +# Stack lock held for full run (triage is rare; blocking CI is acceptable). + +name = "triage" +description = "Deep root cause analysis: trace data flow, add debug instrumentation, decompose causes into backlog issues." +version = 2 + +# Set stack_script to the restart command for local stacks. +# Leave empty ("") to connect to an existing staging environment. +stack_script = "" + +tools = ["playwright"] + +# --------------------------------------------------------------------------- +# Project-specific extension fields. +# Override these in formulas/triage-.toml. +# --------------------------------------------------------------------------- +[project] +# Human-readable layer names for the data-flow trace (generic default). +# Example project override: "chain → indexer → GraphQL → UI" +data_flow = "UI → API → backend → data store" + +# Comma-separated list of API endpoints or services to inspect. +# Example: "GraphQL /graphql, REST /api/v1, RPC ws://localhost:8545" +api_endpoints = "" + +# Stack lock configuration (leave empty for default behavior). +# Example: "full" to hold a full stack lock during triage. +stack_lock = "" + +# --------------------------------------------------------------------------- +# Steps +# --------------------------------------------------------------------------- + +[[steps]] +id = "read-findings" +title = "Read reproduce-agent findings" +description = """ +Before doing anything else, parse all prior evidence from the issue comments. + +1. Fetch the issue body and all comments: + curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues/${ISSUE_NUMBER}" | jq -r '.body' + curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues/${ISSUE_NUMBER}/comments" | jq -r '.[].body' + +2. Identify the reproduce-agent comment (look for sections like + "Reproduction steps", "Logs examined", "What was tried"). + +3. Extract and note: + - The exact symptom (error message, unexpected value, visual regression) + - Steps that reliably trigger the bug + - Log lines or API responses already captured + - Any hypotheses the reproduce-agent already ruled out + +Do NOT repeat work the reproduce-agent already did. Your job starts where +theirs ended. If no reproduce-agent comment is found, note it and proceed +with fresh investigation using the issue body only. +""" + +[[steps]] +id = "trace-data-flow" +title = "Trace data flow from symptom to source" +description = """ +Systematically follow the symptom backwards through each layer of the stack. +Spend ~70% of your total turn budget here before moving to instrumentation. + +Generic layer traversal (adapt to the project's actual stack): + UI → API → backend → data store + +For each layer boundary: + 1. What does the upstream layer send? + 2. What does the downstream layer expect? + 3. Is there a mismatch? If yes — is this the root cause or a symptom? + +Tracing checklist: + a. Start at the layer closest to the visible symptom. + b. Read the relevant source files — do not guess data shapes. + c. Cross-reference API contracts: compare what the code sends vs what it + should send according to schemas, type definitions, or documentation. + d. Check recent git history on suspicious files: + git log --oneline -20 -- + e. Search for related issues or TODOs in the code: + grep -r "TODO\|FIXME\|HACK" -- + +Capture for each layer: + - The data shape flowing in and out (field names, types, nullability) + - Whether the layer's behavior matches its documented contract + - Any discrepancy found + +If a clear root cause becomes obvious during tracing, note it and continue +checking whether additional causes exist downstream. +""" +needs = ["read-findings"] + +[[steps]] +id = "instrumentation" +title = "Add debug instrumentation on a throwaway branch" +description = """ +Use ~30% of your total turn budget here. Only instrument after tracing has +identified the most likely failure points — do not instrument blindly. + +1. Create a throwaway debug branch (NEVER commit this to main): + cd "$PROJECT_REPO_ROOT" + git checkout -b debug/triage-${ISSUE_NUMBER} + +2. Add targeted logging at the layer boundaries identified during tracing: + - Console.log / structured log statements around the suspicious code path + - Log the actual values flowing through: inputs, outputs, intermediate state + - Add verbose mode flags if the stack supports them + - Keep instrumentation minimal — only what confirms or refutes the hypothesis + +3. Restart the stack using the configured script (if set): + ${stack_script:-"# No stack_script configured — restart manually or connect to staging"} + +4. Re-run the reproduction steps from the reproduce-agent findings. + +5. Observe and capture new output: + - Paste relevant log lines into your working notes + - Note whether the observed values match or contradict the hypothesis + +6. If the first instrumentation pass is inconclusive, iterate: + - Narrow the scope to the next most suspicious boundary + - Re-instrument, restart, re-run + - Maximum 2-3 instrumentation rounds before declaring inconclusive + +Do NOT push the debug branch. It will be deleted in the cleanup step. +""" +needs = ["trace-data-flow"] + +[[steps]] +id = "decompose" +title = "Decompose root causes into backlog issues" +description = """ +After tracing and instrumentation, articulate each distinct root cause. + +For each root cause found: + +1. Determine the relationship to other causes: + - Layered (one causes another) → use Depends-on in the issue body + - Independent (separate code paths fail independently) → use Related + +2. Create a backlog issue for each root cause: + curl -sf -X POST "${FORGE_API}/issues" \\ + -H "Authorization: token ${FORGE_TOKEN}" \\ + -H "Content-Type: application/json" \\ + -d '{ + "title": "fix: ", + "body": "## Root cause\\n\\n\\n## Fix suggestion\\n\\n\\n## Context\\nDecomposed from #${ISSUE_NUMBER} (cause N of M)\\n\\n## Dependencies\\n<#X if this depends on another cause being fixed first>", + "labels": [{"name": "backlog"}] + }' + +3. Note the newly created issue numbers. + +If only one root cause is found, still create a single backlog issue with +the specific code location and fix suggestion. + +If the investigation is inconclusive (no clear root cause found), skip this +step and proceed directly to link-back with the inconclusive outcome. +""" +needs = ["instrumentation"] + +[[steps]] +id = "link-back" +title = "Update original issue and relabel" +description = """ +Post a summary comment on the original issue and update its labels. + +### If root causes were found (conclusive): + +Post a comment: + "## Triage findings + + Found N root cause(s): + - #X — (cause 1 of N) + - #Y — (cause 2 of N, depends on #X) + + Data flow traced: + Instrumentation: + + Next step: backlog issues above will be implemented in dependency order." + +Then swap labels: + - Remove: in-triage + - Add: in-progress + +### If investigation was inconclusive (turn budget exhausted): + +Post a comment: + "## Triage — inconclusive + + Traced: + Tried: + Hypothesis: + + No definitive root cause identified. Leaving in-triage for supervisor + to handle as a stale triage session." + +Do NOT relabel. Leave in-triage. The supervisor monitors stale triage +sessions and will escalate or reassign. + +**CRITICAL: Write outcome file** — Always write the outcome to the outcome file: + - If root causes found (conclusive): echo "reproduced" > /tmp/triage-outcome-${ISSUE_NUMBER}.txt + - If inconclusive: echo "needs-triage" > /tmp/triage-outcome-${ISSUE_NUMBER}.txt +""" +needs = ["decompose"] + +[[steps]] +id = "cleanup" +title = "Delete throwaway debug branch" +description = """ +Always delete the debug branch, even if the investigation was inconclusive. + +1. Switch back to the main branch: + cd "$PROJECT_REPO_ROOT" + git checkout "$PRIMARY_BRANCH" + +2. Delete the local debug branch: + git branch -D debug/triage-${ISSUE_NUMBER} + +3. Confirm no remote was pushed (if accidentally pushed, delete it too): + git push origin --delete debug/triage-${ISSUE_NUMBER} 2>/dev/null || true + +4. Verify the worktree is clean: + git status + git worktree list + +A clean repo is a prerequisite for the next dev-agent run. Never leave +debug branches behind — they accumulate and pollute the branch list. +""" +needs = ["link-back"] diff --git a/gardener/AGENTS.md b/gardener/AGENTS.md index c9ba3b1..2a5dcb3 100644 --- a/gardener/AGENTS.md +++ b/gardener/AGENTS.md @@ -1,4 +1,4 @@ - + # Gardener Agent **Role**: Backlog grooming — detect duplicate issues, missing acceptance @@ -22,7 +22,8 @@ directly from cron like the planner, predictor, and supervisor. `PHASE:awaiting_ci` — injects CI results and review feedback, re-signals `PHASE:awaiting_ci` after fixes, signals `PHASE:awaiting_review` on CI pass. Executes pending-actions manifest after PR merge. -- `formulas/run-gardener.toml` — Execution spec: preflight, grooming, dust-bundling, blocked-review, agents-update, commit-and-pr +- `formulas/run-gardener.toml` — Execution spec: preflight, grooming, dust-bundling, + agents-update, commit-and-pr - `gardener/pending-actions.json` — Manifest of deferred repo actions (label changes, closures, comments, issue creation). Written during grooming steps, committed to the PR, reviewed alongside AGENTS.md changes, executed by gardener-run.sh after merge. @@ -34,7 +35,7 @@ directly from cron like the planner, predictor, and supervisor. **Lifecycle**: gardener-run.sh (cron 0,6,12,18) → `check_active gardener` → lock + memory guard → load formula + context → create tmux session → Claude grooms backlog (writes proposed actions to manifest), bundles dust, -reviews blocked issues, updates AGENTS.md, commits manifest + docs to PR → +updates AGENTS.md, commits manifest + docs to PR → `PHASE:awaiting_ci` (stays alive) → CI pass → `PHASE:awaiting_review` → review feedback → address + re-signal → merge → gardener-run.sh executes manifest actions via API → `PHASE:done`. When blocked on external resources diff --git a/gardener/PROMPT.md b/gardener/PROMPT.md deleted file mode 100644 index 90cfe5e..0000000 --- a/gardener/PROMPT.md +++ /dev/null @@ -1,50 +0,0 @@ -# Gardener Prompt — Dust vs Ore - -> **Note:** This is human documentation. The actual LLM prompt is built -> inline in `gardener-poll.sh` (with dynamic context injection). This file -> documents the design rationale for reference. - -## Rule - -Don't promote trivial tech-debt individually. Each promotion costs a full -factory cycle: CI + dev-agent + review + merge. Don't fill minecarts with -dust — put ore inside. - -## What is dust? - -- Comment fix -- Variable rename -- Style-only change (whitespace, formatting) -- Single-line edit -- Trivial cleanup with no behavioral impact - -## What is ore? - -- Multi-file changes -- Behavioral fixes -- Architectural improvements -- Security or correctness issues -- Anything requiring design thought - -## LLM output format - -When a tech-debt issue is dust, the LLM outputs: - -``` -DUST: {"issue": NNN, "group": "", "title": "...", "reason": "..."} -``` - -The `group` field clusters related dust by file or subsystem (e.g. -`"gardener"`, `"lib/env.sh"`, `"dev-poll"`). - -## Bundling - -The script collects dust items into `gardener/dust.jsonl`. When a group -accumulates 3+ items, the script automatically: - -1. Creates one bundled backlog issue referencing all source issues -2. Closes the individual source issues with a cross-reference comment -3. Removes bundled items from the staging file - -This converts N trivial issues into 1 actionable issue, saving N-1 factory -cycles. diff --git a/gardener/gardener-run.sh b/gardener/gardener-run.sh index 31aa8c0..b524b62 100755 --- a/gardener/gardener-run.sh +++ b/gardener/gardener-run.sh @@ -45,7 +45,7 @@ source "$FACTORY_ROOT/lib/agent-sdk.sh" # shellcheck source=../lib/pr-lifecycle.sh source "$FACTORY_ROOT/lib/pr-lifecycle.sh" -LOG_FILE="$SCRIPT_DIR/gardener.log" +LOG_FILE="${DISINTO_LOG_DIR}/gardener/gardener.log" # shellcheck disable=SC2034 # consumed by agent-sdk.sh LOGFILE="$LOG_FILE" # shellcheck disable=SC2034 # consumed by agent-sdk.sh @@ -55,19 +55,30 @@ RESULT_FILE="/tmp/gardener-result-${PROJECT_NAME}.txt" GARDENER_PR_FILE="/tmp/gardener-pr-${PROJECT_NAME}.txt" WORKTREE="/tmp/${PROJECT_NAME}-gardener-run" -log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } +# Override LOG_AGENT for consistent agent identification +# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() +LOG_AGENT="gardener" # ── Guards ──────────────────────────────────────────────────────────────── check_active gardener acquire_cron_lock "/tmp/gardener-run.lock" -check_memory 2000 +memory_guard 2000 log "--- Gardener run start ---" +# ── Resolve forge remote for git operations ───────────────────────────── +resolve_forge_remote + +# ── Resolve agent identity for .profile repo ──────────────────────────── +resolve_agent_identity || true + # ── Load formula + context ─────────────────────────────────────────────── -load_formula "$FACTORY_ROOT/formulas/run-gardener.toml" +load_formula_or_profile "gardener" "$FACTORY_ROOT/formulas/run-gardener.toml" || exit 1 build_context_block AGENTS.md +# ── Prepare .profile context (lessons injection) ───────────────────────── +formula_prepare_profile_context + # ── Read scratch file (compaction survival) ─────────────────────────────── SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE") SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE") @@ -105,7 +116,7 @@ You have full shell access and --dangerously-skip-permissions. Fix what you can. File vault items for what you cannot. Do NOT ask permission — act first, report after. ## Project context -${CONTEXT_BLOCK} +${CONTEXT_BLOCK}$(formula_lessons_block) ${SCRATCH_CONTEXT:+${SCRATCH_CONTEXT} } ## Result file @@ -118,16 +129,7 @@ ${SCRATCH_INSTRUCTION} ${PROMPT_FOOTER}" # ── Create worktree ────────────────────────────────────────────────────── -cd "$PROJECT_REPO_ROOT" -git fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true -worktree_cleanup "$WORKTREE" -git worktree add "$WORKTREE" "origin/${PRIMARY_BRANCH}" --detach 2>/dev/null - -cleanup() { - worktree_cleanup "$WORKTREE" - rm -f "$GARDENER_PR_FILE" -} -trap cleanup EXIT +formula_worktree_setup "$WORKTREE" # ── Post-merge manifest execution ──────────────────────────────────────── # Reads gardener/pending-actions.json and executes each action via API. @@ -156,19 +158,21 @@ _gardener_execute_manifest() { case "$action" in add_label) - local label label_id + local label label_id http_code resp label=$(jq -r ".[$i].label" "$manifest_file") label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${FORGE_API}/labels" | jq -r --arg n "$label" \ '.[] | select(.name == $n) | .id') || true if [ -n "$label_id" ]; then - if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ + resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${issue}/labels" \ - -d "{\"labels\":[${label_id}]}" >/dev/null 2>&1; then + -d "{\"labels\":[${label_id}]}" 2>/dev/null) || true + http_code=$(echo "$resp" | tail -1) + if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then log "manifest: add_label '${label}' to #${issue}" else - log "manifest: FAILED add_label '${label}' to #${issue}" + log "manifest: FAILED add_label '${label}' to #${issue}: HTTP ${http_code}" fi else log "manifest: FAILED add_label — label '${label}' not found" @@ -176,17 +180,19 @@ _gardener_execute_manifest() { ;; remove_label) - local label label_id + local label label_id http_code resp label=$(jq -r ".[$i].label" "$manifest_file") label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${FORGE_API}/labels" | jq -r --arg n "$label" \ '.[] | select(.name == $n) | .id') || true if [ -n "$label_id" ]; then - if curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/issues/${issue}/labels/${label_id}" >/dev/null 2>&1; then + resp=$(curl -sf -w "\n%{http_code}" -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues/${issue}/labels/${label_id}" 2>/dev/null) || true + http_code=$(echo "$resp" | tail -1) + if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then log "manifest: remove_label '${label}' from #${issue}" else - log "manifest: FAILED remove_label '${label}' from #${issue}" + log "manifest: FAILED remove_label '${label}' from #${issue}: HTTP ${http_code}" fi else log "manifest: FAILED remove_label — label '${label}' not found" @@ -194,34 +200,38 @@ _gardener_execute_manifest() { ;; close) - local reason + local reason http_code resp reason=$(jq -r ".[$i].reason // empty" "$manifest_file") - if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ + resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${issue}" \ - -d '{"state":"closed"}' >/dev/null 2>&1; then + -d '{"state":"closed"}' 2>/dev/null) || true + http_code=$(echo "$resp" | tail -1) + if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then log "manifest: closed #${issue} (${reason})" else - log "manifest: FAILED close #${issue}" + log "manifest: FAILED close #${issue}: HTTP ${http_code}" fi ;; comment) - local body escaped_body + local body escaped_body http_code resp body=$(jq -r ".[$i].body" "$manifest_file") escaped_body=$(printf '%s' "$body" | jq -Rs '.') - if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ + resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${issue}/comments" \ - -d "{\"body\":${escaped_body}}" >/dev/null 2>&1; then + -d "{\"body\":${escaped_body}}" 2>/dev/null) || true + http_code=$(echo "$resp" | tail -1) + if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then log "manifest: commented on #${issue}" else - log "manifest: FAILED comment on #${issue}" + log "manifest: FAILED comment on #${issue}: HTTP ${http_code}" fi ;; create_issue) - local title body labels escaped_title escaped_body label_ids + local title body labels escaped_title escaped_body label_ids http_code resp title=$(jq -r ".[$i].title" "$manifest_file") body=$(jq -r ".[$i].body" "$manifest_file") labels=$(jq -r ".[$i].labels // [] | .[]" "$manifest_file") @@ -241,40 +251,46 @@ _gardener_execute_manifest() { done <<< "$labels" [ -n "$ids_json" ] && label_ids="[${ids_json}]" fi - if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ + resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues" \ - -d "{\"title\":${escaped_title},\"body\":${escaped_body},\"labels\":${label_ids}}" >/dev/null 2>&1; then + -d "{\"title\":${escaped_title},\"body\":${escaped_body},\"labels\":${label_ids}}" 2>/dev/null) || true + http_code=$(echo "$resp" | tail -1) + if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then log "manifest: created issue '${title}'" else - log "manifest: FAILED create_issue '${title}'" + log "manifest: FAILED create_issue '${title}': HTTP ${http_code}" fi ;; edit_body) - local body escaped_body + local body escaped_body http_code resp body=$(jq -r ".[$i].body" "$manifest_file") escaped_body=$(printf '%s' "$body" | jq -Rs '.') - if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ + resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${issue}" \ - -d "{\"body\":${escaped_body}}" >/dev/null 2>&1; then + -d "{\"body\":${escaped_body}}" 2>/dev/null) || true + http_code=$(echo "$resp" | tail -1) + if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then log "manifest: edited body of #${issue}" else - log "manifest: FAILED edit_body #${issue}" + log "manifest: FAILED edit_body #${issue}: HTTP ${http_code}" fi ;; close_pr) - local pr + local pr http_code resp pr=$(jq -r ".[$i].pr" "$manifest_file") - if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ + resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/pulls/${pr}" \ - -d '{"state":"closed"}' >/dev/null 2>&1; then + -d '{"state":"closed"}' 2>/dev/null) || true + http_code=$(echo "$resp" | tail -1) + if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then log "manifest: closed PR #${pr}" else - log "manifest: FAILED close_pr #${pr}" + log "manifest: FAILED close_pr #${pr}: HTTP ${http_code}" fi ;; @@ -319,9 +335,9 @@ if [ -n "$PR_NUMBER" ]; then if [ "$_PR_WALK_EXIT_REASON" = "merged" ]; then # Post-merge: pull primary, mirror push, execute manifest - git -C "$PROJECT_REPO_ROOT" fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true + git -C "$PROJECT_REPO_ROOT" fetch "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true git -C "$PROJECT_REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true - git -C "$PROJECT_REPO_ROOT" pull --ff-only origin "$PRIMARY_BRANCH" 2>/dev/null || true + git -C "$PROJECT_REPO_ROOT" pull --ff-only "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true mirror_push _gardener_execute_manifest rm -f "$SCRATCH_FILE" @@ -334,5 +350,8 @@ else rm -f "$SCRATCH_FILE" fi +# Write journal entry post-session +profile_write_journal "gardener-run" "Gardener run $(date -u +%Y-%m-%d)" "complete" "" || true + rm -f "$GARDENER_PR_FILE" log "--- Gardener run done ---" diff --git a/gardener/pending-actions.json b/gardener/pending-actions.json index 747973c..a148369 100644 --- a/gardener/pending-actions.json +++ b/gardener/pending-actions.json @@ -1,32 +1,7 @@ [ { "action": "edit_body", - "issue": 765, - "body": "Depends on: none\n\n## Goal\n\nThe disinto website becomes a versioned artifact: built by CI, published to Codeberg's generic package registry, deployed to staging automatically. Version visible in footer.\n\n## Files to add/change\n\n### `site/VERSION`\n```\n0.1.0\n```\n\n### `site/build.sh`\n```bash\n#!/bin/bash\nVERSION=$(cat VERSION)\nmkdir -p dist\ncp *.html *.jpg *.webp *.png *.ico *.xml robots.txt dist/\nsed -i \"s|Built from scrap, powered by a single battery.|v${VERSION} · Built from scrap, powered by a single battery.|\" dist/index.html\necho \"$VERSION\" > dist/VERSION\n```\n\n### `site/index.html`\nNo template placeholder needed — `build.sh` does the sed replacement on the existing footer text.\n\n### `.woodpecker/site.yml`\n```yaml\nwhen:\n path: \"site/**\"\n event: push\n branch: main\n\nsteps:\n - name: build\n image: alpine\n commands:\n - cd site && sh build.sh\n - VERSION=$(cat site/VERSION)\n - tar czf site-${VERSION}.tar.gz -C site/dist .\n\n - name: publish\n image: alpine\n commands:\n - apk add curl\n - VERSION=$(cat site/VERSION)\n - >-\n curl -sf --user \"johba:$$FORGE_TOKEN\"\n --upload-file site-${VERSION}.tar.gz\n \"https://codeberg.org/api/packages/johba/generic/disinto-site/${VERSION}/site-${VERSION}.tar.gz\"\n environment:\n FORGE_TOKEN:\n from_secret: forge_token\n\n - name: deploy-staging\n image: alpine\n commands:\n - apk add curl\n - VERSION=$(cat site/VERSION)\n - >-\n curl -sf --user \"johba:$$FORGE_TOKEN\"\n \"https://codeberg.org/api/packages/johba/generic/disinto-site/${VERSION}/site-${VERSION}.tar.gz\"\n -o site.tar.gz\n - rm -rf /srv/staging/*\n - tar xzf site.tar.gz -C /srv/staging/\n environment:\n FORGE_TOKEN:\n from_secret: forge_token\n volumes:\n - /home/debian/staging-site:/srv/staging\n```\n\n## Infra setup (manual, before first run)\n- `mkdir -p /home/debian/staging-site`\n- Add to Caddyfile: `staging.disinto.ai { root * /home/debian/staging-site; file_server }`\n- DNS: `staging.disinto.ai` A record → same IP as `disinto.ai`\n- Reload Caddy: `sudo systemctl reload caddy`\n- Add `forge_token` as Woodpecker repo secret for johba/disinto (if not already set)\n- Add `/home/debian/staging-site` to `WOODPECKER_BACKEND_DOCKER_VOLUMES`\n\n## Verification\n- [ ] Merge PR that touches `site/` → CI runs site pipeline\n- [ ] Package appears at `codeberg.org/johba/-/packages/generic/disinto-site/0.1.0`\n- [ ] `staging.disinto.ai` serves the site with `v0.1.0` in footer\n- [ ] `disinto.ai` (production) unchanged\n\n## Related\n- #764 — docker stack edge proxy + staging (future: this moves inside the stack)\n- #755 — vault-gated production promotion (production deploy comes later)\n\n## Affected files\n- `site/VERSION` — new, holds current version string\n- `site/build.sh` — new, builds dist/ with version injected into footer\n- `.woodpecker/site.yml` — new, CI pipeline for build/publish/deploy-staging" - }, - { - "action": "edit_body", - "issue": 764, - "body": "Depends on: none (builds on existing docker-compose generation in `bin/disinto`)\n\n## Design\n\n`disinto init` + `disinto up` starts two additional containers as base factory infrastructure:\n\n### Edge proxy (Caddy)\n- Reverse proxies to Forgejo and Woodpecker\n- Serves staging site\n- Runs on ports 80/443\n- At bootstrap: IP-only, self-signed TLS or HTTP\n- Domain + Let's Encrypt added later via vault resource request\n\n### Staging container (Caddy)\n- Static file server for the project's staging artifacts\n- Starts with a default \"Nothing shipped yet\" page\n- CI pipelines write to a shared volume to update staging content\n- No vault approval needed — staging is the factory's sandbox\n\n### docker-compose addition\n```yaml\nservices:\n edge:\n image: caddy:alpine\n ports:\n - \"80:80\"\n - \"443:443\"\n volumes:\n - ./Caddyfile:/etc/caddy/Caddyfile\n - caddy_data:/data\n depends_on:\n - forgejo\n - woodpecker-server\n - staging\n\n staging:\n image: caddy:alpine\n volumes:\n - staging-site:/srv/site\n # Not exposed directly — edge proxies to it\n\nvolumes:\n caddy_data:\n staging-site:\n```\n\n### Caddyfile (generated by `disinto init`)\n```\n# IP-only at bootstrap, domain added later\n:80 {\n handle /forgejo/* {\n reverse_proxy forgejo:3000\n }\n handle /ci/* {\n reverse_proxy woodpecker-server:8000\n }\n handle {\n reverse_proxy staging:80\n }\n}\n```\n\n### Staging update flow\n1. CI builds artifact (site tarball, etc.)\n2. CI step writes to `staging-site` volume\n3. Staging container serves updated content immediately\n4. No restart needed — Caddy serves files directly\n\n### Domain lifecycle\n- Bootstrap: no domain, edge serves on IP\n- Later: factory files vault resource request for domain\n- Human buys domain, sets DNS\n- Caddyfile updated with domain, Let's Encrypt auto-provisions TLS\n\n## Affected files\n- `bin/disinto` — `generate_compose()` adds edge + staging services\n- New: default staging page (\"Nothing shipped yet\")\n- New: Caddyfile template in `docker/`\n\n## Related\n- #755 — vault-gated deployment promotion (production comes later)\n- #757 — ops repo (domain is a resource requested through vault)\n\n## Acceptance criteria\n- [ ] `disinto init` generates a `docker-compose.yml` that includes `edge` (Caddy) and `staging` containers\n- [ ] Edge proxy routes `/forgejo/*` → Forgejo, `/ci/*` → Woodpecker, default → staging container\n- [ ] Staging container serves a default \"Nothing shipped yet\" page on first boot\n- [ ] `docker/` directory contains a Caddyfile template generated by `disinto init`\n- [ ] `disinto up` starts all containers including edge and staging without manual steps" - }, - { - "action": "edit_body", - "issue": 761, - "body": "Depends on: #747\n\n## Design\n\nEach agent account on the bundled Forgejo gets a `.profile` repo. This repo holds the agent's formula (copied from disinto at creation time) and its journal.\n\n### Structure\n```\n{agent-bot}/.profile/\n├── formula.toml # snapshot of the formula at agent creation time\n├── journal/ # daily logs of what the agent did\n│ ├── 2026-03-26.md\n│ └── ...\n└── knowledge/ # learned patterns, best-practices (optional, agent can evolve)\n```\n\n### Lifecycle\n1. **Create agent** — `disinto init` or `disinto spawn-agent` creates Forgejo account + `.profile` repo\n2. **Copy formula** — current `formulas/{role}.toml` from disinto repo is copied to `.profile/formula.toml`\n3. **Agent reads its own formula** — at session start, agent reads from its `.profile`, not from the disinto repo\n4. **Agent writes journal** — daily entries pushed to `.profile/journal/`\n5. **Agent can evolve knowledge** — best-practices, heuristics, patterns written to `.profile/knowledge/`\n\n### What this enables\n\n**A/B testing formulas:** Create two agents from different formula versions, run both against the same backlog, compare results (cycle time, CI pass rate, review rejection rate).\n\n**Rollback:** New formula worse? Kill agent, spawn from older formula version.\n\n**Audit:** What formula was this agent running when it produced that PR? Check its `.profile` at that git commit.\n\n**Drift tracking:** Diff what an agent learned (`.profile/knowledge/`) vs what it started with. Measure formula evolution over time.\n\n**Portability:** Move agent to different box — `git clone` its `.profile`.\n\n### Disinto repo becomes the template\n\n```\ndisinto repo:\n formulas/dev-agent.toml ← canonical template, evolves\n formulas/review-agent.toml\n formulas/planner.toml\n ...\n\nRunning agents:\n dev-bot-v2/.profile/formula.toml ← snapshot from formulas/dev-agent.toml@v2\n dev-bot-v3/.profile/formula.toml ← snapshot from formulas/dev-agent.toml@v3\n review-bot/.profile/formula.toml ← snapshot from formulas/review-agent.toml\n```\n\nThe formula in the disinto repo is the template. The `.profile` copy is the instance. They can diverge — that's a feature, not a bug.\n\n## Affected files\n- `bin/disinto` — agent creation copies formula to .profile\n- Agent session scripts — read formula from .profile instead of local formulas/ dir\n- Planner/supervisor — can read other agents' journals from their .profile repos\n\n## Related\n- #747 — per-agent Forgejo accounts (prerequisite)\n- #757 — ops repo (shared concerns stay there: vault, portfolio, resources)\n\n## Acceptance criteria\n- [ ] `disinto spawn-agent` (or `disinto init`) creates a Forgejo account + `.profile` repo for each agent bot\n- [ ] Current `formulas/{role}.toml` is copied to `.profile/formula.toml` at agent creation time\n- [ ] Agent session script reads its formula from `.profile/formula.toml`, not from the repo's `formulas/` directory\n- [ ] Agent writes daily journal entries to `.profile/journal/YYYY-MM-DD.md`" - }, - { - "action": "edit_body", - "issue": 742, - "body": "## Problem\n\n`gardener/recipes/*.toml` (4 files: cascade-rebase, chicken-egg-ci, flaky-test, shellcheck-violations) are an older pattern predating `formulas/*.toml`. Two systems for the same thing.\n\n## Fix\n\nMigrate any unique content from recipes to the gardener formula or to new formulas. Delete the recipes directory.\n\n## Affected files\n- `gardener/recipes/*.toml` — delete after migration\n- `formulas/run-gardener.toml` — absorb relevant content\n- Gardener scripts that reference recipes/\n\n## Acceptance criteria\n- [ ] Contents of `gardener/recipes/*.toml` are diff'd against `formulas/run-gardener.toml` — any unique content is migrated\n- [ ] `gardener/recipes/` directory is deleted\n- [ ] No scripts in `gardener/` reference the `recipes/` path after migration\n- [ ] ShellCheck passes on all modified scripts" - }, - { - "action": "add_label", - "issue": 742, - "label": "backlog" - }, - { - "action": "add_label", - "issue": 741, - "label": "backlog" + "issue": 356, + "body": "## Problem\n\nThe entrypoint hardcodes `REPRODUCE_FORMULA` to `formulas/reproduce.toml` (line 26) and never checks the `DISINTO_FORMULA` environment variable passed by the dispatcher for triage runs.\n\nThe dispatcher sets `-e DISINTO_FORMULA=triage` for triage dispatch, but the entrypoint ignores it — always running the reproduce formula.\n\n## Fix\n\nAt line 26, select the formula based on `DISINTO_FORMULA`:\n\n```bash\ncase \"${DISINTO_FORMULA:-reproduce}\" in\n triage)\n ACTIVE_FORMULA=\"${DISINTO_DIR}/formulas/triage.toml\"\n ;;\n *)\n ACTIVE_FORMULA=\"${DISINTO_DIR}/formulas/reproduce.toml\"\n ;;\nesac\n```\n\nThen use `ACTIVE_FORMULA` everywhere `REPRODUCE_FORMULA` is currently used.\n\nAlso update log messages to reflect which formula is running (\"Starting triage-agent\" vs \"Starting reproduce-agent\").\n\n## Affected files\n\n- `docker/reproduce/entrypoint-reproduce.sh` — line 26 and all references to REPRODUCE_FORMULA\n\n## Acceptance criteria\n\n- [ ] `DISINTO_FORMULA=triage` selects `formulas/triage.toml` in the entrypoint\n- [ ] `DISINTO_FORMULA=reproduce` (or unset) still runs `formulas/reproduce.toml`\n- [ ] Log messages reflect which formula is active (\"Starting triage-agent\" / \"Starting reproduce-agent\")\n- [ ] All `REPRODUCE_FORMULA` references replaced with `ACTIVE_FORMULA`\n" } ] diff --git a/gardener/recipes/cascade-rebase.toml b/gardener/recipes/cascade-rebase.toml deleted file mode 100644 index 1cd09ee..0000000 --- a/gardener/recipes/cascade-rebase.toml +++ /dev/null @@ -1,16 +0,0 @@ -# gardener/recipes/cascade-rebase.toml — PR outdated after main moved -# -# Trigger: PR mergeable=false (stale branch or dismissed approval) -# Playbook: rebase only — merge and re-approval happen on subsequent cycles -# after CI reruns on the rebased branch (rebase is async via Gitea API) - -name = "cascade-rebase" -description = "PR outdated after main moved — mergeable=false or stale approval" -priority = 20 - -[trigger] -pr_mergeable = false - -[[playbook]] -action = "rebase-pr" -description = "Rebase PR onto main (async — CI reruns, merge on next cycle)" diff --git a/gardener/recipes/chicken-egg-ci.toml b/gardener/recipes/chicken-egg-ci.toml deleted file mode 100644 index cc71e02..0000000 --- a/gardener/recipes/chicken-egg-ci.toml +++ /dev/null @@ -1,25 +0,0 @@ -# gardener/recipes/chicken-egg-ci.toml — PR introduces CI step that fails on pre-existing code -# -# Trigger: New .woodpecker/*.yml in PR + lint/check step + failures on unchanged files -# Playbook: make step non-blocking, create per-file issues, create follow-up to remove bypass - -name = "chicken-egg-ci" -description = "PR introduces a CI pipeline/linting step that fails on pre-existing code" -priority = 10 - -[trigger] -pr_files = '\.woodpecker/.*\.yml$' -step_name = '(?i)(lint|shellcheck|check)' -failures_on_unchanged = true - -[[playbook]] -action = "make-step-non-blocking" -description = "Make failing step non-blocking (|| true) in the PR" - -[[playbook]] -action = "lint-per-file" -description = "Create per-file fix issues for pre-existing violations (generic linter support)" - -[[playbook]] -action = "create-followup-remove-bypass" -description = "Create follow-up issue to remove || true once fixes land" diff --git a/gardener/recipes/flaky-test.toml b/gardener/recipes/flaky-test.toml deleted file mode 100644 index 5a76940..0000000 --- a/gardener/recipes/flaky-test.toml +++ /dev/null @@ -1,20 +0,0 @@ -# gardener/recipes/flaky-test.toml — CI fails intermittently -# -# Trigger: Test step fails + multiple CI attempts (same step, different output) -# Playbook: retrigger CI (max 2x), quarantine test if still failing - -name = "flaky-test" -description = "CI fails intermittently — same step fails across multiple attempts" -priority = 30 - -[trigger] -step_name = '(?i)test' -min_attempts = 2 - -[[playbook]] -action = "retrigger-ci" -description = "Retrigger CI (max 2 retries)" - -[[playbook]] -action = "quarantine-test" -description = "If still failing, quarantine test and create fix issue" diff --git a/gardener/recipes/shellcheck-violations.toml b/gardener/recipes/shellcheck-violations.toml deleted file mode 100644 index 0bc9d57..0000000 --- a/gardener/recipes/shellcheck-violations.toml +++ /dev/null @@ -1,20 +0,0 @@ -# gardener/recipes/shellcheck-violations.toml — ShellCheck step fails -# -# Trigger: Step named *shellcheck* fails with SC#### codes in output -# Playbook: parse per-file, create one issue per file, label backlog - -name = "shellcheck-violations" -description = "ShellCheck step fails with SC#### codes in output" -priority = 40 - -[trigger] -step_name = '(?i)shellcheck' -output = 'SC\d{4}' - -[[playbook]] -action = "shellcheck-per-file" -description = "Parse output by file, create one fix issue per file with specific SC codes" - -[[playbook]] -action = "label-backlog" -description = "Label created issues as backlog" diff --git a/lib/AGENTS.md b/lib/AGENTS.md index 520440b..a70e9a7 100644 --- a/lib/AGENTS.md +++ b/lib/AGENTS.md @@ -1,4 +1,4 @@ - + # Shared Helpers (`lib/`) All agents source `lib/env.sh` as their first action. Additional helpers are @@ -6,19 +6,29 @@ sourced as needed. | File | What it provides | Sourced by | |---|---|---| -| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`, `FORGE_ACTION_TOKEN`) — each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens — only the vault-runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced — compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. | Every agent | -| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status ` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number ` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote ` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this). | dev-poll, review-poll, review-pr, supervisor-poll | +| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (paginates all pages; accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`; handles invalid/empty JSON responses gracefully — returns empty on parse error instead of crashing), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`) — each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens — only the runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced — compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. **Save/restore scope (#364)**: only `FORGE_URL` is preserved across `.env` re-sourcing (compose injects `http://forgejo:3000`, `.env` has `http://localhost:3000`). `FORGE_TOKEN` is NOT preserved so refreshed tokens in `.env` take effect immediately. **Required env var**: `FORGE_PASS` — bot password for git HTTP push (Forgejo 11.x rejects API tokens for `git push`, #361). | Every agent | +| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status ` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number ` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote ` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this — vault redesign in progress, see #73-#77). `ci_get_logs [--step ]` — reads CI logs from Woodpecker SQLite database via `lib/ci-log-reader.py`; outputs last 200 lines to stdout. Requires mounted woodpecker-data volume at /woodpecker-data. | dev-poll, review-poll, review-pr | | `lib/ci-debug.sh` | CLI tool for Woodpecker CI: `list`, `status`, `logs`, `failures` subcommands. Not sourced — run directly. | Humans / dev-agent (tool access) | -| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). | env.sh (when `PROJECT_TOML` is set), supervisor-poll (per-project iteration) | -| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll | -| `lib/formula-session.sh` | `acquire_cron_lock()`, `check_memory()`, `load_formula()`, `build_context_block()`, `consume_escalation_reply()`, `start_formula_session()`, `formula_phase_callback()`, `build_prompt_footer()`, `build_graph_section()`, `run_formula_and_monitor(AGENT [TIMEOUT] [CALLBACK])` — shared helpers for formula-driven cron agents (lock, memory guard, formula loading, prompt assembly, tmux session, monitor loop, crash recovery). `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `formula_phase_callback()` handles `PHASE:escalate` (unified escalation path — kills the session). `run_formula_and_monitor` accepts an optional CALLBACK (default: `formula_phase_callback`) so callers can install custom merge-through or escalation handlers. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh, action-agent.sh | -| `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in cron logs. Sourced by dev-poll.sh, review-poll.sh, action-poll.sh, predictor-run.sh, supervisor-run.sh. | cron entry points | -| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh and dev/phase-handler.sh — called after every successful merge. | dev-poll.sh, phase-handler.sh | +| `lib/ci-log-reader.py` | Python tool: reads CI logs from Woodpecker SQLite database. ` [--step ]` — returns last 200 lines from failed steps (or specified step). Used by `ci_get_logs()` in ci-helpers.sh. Requires `WOODPECKER_DATA_DIR` (default: /woodpecker-data). | ci-helpers.sh | +| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). Also exports `FORGE_REPO_OWNER` (the owner component of `FORGE_REPO`, e.g. `disinto-admin` from `disinto-admin/disinto`). | env.sh (when `PROJECT_TOML` is set) | +| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll | +| `lib/formula-session.sh` | `acquire_cron_lock()`, `load_formula()`, `load_formula_or_profile()`, `build_context_block()`, `ensure_ops_repo()`, `ops_commit_and_push()`, `build_prompt_footer()`, `build_sdk_prompt_footer()`, `formula_worktree_setup()`, `formula_prepare_profile_context()`, `formula_lessons_block()`, `profile_write_journal()`, `profile_load_lessons()`, `ensure_profile_repo()`, `_profile_has_repo()`, `_count_undigested_journals()`, `_profile_digest_journals()`, `_profile_commit_and_push()`, `resolve_agent_identity()`, `build_graph_section()`, `build_scratch_instruction()`, `read_scratch_context()`, `cleanup_stale_crashed_worktrees()` — shared helpers for formula-driven cron agents (lock, .profile repo management, prompt assembly, worktree setup). Memory guard is provided by `memory_guard()` in `lib/env.sh` (not duplicated here). `resolve_agent_identity()` — sets `FORGE_TOKEN`, `AGENT_IDENTITY`, `FORGE_REMOTE` from per-agent token env vars and FORGE_URL remote detection. `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh | +| `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in cron logs. Sourced by dev-poll.sh, review-poll.sh, predictor-run.sh, supervisor-run.sh. | cron entry points | +| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh — called after every successful merge. | dev-poll.sh | | `lib/build-graph.py` | Python tool: parses VISION.md, prerequisites.md (from ops repo), AGENTS.md, formulas/*.toml, evidence/ (from ops repo), and forge issues/labels into a NetworkX DiGraph. Runs structural analyses (orphaned objectives, stale prerequisites, thin evidence, circular deps) and outputs a JSON report. Used by `review-pr.sh` (per-PR changed-file analysis) and `predictor-run.sh` (full-project analysis) to provide structural context to Claude. | review-pr.sh, predictor-run.sh | -| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | file-action-issue.sh, phase-handler.sh | -| `lib/file-action-issue.sh` | `file_action_issue()` — dedup check, secret scan, label lookup, and issue creation for formula-driven cron wrappers. Sets `FILED_ISSUE_NUM` on success. Returns 4 if secrets detected in body. | (available for future use) | +| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | issue-lifecycle.sh | +| `lib/stack-lock.sh` | File-based lock protocol for singleton project stack access. `stack_lock_acquire(holder, project)` — polls until free, breaks stale heartbeats (>10 min old), claims lock. `stack_lock_release(project)` — deletes lock file. `stack_lock_check(project)` — inspect current lock state. `stack_lock_heartbeat(project)` — update heartbeat timestamp (callers must call every 2 min while holding). Lock files at `~/data/locks/-stack.lock`. | docker/edge/dispatcher.sh, reproduce formula | | `lib/tea-helpers.sh` | `tea_file_issue(title, body, labels...)` — create issue via tea CLI with secret scanning; sets `FILED_ISSUE_NUM`. `tea_relabel(issue_num, labels...)` — replace labels using tea's `edit` subcommand (not `label`). `tea_comment(issue_num, body)` — add comment with secret scanning. `tea_close(issue_num)` — close issue. All use `TEA_LOGIN` and `FORGE_REPO` from env.sh. Labels by name (no ID lookup). Tea binary download verified via sha256 checksum. Sourced by env.sh when `tea` binary is available. | env.sh (conditional) | -| `lib/worktree.sh` | Reusable git worktree management: `worktree_create(path, branch, [base_ref])` — create worktree, checkout base, fetch submodules. `worktree_recover(path, branch, [remote])` — detect existing worktree, reuse if on correct branch (sets `_WORKTREE_REUSED`), otherwise clean and recreate. `worktree_cleanup(path)` — `git worktree remove --force`, clear Claude Code project cache (`~/.claude/projects/` matching path). `worktree_cleanup_stale([max_age_hours])` — scan `/tmp` for orphaned worktrees older than threshold, skip preserved and active tmux worktrees, prune. `worktree_preserve(path, reason)` — mark worktree as preserved for debugging (writes `.worktree-preserved` marker, skipped by stale cleanup). | dev-agent.sh, action-agent.sh, supervisor-run.sh, planner-run.sh, predictor-run.sh, gardener-run.sh | -| `lib/pr-lifecycle.sh` | Reusable PR lifecycle library: `pr_create()`, `pr_find_by_branch()`, `pr_poll_ci()`, `pr_poll_review()`, `pr_merge()`, `pr_is_merged()`, `pr_walk_to_merge()`, `build_phase_protocol_prompt()`. Requires `lib/ci-helpers.sh`. | dev-agent.sh (future), action-agent.sh (future) | -| `lib/issue-lifecycle.sh` | Reusable issue lifecycle library: `issue_claim()` (add in-progress, remove backlog), `issue_release()` (remove in-progress, add backlog), `issue_block()` (post diagnostic comment with secret redaction, add blocked label), `issue_close()`, `issue_check_deps()` (parse deps, check transitive closure; sets `_ISSUE_BLOCKED_BY`, `_ISSUE_SUGGESTION`), `issue_suggest_next()` (find next unblocked backlog issue; sets `_ISSUE_NEXT`), `issue_post_refusal()` (structured refusal comment with dedup). Label IDs cached in globals on first lookup. Sources `lib/secret-scan.sh`. | dev-agent.sh (future), action-agent.sh (future) | -| `lib/agent-session.sh` | Shared tmux + Claude session helpers: `create_agent_session()`, `inject_formula()`, `agent_wait_for_claude_ready()`, `agent_inject_into_session()`, `agent_kill_session()`, `monitor_phase_loop()`, `read_phase()`, `write_compact_context()`. `create_agent_session(session, workdir, [phase_file])` optionally installs a PostToolUse hook (matcher `Bash\|Write`) that detects phase file writes in real-time — when Claude writes to the phase file, the hook writes a marker so `monitor_phase_loop` reacts on the next poll instead of waiting for mtime changes. Also installs a StopFailure hook (matcher `rate_limit\|server_error\|authentication_failed\|billing_error`) that writes `PHASE:failed` with an `api_error` reason to the phase file and touches the phase-changed marker, so the orchestrator discovers API errors within one poll cycle instead of waiting for idle timeout. Also installs a SessionStart hook (matcher `compact`) that re-injects phase protocol instructions after context compaction — callers write the context file via `write_compact_context(phase_file, content)`, and the hook (`on-compact-reinject.sh`) outputs the file content to stdout so Claude retains critical instructions. When `phase_file` is set, passes it to the idle stop hook (`on-idle-stop.sh`) so the hook can **nudge Claude** (up to 2 times) if Claude returns to the prompt without writing to the phase file — the hook injects a tmux reminder asking Claude to signal PHASE:done or PHASE:awaiting_ci. The PreToolUse guard hook (`on-pretooluse-guard.sh`) receives the session name as a third argument — formula agents (`gardener-*`, `planner-*`, `predictor-*`, `supervisor-*`) are identified this way and allowed to access `FACTORY_ROOT` from worktrees (they need env.sh, AGENTS.md, formulas/, lib/). **OAuth flock**: when `DISINTO_CONTAINER=1`, Claude CLI is wrapped in `flock -w 300 ~/.claude/session.lock` to queue concurrent token refresh attempts and prevent rotation races across agents sharing the same credentials. `monitor_phase_loop` sets `_MONITOR_LOOP_EXIT` to one of: `done`, `idle_timeout`, `idle_prompt` (Claude returned to `>` for 3 consecutive polls without writing any phase — callback invoked with `PHASE:failed`, session already dead), `crashed`, or `PHASE:escalate` / other `PHASE:*` string. **Unified escalation**: `PHASE:escalate` is the signal that a session needs human input (renamed from `PHASE:needs_human`). **Callers must handle `idle_prompt`** in both their callback and their post-loop exit handler — see [`docs/PHASE-PROTOCOL.md` idle_prompt](docs/PHASE-PROTOCOL.md#idle_prompt-exit-reason) for the full contract. | dev-agent.sh, action-agent.sh | +| `lib/worktree.sh` | Reusable git worktree management: `worktree_create(path, branch, [base_ref])` — create worktree, checkout base, fetch submodules. `worktree_recover(path, branch, [remote])` — detect existing worktree, reuse if on correct branch (sets `_WORKTREE_REUSED`), otherwise clean and recreate. `worktree_cleanup(path)` — `git worktree remove --force`, clear Claude Code project cache (`~/.claude/projects/` matching path). `worktree_cleanup_stale([max_age_hours])` — scan `/tmp` for orphaned worktrees older than threshold, skip preserved and active tmux worktrees, prune. `worktree_preserve(path, reason)` — mark worktree as preserved for debugging (writes `.worktree-preserved` marker, skipped by stale cleanup). | dev-agent.sh, supervisor-run.sh, planner-run.sh, predictor-run.sh, gardener-run.sh | +| `lib/pr-lifecycle.sh` | Reusable PR lifecycle library: `pr_create()`, `pr_find_by_branch()`, `pr_poll_ci()`, `pr_poll_review()`, `pr_merge()`, `pr_is_merged()`, `pr_walk_to_merge()`, `build_phase_protocol_prompt()`. Requires `lib/ci-helpers.sh`. | dev-agent.sh (future) | +| `lib/issue-lifecycle.sh` | Reusable issue lifecycle library: `issue_claim()` (add in-progress, remove backlog), `issue_release()` (remove in-progress, add backlog), `issue_block()` (post diagnostic comment with secret redaction, add blocked label), `issue_close()`, `issue_check_deps()` (parse deps, check transitive closure; sets `_ISSUE_BLOCKED_BY`, `_ISSUE_SUGGESTION`), `issue_suggest_next()` (find next unblocked backlog issue; sets `_ISSUE_NEXT`), `issue_post_refusal()` (structured refusal comment with dedup). Label IDs cached in globals on first lookup. Sources `lib/secret-scan.sh`. | dev-agent.sh (future) | +| `lib/vault.sh` | **Vault PR helper** — create vault action PRs on ops repo via Forgejo API (works from containers without SSH). `vault_request ` validates TOML (using `validate_vault_action` from `vault/vault-env.sh`), creates branch `vault/`, writes `vault/actions/.toml`, creates PR targeting `main` with title `vault: ` and body from context field, returns PR number. Idempotent: if PR exists, returns existing number. Requires `FORGE_TOKEN`, `FORGE_URL`, `FORGE_REPO`, `FORGE_OPS_REPO`. Uses the calling agent's own token (saves/restores `FORGE_TOKEN` around sourcing `vault-env.sh`), so approval workflow respects individual agent identities. | dev-agent (vault actions), future vault dispatcher | +| `lib/branch-protection.sh` | Branch protection helpers for Forgejo repos. `setup_vault_branch_protection()` — configures admin-only merge protection on main (require 1 approval, restrict merge to admin role, block direct pushes). `setup_profile_branch_protection()` — same protection for `.profile` repos. `verify_branch_protection()` — checks protection is correctly configured. `remove_branch_protection()` — removes protection (cleanup/testing). Handles race condition after initial push: retries with backoff if Forgejo hasn't processed the branch yet. Requires `FORGE_TOKEN`, `FORGE_URL`, `FORGE_OPS_REPO`. | bin/disinto (hire-an-agent) | +| `lib/agent-sdk.sh` | `agent_run([--resume SESSION_ID] [--worktree DIR] PROMPT)` — one-shot `claude -p` invocation with session persistence. Saves session ID to `SID_FILE`, reads it back on resume. `agent_recover_session()` — restore previous session ID from `SID_FILE` on startup. **Nudge guard**: skips nudge injection if the worktree is clean and no push is expected, preventing spurious re-invocations. Callers must define `SID_FILE`, `LOGFILE`, and `log()` before sourcing. | formula-driven agents (dev-agent, planner-run, predictor-run, gardener-run) | +| `lib/forge-setup.sh` | `setup_forge()` — Forgejo instance provisioning: creates admin user, bot accounts, org, repos (code + ops), configures webhooks, sets repo topics. Extracted from `bin/disinto`. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`. **Password storage (#361)**: after creating each bot account, stores its password in `.env` as `FORGE__PASS` (e.g. `FORGE_PASS`, `FORGE_REVIEW_PASS`, etc.) for use by `forge-push.sh`. | bin/disinto (init) | +| `lib/forge-push.sh` | `push_to_forge()` — pushes a local clone to the Forgejo remote and verifies the push. `_assert_forge_push_globals()` validates required env vars before use. Requires `FORGE_URL`, `FORGE_PASS`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. **Auth**: uses `FORGE_PASS` (bot password) for git HTTP push — Forgejo 11.x rejects API tokens for `git push` (#361). | bin/disinto (init) | +| `lib/ops-setup.sh` | `setup_ops_repo()` — creates ops repo on Forgejo if it doesn't exist, configures bot collaborators, clones/initializes ops repo locally, seeds directory structure (vault, knowledge, evidence). Exports `_ACTUAL_OPS_SLUG`. | bin/disinto (init) | +| `lib/ci-setup.sh` | `_install_cron_impl()` — installs crontab entries for project agents. `_create_woodpecker_oauth_impl()` — creates OAuth2 app on Forgejo for Woodpecker. `_generate_woodpecker_token_impl()` — auto-generates WOODPECKER_TOKEN via OAuth2 flow. `_activate_woodpecker_repo_impl()` — activates repo in Woodpecker. All gated by `_load_ci_context()` which validates required env vars. | bin/disinto (init) | +| `lib/generators.sh` | Template generation for `disinto init`: `generate_compose()` — docker-compose.yml, `generate_caddyfile()` — Caddyfile, `generate_staging_index()` — staging index, `generate_deploy_pipelines()` — Woodpecker deployment pipeline configs. Requires `FACTORY_ROOT`, `PROJECT_NAME`, `PRIMARY_BRANCH`. | bin/disinto (init) | +| `lib/hire-agent.sh` | `disinto_hire_an_agent()` — user creation, `.profile` repo setup, formula copying, branch protection, and state marker creation for hiring a new agent. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`, `PROJECT_NAME`. Extracted from `bin/disinto`. | bin/disinto (hire) | +| `lib/release.sh` | `disinto_release()` — vault TOML creation, branch setup on ops repo, PR creation, and auto-merge request for a versioned release. `_assert_release_globals()` validates required env vars. Requires `FORGE_URL`, `FORGE_TOKEN`, `FORGE_OPS_REPO`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. Extracted from `bin/disinto`. | bin/disinto (release) | diff --git a/lib/agent-sdk.sh b/lib/agent-sdk.sh index 41879bf..1c1a69c 100644 --- a/lib/agent-sdk.sh +++ b/lib/agent-sdk.sh @@ -46,9 +46,23 @@ agent_run() { [ -n "${CLAUDE_MODEL:-}" ] && args+=(--model "$CLAUDE_MODEL") local run_dir="${worktree_dir:-$(pwd)}" - local output + local lock_file="${HOME}/.claude/session.lock" + mkdir -p "$(dirname "$lock_file")" + local output rc log "agent_run: starting (resume=${resume_id:-(new)}, dir=${run_dir})" - output=$(cd "$run_dir" && timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") || true + output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") && rc=0 || rc=$? + if [ "$rc" -eq 124 ]; then + log "agent_run: timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $rc)" + elif [ "$rc" -ne 0 ]; then + log "agent_run: claude exited with code $rc" + # Log last 3 lines of output for diagnostics + if [ -n "$output" ]; then + log "agent_run: last output lines: $(echo "$output" | tail -3)" + fi + fi + if [ -z "$output" ]; then + log "agent_run: empty output (claude may have crashed or failed, exit code: $rc)" + fi # Extract and persist session_id local new_sid @@ -58,4 +72,45 @@ agent_run() { printf '%s' "$new_sid" > "$SID_FILE" log "agent_run: session_id=${new_sid:0:12}..." fi + + # Save output for diagnostics (no_push, crashes) + _AGENT_LAST_OUTPUT="$output" + local diag_file="${DISINTO_LOG_DIR:-/tmp}/dev/agent-run-last.json" + printf '%s' "$output" > "$diag_file" 2>/dev/null || true + + # Nudge: if the model stopped without pushing, resume with encouragement. + # Some models emit end_turn prematurely when confused. A nudge often unsticks them. + if [ -n "$_AGENT_SESSION_ID" ] && [ -n "$output" ]; then + local has_changes + has_changes=$(cd "$run_dir" && git status --porcelain 2>/dev/null | head -1) || true + local has_pushed + has_pushed=$(cd "$run_dir" && git log --oneline "${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH:-main}..HEAD" 2>/dev/null | head -1) || true + if [ -z "$has_pushed" ]; then + if [ -n "$has_changes" ]; then + # Nudge: there are uncommitted changes + local nudge="You stopped but did not push any code. You have uncommitted changes. Commit them and push." + log "agent_run: nudging (uncommitted changes)" + local nudge_rc + output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") && nudge_rc=0 || nudge_rc=$? + if [ "$nudge_rc" -eq 124 ]; then + log "agent_run: nudge timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $nudge_rc)" + elif [ "$nudge_rc" -ne 0 ]; then + log "agent_run: nudge claude exited with code $nudge_rc" + # Log last 3 lines of output for diagnostics + if [ -n "$output" ]; then + log "agent_run: nudge last output lines: $(echo "$output" | tail -3)" + fi + fi + new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true + if [ -n "$new_sid" ]; then + _AGENT_SESSION_ID="$new_sid" + printf '%s' "$new_sid" > "$SID_FILE" + fi + printf '%s' "$output" > "$diag_file" 2>/dev/null || true + _AGENT_LAST_OUTPUT="$output" + else + log "agent_run: no push and no changes — skipping nudge" + fi + fi + fi } diff --git a/lib/agent-session.sh b/lib/agent-session.sh deleted file mode 100644 index dbb1e2a..0000000 --- a/lib/agent-session.sh +++ /dev/null @@ -1,486 +0,0 @@ -#!/usr/bin/env bash -# agent-session.sh — Shared tmux + Claude interactive session helpers -# -# Source this into agent orchestrator scripts for reusable session management. -# -# Functions: -# agent_wait_for_claude_ready SESSION_NAME [TIMEOUT_SECS] -# agent_inject_into_session SESSION_NAME TEXT -# agent_kill_session SESSION_NAME -# monitor_phase_loop PHASE_FILE IDLE_TIMEOUT_SECS CALLBACK_FN [SESSION_NAME] -# session_lock_acquire [TIMEOUT_SECS] -# session_lock_release - -# --- Cooperative session lock (fd-based) --- -# File descriptor for the session lock. Set by create_agent_session(). -# Callers can release/re-acquire via session_lock_release/session_lock_acquire -# to allow other Claude sessions during idle phases (awaiting_review/awaiting_ci). -SESSION_LOCK_FD="" - -# Release the session lock without closing the file descriptor. -# The fd stays open so it can be re-acquired later. -session_lock_release() { - if [ -n "${SESSION_LOCK_FD:-}" ]; then - flock -u "$SESSION_LOCK_FD" - fi -} - -# Re-acquire the session lock. Blocks until available or timeout. -# Opens the lock fd if not already open (for use by external callers). -# Args: [timeout_secs] (default 300) -# Returns 0 on success, 1 on timeout/error. -# shellcheck disable=SC2120 # timeout arg is used by external callers -session_lock_acquire() { - local timeout="${1:-300}" - if [ -z "${SESSION_LOCK_FD:-}" ]; then - local lock_dir="${HOME}/.claude" - mkdir -p "$lock_dir" - exec {SESSION_LOCK_FD}>>"${lock_dir}/session.lock" - fi - flock -w "$timeout" "$SESSION_LOCK_FD" -} - -# Wait for the Claude ❯ ready prompt in a tmux pane. -# Returns 0 if ready within TIMEOUT_SECS (default 120), 1 otherwise. -agent_wait_for_claude_ready() { - local session="$1" - local timeout="${2:-120}" - local elapsed=0 - while [ "$elapsed" -lt "$timeout" ]; do - if tmux capture-pane -t "$session" -p 2>/dev/null | grep -q '❯'; then - return 0 - fi - sleep 2 - elapsed=$((elapsed + 2)) - done - return 1 -} - -# Paste TEXT into SESSION (waits for Claude to be ready first), then press Enter. -agent_inject_into_session() { - local session="$1" - local text="$2" - local tmpfile - # Re-acquire session lock before injecting — Claude will resume working - # shellcheck disable=SC2119 # using default timeout - session_lock_acquire || true - agent_wait_for_claude_ready "$session" 120 || true - # Clear idle marker — new work incoming - rm -f "/tmp/claude-idle-${session}.ts" - tmpfile=$(mktemp /tmp/agent-inject-XXXXXX) - printf '%s' "$text" > "$tmpfile" - tmux load-buffer -b "agent-inject-$$" "$tmpfile" - tmux paste-buffer -t "$session" -b "agent-inject-$$" - sleep 0.5 - tmux send-keys -t "$session" "" Enter - tmux delete-buffer -b "agent-inject-$$" 2>/dev/null || true - rm -f "$tmpfile" -} - -# Create a tmux session running Claude in the given workdir. -# Installs a Stop hook for idle detection (see monitor_phase_loop). -# Installs a PreToolUse hook to guard destructive Bash operations. -# Optionally installs a PostToolUse hook for phase file write detection. -# Optionally installs a StopFailure hook for immediate phase file update on API error. -# Args: session workdir [phase_file] -# Returns 0 if session is ready, 1 otherwise. -create_agent_session() { - local session="$1" - local workdir="${2:-.}" - local phase_file="${3:-}" - - # Prepare settings directory for hooks - mkdir -p "${workdir}/.claude" - local settings="${workdir}/.claude/settings.json" - - # Install Stop hook for idle detection: when Claude finishes a response, - # the hook writes a timestamp to a marker file. monitor_phase_loop checks - # this marker instead of fragile tmux pane scraping. - local idle_marker="/tmp/claude-idle-${session}.ts" - local hook_script="${FACTORY_ROOT}/lib/hooks/on-idle-stop.sh" - if [ -x "$hook_script" ]; then - local hook_cmd="${hook_script} ${idle_marker}" - # When a phase file is available, pass it and the session name so the - # hook can nudge Claude if it returns to the prompt without signalling. - if [ -n "$phase_file" ]; then - hook_cmd="${hook_script} ${idle_marker} ${phase_file} ${session}" - fi - if [ -f "$settings" ]; then - # Append our Stop hook to existing project settings - jq --arg cmd "$hook_cmd" ' - if (.hooks.Stop // [] | any(.[]; .hooks[]?.command == $cmd)) - then . - else .hooks.Stop = (.hooks.Stop // []) + [{ - matcher: "", - hooks: [{type: "command", command: $cmd}] - }] - end - ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" - else - jq -n --arg cmd "$hook_cmd" '{ - hooks: { - Stop: [{ - matcher: "", - hooks: [{type: "command", command: $cmd}] - }] - } - }' > "$settings" - fi - fi - - # Install PostToolUse hook for phase file write detection: when Claude - # writes to the phase file via Bash or Write, the hook writes a marker - # so monitor_phase_loop can react immediately instead of waiting for - # the next mtime-based poll cycle. - if [ -n "$phase_file" ]; then - local phase_marker="/tmp/phase-changed-${session}.marker" - local phase_hook_script="${FACTORY_ROOT}/lib/hooks/on-phase-change.sh" - if [ -x "$phase_hook_script" ]; then - local phase_hook_cmd="${phase_hook_script} ${phase_file} ${phase_marker}" - if [ -f "$settings" ]; then - jq --arg cmd "$phase_hook_cmd" ' - if (.hooks.PostToolUse // [] | any(.[]; .hooks[]?.command == $cmd)) - then . - else .hooks.PostToolUse = (.hooks.PostToolUse // []) + [{ - matcher: "Bash|Write", - hooks: [{type: "command", command: $cmd}] - }] - end - ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" - else - jq -n --arg cmd "$phase_hook_cmd" '{ - hooks: { - PostToolUse: [{ - matcher: "Bash|Write", - hooks: [{type: "command", command: $cmd}] - }] - } - }' > "$settings" - fi - rm -f "$phase_marker" - fi - fi - - # Install StopFailure hook for immediate phase file update on API error: - # when Claude hits a rate limit, server error, billing error, or auth failure, - # the hook writes PHASE:failed to the phase file and touches the phase-changed - # marker so monitor_phase_loop picks it up within one poll cycle instead of - # waiting for idle timeout (up to 2 hours). - if [ -n "$phase_file" ]; then - local stop_failure_hook_script="${FACTORY_ROOT}/lib/hooks/on-stop-failure.sh" - if [ -x "$stop_failure_hook_script" ]; then - # phase_marker is defined in the PostToolUse block above; redeclare so - # this block is self-contained if that block is ever removed. - local sf_phase_marker="/tmp/phase-changed-${session}.marker" - local stop_failure_hook_cmd="${stop_failure_hook_script} ${phase_file} ${sf_phase_marker}" - if [ -f "$settings" ]; then - jq --arg cmd "$stop_failure_hook_cmd" ' - if (.hooks.StopFailure // [] | any(.[]; .hooks[]?.command == $cmd)) - then . - else .hooks.StopFailure = (.hooks.StopFailure // []) + [{ - matcher: "rate_limit|server_error|authentication_failed|billing_error", - hooks: [{type: "command", command: $cmd}] - }] - end - ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" - else - jq -n --arg cmd "$stop_failure_hook_cmd" '{ - hooks: { - StopFailure: [{ - matcher: "rate_limit|server_error|authentication_failed|billing_error", - hooks: [{type: "command", command: $cmd}] - }] - } - }' > "$settings" - fi - fi - fi - - # Install PreToolUse hook for destructive operation guard: blocks force push - # to primary branch, rm -rf outside worktree, direct API merge calls, and - # checkout/switch to primary branch. Claude sees the denial reason on exit 2 - # and can self-correct. - local guard_hook_script="${FACTORY_ROOT}/lib/hooks/on-pretooluse-guard.sh" - if [ -x "$guard_hook_script" ]; then - local abs_workdir - abs_workdir=$(cd "$workdir" 2>/dev/null && pwd) || abs_workdir="$workdir" - local guard_hook_cmd="${guard_hook_script} ${PRIMARY_BRANCH:-main} ${abs_workdir} ${session}" - if [ -f "$settings" ]; then - jq --arg cmd "$guard_hook_cmd" ' - if (.hooks.PreToolUse // [] | any(.[]; .hooks[]?.command == $cmd)) - then . - else .hooks.PreToolUse = (.hooks.PreToolUse // []) + [{ - matcher: "Bash", - hooks: [{type: "command", command: $cmd}] - }] - end - ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" - else - jq -n --arg cmd "$guard_hook_cmd" '{ - hooks: { - PreToolUse: [{ - matcher: "Bash", - hooks: [{type: "command", command: $cmd}] - }] - } - }' > "$settings" - fi - fi - - # Install SessionEnd hook for guaranteed cleanup: when the Claude session - # exits (clean or crash), write a termination marker so monitor_phase_loop - # detects the exit faster than tmux has-session polling alone. - local exit_marker="/tmp/claude-exited-${session}.ts" - local session_end_hook_script="${FACTORY_ROOT}/lib/hooks/on-session-end.sh" - if [ -x "$session_end_hook_script" ]; then - local session_end_hook_cmd="${session_end_hook_script} ${exit_marker}" - if [ -f "$settings" ]; then - jq --arg cmd "$session_end_hook_cmd" ' - if (.hooks.SessionEnd // [] | any(.[]; .hooks[]?.command == $cmd)) - then . - else .hooks.SessionEnd = (.hooks.SessionEnd // []) + [{ - matcher: "", - hooks: [{type: "command", command: $cmd}] - }] - end - ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" - else - jq -n --arg cmd "$session_end_hook_cmd" '{ - hooks: { - SessionEnd: [{ - matcher: "", - hooks: [{type: "command", command: $cmd}] - }] - } - }' > "$settings" - fi - fi - rm -f "$exit_marker" - - # Install SessionStart hook for context re-injection after compaction: - # when Claude Code compacts context during long sessions, the phase protocol - # instructions are lost. This hook fires after each compaction and outputs - # the content of a context file so Claude retains critical instructions. - # The context file is written by callers via write_compact_context(). - if [ -n "$phase_file" ]; then - local compact_hook_script="${FACTORY_ROOT}/lib/hooks/on-compact-reinject.sh" - if [ -x "$compact_hook_script" ]; then - local context_file="${phase_file%.phase}.context" - local compact_hook_cmd="${compact_hook_script} ${context_file}" - if [ -f "$settings" ]; then - jq --arg cmd "$compact_hook_cmd" ' - if (.hooks.SessionStart // [] | any(.[]; .hooks[]?.command == $cmd)) - then . - else .hooks.SessionStart = (.hooks.SessionStart // []) + [{ - matcher: "compact", - hooks: [{type: "command", command: $cmd}] - }] - end - ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" - else - jq -n --arg cmd "$compact_hook_cmd" '{ - hooks: { - SessionStart: [{ - matcher: "compact", - hooks: [{type: "command", command: $cmd}] - }] - } - }' > "$settings" - fi - fi - fi - - rm -f "$idle_marker" - local model_flag="" - if [ -n "${CLAUDE_MODEL:-}" ]; then - model_flag="--model ${CLAUDE_MODEL}" - fi - - # Acquire a session-level mutex via fd-based flock to prevent concurrent - # Claude sessions from racing on OAuth token refresh. Unlike the previous - # command-wrapper flock, the fd approach allows callers to release the lock - # during idle phases (awaiting_review/awaiting_ci) and re-acquire before - # injecting the next prompt. See #724. - # Use ~/.claude/session.lock so the lock is shared across containers when - # the host ~/.claude directory is bind-mounted. - local lock_dir="${HOME}/.claude" - mkdir -p "$lock_dir" - local claude_lock="${lock_dir}/session.lock" - if [ -z "${SESSION_LOCK_FD:-}" ]; then - exec {SESSION_LOCK_FD}>>"${claude_lock}" - fi - if ! flock -w 300 "$SESSION_LOCK_FD"; then - return 1 - fi - local claude_cmd="claude --dangerously-skip-permissions ${model_flag}" - - tmux new-session -d -s "$session" -c "$workdir" \ - "$claude_cmd" 2>/dev/null - sleep 1 - tmux has-session -t "$session" 2>/dev/null || return 1 - agent_wait_for_claude_ready "$session" 120 || return 1 - return 0 -} - -# Inject a prompt/formula into a session (alias for agent_inject_into_session). -inject_formula() { - agent_inject_into_session "$@" -} - -# Monitor a phase file, calling a callback on changes and handling idle timeout. -# Sets _MONITOR_LOOP_EXIT to the exit reason (idle_timeout, idle_prompt, done, crashed, PHASE:failed, PHASE:escalate). -# Sets _MONITOR_SESSION to the resolved session name (arg 4 or $SESSION_NAME). -# Callbacks should reference _MONITOR_SESSION instead of $SESSION_NAME directly. -# Args: phase_file idle_timeout_secs callback_fn [session_name] -# session_name — tmux session to health-check; falls back to $SESSION_NAME global -# -# Idle detection: uses a Stop hook marker file (written by lib/hooks/on-idle-stop.sh) -# to detect when Claude finishes responding without writing a phase signal. -# If the marker exists for 3 consecutive polls with no phase written, the session -# is killed and the callback invoked with "PHASE:failed". -monitor_phase_loop() { - local phase_file="$1" - local idle_timeout="$2" - local callback="$3" - local _session="${4:-${SESSION_NAME:-}}" - # Export resolved session name so callbacks can reference it regardless of - # which session was passed to monitor_phase_loop (analogous to _MONITOR_LOOP_EXIT). - export _MONITOR_SESSION="$_session" - local poll_interval="${PHASE_POLL_INTERVAL:-10}" - local last_mtime=0 - local idle_elapsed=0 - local idle_pane_count=0 - - while true; do - sleep "$poll_interval" - idle_elapsed=$(( idle_elapsed + poll_interval )) - - # Session health check: SessionEnd hook marker provides fast detection, - # tmux has-session is the fallback for unclean exits (e.g. tmux crash). - local exit_marker="/tmp/claude-exited-${_session}.ts" - if [ -f "$exit_marker" ] || ! tmux has-session -t "${_session}" 2>/dev/null; then - local current_phase - current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true) - case "$current_phase" in - PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate) - ;; # terminal — fall through to phase handler - *) - # Call callback with "crashed" — let agent-specific code handle recovery - if type "${callback}" &>/dev/null; then - "$callback" "PHASE:crashed" - fi - # If callback didn't restart session, break - if ! tmux has-session -t "${_session}" 2>/dev/null; then - _MONITOR_LOOP_EXIT="crashed" - return 1 - fi - idle_elapsed=0 - idle_pane_count=0 - continue - ;; - esac - fi - - # Check phase-changed marker from PostToolUse hook — if present, the hook - # detected a phase file write so we reset last_mtime to force processing - # this cycle instead of waiting for the next mtime change. - local phase_marker="/tmp/phase-changed-${_session}.marker" - if [ -f "$phase_marker" ]; then - rm -f "$phase_marker" - last_mtime=0 - fi - - # Check phase file for changes - local phase_mtime - phase_mtime=$(stat -c %Y "$phase_file" 2>/dev/null || echo 0) - local current_phase - current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true) - - if [ -z "$current_phase" ] || [ "$phase_mtime" -le "$last_mtime" ]; then - # No phase change — check idle timeout - if [ "$idle_elapsed" -ge "$idle_timeout" ]; then - _MONITOR_LOOP_EXIT="idle_timeout" - agent_kill_session "${_session}" - return 0 - fi - # Idle detection via Stop hook: the on-idle-stop.sh hook writes a marker - # file when Claude finishes a response. If the marker exists and no phase - # has been written, Claude returned to the prompt without following the - # phase protocol. 3 consecutive polls = confirmed idle (not mid-turn). - local idle_marker="/tmp/claude-idle-${_session}.ts" - if [ -z "$current_phase" ] && [ -f "$idle_marker" ]; then - idle_pane_count=$(( idle_pane_count + 1 )) - if [ "$idle_pane_count" -ge 3 ]; then - _MONITOR_LOOP_EXIT="idle_prompt" - # Session is killed before the callback is invoked. - # Callbacks that handle PHASE:failed must not assume the session is alive. - agent_kill_session "${_session}" - if type "${callback}" &>/dev/null; then - "$callback" "PHASE:failed" - fi - return 0 - fi - else - idle_pane_count=0 - fi - continue - fi - - # Phase changed - last_mtime="$phase_mtime" - # shellcheck disable=SC2034 # read by phase-handler.sh callback - LAST_PHASE_MTIME="$phase_mtime" - idle_elapsed=0 - idle_pane_count=0 - - # Terminal phases - case "$current_phase" in - PHASE:done|PHASE:merged) - _MONITOR_LOOP_EXIT="done" - if type "${callback}" &>/dev/null; then - "$callback" "$current_phase" - fi - return 0 - ;; - PHASE:failed|PHASE:escalate) - _MONITOR_LOOP_EXIT="$current_phase" - if type "${callback}" &>/dev/null; then - "$callback" "$current_phase" - fi - return 0 - ;; - esac - - # Non-terminal phase — call callback - if type "${callback}" &>/dev/null; then - "$callback" "$current_phase" - fi - done -} - -# Write context to a file for re-injection after context compaction. -# The SessionStart compact hook reads this file and outputs it to stdout. -# Args: phase_file content -write_compact_context() { - local phase_file="$1" - local content="$2" - local context_file="${phase_file%.phase}.context" - printf '%s\n' "$content" > "$context_file" -} - -# Kill a tmux session gracefully (no-op if not found). -agent_kill_session() { - local session="${1:-}" - [ -n "$session" ] && tmux kill-session -t "$session" 2>/dev/null || true - rm -f "/tmp/claude-idle-${session}.ts" - rm -f "/tmp/phase-changed-${session}.marker" - rm -f "/tmp/claude-exited-${session}.ts" - rm -f "/tmp/claude-nudge-${session}.count" -} - -# Read the current phase from a phase file, stripped of whitespace. -# Usage: read_phase [file] — defaults to $PHASE_FILE -read_phase() { - local file="${1:-${PHASE_FILE:-}}" - { cat "$file" 2>/dev/null || true; } | head -1 | tr -d '[:space:]' -} diff --git a/lib/branch-protection.sh b/lib/branch-protection.sh new file mode 100644 index 0000000..e972977 --- /dev/null +++ b/lib/branch-protection.sh @@ -0,0 +1,591 @@ +#!/usr/bin/env bash +# branch-protection.sh — Helper for setting up branch protection on repos +# +# Source after lib/env.sh: +# source "$(dirname "$0")/../lib/env.sh" +# source "$(dirname "$0")/lib/branch-protection.sh" +# +# Required globals: FORGE_TOKEN, FORGE_URL, FORGE_OPS_REPO +# +# Functions: +# setup_vault_branch_protection — Set up admin-only branch protection for main +# verify_branch_protection — Verify protection is configured correctly +# setup_profile_branch_protection — Set up admin-only branch protection for .profile repos +# remove_branch_protection — Remove branch protection (for cleanup/testing) +# +# Branch protection settings: +# - Require 1 approval before merge +# - Restrict merge to admin role (not regular collaborators or bots) +# - Block direct pushes to main (all changes must go through PR) + +set -euo pipefail + +# Internal log helper +_bp_log() { + if declare -f log >/dev/null 2>&1; then + log "branch-protection: $*" + else + printf '[%s] branch-protection: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >&2 + fi +} + +# Get ops repo API URL +_ops_api() { + printf '%s' "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}" +} + +# ----------------------------------------------------------------------------- +# setup_vault_branch_protection — Set up admin-only branch protection for main +# +# Configures the following protection rules: +# - Require 1 approval before merge +# - Restrict merge to admin role (not regular collaborators or bots) +# - Block direct pushes to main (all changes must go through PR) +# +# Returns: 0 on success, 1 on failure +# ----------------------------------------------------------------------------- +setup_vault_branch_protection() { + local branch="${1:-main}" + local api_url + api_url="$(_ops_api)" + + _bp_log "Setting up branch protection for ${branch} on ${FORGE_OPS_REPO}" + + # Check if branch exists with retry loop (handles race condition after initial push) + local branch_exists="0" + local max_attempts=3 + local attempt=1 + + while [ "$attempt" -le "$max_attempts" ]; do + branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") + + if [ "$branch_exists" = "200" ]; then + _bp_log "Branch ${branch} exists on ${FORGE_OPS_REPO}" + break + fi + + if [ "$attempt" -lt "$max_attempts" ]; then + _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..." + sleep 2 + fi + attempt=$((attempt + 1)) + done + + if [ "$branch_exists" != "200" ]; then + _bp_log "ERROR: Branch ${branch} does not exist on ${FORGE_OPS_REPO} after ${max_attempts} attempts" + return 1 + fi + + # Check if protection already exists + local protection_exists + protection_exists=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/branches/${branch}/protection" 2>/dev/null || echo "0") + + if [ "$protection_exists" = "200" ]; then + _bp_log "Branch protection already exists for ${branch}" + _bp_log "Updating existing protection rules" + fi + + # Create/update branch protection + # Note: Forgejo API uses "require_signed_commits" and "required_approvals" for approval requirements + # The "admin_enforced" field ensures only admins can merge + local protection_json + protection_json=$(cat </dev/null || true) + + if [ -z "$protection_json" ] || [ "$protection_json" = "null" ]; then + _bp_log "ERROR: No branch protection found for ${branch}" + return 1 + fi + + # Extract and validate settings + local enable_push enable_merge_commit required_approvals admin_enforced + enable_push=$(printf '%s' "$protection_json" | jq -r '.enable_push // true') + enable_merge_commit=$(printf '%s' "$protection_json" | jq -r '.enable_merge_commit // false') + required_approvals=$(printf '%s' "$protection_json" | jq -r '.required_approvals // 0') + admin_enforced=$(printf '%s' "$protection_json" | jq -r '.admin_enforced // false') + + local errors=0 + + # Check push is disabled + if [ "$enable_push" = "true" ]; then + _bp_log "ERROR: enable_push should be false" + errors=$((errors + 1)) + else + _bp_log "OK: Pushes are blocked" + fi + + # Check merge commit is enabled + if [ "$enable_merge_commit" != "true" ]; then + _bp_log "ERROR: enable_merge_commit should be true" + errors=$((errors + 1)) + else + _bp_log "OK: Merge commits are allowed" + fi + + # Check required approvals + if [ "$required_approvals" -lt 1 ]; then + _bp_log "ERROR: required_approvals should be at least 1" + errors=$((errors + 1)) + else + _bp_log "OK: Required approvals: ${required_approvals}" + fi + + # Check admin enforced + if [ "$admin_enforced" != "true" ]; then + _bp_log "ERROR: admin_enforced should be true" + errors=$((errors + 1)) + else + _bp_log "OK: Admin enforcement enabled" + fi + + if [ "$errors" -gt 0 ]; then + _bp_log "Verification failed with ${errors} error(s)" + return 1 + fi + + _bp_log "Branch protection verified successfully" + return 0 +} + +# ----------------------------------------------------------------------------- +# setup_profile_branch_protection — Set up admin-only branch protection for .profile repos +# +# Configures the following protection rules: +# - Require 1 approval before merge +# - Restrict merge to admin role (not regular collaborators or bots) +# - Block direct pushes to main (all changes must go through PR) +# +# Also creates a 'journal' branch for direct agent journal pushes +# +# Args: +# $1 - Repo path in format 'owner/repo' (e.g., 'dev-bot/.profile') +# $2 - Branch to protect (default: main) +# +# Returns: 0 on success, 1 on failure +# ----------------------------------------------------------------------------- +setup_profile_branch_protection() { + local repo="${1:-}" + local branch="${2:-main}" + + if [ -z "$repo" ]; then + _bp_log "ERROR: repo path required (format: owner/repo)" + return 1 + fi + + _bp_log "Setting up branch protection for ${branch} on ${repo}" + + local api_url + api_url="${FORGE_URL}/api/v1/repos/${repo}" + + # Check if branch exists with retry loop (handles race condition after initial push) + local branch_exists="0" + local max_attempts=3 + local attempt=1 + + while [ "$attempt" -le "$max_attempts" ]; do + branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") + + if [ "$branch_exists" = "200" ]; then + _bp_log "Branch ${branch} exists on ${repo}" + break + fi + + if [ "$attempt" -lt "$max_attempts" ]; then + _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..." + sleep 2 + fi + attempt=$((attempt + 1)) + done + + if [ "$branch_exists" != "200" ]; then + _bp_log "ERROR: Branch ${branch} does not exist on ${repo} after ${max_attempts} attempts" + return 1 + fi + + # Check if protection already exists + local protection_exists + protection_exists=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/branches/${branch}/protection" 2>/dev/null || echo "0") + + if [ "$protection_exists" = "200" ]; then + _bp_log "Branch protection already exists for ${branch}" + _bp_log "Updating existing protection rules" + fi + + # Create/update branch protection + local protection_json + protection_json=$(cat </dev/null || echo "0") + + if [ "$journal_exists" != "200" ]; then + # Create journal branch from main + # Get the commit hash of main + local main_commit + main_commit=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/git/refs/heads/${branch}" 2>/dev/null | jq -r '.[0].object.sha' || echo "") + + if [ -n "$main_commit" ]; then + curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${api_url}/git/refs" \ + -d "{\"ref\":\"refs/heads/${journal_branch}\",\"sha\":\"${main_commit}\"}" >/dev/null 2>&1 || { + _bp_log "Warning: failed to create journal branch (may already exist)" + } + fi + fi + + _bp_log "Journal branch '${journal_branch}' ready for direct pushes" + + return 0 +} + +# ----------------------------------------------------------------------------- +# remove_branch_protection — Remove branch protection (for cleanup/testing) +# +# Returns: 0 on success, 1 on failure +# ----------------------------------------------------------------------------- +remove_branch_protection() { + local branch="${1:-main}" + local api_url + api_url="$(_ops_api)" + + _bp_log "Removing branch protection for ${branch}" + + # Check if protection exists + local protection_exists + protection_exists=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/branches/${branch}/protection" 2>/dev/null || echo "0") + + if [ "$protection_exists" != "200" ]; then + _bp_log "No branch protection found for ${branch}" + return 0 + fi + + # Delete protection + local http_code + http_code=$(curl -s -o /dev/null -w "%{http_code}" \ + -X DELETE \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/branches/${branch}/protection" 2>/dev/null || echo "0") + + if [ "$http_code" != "204" ]; then + _bp_log "ERROR: Failed to remove branch protection (HTTP ${http_code})" + return 1 + fi + + _bp_log "Branch protection removed successfully for ${branch}" + return 0 +} + +# ----------------------------------------------------------------------------- +# setup_project_branch_protection — Set up branch protection for project repos +# +# Configures the following protection rules: +# - Block direct pushes to main (all changes must go through PR) +# - Require 1 approval before merge +# - Allow merge only via dev-bot (for auto-merge after review+CI) +# - Allow review-bot to approve PRs +# +# Args: +# $1 - Repo path in format 'owner/repo' (e.g., 'disinto-admin/disinto') +# $2 - Branch to protect (default: main) +# +# Returns: 0 on success, 1 on failure +# ----------------------------------------------------------------------------- +setup_project_branch_protection() { + local repo="${1:-}" + local branch="${2:-main}" + + if [ -z "$repo" ]; then + _bp_log "ERROR: repo path required (format: owner/repo)" + return 1 + fi + + _bp_log "Setting up branch protection for ${branch} on ${repo}" + + local api_url + api_url="${FORGE_URL}/api/v1/repos/${repo}" + + # Check if branch exists with retry loop (handles race condition after initial push) + local branch_exists="0" + local max_attempts=3 + local attempt=1 + + while [ "$attempt" -le "$max_attempts" ]; do + branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") + + if [ "$branch_exists" = "200" ]; then + _bp_log "Branch ${branch} exists on ${repo}" + break + fi + + if [ "$attempt" -lt "$max_attempts" ]; then + _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..." + sleep 2 + fi + attempt=$((attempt + 1)) + done + + if [ "$branch_exists" != "200" ]; then + _bp_log "ERROR: Branch ${branch} does not exist on ${repo} after ${max_attempts} attempts" + return 1 + fi + + # Check if protection already exists + local protection_exists + protection_exists=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/branches/${branch}/protection" 2>/dev/null || echo "0") + + if [ "$protection_exists" = "200" ]; then + _bp_log "Branch protection already exists for ${branch}" + _bp_log "Updating existing protection rules" + fi + + # Create/update branch protection + # Forgejo API for branch protection (factory mode): + # - enable_push: false (block direct pushes) + # - enable_merge_whitelist: true (only whitelisted users can merge) + # - merge_whitelist_usernames: ["dev-bot"] (dev-bot merges after CI) + # - required_approvals: 1 (review-bot must approve) + local protection_json + protection_json=$(cat <&2 + exit 1 + fi + + if [ -z "${FORGE_URL:-}" ]; then + echo "ERROR: FORGE_URL is required" >&2 + exit 1 + fi + + if [ -z "${FORGE_OPS_REPO:-}" ]; then + echo "ERROR: FORGE_OPS_REPO is required" >&2 + exit 1 + fi + + # Parse command line args + case "${1:-help}" in + setup) + setup_vault_branch_protection "${2:-main}" + ;; + setup-profile) + if [ -z "${2:-}" ]; then + echo "ERROR: repo path required (format: owner/repo)" >&2 + exit 1 + fi + setup_profile_branch_protection "${2}" "${3:-main}" + ;; + setup-project) + if [ -z "${2:-}" ]; then + echo "ERROR: repo path required (format: owner/repo)" >&2 + exit 1 + fi + setup_project_branch_protection "${2}" "${3:-main}" + ;; + verify) + verify_branch_protection "${2:-main}" + ;; + remove) + remove_branch_protection "${2:-main}" + ;; + help|*) + echo "Usage: $0 {setup|setup-profile|setup-project|verify|remove} [args...]" + echo "" + echo "Commands:" + echo " setup [branch] Set up branch protection on ops repo (default: main)" + echo " setup-profile [branch] Set up branch protection on .profile repo" + echo " setup-project [branch] Set up branch protection on project repo" + echo " verify [branch] Verify branch protection is configured correctly" + echo " remove [branch] Remove branch protection (for cleanup/testing)" + echo "" + echo "Required environment variables:" + echo " FORGE_TOKEN Forgejo API token (admin user recommended)" + echo " FORGE_URL Forgejo instance URL (e.g., https://codeberg.org)" + echo " FORGE_OPS_REPO Ops repo in format owner/repo (e.g., disinto-admin/disinto-ops)" + exit 0 + ;; + esac +fi diff --git a/lib/ci-debug.sh b/lib/ci-debug.sh index 4fa15ba..dd8a0a5 100755 --- a/lib/ci-debug.sh +++ b/lib/ci-debug.sh @@ -17,6 +17,11 @@ REPO="${FORGE_REPO}" API="${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}" api() { + # Validate API URL to prevent URL injection + if ! validate_url "$API"; then + echo "ERROR: API URL validation failed - possible URL injection attempt" >&2 + return 1 + fi curl -sf -H "Authorization: Bearer ${WOODPECKER_TOKEN}" "${API}/$1" } diff --git a/lib/ci-helpers.sh b/lib/ci-helpers.sh index 23ebce7..11c668e 100644 --- a/lib/ci-helpers.sh +++ b/lib/ci-helpers.sh @@ -7,27 +7,6 @@ set -euo pipefail # ci_commit_status() / ci_pipeline_number() require: woodpecker_api(), forge_api() (from env.sh) # classify_pipeline_failure() requires: woodpecker_api() (defined in env.sh) -# ensure_blocked_label_id — look up (or create) the "blocked" label, print its ID. -# Caches the result in _BLOCKED_LABEL_ID to avoid repeated API calls. -# Requires: FORGE_TOKEN, FORGE_API (from env.sh), forge_api() -ensure_blocked_label_id() { - if [ -n "${_BLOCKED_LABEL_ID:-}" ]; then - printf '%s' "$_BLOCKED_LABEL_ID" - return 0 - fi - _BLOCKED_LABEL_ID=$(forge_api GET "/labels" 2>/dev/null \ - | jq -r '.[] | select(.name == "blocked") | .id' 2>/dev/null || true) - if [ -z "$_BLOCKED_LABEL_ID" ]; then - _BLOCKED_LABEL_ID=$(curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_API}/labels" \ - -d '{"name":"blocked","color":"#e11d48"}' 2>/dev/null \ - | jq -r '.id // empty' 2>/dev/null || true) - fi - printf '%s' "$_BLOCKED_LABEL_ID" -} - # ensure_priority_label — look up (or create) the "priority" label, print its ID. # Caches the result in _PRIORITY_LABEL_ID to avoid repeated API calls. # Requires: FORGE_TOKEN, FORGE_API (from env.sh), forge_api() @@ -267,3 +246,42 @@ ci_promote() { echo "$new_num" } + +# ci_get_logs [--step ] +# Reads CI logs from the Woodpecker SQLite database. +# Requires: WOODPECKER_DATA_DIR env var or mounted volume at /woodpecker-data +# Returns: 0 on success, 1 on failure. Outputs log text to stdout. +# +# Usage: +# ci_get_logs 346 # Get all failed step logs +# ci_get_logs 346 --step smoke-init # Get logs for specific step +ci_get_logs() { + local pipeline_number="$1" + shift || true + + local step_name="" + while [ $# -gt 0 ]; do + case "$1" in + --step|-s) + step_name="$2" + shift 2 + ;; + *) + echo "Unknown option: $1" >&2 + return 1 + ;; + esac + done + + local log_reader="${FACTORY_ROOT:-/home/agent/disinto}/lib/ci-log-reader.py" + if [ -f "$log_reader" ]; then + if [ -n "$step_name" ]; then + python3 "$log_reader" "$pipeline_number" --step "$step_name" + else + python3 "$log_reader" "$pipeline_number" + fi + else + echo "ERROR: ci-log-reader.py not found at $log_reader" >&2 + return 1 + fi +} diff --git a/lib/ci-log-reader.py b/lib/ci-log-reader.py new file mode 100755 index 0000000..5786e5a --- /dev/null +++ b/lib/ci-log-reader.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +""" +ci-log-reader.py — Read CI logs from Woodpecker SQLite database. + +Usage: + ci-log-reader.py [--step ] + +Reads log entries from the Woodpecker SQLite database and outputs them to stdout. +If --step is specified, filters to that step only. Otherwise returns logs from +all failed steps, truncated to the last 200 lines to avoid context bloat. + +Environment: + WOODPECKER_DATA_DIR - Path to Woodpecker data directory (default: /woodpecker-data) + +The SQLite database is located at: $WOODPECKER_DATA_DIR/woodpecker.sqlite +""" + +import argparse +import sqlite3 +import sys +import os + +DEFAULT_DB_PATH = "/woodpecker-data/woodpecker.sqlite" +DEFAULT_WOODPECKER_DATA_DIR = "/woodpecker-data" +MAX_OUTPUT_LINES = 200 + + +def get_db_path(): + """Determine the path to the Woodpecker SQLite database.""" + env_dir = os.environ.get("WOODPECKER_DATA_DIR", DEFAULT_WOODPECKER_DATA_DIR) + return os.path.join(env_dir, "woodpecker.sqlite") + + +def query_logs(pipeline_number: int, step_name: str | None = None) -> list[str]: + """ + Query log entries from the Woodpecker database. + + Args: + pipeline_number: The pipeline number to query + step_name: Optional step name to filter by + + Returns: + List of log data strings + """ + db_path = get_db_path() + + if not os.path.exists(db_path): + print(f"ERROR: Woodpecker database not found at {db_path}", file=sys.stderr) + print(f"Set WOODPECKER_DATA_DIR or mount volume to {DEFAULT_WOODPECKER_DATA_DIR}", file=sys.stderr) + sys.exit(1) + + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + if step_name: + # Query logs for a specific step + query = """ + SELECT le.data + FROM log_entries le + JOIN steps s ON le.step_id = s.id + JOIN pipelines p ON s.pipeline_id = p.id + WHERE p.number = ? AND s.name = ? + ORDER BY le.id + """ + cursor.execute(query, (pipeline_number, step_name)) + else: + # Query logs for all failed steps in the pipeline + query = """ + SELECT le.data + FROM log_entries le + JOIN steps s ON le.step_id = s.id + JOIN pipelines p ON s.pipeline_id = p.id + WHERE p.number = ? AND s.state IN ('failure', 'error', 'killed') + ORDER BY le.id + """ + cursor.execute(query, (pipeline_number,)) + + logs = [row["data"] for row in cursor.fetchall()] + conn.close() + return logs + + +def main(): + parser = argparse.ArgumentParser( + description="Read CI logs from Woodpecker SQLite database" + ) + parser.add_argument( + "pipeline_number", + type=int, + help="Pipeline number to query" + ) + parser.add_argument( + "--step", "-s", + dest="step_name", + default=None, + help="Filter to a specific step name" + ) + + args = parser.parse_args() + + logs = query_logs(args.pipeline_number, args.step_name) + + if not logs: + if args.step_name: + print(f"No logs found for pipeline #{args.pipeline_number}, step '{args.step_name}'", file=sys.stderr) + else: + print(f"No failed steps found in pipeline #{args.pipeline_number}", file=sys.stderr) + sys.exit(0) + + # Join all log data and output + full_output = "\n".join(logs) + + # Truncate to last N lines to avoid context bloat + lines = full_output.split("\n") + if len(lines) > MAX_OUTPUT_LINES: + # Keep last N lines + truncated = lines[-MAX_OUTPUT_LINES:] + print("\n".join(truncated)) + else: + print(full_output) + + +if __name__ == "__main__": + main() diff --git a/lib/ci-setup.sh b/lib/ci-setup.sh new file mode 100644 index 0000000..7c4c5dd --- /dev/null +++ b/lib/ci-setup.sh @@ -0,0 +1,455 @@ +#!/usr/bin/env bash +# ============================================================================= +# ci-setup.sh — CI setup functions for Woodpecker and cron configuration +# +# Internal functions (called via _load_ci_context + _*_impl): +# _install_cron_impl() - Install crontab entries for project agents +# _create_woodpecker_oauth_impl() - Create OAuth2 app on Forgejo for Woodpecker +# _generate_woodpecker_token_impl() - Auto-generate WOODPECKER_TOKEN via OAuth2 flow +# _activate_woodpecker_repo_impl() - Activate repo in Woodpecker +# +# Globals expected (asserted by _load_ci_context): +# FORGE_URL - Forge instance URL (e.g. http://localhost:3000) +# FORGE_TOKEN - Forge API token +# FACTORY_ROOT - Root of the disinto factory +# +# Usage: +# source "${FACTORY_ROOT}/lib/ci-setup.sh" +# ============================================================================= +set -euo pipefail + +# Assert required globals are set before using this module. +_load_ci_context() { + local missing=() + [ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL") + [ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN") + [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT") + if [ "${#missing[@]}" -gt 0 ]; then + echo "Error: ci-setup.sh requires these globals to be set: ${missing[*]}" >&2 + exit 1 + fi +} + +# Generate and optionally install cron entries for the project agents. +# Usage: install_cron +_install_cron_impl() { + local name="$1" toml="$2" auto_yes="$3" bare="${4:-false}" + + # In compose mode, skip host cron — the agents container runs cron internally + if [ "$bare" = false ]; then + echo "" + echo "Cron: skipped (agents container handles scheduling in compose mode)" + return + fi + + # Bare mode: crontab is required on the host + if ! command -v crontab &>/dev/null; then + echo "Error: crontab not found (required for bare-metal mode)" >&2 + echo " Install: apt install cron / brew install cron" >&2 + exit 1 + fi + + # Use absolute path for the TOML in cron entries + local abs_toml + abs_toml="$(cd "$(dirname "$toml")" && pwd)/$(basename "$toml")" + + local cron_block + cron_block="# disinto: ${name} +2,7,12,17,22,27,32,37,42,47,52,57 * * * * ${FACTORY_ROOT}/review/review-poll.sh ${abs_toml} >/dev/null 2>&1 +4,9,14,19,24,29,34,39,44,49,54,59 * * * * ${FACTORY_ROOT}/dev/dev-poll.sh ${abs_toml} >/dev/null 2>&1 +0 0,6,12,18 * * * cd ${FACTORY_ROOT} && bash gardener/gardener-run.sh ${abs_toml} >/dev/null 2>&1" + + echo "" + echo "Cron entries to install:" + echo "$cron_block" + echo "" + + # Check if cron entries already exist + local current_crontab + current_crontab=$(crontab -l 2>/dev/null || true) + if echo "$current_crontab" | grep -q "# disinto: ${name}"; then + echo "Cron: skipped (entries for ${name} already installed)" + return + fi + + if [ "$auto_yes" = false ] && [ -t 0 ]; then + read -rp "Install these cron entries? [y/N] " confirm + if [[ ! "$confirm" =~ ^[Yy] ]]; then + echo "Skipped cron install. Add manually with: crontab -e" + return + fi + fi + + # Append to existing crontab + if { crontab -l 2>/dev/null || true; printf '%s\n' "$cron_block"; } | crontab -; then + echo "Cron entries installed for ${name}" + else + echo "Error: failed to install cron entries" >&2 + return 1 + fi +} + +# Set up Woodpecker CI to use Forgejo as its forge backend. +# Creates an OAuth2 app on Forgejo for Woodpecker, activates the repo. +# Usage: create_woodpecker_oauth +_create_woodpecker_oauth_impl() { + local forge_url="$1" + local _repo_slug="$2" # unused but required for signature compatibility + + echo "" + echo "── Woodpecker OAuth2 setup ────────────────────────────" + + # Create OAuth2 application on Forgejo for Woodpecker + local oauth2_name="woodpecker-ci" + local redirect_uri="http://localhost:8000/authorize" + local existing_app client_id client_secret + + # Check if OAuth2 app already exists + existing_app=$(curl -sf \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/user/applications/oauth2" 2>/dev/null \ + | jq -r --arg name "$oauth2_name" '.[] | select(.name == $name) | .client_id // empty' 2>/dev/null) || true + + if [ -n "$existing_app" ]; then + echo "OAuth2: ${oauth2_name} (already exists, client_id=${existing_app})" + client_id="$existing_app" + else + local oauth2_resp + oauth2_resp=$(curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/user/applications/oauth2" \ + -d "{\"name\":\"${oauth2_name}\",\"redirect_uris\":[\"${redirect_uri}\"],\"confidential_client\":true}" \ + 2>/dev/null) || oauth2_resp="" + + if [ -z "$oauth2_resp" ]; then + echo "Warning: failed to create OAuth2 app on Forgejo" >&2 + return + fi + + client_id=$(printf '%s' "$oauth2_resp" | jq -r '.client_id // empty') + client_secret=$(printf '%s' "$oauth2_resp" | jq -r '.client_secret // empty') + + if [ -z "$client_id" ]; then + echo "Warning: OAuth2 app creation returned no client_id" >&2 + return + fi + + echo "OAuth2: ${oauth2_name} created (client_id=${client_id})" + fi + + # Store Woodpecker forge config in .env + # WP_FORGEJO_CLIENT/SECRET match the docker-compose.yml variable references + # WOODPECKER_HOST must be host-accessible URL to match OAuth2 redirect_uri + local env_file="${FACTORY_ROOT}/.env" + local wp_vars=( + "WOODPECKER_FORGEJO=true" + "WOODPECKER_FORGEJO_URL=${forge_url}" + "WOODPECKER_HOST=http://localhost:8000" + ) + if [ -n "${client_id:-}" ]; then + wp_vars+=("WP_FORGEJO_CLIENT=${client_id}") + fi + if [ -n "${client_secret:-}" ]; then + wp_vars+=("WP_FORGEJO_SECRET=${client_secret}") + fi + + for var_line in "${wp_vars[@]}"; do + local var_name="${var_line%%=*}" + if grep -q "^${var_name}=" "$env_file" 2>/dev/null; then + sed -i "s|^${var_name}=.*|${var_line}|" "$env_file" + else + printf '%s\n' "$var_line" >> "$env_file" + fi + done + echo "Config: Woodpecker forge vars written to .env" +} + +# Auto-generate WOODPECKER_TOKEN by driving the Forgejo OAuth2 login flow. +# Requires _FORGE_ADMIN_PASS (set by setup_forge when admin user was just created). +# Called after compose stack is up, before activate_woodpecker_repo. +# Usage: generate_woodpecker_token +_generate_woodpecker_token_impl() { + local forge_url="$1" + local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}" + local env_file="${FACTORY_ROOT}/.env" + local admin_user="disinto-admin" + local admin_pass="${_FORGE_ADMIN_PASS:-}" + + # Skip if already set + if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then + echo "Config: WOODPECKER_TOKEN already set in .env" + return 0 + fi + + echo "" + echo "── Woodpecker token generation ────────────────────────" + + if [ -z "$admin_pass" ]; then + echo "Warning: Forgejo admin password not available — cannot generate WOODPECKER_TOKEN" >&2 + echo " Log into Woodpecker at ${wp_server} and create a token manually" >&2 + return 1 + fi + + # Wait for Woodpecker to become ready + echo -n "Waiting for Woodpecker" + local retries=0 + while ! curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; do + retries=$((retries + 1)) + if [ "$retries" -gt 30 ]; then + echo "" + echo "Warning: Woodpecker not ready at ${wp_server} — skipping token generation" >&2 + return 1 + fi + echo -n "." + sleep 2 + done + echo " ready" + + # Flow: Forgejo web login → OAuth2 authorize → Woodpecker callback → token + local cookie_jar auth_body_file + cookie_jar=$(mktemp /tmp/wp-auth-XXXXXX) + auth_body_file=$(mktemp /tmp/wp-body-XXXXXX) + + # Step 1: Log into Forgejo web UI (session cookie needed for OAuth consent) + local csrf + csrf=$(curl -sf -c "$cookie_jar" "${forge_url}/user/login" 2>/dev/null \ + | grep -o 'name="_csrf"[^>]*' | head -1 \ + | grep -oE '(content|value)="[^"]*"' | head -1 \ + | cut -d'"' -f2) || csrf="" + + if [ -z "$csrf" ]; then + echo "Warning: could not get Forgejo CSRF token — skipping token generation" >&2 + rm -f "$cookie_jar" "$auth_body_file" + return 1 + fi + + curl -sf -b "$cookie_jar" -c "$cookie_jar" -X POST \ + -o /dev/null \ + "${forge_url}/user/login" \ + --data-urlencode "_csrf=${csrf}" \ + --data-urlencode "user_name=${admin_user}" \ + --data-urlencode "password=${admin_pass}" \ + 2>/dev/null || true + + # Step 2: Start Woodpecker OAuth2 flow (captures authorize URL with state param) + local wp_redir + wp_redir=$(curl -sf -o /dev/null -w '%{redirect_url}' \ + "${wp_server}/authorize" 2>/dev/null) || wp_redir="" + + if [ -z "$wp_redir" ]; then + echo "Warning: Woodpecker did not provide OAuth redirect — skipping token generation" >&2 + rm -f "$cookie_jar" "$auth_body_file" + return 1 + fi + + # Rewrite internal Docker network URLs to host-accessible URLs. + # Handle both plain and URL-encoded forms of the internal hostnames. + local forge_url_enc wp_server_enc + forge_url_enc=$(printf '%s' "$forge_url" | sed 's|:|%3A|g; s|/|%2F|g') + wp_server_enc=$(printf '%s' "$wp_server" | sed 's|:|%3A|g; s|/|%2F|g') + wp_redir=$(printf '%s' "$wp_redir" \ + | sed "s|http://forgejo:3000|${forge_url}|g" \ + | sed "s|http%3A%2F%2Fforgejo%3A3000|${forge_url_enc}|g" \ + | sed "s|http://woodpecker:8000|${wp_server}|g" \ + | sed "s|http%3A%2F%2Fwoodpecker%3A8000|${wp_server_enc}|g") + + # Step 3: Hit Forgejo OAuth authorize endpoint with session + # First time: shows consent page. Already approved: redirects with code. + local auth_headers redirect_loc auth_code + auth_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \ + -D - -o "$auth_body_file" \ + "$wp_redir" 2>/dev/null) || auth_headers="" + + redirect_loc=$(printf '%s' "$auth_headers" \ + | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') + + if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then + # Auto-approved: extract code from redirect + auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/') + else + # Consent page: extract CSRF and all form fields, POST grant approval + local consent_csrf form_client_id form_state form_redirect_uri + consent_csrf=$(grep -o 'name="_csrf"[^>]*' "$auth_body_file" 2>/dev/null \ + | head -1 | grep -oE '(content|value)="[^"]*"' | head -1 \ + | cut -d'"' -f2) || consent_csrf="" + form_client_id=$(grep 'name="client_id"' "$auth_body_file" 2>/dev/null \ + | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_client_id="" + form_state=$(grep 'name="state"' "$auth_body_file" 2>/dev/null \ + | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_state="" + form_redirect_uri=$(grep 'name="redirect_uri"' "$auth_body_file" 2>/dev/null \ + | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_redirect_uri="" + + if [ -n "$consent_csrf" ]; then + local grant_headers + grant_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \ + -D - -o /dev/null -X POST \ + "${forge_url}/login/oauth/grant" \ + --data-urlencode "_csrf=${consent_csrf}" \ + --data-urlencode "client_id=${form_client_id}" \ + --data-urlencode "state=${form_state}" \ + --data-urlencode "scope=" \ + --data-urlencode "nonce=" \ + --data-urlencode "redirect_uri=${form_redirect_uri}" \ + --data-urlencode "granted=true" \ + 2>/dev/null) || grant_headers="" + + redirect_loc=$(printf '%s' "$grant_headers" \ + | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') + + if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then + auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/') + fi + fi + fi + + rm -f "$auth_body_file" + + if [ -z "${auth_code:-}" ]; then + echo "Warning: could not obtain OAuth2 authorization code — skipping token generation" >&2 + rm -f "$cookie_jar" + return 1 + fi + + # Step 4: Complete Woodpecker OAuth callback (exchanges code for session) + local state + state=$(printf '%s' "$wp_redir" | sed -n 's/.*[&?]state=\([^&]*\).*/\1/p') + + local wp_headers wp_token + wp_headers=$(curl -sf -c "$cookie_jar" \ + -D - -o /dev/null \ + "${wp_server}/authorize?code=${auth_code}&state=${state:-}" \ + 2>/dev/null) || wp_headers="" + + # Extract token from redirect URL (Woodpecker returns ?access_token=...) + redirect_loc=$(printf '%s' "$wp_headers" \ + | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') + + wp_token="" + if printf '%s' "${redirect_loc:-}" | grep -q 'access_token='; then + wp_token=$(printf '%s' "$redirect_loc" | sed 's/.*access_token=\([^&]*\).*/\1/') + fi + + # Fallback: check for user_sess cookie + if [ -z "$wp_token" ]; then + wp_token=$(awk '/user_sess/{print $NF}' "$cookie_jar" 2>/dev/null) || wp_token="" + fi + + rm -f "$cookie_jar" + + if [ -z "$wp_token" ]; then + echo "Warning: could not obtain Woodpecker token — skipping token generation" >&2 + return 1 + fi + + # Step 5: Create persistent personal access token via Woodpecker API + # WP v3 requires CSRF header for POST operations with session tokens. + local wp_csrf + wp_csrf=$(curl -sf -b "user_sess=${wp_token}" \ + "${wp_server}/web-config.js" 2>/dev/null \ + | sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p') || wp_csrf="" + + local pat_resp final_token + pat_resp=$(curl -sf -X POST \ + -b "user_sess=${wp_token}" \ + ${wp_csrf:+-H "X-CSRF-Token: ${wp_csrf}"} \ + "${wp_server}/api/user/token" \ + 2>/dev/null) || pat_resp="" + + final_token="" + if [ -n "$pat_resp" ]; then + final_token=$(printf '%s' "$pat_resp" \ + | jq -r 'if .token then .token elif .access_token then .access_token else empty end' \ + 2>/dev/null) || final_token="" + fi + + # Use persistent token if available, otherwise use session token + final_token="${final_token:-$wp_token}" + + # Save to .env + if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then + sed -i "s|^WOODPECKER_TOKEN=.*|WOODPECKER_TOKEN=${final_token}|" "$env_file" + else + printf 'WOODPECKER_TOKEN=%s\n' "$final_token" >> "$env_file" + fi + export WOODPECKER_TOKEN="$final_token" + echo "Config: WOODPECKER_TOKEN generated and saved to .env" +} + +# Activate a repo in Woodpecker CI. +# Usage: activate_woodpecker_repo +_activate_woodpecker_repo_impl() { + local forge_repo="$1" + local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}" + + # Wait for Woodpecker to become ready after stack start + local retries=0 + while [ $retries -lt 10 ]; do + if curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; then + break + fi + retries=$((retries + 1)) + sleep 2 + done + + if ! curl -sf --max-time 5 "${wp_server}/api/version" >/dev/null 2>&1; then + echo "Woodpecker: not reachable at ${wp_server} after stack start, skipping repo activation" >&2 + return + fi + + echo "" + echo "── Woodpecker repo activation ─────────────────────────" + + local wp_token="${WOODPECKER_TOKEN:-}" + if [ -z "$wp_token" ]; then + echo "Warning: WOODPECKER_TOKEN not set — cannot activate repo" >&2 + echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2 + return + fi + + local wp_repo_id + wp_repo_id=$(curl -sf \ + -H "Authorization: Bearer ${wp_token}" \ + "${wp_server}/api/repos/lookup/${forge_repo}" 2>/dev/null \ + | jq -r '.id // empty' 2>/dev/null) || true + + if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then + echo "Repo: ${forge_repo} already active in Woodpecker (id=${wp_repo_id})" + else + # Get Forgejo repo numeric ID for WP activation + local forge_repo_id + forge_repo_id=$(curl -sf \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_URL:-http://localhost:3000}/api/v1/repos/${forge_repo}" 2>/dev/null \ + | jq -r '.id // empty' 2>/dev/null) || forge_repo_id="" + + local activate_resp + activate_resp=$(curl -sf -X POST \ + -H "Authorization: Bearer ${wp_token}" \ + "${wp_server}/api/repos?forge_remote_id=${forge_repo_id:-0}" \ + 2>/dev/null) || activate_resp="" + + wp_repo_id=$(printf '%s' "$activate_resp" | jq -r '.id // empty' 2>/dev/null) || true + + if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then + echo "Repo: ${forge_repo} activated in Woodpecker (id=${wp_repo_id})" + + # Set pipeline timeout to 5 minutes (default is 60) + if curl -sf -X PATCH \ + -H "Authorization: Bearer ${wp_token}" \ + -H "Content-Type: application/json" \ + "${wp_server}/api/repos/${wp_repo_id}" \ + -d '{"timeout": 5}' >/dev/null 2>&1; then + echo "Config: pipeline timeout set to 5 minutes" + fi + else + echo "Warning: could not activate repo in Woodpecker" >&2 + echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2 + fi + fi + + # Store repo ID for later TOML generation + if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then + _WP_REPO_ID="$wp_repo_id" + fi +} diff --git a/lib/env.sh b/lib/env.sh index 6bc181e..1c30632 100755 --- a/lib/env.sh +++ b/lib/env.sh @@ -13,7 +13,7 @@ FACTORY_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" if [ "${DISINTO_CONTAINER:-}" = "1" ]; then DISINTO_DATA_DIR="${HOME}/data" DISINTO_LOG_DIR="${DISINTO_DATA_DIR}/logs" - mkdir -p "${DISINTO_DATA_DIR}" "${DISINTO_LOG_DIR}"/{dev,action,review,supervisor,vault,site,metrics} + mkdir -p "${DISINTO_DATA_DIR}" "${DISINTO_LOG_DIR}"/{dev,action,review,supervisor,vault,site,metrics,gardener,planner,predictor,architect,dispatcher} else DISINTO_LOG_DIR="${FACTORY_ROOT}" fi @@ -21,15 +21,37 @@ export DISINTO_LOG_DIR # Load secrets: prefer .env.enc (SOPS-encrypted), fall back to plaintext .env. # Always source .env — cron jobs inside the container do NOT inherit compose -# env vars (FORGE_TOKEN, etc.). Compose-injected vars (like FORGE_URL) are -# already set and won't be clobbered since env.sh uses ${VAR:-default} patterns -# for derived values. FORGE_URL from .env (localhost:3000) is overridden below -# by the compose-injected value when running via docker exec. +# env vars (FORGE_TOKEN, etc.). Only FORGE_URL is preserved across .env +# sourcing because compose injects http://forgejo:3000 while .env has +# http://localhost:3000. FORGE_TOKEN is NOT preserved so that refreshed +# tokens in .env take effect immediately in running containers. if [ -f "$FACTORY_ROOT/.env.enc" ] && command -v sops &>/dev/null; then set -a _saved_forge_url="${FORGE_URL:-}" - eval "$(sops -d --output-type dotenv "$FACTORY_ROOT/.env.enc" 2>/dev/null)" \ - || echo "Warning: failed to decrypt .env.enc — secrets not loaded" >&2 + # Use temp file + validate dotenv format before sourcing (avoids eval injection) + # SOPS -d automatically verifies MAC/GCM authentication tag during decryption + _tmpenv=$(mktemp) || { echo "Error: failed to create temp file for .env.enc" >&2; exit 1; } + if ! sops -d --output-type dotenv "$FACTORY_ROOT/.env.enc" > "$_tmpenv" 2>/dev/null; then + echo "Error: failed to decrypt .env.enc — decryption failed, possible corruption" >&2 + rm -f "$_tmpenv" + exit 1 + fi + # Validate: non-empty, non-comment lines must match KEY=value pattern + # Filter out blank lines and comments before validation + _validated=$(grep -E '^[A-Za-z_][A-Za-z0-9_]*=' "$_tmpenv" 2>/dev/null || true) + if [ -n "$_validated" ]; then + # Write validated content to a second temp file and source it + _validated_env=$(mktemp) + printf '%s\n' "$_validated" > "$_validated_env" + # shellcheck source=/dev/null + source "$_validated_env" + rm -f "$_validated_env" + else + echo "Error: .env.enc decryption output failed format validation" >&2 + rm -f "$_tmpenv" + exit 1 + fi + rm -f "$_tmpenv" set +a [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url" elif [ -f "$FACTORY_ROOT/.env" ]; then @@ -42,6 +64,13 @@ elif [ -f "$FACTORY_ROOT/.env" ]; then [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url" fi +# Allow per-container token override (#375): .env sets the default FORGE_TOKEN +# (dev-bot), then FORGE_TOKEN_OVERRIDE replaces it for containers that need a +# different Forgejo identity (e.g. dev-qwen). +if [ -n "${FORGE_TOKEN_OVERRIDE:-}" ]; then + export FORGE_TOKEN="$FORGE_TOKEN_OVERRIDE" +fi + # PATH: foundry, node, system export PATH="${HOME}/.local/bin:${HOME}/.foundry/bin:${HOME}/.nvm/versions/node/v22.20.0/bin:/usr/local/bin:/usr/bin:/bin:${PATH}" export HOME="${HOME:-/home/debian}" @@ -51,16 +80,11 @@ if [ -n "${PROJECT_TOML:-}" ] && [ -f "$PROJECT_TOML" ]; then source "${FACTORY_ROOT}/lib/load-project.sh" "$PROJECT_TOML" fi -# Forge token: new FORGE_TOKEN > legacy CODEBERG_TOKEN -if [ -z "${FORGE_TOKEN:-}" ]; then - FORGE_TOKEN="${CODEBERG_TOKEN:-}" -fi -export FORGE_TOKEN -export CODEBERG_TOKEN="${FORGE_TOKEN}" # backwards compat +# Forge token +export FORGE_TOKEN="${FORGE_TOKEN:-}" -# Review bot token: FORGE_REVIEW_TOKEN > legacy REVIEW_BOT_TOKEN +# Review bot token export FORGE_REVIEW_TOKEN="${FORGE_REVIEW_TOKEN:-${REVIEW_BOT_TOKEN:-}}" -export REVIEW_BOT_TOKEN="${FORGE_REVIEW_TOKEN}" # backwards compat # Per-agent tokens (#747): each agent gets its own Forgejo identity. # Falls back to FORGE_TOKEN for backwards compat with single-token setups. @@ -69,20 +93,16 @@ export FORGE_GARDENER_TOKEN="${FORGE_GARDENER_TOKEN:-${FORGE_TOKEN}}" export FORGE_VAULT_TOKEN="${FORGE_VAULT_TOKEN:-${FORGE_TOKEN}}" export FORGE_SUPERVISOR_TOKEN="${FORGE_SUPERVISOR_TOKEN:-${FORGE_TOKEN}}" export FORGE_PREDICTOR_TOKEN="${FORGE_PREDICTOR_TOKEN:-${FORGE_TOKEN}}" -export FORGE_ACTION_TOKEN="${FORGE_ACTION_TOKEN:-${FORGE_TOKEN}}" +export FORGE_ARCHITECT_TOKEN="${FORGE_ARCHITECT_TOKEN:-${FORGE_TOKEN}}" -# Bot usernames filter: FORGE_BOT_USERNAMES > legacy CODEBERG_BOT_USERNAMES -export FORGE_BOT_USERNAMES="${FORGE_BOT_USERNAMES:-${CODEBERG_BOT_USERNAMES:-dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,action-bot}}" -export CODEBERG_BOT_USERNAMES="${FORGE_BOT_USERNAMES}" # backwards compat +# Bot usernames filter +export FORGE_BOT_USERNAMES="${FORGE_BOT_USERNAMES:-dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot}" -# Project config (FORGE_* preferred, CODEBERG_* fallback) -export FORGE_REPO="${FORGE_REPO:-${CODEBERG_REPO:-}}" -export CODEBERG_REPO="${FORGE_REPO}" # backwards compat +# Project config +export FORGE_REPO="${FORGE_REPO:-}" export FORGE_URL="${FORGE_URL:-http://localhost:3000}" export FORGE_API="${FORGE_API:-${FORGE_URL}/api/v1/repos/${FORGE_REPO}}" export FORGE_WEB="${FORGE_WEB:-${FORGE_URL}/${FORGE_REPO}}" -export CODEBERG_API="${FORGE_API}" # backwards compat -export CODEBERG_WEB="${FORGE_WEB}" # backwards compat # tea CLI login name: derived from FORGE_URL (codeberg vs local forgejo) if [ -z "${TEA_LOGIN:-}" ]; then case "${FORGE_URL}" in @@ -108,7 +128,7 @@ export CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-7200}" # Vault-only token guard (#745): external-action tokens (GITHUB_TOKEN, CLAWHUB_TOKEN) # must NEVER be available to agents. They live in .env.vault.enc and are injected -# only into the ephemeral vault-runner container at fire time. Unset them here so +# only into the ephemeral runner container at fire time. Unset them here so # even an accidental .env inclusion cannot leak them into agent sessions. unset GITHUB_TOKEN 2>/dev/null || true unset CLAWHUB_TOKEN 2>/dev/null || true @@ -118,21 +138,75 @@ unset CLAWHUB_TOKEN 2>/dev/null || true export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1 # Shared log helper +# Usage: log "message" +# Output: [2026-04-03T14:00:00Z] agent: message +# Where agent is set via LOG_AGENT variable (defaults to caller's context) log() { - printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" + local agent="${LOG_AGENT:-agent}" + printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" } -# Forge API helper — usage: forge_api GET /issues?state=open +# ============================================================================= +# URL VALIDATION HELPER +# ============================================================================= +# Validates that a URL variable matches expected patterns to prevent +# URL injection or redirection attacks (OWASP URL Redirection prevention). +# Returns 0 if valid, 1 if invalid. +# ============================================================================= +validate_url() { + local url="$1" + local allowed_hosts="${2:-}" + + # Must start with http:// or https:// + if [[ ! "$url" =~ ^https?:// ]]; then + return 1 + fi + + # Extract host and reject if it contains @ (credential injection) + if [[ "$url" =~ ^https?://[^@]+@ ]]; then + return 1 + fi + + # If allowed_hosts is specified, validate against it + if [ -n "$allowed_hosts" ]; then + local host + host=$(echo "$url" | sed -E 's|^https?://([^/:]+).*|\1|') + local valid=false + for allowed in $allowed_hosts; do + if [ "$host" = "$allowed" ]; then + valid=true + break + fi + done + if [ "$valid" = false ]; then + return 1 + fi + fi + + return 0 +} + +# ============================================================================= +# FORGE API HELPER +# ============================================================================= +# Usage: forge_api GET /issues?state=open +# Validates FORGE_API before use to prevent URL injection attacks. +# ============================================================================= forge_api() { local method="$1" path="$2" shift 2 + + # Validate FORGE_API to prevent URL injection + if ! validate_url "$FORGE_API"; then + echo "ERROR: FORGE_API validation failed - possible URL injection attempt" >&2 + return 1 + fi + curl -sf -X "$method" \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ "${FORGE_API}${path}" "$@" } -# Backwards-compat alias -codeberg_api() { forge_api "$@"; } # Paginate a Forge API GET endpoint and return all items as a merged JSON array. # Usage: forge_api_all /path (no existing query params) @@ -149,7 +223,8 @@ forge_api_all() { page=1 while true; do page_items=$(forge_api GET "${path_prefix}${sep}limit=50&page=${page}") - count=$(printf '%s' "$page_items" | jq 'length') + count=$(printf '%s' "$page_items" | jq 'length' 2>/dev/null) || count=0 + [ -z "$count" ] && count=0 [ "$count" -eq 0 ] && break all_items=$(printf '%s\n%s' "$all_items" "$page_items" | jq -s 'add') [ "$count" -lt 50 ] && break @@ -157,13 +232,23 @@ forge_api_all() { done printf '%s' "$all_items" } -# Backwards-compat alias -codeberg_api_all() { forge_api_all "$@"; } -# Woodpecker API helper +# ============================================================================= +# WOODPECKER API HELPER +# ============================================================================= +# Usage: woodpecker_api /repos/{id}/pipelines +# Validates WOODPECKER_SERVER before use to prevent URL injection attacks. +# ============================================================================= woodpecker_api() { local path="$1" shift + + # Validate WOODPECKER_SERVER to prevent URL injection + if ! validate_url "$WOODPECKER_SERVER"; then + echo "ERROR: WOODPECKER_SERVER validation failed - possible URL injection attempt" >&2 + return 1 + fi + curl -sfL \ -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ "${WOODPECKER_SERVER}/api${path}" "$@" diff --git a/lib/file-action-issue.sh b/lib/file-action-issue.sh deleted file mode 100644 index abba4c8..0000000 --- a/lib/file-action-issue.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# file-action-issue.sh — File an action issue for a formula run -# -# Usage: source this file, then call file_action_issue. -# Requires: forge_api() from lib/env.sh, jq, lib/secret-scan.sh -# -# file_action_issue <body> -# Sets FILED_ISSUE_NUM on success. -# Returns: 0=created, 1=duplicate exists, 2=label not found, 3=API error, 4=secrets detected - -# Load secret scanner -# shellcheck source=secret-scan.sh -source "$(dirname "${BASH_SOURCE[0]}")/secret-scan.sh" - -file_action_issue() { - local formula_name="$1" title="$2" body="$3" - FILED_ISSUE_NUM="" - - # Secret scan: reject issue bodies containing embedded secrets - if ! scan_for_secrets "$body"; then - echo "file-action-issue: BLOCKED — issue body for '${formula_name}' contains potential secrets. Use env var references instead." >&2 - return 4 - fi - - # Dedup: skip if an open action issue for this formula already exists - local open_actions - open_actions=$(forge_api_all "/issues?state=open&type=issues&labels=action" 2>/dev/null || true) - if [ -n "$open_actions" ] && [ "$open_actions" != "null" ]; then - local existing - existing=$(printf '%s' "$open_actions" | \ - jq --arg f "$formula_name" '[.[] | select(.title | test($f))] | length' 2>/dev/null || echo 0) - if [ "${existing:-0}" -gt 0 ]; then - return 1 - fi - fi - - # Fetch 'action' label ID - local action_label_id - action_label_id=$(forge_api GET "/labels" 2>/dev/null | \ - jq -r '.[] | select(.name == "action") | .id' 2>/dev/null || true) - if [ -z "$action_label_id" ]; then - return 2 - fi - - # Create the issue - local payload result - payload=$(jq -nc \ - --arg title "$title" \ - --arg body "$body" \ - --argjson labels "[$action_label_id]" \ - '{title: $title, body: $body, labels: $labels}') - - result=$(forge_api POST "/issues" -d "$payload" 2>/dev/null || true) - FILED_ISSUE_NUM=$(printf '%s' "$result" | jq -r '.number // empty' 2>/dev/null || true) - - if [ -z "$FILED_ISSUE_NUM" ]; then - return 3 - fi -} diff --git a/lib/forge-push.sh b/lib/forge-push.sh new file mode 100644 index 0000000..1da61f7 --- /dev/null +++ b/lib/forge-push.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# ============================================================================= +# forge-push.sh — push_to_forge() function +# +# Handles pushing a local clone to the Forgejo remote and verifying the push. +# +# Globals expected: +# FORGE_URL - Forge instance URL (e.g. http://localhost:3000) +# FORGE_TOKEN - API token for Forge operations (used for API verification) +# FORGE_PASS - Bot password for git HTTP push (#361: tokens rejected by Forgejo 11.x) +# FACTORY_ROOT - Root of the disinto factory +# PRIMARY_BRANCH - Primary branch name (e.g. main) +# +# Usage: +# source "${FACTORY_ROOT}/lib/forge-push.sh" +# push_to_forge <repo_root> <forge_url> <repo_slug> +# ============================================================================= +set -euo pipefail + +# Assert required globals are set before using this module. +_assert_forge_push_globals() { + local missing=() + [ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL") + [ -z "${FORGE_PASS:-}" ] && missing+=("FORGE_PASS") + [ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN") + [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT") + [ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH") + if [ "${#missing[@]}" -gt 0 ]; then + echo "Error: forge-push.sh requires these globals to be set: ${missing[*]}" >&2 + exit 1 + fi +} + +# Push local clone to the Forgejo remote. +push_to_forge() { + local repo_root="$1" forge_url="$2" repo_slug="$3" + + # Build authenticated remote URL: http://dev-bot:<password>@host:port/org/repo.git + # Forgejo 11.x rejects API tokens for git HTTP push (#361); password auth works. + if [ -z "${FORGE_PASS:-}" ]; then + echo "Error: FORGE_PASS not set — cannot push to Forgejo (see #361)" >&2 + return 1 + fi + local auth_url + auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_PASS}@|") + local remote_url="${auth_url}/${repo_slug}.git" + # Display URL without token + local display_url="${forge_url}/${repo_slug}.git" + + # Always set the remote URL to ensure credentials are current + if git -C "$repo_root" remote get-url forgejo >/dev/null 2>&1; then + git -C "$repo_root" remote set-url forgejo "$remote_url" + else + git -C "$repo_root" remote add forgejo "$remote_url" + fi + echo "Remote: forgejo -> ${display_url}" + + # Skip push if local repo has no commits (e.g. cloned from empty Forgejo repo) + if ! git -C "$repo_root" rev-parse HEAD >/dev/null 2>&1; then + echo "Push: skipped (local repo has no commits)" + return 0 + fi + + # Push all branches and tags + echo "Pushing: branches to forgejo" + if ! git -C "$repo_root" push forgejo --all 2>&1; then + echo "Error: failed to push branches to Forgejo" >&2 + return 1 + fi + echo "Pushing: tags to forgejo" + if ! git -C "$repo_root" push forgejo --tags 2>&1; then + echo "Error: failed to push tags to Forgejo" >&2 + return 1 + fi + + # Verify the repo is no longer empty (Forgejo may need a moment to index pushed refs) + local is_empty="true" + local verify_attempt + for verify_attempt in $(seq 1 5); do + local repo_info + repo_info=$(curl -sf --max-time 10 \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/repos/${repo_slug}" 2>/dev/null) || repo_info="" + if [ -z "$repo_info" ]; then + is_empty="skipped" + break # API unreachable, skip verification + fi + is_empty=$(printf '%s' "$repo_info" | jq -r '.empty // "unknown"') + if [ "$is_empty" != "true" ]; then + echo "Verify: repo is not empty (push confirmed)" + break + fi + if [ "$verify_attempt" -lt 5 ]; then + sleep 2 + fi + done + if [ "$is_empty" = "true" ]; then + echo "Warning: Forgejo repo still reports empty after push" >&2 + return 1 + fi +} diff --git a/lib/forge-setup.sh b/lib/forge-setup.sh new file mode 100644 index 0000000..40909c0 --- /dev/null +++ b/lib/forge-setup.sh @@ -0,0 +1,518 @@ +#!/usr/bin/env bash +# ============================================================================= +# forge-setup.sh — setup_forge() and helpers for Forgejo provisioning +# +# Handles admin user creation, bot user creation, token generation, +# password resets, repo creation, and collaborator setup. +# +# Globals expected (asserted by _load_init_context): +# FORGE_URL - Forge instance URL (e.g. http://localhost:3000) +# FACTORY_ROOT - Root of the disinto factory +# PRIMARY_BRANCH - Primary branch name (e.g. main) +# +# Usage: +# source "${FACTORY_ROOT}/lib/forge-setup.sh" +# setup_forge <forge_url> <repo_slug> +# ============================================================================= +set -euo pipefail + +# Assert required globals are set before using this module. +_load_init_context() { + local missing=() + [ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL") + [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT") + [ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH") + if [ "${#missing[@]}" -gt 0 ]; then + echo "Error: forge-setup.sh requires these globals to be set: ${missing[*]}" >&2 + exit 1 + fi +} + +# Execute a command in the Forgejo container (for admin operations) +_forgejo_exec() { + local use_bare="${DISINTO_BARE:-false}" + if [ "$use_bare" = true ]; then + docker exec -u git disinto-forgejo "$@" + else + docker compose -f "${FACTORY_ROOT}/docker-compose.yml" exec -T -u git forgejo "$@" + fi +} + +# Provision or connect to a local Forgejo instance. +# Creates admin + bot users, generates API tokens, stores in .env. +# When $DISINTO_BARE is set, uses standalone docker run; otherwise uses compose. +setup_forge() { + local forge_url="$1" + local repo_slug="$2" + local use_bare="${DISINTO_BARE:-false}" + + echo "" + echo "── Forge setup ────────────────────────────────────────" + + # Check if Forgejo is already running + if curl -sf --max-time 5 "${forge_url}/api/v1/version" >/dev/null 2>&1; then + echo "Forgejo: ${forge_url} (already running)" + else + echo "Forgejo not reachable at ${forge_url}" + echo "Starting Forgejo via Docker..." + + if ! command -v docker &>/dev/null; then + echo "Error: docker not found — needed to provision Forgejo" >&2 + echo " Install Docker or start Forgejo manually at ${forge_url}" >&2 + exit 1 + fi + + # Extract port from forge_url + local forge_port + forge_port=$(printf '%s' "$forge_url" | sed -E 's|.*:([0-9]+)/?$|\1|') + forge_port="${forge_port:-3000}" + + if [ "$use_bare" = true ]; then + # Bare-metal mode: standalone docker run + mkdir -p "${FORGEJO_DATA_DIR}" + + if docker ps -a --format '{{.Names}}' | grep -q '^disinto-forgejo$'; then + docker start disinto-forgejo >/dev/null 2>&1 || true + else + docker run -d \ + --name disinto-forgejo \ + --restart unless-stopped \ + -p "${forge_port}:3000" \ + -p 2222:22 \ + -v "${FORGEJO_DATA_DIR}:/data" \ + -e "FORGEJO__database__DB_TYPE=sqlite3" \ + -e "FORGEJO__server__ROOT_URL=${forge_url}/" \ + -e "FORGEJO__server__HTTP_PORT=3000" \ + -e "FORGEJO__service__DISABLE_REGISTRATION=true" \ + codeberg.org/forgejo/forgejo:11.0 + fi + else + # Compose mode: start Forgejo via docker compose + docker compose -f "${FACTORY_ROOT}/docker-compose.yml" up -d forgejo + fi + + # Wait for Forgejo to become healthy + echo -n "Waiting for Forgejo to start" + local retries=0 + while ! curl -sf --max-time 3 "${forge_url}/api/v1/version" >/dev/null 2>&1; do + retries=$((retries + 1)) + if [ "$retries" -gt 60 ]; then + echo "" + echo "Error: Forgejo did not become ready within 60s" >&2 + exit 1 + fi + echo -n "." + sleep 1 + done + echo " ready" + fi + + # Wait for Forgejo database to accept writes (API may be ready before DB is) + echo -n "Waiting for Forgejo database" + local db_ready=false + for _i in $(seq 1 30); do + if _forgejo_exec forgejo admin user list >/dev/null 2>&1; then + db_ready=true + break + fi + echo -n "." + sleep 1 + done + echo "" + if [ "$db_ready" != true ]; then + echo "Error: Forgejo database not ready after 30s" >&2 + exit 1 + fi + + # Create admin user if it doesn't exist + local admin_user="disinto-admin" + local admin_pass + local env_file="${FACTORY_ROOT}/.env" + + # Re-read persisted admin password if available (#158) + if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then + admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-) + fi + # Generate a fresh password only when none was persisted + if [ -z "${admin_pass:-}" ]; then + admin_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + fi + + if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then + echo "Creating admin user: ${admin_user}" + local create_output + if ! create_output=$(_forgejo_exec forgejo admin user create \ + --admin \ + --username "${admin_user}" \ + --password "${admin_pass}" \ + --email "admin@disinto.local" \ + --must-change-password=false 2>&1); then + echo "Error: failed to create admin user '${admin_user}':" >&2 + echo " ${create_output}" >&2 + exit 1 + fi + # Forgejo 11.x ignores --must-change-password=false on create; + # explicitly clear the flag so basic-auth token creation works. + _forgejo_exec forgejo admin user change-password \ + --username "${admin_user}" \ + --password "${admin_pass}" \ + --must-change-password=false + + # Verify admin user was actually created + if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then + echo "Error: admin user '${admin_user}' not found after creation" >&2 + exit 1 + fi + + # Persist admin password to .env for idempotent re-runs (#158) + if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then + sed -i "s|^FORGE_ADMIN_PASS=.*|FORGE_ADMIN_PASS=${admin_pass}|" "$env_file" + else + printf 'FORGE_ADMIN_PASS=%s\n' "$admin_pass" >> "$env_file" + fi + else + echo "Admin user: ${admin_user} (already exists)" + # Only reset password if basic auth fails (#158, #267) + # Forgejo 11.x may ignore --must-change-password=false, blocking token creation + if ! curl -sf --max-time 5 -u "${admin_user}:${admin_pass}" \ + "${forge_url}/api/v1/user" >/dev/null 2>&1; then + _forgejo_exec forgejo admin user change-password \ + --username "${admin_user}" \ + --password "${admin_pass}" \ + --must-change-password=false + fi + fi + # Preserve password for Woodpecker OAuth2 token generation (#779) + _FORGE_ADMIN_PASS="$admin_pass" + + # Create human user (disinto-admin) as site admin if it doesn't exist + local human_user="disinto-admin" + local human_pass + human_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + + if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then + echo "Creating human user: ${human_user}" + local create_output + if ! create_output=$(_forgejo_exec forgejo admin user create \ + --admin \ + --username "${human_user}" \ + --password "${human_pass}" \ + --email "admin@disinto.local" \ + --must-change-password=false 2>&1); then + echo "Error: failed to create human user '${human_user}':" >&2 + echo " ${create_output}" >&2 + exit 1 + fi + # Forgejo 11.x ignores --must-change-password=false on create; + # explicitly clear the flag so basic-auth token creation works. + _forgejo_exec forgejo admin user change-password \ + --username "${human_user}" \ + --password "${human_pass}" \ + --must-change-password=false + + # Verify human user was actually created + if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then + echo "Error: human user '${human_user}' not found after creation" >&2 + exit 1 + fi + echo " Human user '${human_user}' created as site admin" + else + echo "Human user: ${human_user} (already exists)" + fi + + # Delete existing admin token if present (token sha1 is only returned at creation time) + local existing_token_id + existing_token_id=$(curl -sf \ + -u "${admin_user}:${admin_pass}" \ + "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \ + | jq -r '.[] | select(.name == "disinto-admin-token") | .id') || existing_token_id="" + if [ -n "$existing_token_id" ]; then + curl -sf -X DELETE \ + -u "${admin_user}:${admin_pass}" \ + "${forge_url}/api/v1/users/${admin_user}/tokens/${existing_token_id}" >/dev/null 2>&1 || true + fi + + # Create admin token (fresh, so sha1 is returned) + local admin_token + admin_token=$(curl -sf -X POST \ + -u "${admin_user}:${admin_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${admin_user}/tokens" \ + -d '{"name":"disinto-admin-token","scopes":["all"]}' 2>/dev/null \ + | jq -r '.sha1 // empty') || admin_token="" + + if [ -z "$admin_token" ]; then + echo "Error: failed to obtain admin API token" >&2 + exit 1 + fi + + # Get or create human user token + local human_token + if curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then + # Delete existing human token if present (token sha1 is only returned at creation time) + local existing_human_token_id + existing_human_token_id=$(curl -sf \ + -u "${human_user}:${human_pass}" \ + "${forge_url}/api/v1/users/${human_user}/tokens" 2>/dev/null \ + | jq -r '.[] | select(.name == "disinto-human-token") | .id') || existing_human_token_id="" + if [ -n "$existing_human_token_id" ]; then + curl -sf -X DELETE \ + -u "${human_user}:${human_pass}" \ + "${forge_url}/api/v1/users/${human_user}/tokens/${existing_human_token_id}" >/dev/null 2>&1 || true + fi + + # Create human token (fresh, so sha1 is returned) + human_token=$(curl -sf -X POST \ + -u "${human_user}:${human_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${human_user}/tokens" \ + -d '{"name":"disinto-human-token","scopes":["all"]}' 2>/dev/null \ + | jq -r '.sha1 // empty') || human_token="" + + if [ -n "$human_token" ]; then + # Store human token in .env + if grep -q '^HUMAN_TOKEN=' "$env_file" 2>/dev/null; then + sed -i "s|^HUMAN_TOKEN=.*|HUMAN_TOKEN=${human_token}|" "$env_file" + else + printf 'HUMAN_TOKEN=%s\n' "$human_token" >> "$env_file" + fi + export HUMAN_TOKEN="$human_token" + echo " Human token saved (HUMAN_TOKEN)" + fi + fi + + # Create bot users and tokens + # Each agent gets its own Forgejo account for identity and audit trail (#747). + # Map: bot-username -> env-var-name for the token + local -A bot_token_vars=( + [dev-bot]="FORGE_TOKEN" + [review-bot]="FORGE_REVIEW_TOKEN" + [planner-bot]="FORGE_PLANNER_TOKEN" + [gardener-bot]="FORGE_GARDENER_TOKEN" + [vault-bot]="FORGE_VAULT_TOKEN" + [supervisor-bot]="FORGE_SUPERVISOR_TOKEN" + [predictor-bot]="FORGE_PREDICTOR_TOKEN" + [architect-bot]="FORGE_ARCHITECT_TOKEN" + ) + # Map: bot-username -> env-var-name for the password + # Forgejo 11.x API tokens don't work for git HTTP push (#361). + # Store passwords so agents can use password auth for git operations. + local -A bot_pass_vars=( + [dev-bot]="FORGE_PASS" + [review-bot]="FORGE_REVIEW_PASS" + [planner-bot]="FORGE_PLANNER_PASS" + [gardener-bot]="FORGE_GARDENER_PASS" + [vault-bot]="FORGE_VAULT_PASS" + [supervisor-bot]="FORGE_SUPERVISOR_PASS" + [predictor-bot]="FORGE_PREDICTOR_PASS" + [architect-bot]="FORGE_ARCHITECT_PASS" + ) + + local bot_user bot_pass token token_var pass_var + + for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot architect-bot; do + bot_pass="bot-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + token_var="${bot_token_vars[$bot_user]}" + + # Check if bot user exists + local user_exists=false + if curl -sf --max-time 5 \ + -H "Authorization: token ${admin_token}" \ + "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then + user_exists=true + fi + + if [ "$user_exists" = false ]; then + echo "Creating bot user: ${bot_user}" + local create_output + if ! create_output=$(_forgejo_exec forgejo admin user create \ + --username "${bot_user}" \ + --password "${bot_pass}" \ + --email "${bot_user}@disinto.local" \ + --must-change-password=false 2>&1); then + echo "Error: failed to create bot user '${bot_user}':" >&2 + echo " ${create_output}" >&2 + exit 1 + fi + # Forgejo 11.x ignores --must-change-password=false on create; + # explicitly clear the flag so basic-auth token creation works. + _forgejo_exec forgejo admin user change-password \ + --username "${bot_user}" \ + --password "${bot_pass}" \ + --must-change-password=false + + # Verify bot user was actually created + if ! curl -sf --max-time 5 \ + -H "Authorization: token ${admin_token}" \ + "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then + echo "Error: bot user '${bot_user}' not found after creation" >&2 + exit 1 + fi + echo " ${bot_user} user created" + else + echo " ${bot_user} user exists (resetting password for token generation)" + # User exists but may not have a known password. + # Use admin API to reset the password so we can generate a new token. + _forgejo_exec forgejo admin user change-password \ + --username "${bot_user}" \ + --password "${bot_pass}" \ + --must-change-password=false || { + echo "Error: failed to reset password for existing bot user '${bot_user}'" >&2 + exit 1 + } + fi + + # Generate token via API (basic auth as the bot user — Forgejo requires + # basic auth on POST /users/{username}/tokens, token auth is rejected) + # First, try to delete existing tokens to avoid name collision + # Use bot user's own Basic Auth (we just set the password above) + local existing_token_ids + existing_token_ids=$(curl -sf \ + -u "${bot_user}:${bot_pass}" \ + "${forge_url}/api/v1/users/${bot_user}/tokens" 2>/dev/null \ + | jq -r '.[].id // empty' 2>/dev/null) || existing_token_ids="" + + # Delete any existing tokens for this user + if [ -n "$existing_token_ids" ]; then + while IFS= read -r tid; do + [ -n "$tid" ] && curl -sf -X DELETE \ + -u "${bot_user}:${bot_pass}" \ + "${forge_url}/api/v1/users/${bot_user}/tokens/${tid}" >/dev/null 2>&1 || true + done <<< "$existing_token_ids" + fi + + token=$(curl -sf -X POST \ + -u "${bot_user}:${bot_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${bot_user}/tokens" \ + -d "{\"name\":\"disinto-${bot_user}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \ + | jq -r '.sha1 // empty') || token="" + + if [ -z "$token" ]; then + echo "Error: failed to create API token for '${bot_user}'" >&2 + exit 1 + fi + + # Store token in .env under the per-agent variable name + if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then + sed -i "s|^${token_var}=.*|${token_var}=${token}|" "$env_file" + else + printf '%s=%s\n' "$token_var" "$token" >> "$env_file" + fi + export "${token_var}=${token}" + echo " ${bot_user} token generated and saved (${token_var})" + + # Store password in .env for git HTTP push (#361) + # Forgejo 11.x API tokens don't work for git push; password auth does. + pass_var="${bot_pass_vars[$bot_user]}" + if grep -q "^${pass_var}=" "$env_file" 2>/dev/null; then + sed -i "s|^${pass_var}=.*|${pass_var}=${bot_pass}|" "$env_file" + else + printf '%s=%s\n' "$pass_var" "$bot_pass" >> "$env_file" + fi + export "${pass_var}=${bot_pass}" + echo " ${bot_user} password saved (${pass_var})" + + # Backwards-compat aliases for dev-bot and review-bot + if [ "$bot_user" = "dev-bot" ]; then + export CODEBERG_TOKEN="$token" + elif [ "$bot_user" = "review-bot" ]; then + export REVIEW_BOT_TOKEN="$token" + fi + done + + # Store FORGE_URL in .env if not already present + if ! grep -q '^FORGE_URL=' "$env_file" 2>/dev/null; then + printf 'FORGE_URL=%s\n' "$forge_url" >> "$env_file" + fi + + # Create the repo on Forgejo if it doesn't exist + local org_name="${repo_slug%%/*}" + local repo_name="${repo_slug##*/}" + + # Check if repo already exists + if ! curl -sf --max-time 5 \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/repos/${repo_slug}" >/dev/null 2>&1; then + + # Try creating org first (ignore if exists) + curl -sf -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/orgs" \ + -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true + + # Create repo under org + if ! curl -sf -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/orgs/${org_name}/repos" \ + -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then + # Fallback: create under the human user namespace using admin endpoint + if [ -n "${admin_token:-}" ]; then + if ! curl -sf -X POST \ + -H "Authorization: token ${admin_token}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/admin/users/${org_name}/repos" \ + -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then + echo "Error: failed to create repo '${repo_slug}' on Forgejo (admin endpoint)" >&2 + exit 1 + fi + elif [ -n "${HUMAN_TOKEN:-}" ]; then + if ! curl -sf -X POST \ + -H "Authorization: token ${HUMAN_TOKEN}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/user/repos" \ + -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then + echo "Error: failed to create repo '${repo_slug}' on Forgejo (user endpoint)" >&2 + exit 1 + fi + else + echo "Error: failed to create repo '${repo_slug}' — no admin or human token available" >&2 + exit 1 + fi + fi + + # Add all bot users as collaborators with appropriate permissions + # dev-bot: write (PR creation via lib/vault.sh) + # review-bot: read (PR review) + # planner-bot: write (prerequisites.md, memory) + # gardener-bot: write (backlog grooming) + # vault-bot: write (vault items) + # supervisor-bot: read (health monitoring) + # predictor-bot: read (pattern detection) + # architect-bot: write (sprint PRs) + local bot_perm + declare -A bot_permissions=( + [dev-bot]="write" + [review-bot]="read" + [planner-bot]="write" + [gardener-bot]="write" + [vault-bot]="write" + [supervisor-bot]="read" + [predictor-bot]="read" + [architect-bot]="write" + ) + for bot_user in "${!bot_permissions[@]}"; do + bot_perm="${bot_permissions[$bot_user]}" + curl -sf -X PUT \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/repos/${repo_slug}/collaborators/${bot_user}" \ + -d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1 || true + done + + # Add disinto-admin as admin collaborator + curl -sf -X PUT \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/repos/${repo_slug}/collaborators/disinto-admin" \ + -d '{"permission":"admin"}' >/dev/null 2>&1 || true + + echo "Repo: ${repo_slug} created on Forgejo" + else + echo "Repo: ${repo_slug} (already exists on Forgejo)" + fi + + echo "Forge: ${forge_url} (ready)" +} diff --git a/lib/formula-session.sh b/lib/formula-session.sh index 7c52035..1b2b884 100644 --- a/lib/formula-session.sh +++ b/lib/formula-session.sh @@ -1,23 +1,34 @@ #!/usr/bin/env bash # formula-session.sh — Shared helpers for formula-driven cron agents # -# Provides reusable functions for the common cron-wrapper + tmux-session -# pattern used by planner-run.sh, predictor-run.sh, gardener-run.sh, and supervisor-run.sh. +# Provides reusable utility functions for the common cron-wrapper pattern +# used by planner-run.sh, predictor-run.sh, gardener-run.sh, and supervisor-run.sh. # # Functions: # acquire_cron_lock LOCK_FILE — PID lock with stale cleanup -# check_memory [MIN_MB] — skip if available RAM too low # load_formula FORMULA_FILE — sets FORMULA_CONTENT # build_context_block FILE [FILE ...] — sets CONTEXT_BLOCK -# start_formula_session SESSION WORKDIR PHASE_FILE — create tmux + claude -# build_prompt_footer [EXTRA_API] — sets PROMPT_FOOTER (API ref + env + phase) -# run_formula_and_monitor AGENT [TIMEOUT] [CALLBACK] — session start, inject, monitor, log -# formula_phase_callback PHASE — standard crash-recovery callback +# build_prompt_footer [EXTRA_API_LINES] — sets PROMPT_FOOTER (API ref + env) +# build_sdk_prompt_footer [EXTRA_API] — omits phase protocol (SDK mode) +# formula_worktree_setup WORKTREE — isolated worktree for formula execution +# formula_prepare_profile_context — load lessons from .profile repo (pre-session) +# formula_lessons_block — return lessons block for prompt +# profile_write_journal ISSUE_NUM TITLE OUTCOME [FILES] — post-session journal +# profile_load_lessons — load lessons-learned.md into LESSONS_CONTEXT +# ensure_profile_repo [AGENT_IDENTITY] — clone/pull .profile repo +# _profile_has_repo — check if agent has .profile repo +# _count_undigested_journals — count journal entries to digest +# _profile_digest_journals — digest journals into lessons +# _profile_commit_and_push MESSAGE [FILES] — commit/push to .profile repo +# resolve_agent_identity — resolve agent user login from FORGE_TOKEN +# build_graph_section — run build-graph.py and set GRAPH_SECTION +# build_scratch_instruction SCRATCH_FILE — return context scratch instruction +# read_scratch_context SCRATCH_FILE — return scratch file content block +# ensure_ops_repo — clone/pull ops repo +# ops_commit_and_push MESSAGE [FILES] — commit/push to ops repo +# cleanup_stale_crashed_worktrees [HOURS] — thin wrapper around worktree_cleanup_stale # -# Requires: lib/agent-session.sh sourced first (for create_agent_session, -# agent_kill_session, agent_inject_into_session). -# Globals used by formula_phase_callback: SESSION_NAME, PHASE_FILE, -# PROJECT_REPO_ROOT, PROMPT (set by the calling script). +# Requires: lib/env.sh, lib/worktree.sh sourced first for shared helpers. # ── Cron guards ────────────────────────────────────────────────────────── @@ -39,16 +50,431 @@ acquire_cron_lock() { trap 'rm -f "$_CRON_LOCK_FILE"' EXIT } -# check_memory [MIN_MB] -# Exits 0 (skip) if available memory is below MIN_MB (default 2000). -check_memory() { - local min_mb="${1:-2000}" - local avail_mb - avail_mb=$(free -m | awk '/Mem:/{print $7}') - if [ "${avail_mb:-0}" -lt "$min_mb" ]; then - log "run: skipping — only ${avail_mb}MB available (need ${min_mb})" - exit 0 +# ── Agent identity resolution ──────────────────────────────────────────── + +# resolve_agent_identity +# Resolves the agent identity (user login) from the FORGE_TOKEN. +# Exports AGENT_IDENTITY (user login string). +# Returns 0 on success, 1 on failure. +resolve_agent_identity() { + if [ -z "${FORGE_TOKEN:-}" ]; then + log "WARNING: FORGE_TOKEN not set, cannot resolve agent identity" + return 1 fi + local forge_url="${FORGE_URL:-http://localhost:3000}" + AGENT_IDENTITY=$(curl -sf --max-time 10 \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null) || true + if [ -z "$AGENT_IDENTITY" ]; then + log "WARNING: failed to resolve agent identity from FORGE_TOKEN" + return 1 + fi + log "Resolved agent identity: ${AGENT_IDENTITY}" + return 0 +} + +# ── Forge remote resolution ────────────────────────────────────────────── + +# resolve_forge_remote +# Resolves FORGE_REMOTE by matching FORGE_URL hostname against git remotes. +# Falls back to "origin" if no match found. +# Requires: FORGE_URL, git repo with remotes configured. +# Exports: FORGE_REMOTE (always set). +resolve_forge_remote() { + # Extract hostname from FORGE_URL (e.g., https://codeberg.org/user/repo -> codeberg.org) + _forge_host=$(printf '%s' "$FORGE_URL" | sed 's|https\?://||; s|/.*||; s|:.*||') + # Find git remote whose push URL matches the forge host + FORGE_REMOTE=$(git remote -v | awk -v host="$_forge_host" '$2 ~ host && /\(push\)/ {print $1; exit}') + # Fallback to origin if no match found + FORGE_REMOTE="${FORGE_REMOTE:-origin}" + export FORGE_REMOTE + log "forge remote: ${FORGE_REMOTE}" +} + +# ── .profile repo management ────────────────────────────────────────────── + +# ensure_profile_repo [AGENT_IDENTITY] +# Clones or pulls the agent's .profile repo to a local cache dir. +# Requires: FORGE_TOKEN, FORGE_URL. +# Exports PROFILE_REPO_PATH (local cache path) and PROFILE_FORMULA_PATH. +# Returns 0 on success, 1 on failure (falls back gracefully). +ensure_profile_repo() { + local agent_identity="${1:-${AGENT_IDENTITY:-}}" + + if [ -z "$agent_identity" ]; then + # Try to resolve from FORGE_TOKEN + if ! resolve_agent_identity; then + log "WARNING: cannot resolve agent identity, skipping .profile repo" + return 1 + fi + agent_identity="$AGENT_IDENTITY" + fi + + # Define cache directory: /home/agent/data/.profile/{agent-name} + PROFILE_REPO_PATH="${HOME:-/home/agent}/data/.profile/${agent_identity}" + + # Build clone URL from FORGE_URL and agent identity + local forge_url="${FORGE_URL:-http://localhost:3000}" + local auth_url + auth_url=$(printf '%s' "$forge_url" | sed "s|://|://$(whoami):${FORGE_TOKEN}@|") + local clone_url="${auth_url}/${agent_identity}/.profile.git" + + # Check if already cached and up-to-date + if [ -d "${PROFILE_REPO_PATH}/.git" ]; then + log "Pulling .profile repo: ${agent_identity}/.profile" + if git -C "$PROFILE_REPO_PATH" fetch origin --quiet 2>/dev/null; then + git -C "$PROFILE_REPO_PATH" checkout main --quiet 2>/dev/null || \ + git -C "$PROFILE_REPO_PATH" checkout master --quiet 2>/dev/null || true + git -C "$PROFILE_REPO_PATH" pull --ff-only origin main --quiet 2>/dev/null || \ + git -C "$PROFILE_REPO_PATH" pull --ff-only origin master --quiet 2>/dev/null || true + log ".profile repo pulled: ${PROFILE_REPO_PATH}" + else + log "WARNING: failed to pull .profile repo, using cached version" + fi + else + log "Cloning .profile repo: ${agent_identity}/.profile -> ${PROFILE_REPO_PATH}" + if git clone --quiet "$clone_url" "$PROFILE_REPO_PATH" 2>/dev/null; then + log ".profile repo cloned: ${PROFILE_REPO_PATH}" + else + log "WARNING: failed to clone .profile repo ${agent_identity}/.profile — falling back to formulas/" + return 1 + fi + fi + + # Set formula path from .profile + PROFILE_FORMULA_PATH="${PROFILE_REPO_PATH}/formula.toml" + return 0 +} + +# _profile_has_repo +# Checks if the agent has a .profile repo by querying Forgejo API. +# Returns 0 if repo exists, 1 otherwise. +_profile_has_repo() { + local agent_identity="${AGENT_IDENTITY:-}" + + if [ -z "$agent_identity" ]; then + if ! resolve_agent_identity; then + return 1 + fi + agent_identity="$AGENT_IDENTITY" + fi + + local forge_url="${FORGE_URL:-http://localhost:3000}" + local api_url="${forge_url}/api/v1/repos/${agent_identity}/.profile" + + # Check if repo exists via API (returns 200 if exists, 404 if not) + if curl -sf -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "$api_url" >/dev/null 2>&1; then + return 0 + fi + return 1 +} + +# _count_undigested_journals +# Counts journal entries in .profile/journal/ excluding archive/ +# Returns count via stdout. +_count_undigested_journals() { + if [ ! -d "${PROFILE_REPO_PATH:-}/journal" ]; then + echo "0" + return + fi + find "${PROFILE_REPO_PATH}/journal" -maxdepth 1 -name "*.md" -type f ! -path "*/archive/*" 2>/dev/null | wc -l +} + +# _profile_digest_journals +# Runs a claude -p one-shot to digest undigested journals into lessons-learned.md +# Returns 0 on success, 1 on failure. +_profile_digest_journals() { + local agent_identity="${AGENT_IDENTITY:-}" + local model="${CLAUDE_MODEL:-opus}" + + if [ -z "$agent_identity" ]; then + if ! resolve_agent_identity; then + return 1 + fi + agent_identity="$AGENT_IDENTITY" + fi + + local journal_dir="${PROFILE_REPO_PATH}/journal" + local knowledge_dir="${PROFILE_REPO_PATH}/knowledge" + local lessons_file="${knowledge_dir}/lessons-learned.md" + + # Collect undigested journal entries + local journal_entries="" + if [ -d "$journal_dir" ]; then + for jf in "$journal_dir"/*.md; do + [ -f "$jf" ] || continue + # Skip archived entries + [[ "$jf" == */archive/* ]] && continue + local basename + basename=$(basename "$jf") + journal_entries="${journal_entries} +### ${basename} +$(cat "$jf") +" + done + fi + + if [ -z "$journal_entries" ]; then + log "profile: no undigested journals to digest" + return 0 + fi + + # Read existing lessons if available + local existing_lessons="" + if [ -f "$lessons_file" ]; then + existing_lessons=$(cat "$lessons_file") + fi + + # Build prompt for digestion + local digest_prompt="You are digesting journal entries from a developer agent's work sessions. + +## Task +Condense these journal entries into abstract, transferable lessons. Rewrite lessons-learned.md entirely. + +## Constraints +- Hard cap: 2KB maximum +- Abstract: patterns and heuristics, not specific issues or file paths +- Transferable: must help with future unseen work, not just recall past work +- Drop the least transferable lessons if over limit + +## Existing lessons-learned.md (if any) +${existing_lessons:-<none>} + +## Journal entries to digest +${journal_entries} + +## Output +Write the complete, rewritten lessons-learned.md content below. No preamble, no explanation — just the file content." + + # Run claude -p one-shot with same model as agent + local output + output=$(claude -p "$digest_prompt" \ + --output-format json \ + --dangerously-skip-permissions \ + ${model:+--model "$model"} \ + 2>>"$LOGFILE" || echo '{"result":"error"}') + + # Extract content from JSON response + local lessons_content + lessons_content=$(printf '%s' "$output" | jq -r '.result // empty' 2>/dev/null || echo "") + + if [ -z "$lessons_content" ]; then + log "profile: failed to digest journals" + return 1 + fi + + # Ensure knowledge directory exists + mkdir -p "$knowledge_dir" + + # Write the lessons file (full rewrite) + printf '%s\n' "$lessons_content" > "$lessons_file" + log "profile: wrote lessons-learned.md (${#lessons_content} bytes)" + + # Move digested journals to archive (if any were processed) + if [ -d "$journal_dir" ]; then + mkdir -p "${journal_dir}/archive" + local archived=0 + for jf in "$journal_dir"/*.md; do + [ -f "$jf" ] || continue + [[ "$jf" == */archive/* ]] && continue + local basename + basename=$(basename "$jf") + mv "$jf" "${journal_dir}/archive/${basename}" 2>/dev/null && archived=$((archived + 1)) + done + if [ "$archived" -gt 0 ]; then + log "profile: archived ${archived} journal entries" + fi + fi + + return 0 +} + +# _profile_commit_and_push MESSAGE [FILE ...] +# Commits and pushes changes to .profile repo. +_profile_commit_and_push() { + local msg="$1" + shift + local files=("$@") + + if [ ! -d "${PROFILE_REPO_PATH:-}/.git" ]; then + return 1 + fi + + ( + cd "$PROFILE_REPO_PATH" || return 1 + + if [ ${#files[@]} -gt 0 ]; then + git add "${files[@]}" + else + git add -A + fi + + if ! git diff --cached --quiet 2>/dev/null; then + git config user.name "${AGENT_IDENTITY}" || true + git config user.email "${AGENT_IDENTITY}@users.noreply.codeberg.org" || true + git commit -m "$msg" --no-verify 2>/dev/null || true + git push origin main --quiet 2>/dev/null || git push origin master --quiet 2>/dev/null || true + fi + ) +} + +# profile_load_lessons +# Pre-session: loads lessons-learned.md into LESSONS_CONTEXT for prompt injection. +# Lazy digestion: if >10 undigested journals exist, runs claude -p to digest them. +# Returns 0 on success, 1 if agent has no .profile repo (silent no-op). +# Requires: ensure_profile_repo() called, AGENT_IDENTITY, FORGE_TOKEN, FORGE_URL, CLAUDE_MODEL. +# Exports: LESSONS_CONTEXT (the lessons file content, hard-capped at 2KB). +profile_load_lessons() { + # Check if agent has .profile repo + if ! _profile_has_repo; then + return 0 # Silent no-op + fi + + # Pull .profile repo + if ! ensure_profile_repo; then + return 0 # Silent no-op + fi + + # Check journal count for lazy digestion trigger + local journal_count + journal_count=$(_count_undigested_journals) + + if [ "${journal_count:-0}" -gt 10 ]; then + log "profile: digesting ${journal_count} undigested journals" + if ! _profile_digest_journals; then + log "profile: warning — journal digestion failed" + fi + fi + + # Read lessons-learned.md (hard cap at 2KB) + local lessons_file="${PROFILE_REPO_PATH}/knowledge/lessons-learned.md" + LESSONS_CONTEXT="" + + if [ -f "$lessons_file" ]; then + local lessons_content + lessons_content=$(head -c 2048 "$lessons_file" 2>/dev/null) || lessons_content="" + if [ -n "$lessons_content" ]; then + # shellcheck disable=SC2034 # exported to caller for prompt injection + LESSONS_CONTEXT="## Lessons learned (from .profile/knowledge/lessons-learned.md) +${lessons_content}" + log "profile: loaded lessons-learned.md (${#lessons_content} bytes)" + fi + fi + + return 0 +} + +# formula_prepare_profile_context +# Pre-session: loads lessons from .profile repo and sets LESSONS_CONTEXT for prompt injection. +# Single shared function to avoid duplicate boilerplate across agent scripts. +# Requires: AGENT_IDENTITY, FORGE_TOKEN, FORGE_URL (via profile_load_lessons). +# Exports: LESSONS_CONTEXT (set by profile_load_lessons). +# Returns 0 on success, 1 if agent has no .profile repo (silent no-op). +formula_prepare_profile_context() { + profile_load_lessons || true + LESSONS_INJECTION="${LESSONS_CONTEXT:-}" +} + +# formula_lessons_block +# Returns a formatted lessons block for prompt injection. +# Usage: LESSONS_BLOCK=$(formula_lessons_block) +# Expects: LESSONS_INJECTION to be set by formula_prepare_profile_context. +# Returns: formatted block or empty string. +formula_lessons_block() { + if [ -n "${LESSONS_INJECTION:-}" ]; then + printf '\n## Lessons learned (from .profile/knowledge/lessons-learned.md)\n%s' "$LESSONS_INJECTION" + fi +} + +# profile_write_journal ISSUE_NUM ISSUE_TITLE OUTCOME [FILES_CHANGED] +# Post-session: writes a reflection journal entry after work completes. +# Returns 0 on success, 1 on failure. +# Requires: AGENT_IDENTITY, FORGE_TOKEN, FORGE_URL, CLAUDE_MODEL. +# Args: +# $1 - ISSUE_NUM: The issue number worked on +# $2 - ISSUE_TITLE: The issue title +# $3 - OUTCOME: Session outcome (merged, blocked, failed, etc.) +# $4 - FILES_CHANGED: Optional comma-separated list of files changed +profile_write_journal() { + local issue_num="$1" + local issue_title="$2" + local outcome="$3" + local files_changed="${4:-}" + + # Check if agent has .profile repo + if ! _profile_has_repo; then + return 0 # Silent no-op + fi + + # Pull .profile repo + if ! ensure_profile_repo; then + return 0 # Silent no-op + fi + + # Build session summary + local session_summary="" + if [ -n "$files_changed" ]; then + session_summary="Files changed: ${files_changed} +" + fi + session_summary="${session_summary}Outcome: ${outcome}" + + # Build reflection prompt + local reflection_prompt="You are reflecting on a development session. Write a concise journal entry about transferable lessons learned. + +## Session context +- Issue: #${issue_num} — ${issue_title} +- Outcome: ${outcome} + +${session_summary} + +## Task +Write a journal entry focused on what you learned that would help you do similar work better next time. + +## Constraints +- Be concise (100-200 words) +- Focus on transferable lessons, not a summary of what you did +- Abstract patterns and heuristics, not specific issue/file references +- One concise entry, not a list + +## Output +Write the journal entry below. Use markdown format." + + # Run claude -p one-shot with same model as agent + local output + output=$(claude -p "$reflection_prompt" \ + --output-format json \ + --dangerously-skip-permissions \ + ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} \ + 2>>"$LOGFILE" || echo '{"result":"error"}') + + # Extract content from JSON response + local journal_content + journal_content=$(printf '%s' "$output" | jq -r '.result // empty' 2>/dev/null || echo "") + + if [ -z "$journal_content" ]; then + log "profile: failed to write journal entry" + return 1 + fi + + # Ensure journal directory exists + local journal_dir="${PROFILE_REPO_PATH}/journal" + mkdir -p "$journal_dir" + + # Write journal entry (append if exists) + local journal_file="${journal_dir}/issue-${issue_num}.md" + if [ -f "$journal_file" ]; then + printf '\n---\n\n' >> "$journal_file" + fi + printf '%s\n' "$journal_content" >> "$journal_file" + log "profile: wrote journal entry for issue #${issue_num}" + + # Commit and push to .profile repo + _profile_commit_and_push "journal: issue #${issue_num} reflection" "journal/issue-${issue_num}.md" + + return 0 } # ── Formula loading ────────────────────────────────────────────────────── @@ -65,6 +491,60 @@ load_formula() { FORMULA_CONTENT=$(cat "$formula_file") } +# load_formula_or_profile [ROLE] [FORMULA_FILE] +# Tries to load formula from .profile repo first, falls back to formulas/<role>.toml. +# Requires: AGENT_IDENTITY, ensure_profile_repo() available. +# Exports: FORMULA_CONTENT, FORMULA_SOURCE (either ".profile" or "formulas/"). +# Returns 0 on success, 1 on failure. +load_formula_or_profile() { + local role="${1:-}" + local fallback_formula="${2:-}" + + # Try to load from .profile repo + if [ -n "$AGENT_IDENTITY" ] && ensure_profile_repo "$AGENT_IDENTITY"; then + if [ -f "$PROFILE_FORMULA_PATH" ]; then + log "formula source: .profile (${PROFILE_FORMULA_PATH})" + # shellcheck disable=SC2034 + FORMULA_CONTENT="$(cat "$PROFILE_FORMULA_PATH")" + FORMULA_SOURCE=".profile" + return 0 + else + log "WARNING: .profile repo exists but formula.toml not found at ${PROFILE_FORMULA_PATH}" + fi + fi + + # Fallback to formulas/<role>.toml + if [ -n "$fallback_formula" ]; then + if [ -f "$fallback_formula" ]; then + log "formula source: formulas/ (fallback) — ${fallback_formula}" + # shellcheck disable=SC2034 + FORMULA_CONTENT="$(cat "$fallback_formula")" + FORMULA_SOURCE="formulas/" + return 0 + else + log "ERROR: formula not found in .profile and fallback file not found: $fallback_formula" + return 1 + fi + fi + + # No fallback specified but role provided — construct fallback path + if [ -n "$role" ]; then + fallback_formula="${FACTORY_ROOT}/formulas/${role}.toml" + if [ -f "$fallback_formula" ]; then + log "formula source: formulas/ (fallback) — ${fallback_formula}" + # shellcheck disable=SC2034 + FORMULA_CONTENT="$(cat "$fallback_formula")" + # shellcheck disable=SC2034 + FORMULA_SOURCE="formulas/" + return 0 + fi + fi + + # No fallback specified + log "ERROR: formula not found in .profile and no fallback specified" + return 1 +} + # build_context_block FILE [FILE ...] # Reads each file from $PROJECT_REPO_ROOT and builds CONTEXT_BLOCK. # Files prefixed with "ops:" are read from $OPS_REPO_ROOT instead. @@ -91,7 +571,7 @@ $(cat "$ctx_path") done } -# ── Ops repo helpers ───────────────────────────────────────────────── +# ── Ops repo helpers ──────────────────────────────────────────────────── # ensure_ops_repo # Clones or pulls the ops repo so agents can read/write operational data. @@ -154,90 +634,6 @@ ops_commit_and_push() { ) } -# ── Session management ─────────────────────────────────────────────────── - -# start_formula_session SESSION WORKDIR PHASE_FILE -# Kills stale session, resets phase file, creates a per-agent git worktree -# for session isolation, and creates a new tmux + claude session in it. -# Sets _FORMULA_SESSION_WORKDIR to the worktree path (or original workdir -# on fallback). Callers must clean up via remove_formula_worktree after -# the session ends. -# Returns 0 on success, 1 on failure. -start_formula_session() { - local session="$1" workdir="$2" phase_file="$3" - agent_kill_session "$session" - rm -f "$phase_file" - - # Create per-agent git worktree for session isolation. - # Each agent gets its own CWD so Claude Code treats them as separate - # projects — no resume collisions between sequential formula runs. - _FORMULA_SESSION_WORKDIR="/tmp/disinto-${session}" - # Clean up any stale worktree from a previous run - git -C "$workdir" worktree remove "$_FORMULA_SESSION_WORKDIR" --force 2>/dev/null || true - if git -C "$workdir" worktree add "$_FORMULA_SESSION_WORKDIR" HEAD --detach 2>/dev/null; then - log "Created worktree: ${_FORMULA_SESSION_WORKDIR}" - else - log "WARNING: worktree creation failed — falling back to ${workdir}" - _FORMULA_SESSION_WORKDIR="$workdir" - fi - - log "Creating tmux session: ${session}" - if ! create_agent_session "$session" "$_FORMULA_SESSION_WORKDIR" "$phase_file"; then - log "ERROR: failed to create tmux session ${session}" - return 1 - fi -} - -# remove_formula_worktree -# Removes the worktree created by start_formula_session if it differs from -# PROJECT_REPO_ROOT. Safe to call multiple times. No-op if no worktree was created. -remove_formula_worktree() { - if [ -n "${_FORMULA_SESSION_WORKDIR:-}" ] \ - && [ "$_FORMULA_SESSION_WORKDIR" != "${PROJECT_REPO_ROOT:-}" ]; then - git -C "$PROJECT_REPO_ROOT" worktree remove "$_FORMULA_SESSION_WORKDIR" --force 2>/dev/null || true - log "Removed worktree: ${_FORMULA_SESSION_WORKDIR}" - fi -} - -# formula_phase_callback PHASE -# Standard crash-recovery phase callback for formula sessions. -# Requires globals: SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT. -# Uses _FORMULA_CRASH_COUNT (auto-initialized) for single-retry limit. -# shellcheck disable=SC2154 # SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT set by caller -formula_phase_callback() { - local phase="$1" - log "phase: ${phase}" - case "$phase" in - PHASE:crashed) - if [ "${_FORMULA_CRASH_COUNT:-0}" -gt 0 ]; then - log "ERROR: session crashed again after recovery — giving up" - return 0 - fi - _FORMULA_CRASH_COUNT=$(( ${_FORMULA_CRASH_COUNT:-0} + 1 )) - log "WARNING: tmux session died unexpectedly — attempting recovery" - if create_agent_session "${_MONITOR_SESSION:-$SESSION_NAME}" "${_FORMULA_SESSION_WORKDIR:-$PROJECT_REPO_ROOT}" "$PHASE_FILE" 2>/dev/null; then - agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" "$PROMPT" - log "Recovery session started" - else - log "ERROR: could not restart session after crash" - fi - ;; - PHASE:done|PHASE:failed|PHASE:escalate|PHASE:merged) - agent_kill_session "${_MONITOR_SESSION:-$SESSION_NAME}" - ;; - esac -} - -# ── Stale crashed worktree cleanup ───────────────────────────────────────── - -# cleanup_stale_crashed_worktrees [MAX_AGE_HOURS] -# Thin wrapper around worktree_cleanup_stale() from lib/worktree.sh. -# Kept for backwards compatibility with existing callers. -# Requires: lib/worktree.sh sourced. -cleanup_stale_crashed_worktrees() { - worktree_cleanup_stale "${1:-24}" -} - # ── Scratch file helpers (compaction survival) ──────────────────────────── # build_scratch_instruction SCRATCH_FILE @@ -283,8 +679,14 @@ build_graph_section() { --project-root "$PROJECT_REPO_ROOT" \ --output "$report" 2>>"$LOG_FILE"; then # shellcheck disable=SC2034 - GRAPH_SECTION=$(printf '\n## Structural analysis\n```json\n%s\n```\n' \ - "$(cat "$report")") + local report_content + report_content="$(cat "$report")" + # shellcheck disable=SC2034 + GRAPH_SECTION=" +## Structural analysis +\`\`\`json +${report_content} +\`\`\`" log "graph report generated: $(jq -r '.stats | "\(.nodes) nodes, \(.edges) edges"' "$report")" else log "WARN: build-graph.py failed — continuing without structural analysis" @@ -307,25 +709,26 @@ build_sdk_prompt_footer() { # Creates an isolated worktree for synchronous formula execution. # Fetches primary branch, cleans stale worktree, creates new one, and # sets an EXIT trap for cleanup. -# Requires globals: PROJECT_REPO_ROOT, PRIMARY_BRANCH. +# Requires globals: PROJECT_REPO_ROOT, PRIMARY_BRANCH, FORGE_REMOTE. +# Ensure resolve_forge_remote() is called before this function. formula_worktree_setup() { local worktree="$1" cd "$PROJECT_REPO_ROOT" || return - git fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true + git fetch "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true worktree_cleanup "$worktree" - git worktree add "$worktree" "origin/${PRIMARY_BRANCH}" --detach 2>/dev/null + git worktree add "$worktree" "${FORGE_REMOTE}/${PRIMARY_BRANCH}" --detach 2>/dev/null # shellcheck disable=SC2064 # expand worktree now, not at trap time trap "worktree_cleanup '$worktree'" EXIT } -# ── Prompt + monitor helpers ────────────────────────────────────────────── +# ── Prompt helpers ────────────────────────────────────────────────────── # build_prompt_footer [EXTRA_API_LINES] -# Assembles the common forge API reference + environment + phase protocol -# block for formula prompts. Sets PROMPT_FOOTER. +# Assembles the common forge API reference + environment block for formula prompts. +# Sets PROMPT_FOOTER. # Pass additional API endpoint lines (pre-formatted, newline-prefixed) via $1. # Requires globals: FORGE_API, FACTORY_ROOT, PROJECT_REPO_ROOT, -# PRIMARY_BRANCH, PHASE_FILE. +# PRIMARY_BRANCH. build_prompt_footer() { local extra_api="${1:-}" # shellcheck disable=SC2034 # consumed by the calling script's PROMPT @@ -341,66 +744,15 @@ NEVER echo or include the actual token value in output — always reference \${F FACTORY_ROOT=${FACTORY_ROOT} PROJECT_REPO_ROOT=${PROJECT_REPO_ROOT} OPS_REPO_ROOT=${OPS_REPO_ROOT} -PRIMARY_BRANCH=${PRIMARY_BRANCH} -PHASE_FILE=${PHASE_FILE} - -## Phase protocol (REQUIRED) -When all work is done: - echo 'PHASE:done' > '${PHASE_FILE}' -On unrecoverable error: - printf 'PHASE:failed\nReason: %s\n' 'describe error' > '${PHASE_FILE}'" +PRIMARY_BRANCH=${PRIMARY_BRANCH}" } -# run_formula_and_monitor AGENT_NAME [TIMEOUT] -# Starts the formula session, injects PROMPT, monitors phase, and logs result. -# Requires globals: SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT, -# FORGE_REPO, CLAUDE_MODEL (exported). -# shellcheck disable=SC2154 # SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT set by caller -run_formula_and_monitor() { - local agent_name="$1" - local timeout="${2:-7200}" - local callback="${3:-formula_phase_callback}" +# ── Stale crashed worktree cleanup ──────────────────────────────────────── - if ! start_formula_session "$SESSION_NAME" "$PROJECT_REPO_ROOT" "$PHASE_FILE"; then - exit 1 - fi - - # Write phase protocol to context file for compaction survival - if [ -n "${PROMPT_FOOTER:-}" ]; then - write_compact_context "$PHASE_FILE" "$PROMPT_FOOTER" - fi - - agent_inject_into_session "$SESSION_NAME" "$PROMPT" - log "Prompt sent to tmux session" - - log "Monitoring phase file: ${PHASE_FILE}" - _FORMULA_CRASH_COUNT=0 - - monitor_phase_loop "$PHASE_FILE" "$timeout" "$callback" - - FINAL_PHASE=$(read_phase "$PHASE_FILE") - log "Final phase: ${FINAL_PHASE:-none}" - - if [ "$FINAL_PHASE" != "PHASE:done" ]; then - case "${_MONITOR_LOOP_EXIT:-}" in - idle_prompt) - log "${agent_name}: Claude returned to prompt without writing phase signal" - ;; - idle_timeout) - log "${agent_name}: timed out with no phase signal" - ;; - *) - log "${agent_name} finished without PHASE:done (phase: ${FINAL_PHASE:-none}, exit: ${_MONITOR_LOOP_EXIT:-})" - ;; - esac - fi - - # Preserve worktree on crash for debugging; clean up on success - if [ "${_MONITOR_LOOP_EXIT:-}" = "crashed" ]; then - worktree_preserve "${_FORMULA_SESSION_WORKDIR:-}" "crashed (agent=${agent_name})" - else - remove_formula_worktree - fi - - log "--- ${agent_name^} run done ---" +# cleanup_stale_crashed_worktrees [MAX_AGE_HOURS] +# Thin wrapper around worktree_cleanup_stale() from lib/worktree.sh. +# Kept for backwards compatibility with existing callers. +# Requires: lib/worktree.sh sourced. +cleanup_stale_crashed_worktrees() { + worktree_cleanup_stale "${1:-24}" } diff --git a/lib/generators.sh b/lib/generators.sh new file mode 100644 index 0000000..75e5e18 --- /dev/null +++ b/lib/generators.sh @@ -0,0 +1,432 @@ +#!/usr/bin/env bash +# ============================================================================= +# generators — template generation functions for disinto init +# +# Generates docker-compose.yml, Dockerfile, Caddyfile, staging index, and +# deployment pipeline configs. +# +# Globals expected (must be set before sourcing): +# FACTORY_ROOT - Root of the disinto factory +# PROJECT_NAME - Project name for the project repo (defaults to 'project') +# PRIMARY_BRANCH - Primary branch name (defaults to 'main') +# +# Usage: +# source "${FACTORY_ROOT}/lib/generators.sh" +# generate_compose "$forge_port" +# generate_caddyfile +# generate_staging_index +# generate_deploy_pipelines "$repo_root" "$project_name" +# ============================================================================= +set -euo pipefail + +# Assert required globals are set +: "${FACTORY_ROOT:?FACTORY_ROOT must be set}" +# PROJECT_NAME defaults to 'project' if not set (env.sh may have set it from FORGE_REPO) +PROJECT_NAME="${PROJECT_NAME:-project}" +# PRIMARY_BRANCH defaults to main (env.sh may have set it to 'master') +PRIMARY_BRANCH="${PRIMARY_BRANCH:-main}" + +# Generate docker-compose.yml in the factory root. +_generate_compose_impl() { + local forge_port="${1:-3000}" + local compose_file="${FACTORY_ROOT}/docker-compose.yml" + + # Check if compose file already exists + if [ -f "$compose_file" ]; then + echo "Compose: ${compose_file} (already exists, skipping)" + return 0 + fi + + cat > "$compose_file" <<'COMPOSEEOF' +# docker-compose.yml — generated by disinto init +# Brings up Forgejo, Woodpecker, and the agent runtime. + +services: + forgejo: + image: codeberg.org/forgejo/forgejo:1 + container_name: disinto-forgejo + restart: unless-stopped + security_opt: + - apparmor=unconfined + volumes: + - forgejo-data:/data + environment: + FORGEJO__database__DB_TYPE: sqlite3 + FORGEJO__server__ROOT_URL: http://forgejo:3000/ + FORGEJO__server__HTTP_PORT: "3000" + FORGEJO__security__INSTALL_LOCK: "true" + FORGEJO__service__DISABLE_REGISTRATION: "true" + FORGEJO__webhook__ALLOWED_HOST_LIST: "private" + networks: + - disinto-net + + woodpecker: + image: woodpeckerci/woodpecker-server:v3 + container_name: disinto-woodpecker + restart: unless-stopped + security_opt: + - apparmor=unconfined + ports: + - "8000:8000" + - "9000:9000" + volumes: + - woodpecker-data:/var/lib/woodpecker + environment: + WOODPECKER_FORGEJO: "true" + WOODPECKER_FORGEJO_URL: http://forgejo:3000 + WOODPECKER_FORGEJO_CLIENT: ${WP_FORGEJO_CLIENT:-} + WOODPECKER_FORGEJO_SECRET: ${WP_FORGEJO_SECRET:-} + WOODPECKER_HOST: ${WOODPECKER_HOST:-http://woodpecker:8000} + WOODPECKER_OPEN: "true" + WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-} + WOODPECKER_DATABASE_DRIVER: sqlite3 + WOODPECKER_DATABASE_DATASOURCE: /var/lib/woodpecker/woodpecker.sqlite + WOODPECKER_ENVIRONMENT: "FORGE_TOKEN:${FORGE_TOKEN}" + depends_on: + - forgejo + networks: + - disinto-net + + woodpecker-agent: + image: woodpeckerci/woodpecker-agent:v3 + container_name: disinto-woodpecker-agent + restart: unless-stopped + network_mode: host + privileged: true + volumes: + - /var/run/docker.sock:/var/run/docker.sock + environment: + WOODPECKER_SERVER: localhost:9000 + WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-} + WOODPECKER_GRPC_SECURE: "false" + WOODPECKER_HEALTHCHECK_ADDR: ":3333" + WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net + WOODPECKER_MAX_WORKFLOWS: 1 + depends_on: + - woodpecker + + agents: + build: + context: . + dockerfile: docker/agents/Dockerfile + container_name: disinto-agents + restart: unless-stopped + security_opt: + - apparmor=unconfined + volumes: + - agent-data:/home/agent/data + - project-repos:/home/agent/repos + - ${HOME}/.claude:/home/agent/.claude + - ${HOME}/.claude.json:/home/agent/.claude.json:ro + - CLAUDE_BIN_PLACEHOLDER:/usr/local/bin/claude:ro + - ${HOME}/.ssh:/home/agent/.ssh:ro + - ${HOME}/.config/sops/age:/home/agent/.config/sops/age:ro + - woodpecker-data:/woodpecker-data:ro + environment: + FORGE_URL: http://forgejo:3000 + WOODPECKER_SERVER: http://woodpecker:8000 + DISINTO_CONTAINER: "1" + PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project} + WOODPECKER_DATA_DIR: /woodpecker-data + env_file: + - .env + # IMPORTANT: agents get .env only (forge tokens, CI tokens, config). + # Vault-only secrets (GITHUB_TOKEN, CLAWHUB_TOKEN, deploy keys) live in + # .env.vault.enc and are NEVER injected here — only the runner + # container receives them at fire time (AD-006, #745). + depends_on: + - forgejo + - woodpecker + networks: + - disinto-net + + runner: + build: + context: . + dockerfile: docker/agents/Dockerfile + profiles: ["vault"] + security_opt: + - apparmor=unconfined + volumes: + - agent-data:/home/agent/data + environment: + FORGE_URL: http://forgejo:3000 + DISINTO_CONTAINER: "1" + PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project} + # Vault redesign in progress (PR-based approval, see #73-#77) + # This container is being replaced — entrypoint will be updated in follow-up + networks: + - disinto-net + + # Edge proxy — reverse proxy to Forgejo, Woodpecker, and staging + # Serves on ports 80/443, routes based on path + edge: + build: ./docker/edge + container_name: disinto-edge + ports: + - "80:80" + - "443:443" + environment: + - DISINTO_VERSION=${DISINTO_VERSION:-main} + - FORGE_URL=http://forgejo:3000 + - FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto} + - FORGE_OPS_REPO=${FORGE_OPS_REPO:-disinto-admin/disinto-ops} + - FORGE_TOKEN=${FORGE_TOKEN:-} + - FORGE_ADMIN_USERS=${FORGE_ADMIN_USERS:-disinto-admin} + - FORGE_ADMIN_TOKEN=${FORGE_ADMIN_TOKEN:-} + - OPS_REPO_ROOT=/opt/disinto-ops + - PROJECT_REPO_ROOT=/opt/disinto + - PRIMARY_BRANCH=main + volumes: + - ./docker/Caddyfile:/etc/caddy/Caddyfile + - caddy_data:/data + - /var/run/docker.sock:/var/run/docker.sock + depends_on: + - forgejo + - woodpecker + - staging + networks: + - disinto-net + + # Staging container — static file server for staging artifacts + # Edge proxy routes to this container for default requests + staging: + image: caddy:alpine + command: ["caddy", "file-server", "--root", "/srv/site"] + volumes: + - ./docker:/srv/site:ro + networks: + - disinto-net + + # Staging deployment slot — activated by Woodpecker staging pipeline (#755). + # Profile-gated: only starts when explicitly targeted by deploy commands. + # Customize image/ports/volumes for your project after init. + staging-deploy: + image: alpine:3 + profiles: ["staging"] + security_opt: + - apparmor=unconfined + environment: + DEPLOY_ENV: staging + networks: + - disinto-net + command: ["echo", "staging slot — replace with project image"] + +volumes: + forgejo-data: + woodpecker-data: + agent-data: + project-repos: + caddy_data: + +networks: + disinto-net: + driver: bridge +COMPOSEEOF + + # Patch the Claude CLI binary path — resolve from host PATH at init time. + local claude_bin + claude_bin="$(command -v claude 2>/dev/null || true)" + if [ -n "$claude_bin" ]; then + # Resolve symlinks to get the real binary path + claude_bin="$(readlink -f "$claude_bin")" + sed -i "s|CLAUDE_BIN_PLACEHOLDER|${claude_bin}|" "$compose_file" + else + echo "Warning: claude CLI not found in PATH — update docker-compose.yml volumes manually" >&2 + sed -i "s|CLAUDE_BIN_PLACEHOLDER|/usr/local/bin/claude|" "$compose_file" + fi + + # Patch the forgejo port mapping into the file if non-default + if [ "$forge_port" != "3000" ]; then + # Add port mapping to forgejo service so it's reachable from host during init + sed -i "/image: codeberg\.org\/forgejo\/forgejo:1/a\\ ports:\\n - \"${forge_port}:3000\"" "$compose_file" + else + sed -i "/image: codeberg\.org\/forgejo\/forgejo:1/a\\ ports:\\n - \"3000:3000\"" "$compose_file" + fi + + echo "Created: ${compose_file}" +} + +# Generate docker/agents/ files if they don't already exist. +_generate_agent_docker_impl() { + local docker_dir="${FACTORY_ROOT}/docker/agents" + mkdir -p "$docker_dir" + + if [ ! -f "${docker_dir}/Dockerfile" ]; then + echo "Warning: docker/agents/Dockerfile not found — expected in repo" >&2 + fi + if [ ! -f "${docker_dir}/entrypoint.sh" ]; then + echo "Warning: docker/agents/entrypoint.sh not found — expected in repo" >&2 + fi +} + +# Generate docker/Caddyfile template for edge proxy. +_generate_caddyfile_impl() { + local docker_dir="${FACTORY_ROOT}/docker" + local caddyfile="${docker_dir}/Caddyfile" + + if [ -f "$caddyfile" ]; then + echo "Caddyfile: ${caddyfile} (already exists, skipping)" + return + fi + + cat > "$caddyfile" <<'CADDYFILEEOF' +# Caddyfile — edge proxy configuration +# IP-only binding at bootstrap; domain + TLS added later via vault resource request + +:80 { + # Reverse proxy to Forgejo + handle /forgejo/* { + reverse_proxy forgejo:3000 + } + + # Reverse proxy to Woodpecker CI + handle /ci/* { + reverse_proxy woodpecker:8000 + } + + # Default: proxy to staging container + handle { + reverse_proxy staging:80 + } +} +CADDYFILEEOF + + echo "Created: ${caddyfile}" +} + +# Generate docker/index.html default page. +_generate_staging_index_impl() { + local docker_dir="${FACTORY_ROOT}/docker" + local index_file="${docker_dir}/index.html" + + if [ -f "$index_file" ]; then + echo "Staging: ${index_file} (already exists, skipping)" + return + fi + + cat > "$index_file" <<'INDEXEOF' +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <title>Nothing shipped yet + + + +
+

Nothing shipped yet

+

CI pipelines will update this page with your staging artifacts.

+
+ + +INDEXEOF + + echo "Created: ${index_file}" +} + +# Generate template .woodpecker/ deployment pipeline configs in a project repo. +# Creates staging.yml and production.yml alongside the project's existing CI config. +# These pipelines trigger on Woodpecker's deployment event with environment filters. +_generate_deploy_pipelines_impl() { + local repo_root="$1" + local project_name="$2" + : "${project_name// /}" # Silence SC2034 - variable used in heredoc + local wp_dir="${repo_root}/.woodpecker" + + mkdir -p "$wp_dir" + + # Skip if deploy pipelines already exist + if [ -f "${wp_dir}/staging.yml" ] && [ -f "${wp_dir}/production.yml" ]; then + echo "Deploy: .woodpecker/{staging,production}.yml (already exist)" + return + fi + + if [ ! -f "${wp_dir}/staging.yml" ]; then + cat > "${wp_dir}/staging.yml" <<'STAGINGEOF' +# .woodpecker/staging.yml — Staging deployment pipeline +# Triggered by runner via Woodpecker promote API. +# Human approves promotion in vault → runner calls promote → this runs. + +when: + event: deployment + environment: staging + +steps: + - name: deploy-staging + image: docker:27 + commands: + - echo "Deploying to staging environment..." + - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from CI #${CI_PIPELINE_PARENT}" + # Pull the image built by CI and deploy to staging + # Customize these commands for your project: + # - docker compose -f docker-compose.yml --profile staging up -d + - echo "Staging deployment complete" + + - name: verify-staging + image: alpine:3 + commands: + - echo "Verifying staging deployment..." + # Add health checks, smoke tests, or integration tests here: + # - curl -sf http://staging:8080/health || exit 1 + - echo "Staging verification complete" +STAGINGEOF + echo "Created: ${wp_dir}/staging.yml" + fi + + if [ ! -f "${wp_dir}/production.yml" ]; then + cat > "${wp_dir}/production.yml" <<'PRODUCTIONEOF' +# .woodpecker/production.yml — Production deployment pipeline +# Triggered by runner via Woodpecker promote API. +# Human approves promotion in vault → runner calls promote → this runs. + +when: + event: deployment + environment: production + +steps: + - name: deploy-production + image: docker:27 + commands: + - echo "Deploying to production environment..." + - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from staging" + # Pull the verified image and deploy to production + # Customize these commands for your project: + # - docker compose -f docker-compose.yml up -d + - echo "Production deployment complete" + + - name: verify-production + image: alpine:3 + commands: + - echo "Verifying production deployment..." + # Add production health checks here: + # - curl -sf http://production:8080/health || exit 1 + - echo "Production verification complete" +PRODUCTIONEOF + echo "Created: ${wp_dir}/production.yml" + fi +} diff --git a/lib/hire-agent.sh b/lib/hire-agent.sh new file mode 100644 index 0000000..3ccc1c4 --- /dev/null +++ b/lib/hire-agent.sh @@ -0,0 +1,464 @@ +#!/usr/bin/env bash +# ============================================================================= +# hire-agent — disinto_hire_an_agent() function +# +# Handles user creation, .profile repo setup, formula copying, branch protection, +# and state marker creation for hiring a new agent. +# +# Globals expected: +# FORGE_URL - Forge instance URL +# FORGE_TOKEN - Admin token for Forge operations +# FACTORY_ROOT - Root of the disinto factory +# PROJECT_NAME - Project name for email/domain generation +# +# Usage: +# source "${FACTORY_ROOT}/lib/hire-agent.sh" +# disinto_hire_an_agent [--formula ] [--local-model ] [--poll-interval ] +# ============================================================================= +set -euo pipefail + +disinto_hire_an_agent() { + local agent_name="${1:-}" + local role="${2:-}" + local formula_path="" + local local_model="" + local poll_interval="" + + if [ -z "$agent_name" ] || [ -z "$role" ]; then + echo "Error: agent-name and role required" >&2 + echo "Usage: disinto hire-an-agent [--formula ] [--local-model ] [--poll-interval ]" >&2 + exit 1 + fi + shift 2 + + # Parse flags + while [ $# -gt 0 ]; do + case "$1" in + --formula) + formula_path="$2" + shift 2 + ;; + --local-model) + local_model="$2" + shift 2 + ;; + --poll-interval) + poll_interval="$2" + shift 2 + ;; + *) + echo "Unknown option: $1" >&2 + exit 1 + ;; + esac + done + + # Default formula path — try both naming conventions + if [ -z "$formula_path" ]; then + formula_path="${FACTORY_ROOT}/formulas/${role}.toml" + if [ ! -f "$formula_path" ]; then + formula_path="${FACTORY_ROOT}/formulas/run-${role}.toml" + fi + fi + + # Validate formula exists + if [ ! -f "$formula_path" ]; then + echo "Error: formula not found at ${formula_path}" >&2 + exit 1 + fi + + echo "── Hiring agent: ${agent_name} (${role}) ───────────────────────" + echo "Formula: ${formula_path}" + if [ -n "$local_model" ]; then + echo "Local model: ${local_model}" + echo "Poll interval: ${poll_interval:-300}s" + fi + + # Ensure FORGE_TOKEN is set + if [ -z "${FORGE_TOKEN:-}" ]; then + echo "Error: FORGE_TOKEN not set" >&2 + exit 1 + fi + + # Get Forge URL + local forge_url="${FORGE_URL:-http://localhost:3000}" + echo "Forge: ${forge_url}" + + # Step 1: Create user via API (skip if exists) + echo "" + echo "Step 1: Creating user '${agent_name}' (if not exists)..." + + local user_pass="" + local admin_pass="" + + # Read admin password from .env for standalone runs (#184) + local env_file="${FACTORY_ROOT}/.env" + if [ -f "$env_file" ] && grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then + admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-) + fi + + # Get admin token early (needed for both user creation and password reset) + local admin_user="disinto-admin" + admin_pass="${admin_pass:-admin}" + local admin_token="" + local admin_token_name + admin_token_name="temp-token-$(date +%s)" + admin_token=$(curl -sf -X POST \ + -u "${admin_user}:${admin_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${admin_user}/tokens" \ + -d "{\"name\":\"${admin_token_name}\",\"scopes\":[\"all\"]}" 2>/dev/null \ + | jq -r '.sha1 // empty') || admin_token="" + if [ -z "$admin_token" ]; then + # Token might already exist — try listing + admin_token=$(curl -sf \ + -u "${admin_user}:${admin_pass}" \ + "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \ + | jq -r '.[0].sha1 // empty') || admin_token="" + fi + if [ -z "$admin_token" ]; then + echo "Error: failed to obtain admin API token" >&2 + echo " Cannot proceed without admin privileges" >&2 + exit 1 + fi + + if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then + echo " User '${agent_name}' already exists" + # Reset user password so we can get a token (#184) + user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + # Use Forgejo CLI to reset password (API PATCH ignores must_change_password in Forgejo 11.x) + if _forgejo_exec forgejo admin user change-password \ + --username "${agent_name}" \ + --password "${user_pass}" \ + --must-change-password=false >/dev/null 2>&1; then + echo " Reset password for existing user '${agent_name}'" + else + echo " Warning: could not reset password for existing user" >&2 + fi + else + # Create user using basic auth (admin token fallback would poison subsequent calls) + # Create the user + user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + if curl -sf -X POST \ + -u "${admin_user}:${admin_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/admin/users" \ + -d "{\"username\":\"${agent_name}\",\"password\":\"${user_pass}\",\"email\":\"${agent_name}@${PROJECT_NAME:-disinto}.local\",\"full_name\":\"${agent_name}\",\"active\":true,\"admin\":false,\"must_change_password\":false}" >/dev/null 2>&1; then + echo " Created user '${agent_name}'" + else + echo " Warning: failed to create user via admin API" >&2 + # Try alternative: user might already exist + if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then + echo " User '${agent_name}' exists (confirmed)" + else + echo " Error: failed to create user '${agent_name}'" >&2 + exit 1 + fi + fi + fi + + # Step 1.5: Generate Forge token for the new/existing user + echo "" + echo "Step 1.5: Generating Forge token for '${agent_name}'..." + + # Convert role to uppercase token variable name (e.g., architect -> FORGE_ARCHITECT_TOKEN) + local role_upper + role_upper=$(echo "$role" | tr '[:lower:]' '[:upper:]') + local token_var="FORGE_${role_upper}_TOKEN" + + # Generate token using the user's password (basic auth) + local agent_token="" + agent_token=$(curl -sf -X POST \ + -u "${agent_name}:${user_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${agent_name}/tokens" \ + -d "{\"name\":\"disinto-${agent_name}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \ + | jq -r '.sha1 // empty') || agent_token="" + + if [ -z "$agent_token" ]; then + # Token name collision — create with timestamp suffix + agent_token=$(curl -sf -X POST \ + -u "${agent_name}:${user_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${agent_name}/tokens" \ + -d "{\"name\":\"disinto-${agent_name}-$(date +%s)\",\"scopes\":[\"all\"]}" 2>/dev/null \ + | jq -r '.sha1 // empty') || agent_token="" + fi + + if [ -z "$agent_token" ]; then + echo " Warning: failed to create API token for '${agent_name}'" >&2 + else + # Store token in .env under the role-specific variable name + if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then + # Use sed with alternative delimiter and proper escaping for special chars in token + local escaped_token + escaped_token=$(printf '%s\n' "$agent_token" | sed 's/[&/\]/\\&/g') + sed -i "s|^${token_var}=.*|${token_var}=${escaped_token}|" "$env_file" + echo " ${agent_name} token updated (${token_var})" + else + printf '%s=%s\n' "$token_var" "$agent_token" >> "$env_file" + echo " ${agent_name} token saved (${token_var})" + fi + export "${token_var}=${agent_token}" + fi + + # Step 2: Create .profile repo on Forgejo + echo "" + echo "Step 2: Creating '${agent_name}/.profile' repo (if not exists)..." + + if curl -sf --max-time 5 "${forge_url}/api/v1/repos/${agent_name}/.profile" >/dev/null 2>&1; then + echo " Repo '${agent_name}/.profile' already exists" + else + # Create the repo using the admin API to ensure it's created in the agent's namespace. + # Using POST /api/v1/user/repos with a user token would create the repo under the + # authenticated user, which could be wrong if the token belongs to a different user. + # The admin API POST /api/v1/admin/users/{username}/repos explicitly creates in the + # specified user's namespace. + local create_output + create_output=$(curl -sf -X POST \ + -u "${admin_user}:${admin_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/admin/users/${agent_name}/repos" \ + -d "{\"name\":\".profile\",\"description\":\"${agent_name}'s .profile repo\",\"private\":true,\"auto_init\":false}" 2>&1) || true + + if echo "$create_output" | grep -q '"id":\|[0-9]'; then + echo " Created repo '${agent_name}/.profile' (via admin API)" + else + echo " Error: failed to create repo '${agent_name}/.profile'" >&2 + echo " Response: ${create_output}" >&2 + exit 1 + fi + fi + + # Step 3: Clone repo and create initial commit + echo "" + echo "Step 3: Cloning repo and creating initial commit..." + + local clone_dir="/tmp/.profile-clone-${agent_name}" + rm -rf "$clone_dir" + mkdir -p "$clone_dir" + + # Build authenticated clone URL using basic auth (user_pass is always set in Step 1) + if [ -z "${user_pass:-}" ]; then + echo " Error: no user password available for cloning" >&2 + exit 1 + fi + + local auth_url + auth_url=$(printf '%s' "$forge_url" | sed "s|://|://${agent_name}:${user_pass}@|") + auth_url="${auth_url}/${agent_name}/.profile.git" + + # Display unauthenticated URL (auth token only in actual git clone command) + echo " Cloning: ${forge_url}/${agent_name}/.profile.git" + + # Try authenticated clone first (required for private repos) + if ! git clone --quiet "$auth_url" "$clone_dir" 2>/dev/null; then + echo " Error: failed to clone repo with authentication" >&2 + echo " Note: Ensure the user has a valid API token with repository access" >&2 + rm -rf "$clone_dir" + exit 1 + fi + + # Configure git + git -C "$clone_dir" config user.name "disinto-admin" + git -C "$clone_dir" config user.email "disinto-admin@localhost" + + # Create directory structure + echo " Creating directory structure..." + mkdir -p "${clone_dir}/journal" + mkdir -p "${clone_dir}/knowledge" + touch "${clone_dir}/journal/.gitkeep" + touch "${clone_dir}/knowledge/.gitkeep" + + # Copy formula + echo " Copying formula..." + cp "$formula_path" "${clone_dir}/formula.toml" + + # Create README + if [ ! -f "${clone_dir}/README.md" ]; then + cat > "${clone_dir}/README.md" </dev/null; then + git -C "$clone_dir" commit -m "chore: initial .profile setup" -q + git -C "$clone_dir" push origin main >/dev/null 2>&1 || \ + git -C "$clone_dir" push origin master >/dev/null 2>&1 || true + echo " Committed: initial .profile setup" + else + echo " No changes to commit" + fi + + rm -rf "$clone_dir" + + # Step 4: Set up branch protection + echo "" + echo "Step 4: Setting up branch protection..." + + # Source branch-protection.sh helper + local bp_script="${FACTORY_ROOT}/lib/branch-protection.sh" + if [ -f "$bp_script" ]; then + # Source required environment + if [ -f "${FACTORY_ROOT}/lib/env.sh" ]; then + source "${FACTORY_ROOT}/lib/env.sh" + fi + + # Set up branch protection for .profile repo + if source "$bp_script" 2>/dev/null && setup_profile_branch_protection "${agent_name}/.profile" "main"; then + echo " Branch protection configured for main branch" + echo " - Requires 1 approval before merge" + echo " - Admin-only merge enforcement" + echo " - Journal branch created for direct agent pushes" + else + echo " Warning: could not configure branch protection (Forgejo API may not be available)" + echo " Note: Branch protection can be set up manually later" + fi + else + echo " Warning: branch-protection.sh not found at ${bp_script}" + fi + + # Step 5: Create state marker + echo "" + echo "Step 5: Creating state marker..." + + local state_dir="${FACTORY_ROOT}/state" + mkdir -p "$state_dir" + local state_file="${state_dir}/.${role}-active" + + if [ ! -f "$state_file" ]; then + touch "$state_file" + echo " Created: ${state_file}" + else + echo " State marker already exists: ${state_file}" + fi + + # Step 6: Set up local model agent (if --local-model specified) + if [ -n "$local_model" ]; then + echo "" + echo "Step 6: Configuring local model agent..." + + local override_file="${FACTORY_ROOT}/docker-compose.override.yml" + local override_dir + override_dir=$(dirname "$override_file") + mkdir -p "$override_dir" + + # Validate model endpoint is reachable + echo " Validating model endpoint: ${local_model}" + if ! curl -sf --max-time 10 "${local_model}/health" >/dev/null 2>&1; then + # Try /v1/chat/completions as fallback endpoint check + if ! curl -sf --max-time 10 "${local_model}/v1/chat/completions" >/dev/null 2>&1; then + echo " Warning: model endpoint may not be reachable at ${local_model}" + echo " Continuing with configuration..." + fi + else + echo " Model endpoint is reachable" + fi + + # Generate service name from agent name (lowercase) + local service_name="agents-${agent_name}" + service_name=$(echo "$service_name" | tr '[:upper:]' '[:lower:]') + + # Set default poll interval + local interval="${poll_interval:-300}" + + # Generate the override compose file + # Bash expands ${service_name}, ${local_model}, ${interval}, ${PROJECT_NAME} at generation time + # \$HOME, \$FORGE_TOKEN become ${HOME}, ${FORGE_TOKEN} in the file for docker-compose runtime expansion + cat > "$override_file" </dev/null || true) fi if [ -n "$label_id" ]; then - eval "${varname}=\"${label_id}\"" + _ILC_LABEL_IDS["$name"]="$label_id" fi printf '%s' "$label_id" } -_ilc_backlog_id() { _ilc_ensure_label_id _ILC_BACKLOG_ID "backlog" "#0075ca"; } -_ilc_in_progress_id() { _ilc_ensure_label_id _ILC_IN_PROGRESS_ID "in-progress" "#1d76db"; } -_ilc_blocked_id() { _ilc_ensure_label_id _ILC_BLOCKED_ID "blocked" "#e11d48"; } +_ilc_backlog_id() { _ilc_ensure_label_id "backlog" "#0075ca"; } +_ilc_in_progress_id() { _ilc_ensure_label_id "in-progress" "#1d76db"; } +_ilc_blocked_id() { _ilc_ensure_label_id "blocked" "#e11d48"; } # --------------------------------------------------------------------------- -# issue_claim — add "in-progress" label, remove "backlog" label. +# issue_claim — assign issue to bot, add "in-progress" label, remove "backlog". # Args: issue_number +# Returns: 0 on success, 1 if already assigned to another agent # --------------------------------------------------------------------------- issue_claim() { local issue="$1" + + # Get current bot identity + local me + me=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_URL}/api/v1/user" | jq -r '.login') || return 1 + + # Check current assignee + local current + current=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues/${issue}" | jq -r '.assignee.login // ""') || return 1 + + if [ -n "$current" ] && [ "$current" != "$me" ]; then + _ilc_log "issue #${issue} already assigned to ${current} — skipping" + return 1 + fi + + # Assign to self (Forgejo rejects if already assigned differently) + curl -sf -X PATCH \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_API}/issues/${issue}" \ + -d "{\"assignees\":[\"${me}\"]}" >/dev/null 2>&1 || return 1 + local ip_id bl_id ip_id=$(_ilc_in_progress_id) bl_id=$(_ilc_backlog_id) @@ -102,14 +125,23 @@ issue_claim() { "${FORGE_API}/issues/${issue}/labels/${bl_id}" >/dev/null 2>&1 || true fi _ilc_log "claimed issue #${issue}" + return 0 } # --------------------------------------------------------------------------- -# issue_release — remove "in-progress" label, add "backlog" label. +# issue_release — remove "in-progress" label, add "backlog" label, clear assignee. # Args: issue_number # --------------------------------------------------------------------------- issue_release() { local issue="$1" + + # Clear assignee + curl -sf -X PATCH \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_API}/issues/${issue}" \ + -d '{"assignees":[]}' >/dev/null 2>&1 || true + local ip_id bl_id ip_id=$(_ilc_in_progress_id) bl_id=$(_ilc_backlog_id) @@ -128,6 +160,27 @@ issue_release() { _ilc_log "released issue #${issue}" } +# --------------------------------------------------------------------------- +# _ilc_post_comment — Post a comment to an issue (internal helper) +# Args: issue_number body_text +# Uses a temp file to avoid large inline strings. +# --------------------------------------------------------------------------- +_ilc_post_comment() { + local issue="$1" body="$2" + + local tmpfile tmpjson + tmpfile=$(mktemp /tmp/ilc-comment-XXXXXX.md) + tmpjson="${tmpfile}.json" + printf '%s' "$body" > "$tmpfile" + jq -Rs '{body:.}' < "$tmpfile" > "$tmpjson" + curl -sf -o /dev/null -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_API}/issues/${issue}/comments" \ + --data-binary @"$tmpjson" 2>/dev/null || true + rm -f "$tmpfile" "$tmpjson" +} + # --------------------------------------------------------------------------- # issue_block — add "blocked" label, post diagnostic comment, remove in-progress. # Args: issue_number reason [result_text] @@ -154,14 +207,9 @@ issue_block() { fi } > "$tmpfile" - # Post comment - jq -Rs '{body:.}' < "$tmpfile" > "${tmpfile}.json" - curl -sf -o /dev/null -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_API}/issues/${issue}/comments" \ - --data-binary @"${tmpfile}.json" 2>/dev/null || true - rm -f "$tmpfile" "${tmpfile}.json" + # Post comment using shared helper + _ilc_post_comment "$issue" "$(cat "$tmpfile")" + rm -f "$tmpfile" # Remove in-progress, add blocked local ip_id bk_id @@ -184,11 +232,19 @@ issue_block() { } # --------------------------------------------------------------------------- -# issue_close — PATCH state to closed. +# issue_close — clear assignee, PATCH state to closed. # Args: issue_number # --------------------------------------------------------------------------- issue_close() { local issue="$1" + + # Clear assignee before closing + curl -sf -X PATCH \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_API}/issues/${issue}" \ + -d '{"assignees":[]}' >/dev/null 2>&1 || true + curl -sf -X PATCH \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ diff --git a/lib/load-project.sh b/lib/load-project.sh index dcddc94..9d7afaf 100755 --- a/lib/load-project.sh +++ b/lib/load-project.sh @@ -10,7 +10,6 @@ # PROJECT_CONTAINERS, CHECK_PRS, CHECK_DEV_AGENT, # CHECK_PIPELINE_STALL, CI_STALE_MINUTES, # MIRROR_NAMES, MIRROR_URLS, MIRROR_ (per configured mirror) -# (plus backwards-compat aliases: CODEBERG_REPO, CODEBERG_API, CODEBERG_WEB) # # If no argument given, does nothing (allows poll scripts to work with # plain .env fallback for backwards compatibility). @@ -83,7 +82,7 @@ if mirrors: # Export parsed variables. # Inside the agents container (DISINTO_CONTAINER=1), compose already sets the # correct FORGE_URL (http://forgejo:3000) and path vars for the container -# environment. The TOML carries host-perspective values (localhost, /home/johba/…) +# environment. The TOML carries host-perspective values (localhost, /home/admin/…) # that would break container API calls and path resolution. Skip overriding # any env var that is already set when running inside the container. while IFS='=' read -r _key _val; do @@ -100,11 +99,9 @@ export FORGE_URL="${FORGE_URL:-http://localhost:3000}" if [ -n "$FORGE_REPO" ]; then export FORGE_API="${FORGE_URL}/api/v1/repos/${FORGE_REPO}" export FORGE_WEB="${FORGE_URL}/${FORGE_REPO}" + # Extract repo owner (first path segment of owner/repo) + export FORGE_REPO_OWNER="${FORGE_REPO%%/*}" fi -# Backwards-compat aliases -export CODEBERG_REPO="${FORGE_REPO}" -export CODEBERG_API="${FORGE_API:-}" -export CODEBERG_WEB="${FORGE_WEB:-}" # Derive PROJECT_REPO_ROOT if not explicitly set if [ -z "${PROJECT_REPO_ROOT:-}" ] && [ -n "${PROJECT_NAME:-}" ]; then diff --git a/lib/mirrors.sh b/lib/mirrors.sh index e6dfba1..3ba561d 100644 --- a/lib/mirrors.sh +++ b/lib/mirrors.sh @@ -13,7 +13,16 @@ mirror_push() { local name url for name in $MIRROR_NAMES; do - url=$(eval "echo \"\$MIRROR_$(echo "$name" | tr '[:lower:]' '[:upper:]')\"") || true + # Convert name to uppercase env var name safely (only alphanumeric allowed) + local upper_name + upper_name=$(printf '%s' "$name" | tr '[:lower:]' '[:upper:]') + # Validate: only allow alphanumeric + underscore in var name + if [[ ! "$upper_name" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then + continue + fi + # Use indirect expansion safely (no eval) — MIRROR_ prefix required + local varname="MIRROR_${upper_name}" + url="${!varname:-}" [ -z "$url" ] && continue # Ensure remote exists with correct URL diff --git a/lib/ops-setup.sh b/lib/ops-setup.sh new file mode 100644 index 0000000..c55f1b1 --- /dev/null +++ b/lib/ops-setup.sh @@ -0,0 +1,225 @@ +#!/usr/bin/env bash +# ops-setup.sh — Setup ops repository (disinto-ops) +# +# Source from bin/disinto: +# source "$(dirname "$0")/../lib/ops-setup.sh" +# +# Required globals: FORGE_URL, FORGE_TOKEN, FACTORY_ROOT +# Optional: admin_token (falls back to FORGE_TOKEN for admin operations) +# +# Functions: +# setup_ops_repo [primary_branch] +# - Create ops repo on Forgejo if it doesn't exist +# - Configure bot collaborators with appropriate permissions +# - Clone or initialize ops repo locally +# - Seed directory structure (vault, knowledge, evidence) +# - Export _ACTUAL_OPS_SLUG for caller to use +# +# Globals modified: +# _ACTUAL_OPS_SLUG - resolved ops repo slug after function completes + +set -euo pipefail + +setup_ops_repo() { + + local forge_url="$1" ops_slug="$2" ops_root="$3" primary_branch="${4:-main}" + local org_name="${ops_slug%%/*}" + local ops_name="${ops_slug##*/}" + + echo "" + echo "── Ops repo setup ─────────────────────────────────────" + + # Determine the actual ops repo location by searching across possible namespaces + # This handles cases where the repo was created under a different namespace + # due to past bugs (e.g., dev-bot/disinto-ops instead of disinto-admin/disinto-ops) + local actual_ops_slug="" + local -a possible_namespaces=( "$org_name" "dev-bot" "disinto-admin" ) + local http_code + + for ns in "${possible_namespaces[@]}"; do + slug="${ns}/${ops_name}" + if curl -sf --max-time 5 \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/repos/${slug}" >/dev/null 2>&1; then + actual_ops_slug="$slug" + echo "Ops repo: ${slug} (found at ${slug})" + break + fi + done + + # If not found, try to create it in the configured namespace + if [ -z "$actual_ops_slug" ]; then + echo "Creating ops repo in namespace: ${org_name}" + # Create org if it doesn't exist + curl -sf -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/orgs" \ + -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true + if curl -sf -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/orgs/${org_name}/repos" \ + -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" >/dev/null 2>&1; then + actual_ops_slug="${org_name}/${ops_name}" + echo "Ops repo: ${actual_ops_slug} created on Forgejo" + else + # Fallback: use admin API to create repo under the target namespace + http_code=$(curl -s -o /dev/null -w "%{http_code}" \ + -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/admin/users/${org_name}/repos" \ + -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" 2>/dev/null || echo "0") + if [ "$http_code" = "201" ]; then + actual_ops_slug="${org_name}/${ops_name}" + echo "Ops repo: ${actual_ops_slug} created on Forgejo (via admin API)" + else + echo "Error: failed to create ops repo '${org_name}/${ops_name}' (HTTP ${http_code})" >&2 + return 1 + fi + fi + fi + + # Configure collaborators on the ops repo + local bot_user bot_perm + declare -A bot_permissions=( + [dev-bot]="write" + [review-bot]="read" + [planner-bot]="write" + [gardener-bot]="write" + [vault-bot]="write" + [supervisor-bot]="read" + [predictor-bot]="read" + [architect-bot]="write" + ) + + # Add all bot users as collaborators with appropriate permissions + # vault branch protection (#77) requires: + # - Admin-only merge to main (enforced by admin_enforced: true) + # - Bots can push branches and create PRs, but cannot merge + for bot_user in "${!bot_permissions[@]}"; do + bot_perm="${bot_permissions[$bot_user]}" + if curl -sf -X PUT \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/repos/${actual_ops_slug}/collaborators/${bot_user}" \ + -d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1; then + echo " + ${bot_user} = ${bot_perm} collaborator" + else + echo " ! ${bot_user} = ${bot_perm} (already set or failed)" + fi + done + + # Add disinto-admin as admin collaborator + if curl -sf -X PUT \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/repos/${actual_ops_slug}/collaborators/disinto-admin" \ + -d '{"permission":"admin"}' >/dev/null 2>&1; then + echo " + disinto-admin = admin collaborator" + else + echo " ! disinto-admin = admin (already set or failed)" + fi + + # Clone ops repo locally if not present + if [ ! -d "${ops_root}/.git" ]; then + local auth_url + auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_TOKEN}@|") + local clone_url="${auth_url}/${actual_ops_slug}.git" + echo "Cloning: ops repo -> ${ops_root}" + if git clone --quiet "$clone_url" "$ops_root" 2>/dev/null; then + echo "Ops repo: ${actual_ops_slug} cloned successfully" + else + echo "Initializing: ops repo at ${ops_root}" + mkdir -p "$ops_root" + git -C "$ops_root" init --initial-branch="${primary_branch}" -q + # Set remote to the actual ops repo location + git -C "$ops_root" remote add origin "${forge_url}/${actual_ops_slug}.git" + echo "Ops repo: ${actual_ops_slug} initialized locally" + fi + else + echo "Ops repo: ${ops_root} (already exists locally)" + # Verify remote is correct + local current_remote + current_remote=$(git -C "$ops_root" remote get-url origin 2>/dev/null || true) + local expected_remote="${forge_url}/${actual_ops_slug}.git" + if [ -n "$current_remote" ] && [ "$current_remote" != "$expected_remote" ]; then + echo " Fixing: remote URL from ${current_remote} to ${expected_remote}" + git -C "$ops_root" remote set-url origin "$expected_remote" + fi + fi + + # Seed directory structure + local seeded=false + mkdir -p "${ops_root}/vault/pending" + mkdir -p "${ops_root}/vault/approved" + mkdir -p "${ops_root}/vault/fired" + mkdir -p "${ops_root}/vault/rejected" + mkdir -p "${ops_root}/knowledge" + mkdir -p "${ops_root}/evidence/engagement" + + if [ ! -f "${ops_root}/README.md" ]; then + cat > "${ops_root}/README.md" < **Note:** Journal directories (journal/planner/ and journal/supervisor/) have been removed from the ops repo. Agent journals are now stored in each agent's .profile repo on Forgejo. + +## Branch protection + +- \`main\`: 2 reviewers required for vault items +- Journal/evidence commits may use lighter rules +OPSEOF + seeded=true + fi + + # Create stub files if they don't exist + [ -f "${ops_root}/portfolio.md" ] || { echo "# Portfolio" > "${ops_root}/portfolio.md"; seeded=true; } + [ -f "${ops_root}/prerequisites.md" ] || { echo "# Prerequisite Tree" > "${ops_root}/prerequisites.md"; seeded=true; } + [ -f "${ops_root}/RESOURCES.md" ] || { echo "# Resources" > "${ops_root}/RESOURCES.md"; seeded=true; } + + # Commit and push seed content + if [ "$seeded" = true ] && [ -d "${ops_root}/.git" ]; then + # Auto-configure repo-local git identity if missing (#778) + if [ -z "$(git -C "$ops_root" config user.name 2>/dev/null)" ]; then + git -C "$ops_root" config user.name "disinto-admin" + fi + if [ -z "$(git -C "$ops_root" config user.email 2>/dev/null)" ]; then + git -C "$ops_root" config user.email "disinto-admin@localhost" + fi + + git -C "$ops_root" add -A + if ! git -C "$ops_root" diff --cached --quiet 2>/dev/null; then + git -C "$ops_root" commit -m "chore: seed ops repo structure" -q + # Push if remote exists + if git -C "$ops_root" remote get-url origin >/dev/null 2>&1; then + if git -C "$ops_root" push origin "${primary_branch}" -q 2>/dev/null; then + echo "Seeded: ops repo with initial structure" + else + echo "Warning: failed to push seed content to ops repo" >&2 + fi + fi + fi + fi + + # Export resolved slug for the caller to write back to the project TOML + _ACTUAL_OPS_SLUG="${actual_ops_slug}" +} diff --git a/lib/pr-lifecycle.sh b/lib/pr-lifecycle.sh index ad6f0de..e097f34 100644 --- a/lib/pr-lifecycle.sh +++ b/lib/pr-lifecycle.sh @@ -61,13 +61,15 @@ _prl_log() { # --------------------------------------------------------------------------- # pr_create — Create a PR via forge API. -# Args: branch title body [base_branch] +# Args: branch title body [base_branch] [api_url] # Stdout: PR number # Returns: 0=created (or found existing), 1=failed +# api_url defaults to FORGE_API if not provided # --------------------------------------------------------------------------- pr_create() { local branch="$1" title="$2" body="$3" local base="${4:-${PRIMARY_BRANCH:-main}}" + local api_url="${5:-${FORGE_API}}" local tmpfile resp http_code resp_body pr_num tmpfile=$(mktemp /tmp/prl-create-XXXXXX.json) @@ -77,7 +79,7 @@ pr_create() { resp=$(curl -s -w "\n%{http_code}" -X POST \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ - "${FORGE_API}/pulls" \ + "${api_url}/pulls" \ --data-binary @"$tmpfile") || true rm -f "$tmpfile" @@ -92,7 +94,7 @@ pr_create() { return 0 ;; 409) - pr_num=$(pr_find_by_branch "$branch") || true + pr_num=$(pr_find_by_branch "$branch" "$api_url") || true if [ -n "$pr_num" ]; then _prl_log "PR already exists: #${pr_num}" printf '%s' "$pr_num" @@ -110,15 +112,17 @@ pr_create() { # --------------------------------------------------------------------------- # pr_find_by_branch — Find an open PR by head branch name. -# Args: branch +# Args: branch [api_url] # Stdout: PR number # Returns: 0=found, 1=not found +# api_url defaults to FORGE_API if not provided # --------------------------------------------------------------------------- pr_find_by_branch() { local branch="$1" + local api_url="${2:-${FORGE_API}}" local pr_num pr_num=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/pulls?state=open&limit=20" | \ + "${api_url}/pulls?state=open&limit=20" | \ jq -r --arg b "$branch" '.[] | select(.head.ref == $b) | .number' \ | head -1) || true if [ -n "$pr_num" ]; then @@ -344,6 +348,29 @@ pr_is_merged() { [ "$merged" = "true" ] } +# --------------------------------------------------------------------------- +# pr_close — Close a PR via forge API. +# Args: pr_number +# Returns: 0=closed, 1=error +# --------------------------------------------------------------------------- +pr_close() { + local pr_num="$1" + + _prl_log "closing PR #${pr_num}" + local resp http_code + resp=$(curl -sf -w "\n%{http_code}" -X PATCH \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_API}/pulls/${pr_num}" \ + -d '{"state":"closed"}' 2>/dev/null) || true + http_code=$(printf '%s\n' "$resp" | tail -1) + if [ "$http_code" != "200" ] && [ "$http_code" != "204" ]; then + _prl_log "pr_close FAILED: HTTP ${http_code} for PR #${pr_num}" + return 1 + fi + _prl_log "PR #${pr_num} closed" +} + # --------------------------------------------------------------------------- # pr_walk_to_merge — Walk a PR through CI, review, and merge. # @@ -378,11 +405,18 @@ pr_walk_to_merge() { if [ "${_PR_CI_FAILURE_TYPE:-}" = "infra" ] && [ "$ci_retry_count" -lt 1 ]; then ci_retry_count=$((ci_retry_count + 1)) _prl_log "infra failure — retriggering CI (retry ${ci_retry_count})" + local rebase_output rebase_rc ( cd "$worktree" && \ git commit --allow-empty -m "ci: retrigger after infra failure" --no-verify && \ git fetch "$remote" "${PRIMARY_BRANCH}" 2>/dev/null && \ git rebase "${remote}/${PRIMARY_BRANCH}" && \ - git push --force-with-lease "$remote" HEAD ) 2>&1 | tail -5 || true + git push --force-with-lease "$remote" HEAD ) > /tmp/rebase-output-$$ 2>&1 + rebase_rc=$? + rebase_output=$(cat /tmp/rebase-output-$$) + rm -f /tmp/rebase-output-$$ + if [ "$rebase_rc" -ne 0 ]; then + _prl_log "rebase/push failed (exit code $rebase_rc): $(echo "$rebase_output" | tail -5)" + fi continue fi @@ -394,6 +428,23 @@ pr_walk_to_merge() { fi _prl_log "CI failed — invoking agent (attempt ${ci_fix_count}/${max_ci_fixes})" + + # Get CI logs from SQLite database if available + local ci_logs="" + if [ -n "$_PR_CI_PIPELINE" ] && [ -n "${FACTORY_ROOT:-}" ]; then + ci_logs=$(ci_get_logs "$_PR_CI_PIPELINE" 2>/dev/null | tail -50) || ci_logs="" + fi + + local logs_section="" + if [ -n "$ci_logs" ]; then + logs_section=" +CI Log Output (last 50 lines): +\`\`\` +${ci_logs} +\`\`\` +" + fi + agent_run --resume "$session_id" --worktree "$worktree" \ "CI failed on PR #${pr_num} (attempt ${ci_fix_count}/${max_ci_fixes}). @@ -401,7 +452,7 @@ Pipeline: #${_PR_CI_PIPELINE:-?} Failure type: ${_PR_CI_FAILURE_TYPE:-unknown} Error log: -${_PR_CI_ERROR_LOG:-No logs available.} +${_PR_CI_ERROR_LOG:-No logs available.}${logs_section} Fix the issue, run tests, commit, rebase on ${PRIMARY_BRANCH}, and push: git fetch ${remote} ${PRIMARY_BRANCH} && git rebase ${remote}/${PRIMARY_BRANCH} @@ -437,11 +488,7 @@ Fix the issue, run tests, commit, rebase on ${PRIMARY_BRANCH}, and push: _PR_WALK_EXIT_REASON="merged" return 0 fi - if [ "$rc" -eq 2 ]; then - _PR_WALK_EXIT_REASON="merge_blocked" - return 1 - fi - # Merge failed (conflict) — ask agent to rebase + # Merge failed (conflict or HTTP 405) — ask agent to rebase _prl_log "merge failed — invoking agent to rebase" agent_run --resume "$session_id" --worktree "$worktree" \ "PR #${pr_num} approved but merge failed: ${_PR_MERGE_ERROR:-unknown} @@ -487,8 +534,7 @@ Commit, rebase on ${PRIMARY_BRANCH}, and push: # build_phase_protocol_prompt — Generate push/commit instructions for Claude. # # For the synchronous agent_run architecture: tells Claude how to commit and -# push (no phase files). For the tmux session architecture, use the -# build_phase_protocol_prompt in dev/phase-handler.sh instead. +# push (no phase files). # # Args: branch [remote] # Stdout: instruction text diff --git a/lib/release.sh b/lib/release.sh new file mode 100644 index 0000000..6eb03ee --- /dev/null +++ b/lib/release.sh @@ -0,0 +1,178 @@ +#!/usr/bin/env bash +# ============================================================================= +# release.sh — disinto_release() function +# +# Handles vault TOML creation, branch setup on ops repo, PR creation, +# and auto-merge request for a versioned release. +# +# Globals expected: +# FORGE_URL - Forge instance URL (e.g. http://localhost:3000) +# FORGE_TOKEN - API token for Forge operations +# FORGE_OPS_REPO - Ops repo slug (e.g. disinto-admin/myproject-ops) +# FACTORY_ROOT - Root of the disinto factory +# PRIMARY_BRANCH - Primary branch name (e.g. main) +# +# Usage: +# source "${FACTORY_ROOT}/lib/release.sh" +# disinto_release +# ============================================================================= +set -euo pipefail + +# Source vault.sh for _vault_log helper +source "${FACTORY_ROOT}/lib/vault.sh" + +# Assert required globals are set before using this module. +_assert_release_globals() { + local missing=() + [ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL") + [ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN") + [ -z "${FORGE_OPS_REPO:-}" ] && missing+=("FORGE_OPS_REPO") + [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT") + [ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH") + if [ "${#missing[@]}" -gt 0 ]; then + echo "Error: release.sh requires these globals to be set: ${missing[*]}" >&2 + exit 1 + fi +} + +disinto_release() { + _assert_release_globals + + local version="${1:-}" + local formula_path="${FACTORY_ROOT}/formulas/release.toml" + + if [ -z "$version" ]; then + echo "Error: version required" >&2 + echo "Usage: disinto release " >&2 + echo "Example: disinto release v1.2.0" >&2 + exit 1 + fi + + # Validate version format (must start with 'v' followed by semver) + if ! echo "$version" | grep -qE '^v[0-9]+\.[0-9]+\.[0-9]+$'; then + echo "Error: version must be in format v1.2.3 (semver with 'v' prefix)" >&2 + exit 1 + fi + + # Load project config to get FORGE_OPS_REPO + if [ -z "${PROJECT_NAME:-}" ]; then + # PROJECT_NAME is unset - detect project TOML from projects/ directory + local found_toml + found_toml=$(find "${FACTORY_ROOT}/projects" -maxdepth 1 -name "*.toml" ! -name "*.example" 2>/dev/null | head -1) + if [ -n "$found_toml" ]; then + source "${FACTORY_ROOT}/lib/load-project.sh" "$found_toml" + fi + else + local project_toml="${FACTORY_ROOT}/projects/${PROJECT_NAME}.toml" + if [ -f "$project_toml" ]; then + source "${FACTORY_ROOT}/lib/load-project.sh" "$project_toml" + fi + fi + + # Check formula exists + if [ ! -f "$formula_path" ]; then + echo "Error: release formula not found at ${formula_path}" >&2 + exit 1 + fi + + # Get the ops repo root + local ops_root="${FACTORY_ROOT}/../disinto-ops" + if [ ! -d "${ops_root}/.git" ]; then + echo "Error: ops repo not found at ${ops_root}" >&2 + echo " Run 'disinto init' to set up the ops repo first" >&2 + exit 1 + fi + + # Generate a unique ID for the vault item + local id="release-${version//./}" + local vault_toml="${ops_root}/vault/actions/${id}.toml" + + # Create vault TOML with the specific version + cat > "$vault_toml" </dev/null || true + + # Push branch + git push -u origin "$branch_name" 2>/dev/null || { + echo "Error: failed to push branch" >&2 + exit 1 + } + ) + + # Create PR + local pr_response + pr_response=$(curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls" \ + -d "{\"title\":\"${pr_title}\",\"head\":\"${branch_name}\",\"base\":\"${PRIMARY_BRANCH}\",\"body\":\"$(echo "$pr_body" | sed ':a;N;$!ba;s/\n/\\n/g')\"}" 2>/dev/null) || { + echo "Error: failed to create PR" >&2 + echo "Response: ${pr_response}" >&2 + exit 1 + } + + local pr_number + pr_number=$(echo "$pr_response" | jq -r '.number') + + local pr_url="${FORGE_URL}/${FORGE_OPS_REPO}/pulls/${pr_number}" + + # Enable auto-merge on the PR — Forgejo will auto-merge after approval + _vault_log "Enabling auto-merge for PR #${pr_number}" + curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls/${pr_number}/merge" \ + -d '{"Do":"merge","merge_when_checks_succeed":true}' >/dev/null 2>&1 || { + echo "Warning: failed to enable auto-merge (may already be enabled or not supported)" >&2 + } + + echo "" + echo "Release PR created: ${pr_url}" + echo "" + echo "Next steps:" + echo " 1. Review the PR" + echo " 2. Approve the PR (auto-merge will trigger after approval)" + echo " 3. The vault runner will execute the release formula" + echo "" + echo "After merge, the release will:" + echo " 1. Tag Forgejo main with ${version}" + echo " 2. Push tag to mirrors (Codeberg, GitHub)" + echo " 3. Build and tag the agents Docker image" + echo " 4. Restart agent containers" +} diff --git a/lib/stack-lock.sh b/lib/stack-lock.sh new file mode 100644 index 0000000..6c8c1ed --- /dev/null +++ b/lib/stack-lock.sh @@ -0,0 +1,197 @@ +#!/usr/bin/env bash +# stack-lock.sh — File-based lock protocol for singleton project stack access +# +# Prevents CI pipelines and the reproduce-agent from stepping on each other +# when sharing a single project stack (e.g. harb docker compose). +# +# Lock file: /home/agent/data/locks/-stack.lock +# Contents: {"holder": "reproduce-agent-42", "since": "...", "heartbeat": "..."} +# +# Protocol: +# 1. stack_lock_check — inspect current lock state +# 2. stack_lock_acquire — wait until lock is free, then claim it +# 3. stack_lock_release — delete lock file when done +# +# Heartbeat: callers must update the heartbeat every 2 minutes while holding +# the lock by calling stack_lock_heartbeat. A heartbeat older than 10 minutes +# is considered stale — the next acquire will break it. +# +# Usage: +# source "$(dirname "$0")/../lib/stack-lock.sh" +# stack_lock_acquire "ci-pipeline-$BUILD_NUMBER" "myproject" +# trap 'stack_lock_release "myproject"' EXIT +# # ... do work ... +# stack_lock_release "myproject" + +set -euo pipefail + +STACK_LOCK_DIR="${HOME}/data/locks" +STACK_LOCK_POLL_INTERVAL=30 # seconds between retry polls +STACK_LOCK_STALE_SECONDS=600 # 10 minutes — heartbeat older than this = stale +STACK_LOCK_MAX_WAIT=3600 # 1 hour — give up after this many seconds + +# _stack_lock_path +# Print the path of the lock file for the given project. +_stack_lock_path() { + local project="$1" + echo "${STACK_LOCK_DIR}/${project}-stack.lock" +} + +# _stack_lock_now +# Print current UTC timestamp in ISO-8601 format. +_stack_lock_now() { + date -u +"%Y-%m-%dT%H:%M:%SZ" +} + +# _stack_lock_epoch +# Convert an ISO-8601 UTC timestamp to a Unix epoch integer. +_stack_lock_epoch() { + local ts="$1" + # Strip trailing Z, replace T with space for `date -d` + date -u -d "${ts%Z}" +%s 2>/dev/null || date -u -j -f "%Y-%m-%dT%H:%M:%S" "${ts%Z}" +%s 2>/dev/null +} + +# stack_lock_check +# Print lock status to stdout: "free", "held:", or "stale:". +# Returns 0 in all cases (status is in stdout). +stack_lock_check() { + local project="$1" + local lock_file + lock_file="$(_stack_lock_path "$project")" + + if [ ! -f "$lock_file" ]; then + echo "free" + return 0 + fi + + local holder heartbeat + holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder","unknown"))' "$lock_file" 2>/dev/null || echo "unknown") + heartbeat=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("heartbeat",""))' "$lock_file" 2>/dev/null || echo "") + + if [ -z "$heartbeat" ]; then + echo "stale:${holder}" + return 0 + fi + + local hb_epoch now_epoch age + hb_epoch=$(_stack_lock_epoch "$heartbeat" 2>/dev/null || echo "0") + now_epoch=$(date -u +%s) + age=$(( now_epoch - hb_epoch )) + + if [ "$age" -gt "$STACK_LOCK_STALE_SECONDS" ]; then + echo "stale:${holder}" + else + echo "held:${holder}" + fi +} + +# stack_lock_acquire [max_wait_seconds] +# Acquire the lock for on behalf of . +# Polls every STACK_LOCK_POLL_INTERVAL seconds. +# Breaks stale locks automatically. +# Exits non-zero if the lock cannot be acquired within max_wait_seconds. +stack_lock_acquire() { + local holder="$1" + local project="$2" + local max_wait="${3:-$STACK_LOCK_MAX_WAIT}" + local lock_file + lock_file="$(_stack_lock_path "$project")" + local deadline + deadline=$(( $(date -u +%s) + max_wait )) + + mkdir -p "$STACK_LOCK_DIR" + + while true; do + local status + status=$(stack_lock_check "$project") + + case "$status" in + free) + # Write to temp file then rename to avoid partial reads by other processes + local tmp_lock + tmp_lock=$(mktemp "${STACK_LOCK_DIR}/.lock-tmp-XXXXXX") + local now + now=$(_stack_lock_now) + printf '{"holder": "%s", "since": "%s", "heartbeat": "%s"}\n' \ + "$holder" "$now" "$now" > "$tmp_lock" + mv "$tmp_lock" "$lock_file" + echo "[stack-lock] acquired lock for ${project} as ${holder}" >&2 + return 0 + ;; + stale:*) + local stale_holder="${status#stale:}" + echo "[stack-lock] breaking stale lock held by ${stale_holder} for ${project}" >&2 + rm -f "$lock_file" + # Loop back immediately to re-check and claim + ;; + held:*) + local cur_holder="${status#held:}" + local remaining + remaining=$(( deadline - $(date -u +%s) )) + if [ "$remaining" -le 0 ]; then + echo "[stack-lock] timed out waiting for lock on ${project} (held by ${cur_holder})" >&2 + return 1 + fi + echo "[stack-lock] ${project} locked by ${cur_holder}, waiting ${STACK_LOCK_POLL_INTERVAL}s (${remaining}s left)..." >&2 + sleep "$STACK_LOCK_POLL_INTERVAL" + ;; + *) + echo "[stack-lock] unexpected status '${status}' for ${project}" >&2 + return 1 + ;; + esac + done +} + +# stack_lock_heartbeat +# Update the heartbeat timestamp in the lock file. +# Should be called every 2 minutes while holding the lock. +# No-op if the lock file is absent or held by a different holder. +stack_lock_heartbeat() { + local holder="$1" + local project="$2" + local lock_file + lock_file="$(_stack_lock_path "$project")" + + [ -f "$lock_file" ] || return 0 + + local current_holder + current_holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder",""))' "$lock_file" 2>/dev/null || echo "") + [ "$current_holder" = "$holder" ] || return 0 + + local since + since=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("since",""))' "$lock_file" 2>/dev/null || echo "") + local now + now=$(_stack_lock_now) + + local tmp_lock + tmp_lock=$(mktemp "${STACK_LOCK_DIR}/.lock-tmp-XXXXXX") + printf '{"holder": "%s", "since": "%s", "heartbeat": "%s"}\n' \ + "$holder" "$since" "$now" > "$tmp_lock" + mv "$tmp_lock" "$lock_file" +} + +# stack_lock_release [holder_id] +# Release the lock for . +# If holder_id is provided, only releases if the lock is held by that holder +# (prevents accidentally releasing someone else's lock). +stack_lock_release() { + local project="$1" + local holder="${2:-}" + local lock_file + lock_file="$(_stack_lock_path "$project")" + + [ -f "$lock_file" ] || return 0 + + if [ -n "$holder" ]; then + local current_holder + current_holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder",""))' "$lock_file" 2>/dev/null || echo "") + if [ "$current_holder" != "$holder" ]; then + echo "[stack-lock] refusing to release: lock held by '${current_holder}', not '${holder}'" >&2 + return 1 + fi + fi + + rm -f "$lock_file" + echo "[stack-lock] released lock for ${project}" >&2 +} diff --git a/lib/vault.sh b/lib/vault.sh new file mode 100644 index 0000000..812d464 --- /dev/null +++ b/lib/vault.sh @@ -0,0 +1,232 @@ +#!/usr/bin/env bash +# vault.sh — Helper for agents to create vault PRs on ops repo +# +# Source after lib/env.sh: +# source "$(dirname "$0")/../lib/env.sh" +# source "$(dirname "$0")/lib/vault.sh" +# +# Required globals: FORGE_TOKEN, FORGE_URL, FORGE_REPO, FORGE_OPS_REPO +# Optional: OPS_REPO_ROOT (local path for ops repo) +# +# Functions: +# vault_request — Create vault PR, return PR number +# +# The function: +# 1. Validates TOML content using validate_vault_action() from vault/vault-env.sh +# 2. Creates a branch on the ops repo: vault/ +# 3. Writes TOML to vault/actions/.toml on that branch +# 4. Creates PR targeting main with title "vault: " +# 5. Body includes context field from TOML +# 6. Returns PR number (existing or newly created) +# +# Idempotent: if PR for same action-id exists, returns its number +# +# Uses Forgejo REST API (not git push) — works from containers without SSH + +set -euo pipefail + +# Internal log helper +_vault_log() { + if declare -f log >/dev/null 2>&1; then + log "vault: $*" + else + printf '[%s] vault: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >&2 + fi +} + +# Get ops repo API URL +_vault_ops_api() { + printf '%s' "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}" +} + +# ----------------------------------------------------------------------------- +# vault_request — Create a vault PR or return existing one +# Args: action_id toml_content +# Stdout: PR number +# Returns: 0=success, 1=validation failed, 2=API error +# ----------------------------------------------------------------------------- +vault_request() { + local action_id="$1" + local toml_content="$2" + + if [ -z "$action_id" ]; then + echo "ERROR: action_id is required" >&2 + return 1 + fi + + if [ -z "$toml_content" ]; then + echo "ERROR: toml_content is required" >&2 + return 1 + fi + + # Check if PR already exists for this action + local existing_pr + existing_pr=$(pr_find_by_branch "vault/${action_id}" "$(_vault_ops_api)") || true + if [ -n "$existing_pr" ]; then + _vault_log "PR already exists for action $action_id: #${existing_pr}" + printf '%s' "$existing_pr" + return 0 + fi + + # Validate TOML content + local tmp_toml + tmp_toml=$(mktemp /tmp/vault-XXXXXX.toml) + trap 'rm -f "$tmp_toml"' RETURN + + printf '%s' "$toml_content" > "$tmp_toml" + + # Source vault-env.sh for validate_vault_action + local vault_env="${FACTORY_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/vault/vault-env.sh" + if [ ! -f "$vault_env" ]; then + echo "ERROR: vault-env.sh not found at $vault_env" >&2 + return 1 + fi + + # Save caller's FORGE_TOKEN, source vault-env.sh for validate_vault_action, + # then restore caller's token so PR creation uses agent's identity (not vault-bot) + local _saved_forge_token="${FORGE_TOKEN:-}" + if ! source "$vault_env"; then + FORGE_TOKEN="${_saved_forge_token:-}" + echo "ERROR: failed to source vault-env.sh" >&2 + return 1 + fi + # Restore caller's FORGE_TOKEN after validation + FORGE_TOKEN="${_saved_forge_token:-}" + + # Run validation + if ! validate_vault_action "$tmp_toml"; then + echo "ERROR: TOML validation failed" >&2 + return 1 + fi + + # Extract values for PR creation + local pr_title pr_body + pr_title="vault: ${action_id}" + pr_body="Vault action: ${action_id} + +Context: ${VAULT_ACTION_CONTEXT:-No context provided} + +Formula: ${VAULT_ACTION_FORMULA:-} +Secrets: ${VAULT_ACTION_SECRETS:-} + +--- +This vault action has been created by an agent and requires admin approval +before execution. See the TOML file for details." + + # Get ops repo API URL + local ops_api + ops_api="$(_vault_ops_api)" + + # Create branch + local branch="vault/${action_id}" + local branch_exists + + branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${ops_api}/git/branches/${branch}" 2>/dev/null || echo "0") + + if [ "$branch_exists" != "200" ]; then + # Branch doesn't exist, create it from main + _vault_log "Creating branch ${branch} on ops repo" + + # Get the commit SHA of main branch + local main_sha + main_sha=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${ops_api}/git/branches/${PRIMARY_BRANCH:-main}" 2>/dev/null | \ + jq -r '.commit.id // empty' || true) + + if [ -z "$main_sha" ]; then + # Fallback: get from refs + main_sha=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${ops_api}/git/refs/heads/${PRIMARY_BRANCH:-main}" 2>/dev/null | \ + jq -r '.object.sha // empty' || true) + fi + + if [ -z "$main_sha" ]; then + echo "ERROR: could not get main branch SHA" >&2 + return 1 + fi + + # Create the branch + if ! curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${ops_api}/git/branches" \ + -d "{\"ref\":\"${branch}\",\"sha\":\"${main_sha}\"}" >/dev/null 2>&1; then + echo "ERROR: failed to create branch ${branch}" >&2 + return 1 + fi + else + _vault_log "Branch ${branch} already exists" + fi + + # Write TOML file to branch via API + local file_path="vault/actions/${action_id}.toml" + _vault_log "Writing ${file_path} to branch ${branch}" + + # Encode TOML content as base64 + local encoded_content + encoded_content=$(printf '%s' "$toml_content" | base64 -w 0) + + # Upload file using Forgejo content API + if ! curl -sf -X PUT \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${ops_api}/contents/${file_path}" \ + -d "{\"message\":\"vault: add ${action_id}\",\"branch\":\"${branch}\",\"content\":\"${encoded_content}\",\"committer\":{\"name\":\"vault-bot\",\"email\":\"vault-bot@${FORGE_REPO}\"},\"overwrite\":true}" >/dev/null 2>&1; then + echo "ERROR: failed to write ${file_path} to branch ${branch}" >&2 + return 1 + fi + + # Create PR + _vault_log "Creating PR for ${branch}" + + local pr_num + pr_num=$(pr_create "$branch" "$pr_title" "$pr_body" "$PRIMARY_BRANCH" "$ops_api") || { + echo "ERROR: failed to create PR" >&2 + return 1 + } + + # Enable auto-merge on the PR — Forgejo will auto-merge after approval + _vault_log "Enabling auto-merge for PR #${pr_num}" + curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${ops_api}/pulls/${pr_num}/merge" \ + -d '{"Do":"merge","merge_when_checks_succeed":true}' >/dev/null 2>&1 || { + _vault_log "Warning: failed to enable auto-merge (may already be enabled or not supported)" + } + + # Add labels to PR (vault, pending-approval) + _vault_log "PR #${pr_num} created, adding labels" + + # Get label IDs + local vault_label_id pending_label_id + vault_label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${ops_api}/labels" 2>/dev/null | \ + jq -r --arg n "vault" '.[] | select(.name == $n) | .id // empty' || true) + + pending_label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${ops_api}/labels" 2>/dev/null | \ + jq -r --arg n "pending-approval" '.[] | select(.name == $n) | .id // empty' || true) + + # Add labels if they exist + if [ -n "$vault_label_id" ]; then + curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${ops_api}/issues/${pr_num}/labels" \ + -d "[{\"id\":${vault_label_id}}]" >/dev/null 2>&1 || true + fi + + if [ -n "$pending_label_id" ]; then + curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${ops_api}/issues/${pr_num}/labels" \ + -d "[{\"id\":${pending_label_id}}]" >/dev/null 2>&1 || true + fi + + printf '%s' "$pr_num" + return 0 +} diff --git a/planner/AGENTS.md b/planner/AGENTS.md index 9749afd..7343b7c 100644 --- a/planner/AGENTS.md +++ b/planner/AGENTS.md @@ -1,4 +1,4 @@ - + # Planner Agent **Role**: Strategic planning using a Prerequisite Tree (Theory of Constraints), @@ -22,12 +22,13 @@ to detect issues ping-ponging between backlog and underspecified. Issues that need human decisions or external resources are filed as vault procurement items (`$OPS_REPO_ROOT/vault/pending/*.md`) instead of being escalated. Phase 3 (file-at-constraints): identify the top 3 unresolved prerequisites that block -the most downstream objectives — file issues as either `backlog` (code changes, -dev-agent) or `action` (run existing formula, action-agent). **Stuck issues -(detected BOUNCED/LABEL_CHURN) are dispatched to the `groom-backlog` formula -in breakdown mode instead of being re-promoted** — this breaks the ping-pong -loop by splitting them into dev-agent-sized sub-issues. **Human-blocked issues -are routed through the vault** — the planner files an actionable procurement +the most downstream objectives — file issues using a **template-or-vision gate**: +read issue templates from `.codeberg/ISSUE_TEMPLATE/*.yaml`, attempt to fill +template fields (affected_files ≤3, acceptance_criteria ≤5, single clear approach), +then apply complexity test: if work touches one subsystem with no design forks, +file as `backlog` using matching template (bug/feature/refactor); otherwise +label `vision` with problem statement and why it's vision-sized. **Human-blocked +issues are routed through the vault** — the planner files an actionable procurement item (`$OPS_REPO_ROOT/vault/pending/-.md` with What/Why/Human action/Factory will then sections) and marks the prerequisite as blocked-on-vault in the tree. Deduplication: checks pending/ + approved/ + fired/ before creating. @@ -56,15 +57,15 @@ component, not work. prediction-triage, update-prerequisite-tree, file-at-constraints, journal-and-memory, commit-and-pr) with `needs` dependencies. Claude executes all steps in a single interactive session with tool access -- `formulas/groom-backlog.toml` — Dual-mode formula: grooming (default) or - breakdown (dispatched by planner for bounced/stuck issues — splits the issue - into dev-agent-sized sub-issues, removes `underspecified` label) +- `formulas/groom-backlog.toml` — Grooming formula for backlog triage and + grooming. (Note: the planner no longer dispatches breakdown mode — complex + issues are labeled `vision` instead.) - `$OPS_REPO_ROOT/prerequisites.md` — Prerequisite tree: versioned constraint map linking VISION.md objectives to their prerequisites. Planner owns the tree, humans steer by editing VISION.md. Tree grows organically as the planner discovers new prerequisites during runs - `$OPS_REPO_ROOT/knowledge/planner-memory.md` — Persistent memory across runs (in ops repo) -- `$OPS_REPO_ROOT/journal/planner/*.md` — Daily raw logs from each planner run (in ops repo) + **Constraint focus**: The planner uses Theory of Constraints to avoid premature issue filing. Only the top 3 unresolved prerequisites that block the most diff --git a/planner/planner-run.sh b/planner/planner-run.sh index 313f6ef..3c71d44 100755 --- a/planner/planner-run.sh +++ b/planner/planner-run.sh @@ -35,7 +35,7 @@ source "$FACTORY_ROOT/lib/guard.sh" # shellcheck source=../lib/agent-sdk.sh source "$FACTORY_ROOT/lib/agent-sdk.sh" -LOG_FILE="$SCRIPT_DIR/planner.log" +LOG_FILE="${DISINTO_LOG_DIR}/planner/planner.log" # shellcheck disable=SC2034 # consumed by agent-sdk.sh LOGFILE="$LOG_FILE" # shellcheck disable=SC2034 # consumed by agent-sdk.sh @@ -43,17 +43,32 @@ SID_FILE="/tmp/planner-session-${PROJECT_NAME}.sid" SCRATCH_FILE="/tmp/planner-${PROJECT_NAME}-scratch.md" WORKTREE="/tmp/${PROJECT_NAME}-planner-run" -log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } +# Override LOG_AGENT for consistent agent identification +# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() +LOG_AGENT="planner" + +# Override log() to append to planner-specific log file +# shellcheck disable=SC2034 +log() { + local agent="${LOG_AGENT:-planner}" + printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE" +} # ── Guards ──────────────────────────────────────────────────────────────── check_active planner acquire_cron_lock "/tmp/planner-run.lock" -check_memory 2000 +memory_guard 2000 log "--- Planner run start ---" +# ── Resolve forge remote for git operations ───────────────────────────── +resolve_forge_remote + +# ── Resolve agent identity for .profile repo ──────────────────────────── +resolve_agent_identity || true + # ── Load formula + context ─────────────────────────────────────────────── -load_formula "$FACTORY_ROOT/formulas/run-planner.toml" +load_formula_or_profile "planner" "$FACTORY_ROOT/formulas/run-planner.toml" || exit 1 build_context_block VISION.md AGENTS.md ops:RESOURCES.md ops:prerequisites.md # ── Build structural analysis graph ────────────────────────────────────── @@ -72,24 +87,8 @@ $(cat "$MEMORY_FILE") " fi -# ── Read recent journal files ────────────────────────────────────────── -JOURNAL_BLOCK="" -JOURNAL_DIR="$OPS_REPO_ROOT/journal/planner" -if [ -d "$JOURNAL_DIR" ]; then - # Load last 5 journal files (most recent first) for run history context - JOURNAL_FILES=$(find "$JOURNAL_DIR" -name '*.md' -type f | sort -r | head -5) - if [ -n "$JOURNAL_FILES" ]; then - JOURNAL_BLOCK=" -### Recent journal entries (journal/planner/) -" - while IFS= read -r jf; do - JOURNAL_BLOCK="${JOURNAL_BLOCK} -#### $(basename "$jf") -$(cat "$jf") -" - done <<< "$JOURNAL_FILES" - fi -fi +# ── Prepare .profile context (lessons injection) ───────────────────────── +formula_prepare_profile_context # ── Read scratch file (compaction survival) ─────────────────────────────── SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE") @@ -105,7 +104,7 @@ build_sdk_prompt_footer " PROMPT="You are the strategic planner for ${FORGE_REPO}. Work through the formula below. ## Project context -${CONTEXT_BLOCK}${MEMORY_BLOCK}${JOURNAL_BLOCK} +${CONTEXT_BLOCK}${MEMORY_BLOCK}$(formula_lessons_block) ${GRAPH_SECTION} ${SCRATCH_CONTEXT:+${SCRATCH_CONTEXT} } @@ -125,5 +124,8 @@ export CLAUDE_MODEL="opus" agent_run --worktree "$WORKTREE" "$PROMPT" log "agent_run complete" +# Write journal entry post-session +profile_write_journal "planner-run" "Planner run $(date -u +%Y-%m-%d)" "complete" "" || true + rm -f "$SCRATCH_FILE" log "--- Planner run done ---" diff --git a/predictor/AGENTS.md b/predictor/AGENTS.md index 327a842..d0bae51 100644 --- a/predictor/AGENTS.md +++ b/predictor/AGENTS.md @@ -1,4 +1,4 @@ - + # Predictor Agent **Role**: Abstract adversary (the "goblin"). Runs a 2-step formula diff --git a/predictor/predictor-run.sh b/predictor/predictor-run.sh index fb9bf51..889fe1c 100755 --- a/predictor/predictor-run.sh +++ b/predictor/predictor-run.sh @@ -36,7 +36,7 @@ source "$FACTORY_ROOT/lib/guard.sh" # shellcheck source=../lib/agent-sdk.sh source "$FACTORY_ROOT/lib/agent-sdk.sh" -LOG_FILE="$SCRIPT_DIR/predictor.log" +LOG_FILE="${DISINTO_LOG_DIR}/predictor/predictor.log" # shellcheck disable=SC2034 # consumed by agent-sdk.sh LOGFILE="$LOG_FILE" # shellcheck disable=SC2034 # consumed by agent-sdk.sh @@ -44,22 +44,40 @@ SID_FILE="/tmp/predictor-session-${PROJECT_NAME}.sid" SCRATCH_FILE="/tmp/predictor-${PROJECT_NAME}-scratch.md" WORKTREE="/tmp/${PROJECT_NAME}-predictor-run" -log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } +# Override LOG_AGENT for consistent agent identification +# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() +LOG_AGENT="predictor" + +# Override log() to append to predictor-specific log file +# shellcheck disable=SC2034 +log() { + local agent="${LOG_AGENT:-predictor}" + printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE" +} # ── Guards ──────────────────────────────────────────────────────────────── check_active predictor acquire_cron_lock "/tmp/predictor-run.lock" -check_memory 2000 +memory_guard 2000 log "--- Predictor run start ---" +# ── Resolve forge remote for git operations ───────────────────────────── +resolve_forge_remote + +# ── Resolve agent identity for .profile repo ──────────────────────────── +resolve_agent_identity || true + # ── Load formula + context ─────────────────────────────────────────────── -load_formula "$FACTORY_ROOT/formulas/run-predictor.toml" +load_formula_or_profile "predictor" "$FACTORY_ROOT/formulas/run-predictor.toml" || exit 1 build_context_block AGENTS.md ops:RESOURCES.md VISION.md ops:prerequisites.md # ── Build structural analysis graph ────────────────────────────────────── build_graph_section +# ── Prepare .profile context (lessons injection) ───────────────────────── +formula_prepare_profile_context + # ── Read scratch file (compaction survival) ─────────────────────────────── SCRATCH_CONTEXT=$(read_scratch_context "$SCRATCH_FILE") SCRATCH_INSTRUCTION=$(build_scratch_instruction "$SCRATCH_FILE") @@ -82,9 +100,10 @@ Use WebSearch for external signal scanning — be targeted (project dependencies and tools only, not general news). Limit to 3 web searches per run. ## Project context -${CONTEXT_BLOCK} +${CONTEXT_BLOCK}$(formula_lessons_block) ${GRAPH_SECTION} -${SCRATCH_CONTEXT} +${SCRATCH_CONTEXT:+${SCRATCH_CONTEXT} +} ## Formula ${FORMULA_CONTENT} @@ -98,5 +117,8 @@ formula_worktree_setup "$WORKTREE" agent_run --worktree "$WORKTREE" "$PROMPT" log "agent_run complete" +# Write journal entry post-session +profile_write_journal "predictor-run" "Predictor run $(date -u +%Y-%m-%d)" "complete" "" || true + rm -f "$SCRATCH_FILE" log "--- Predictor run done ---" diff --git a/projects/disinto.toml.example b/projects/disinto.toml.example index ea0b8c5..61781e5 100644 --- a/projects/disinto.toml.example +++ b/projects/disinto.toml.example @@ -5,7 +5,7 @@ name = "disinto" repo = "johba/disinto" -ops_repo = "johba/disinto-ops" +ops_repo = "disinto-admin/disinto-ops" forge_url = "http://localhost:3000" repo_root = "/home/YOU/dark-factory" ops_repo_root = "/home/YOU/disinto-ops" diff --git a/review/AGENTS.md b/review/AGENTS.md index e010ff5..6976c04 100644 --- a/review/AGENTS.md +++ b/review/AGENTS.md @@ -1,4 +1,4 @@ - + # Review Agent **Role**: AI-powered PR review — post structured findings and formal @@ -9,8 +9,8 @@ whose CI has passed and that lack a review for the current HEAD SHA, then spawns `review-pr.sh `. **Key files**: -- `review/review-poll.sh` — Cron scheduler: finds unreviewed PRs with passing CI. Sources `lib/guard.sh` and calls `check_active reviewer` — skips if `$FACTORY_ROOT/state/.reviewer-active` is absent. -- `review/review-pr.sh` — Creates/reuses a tmux session (`review-{project}-{pr}`), injects PR diff, waits for Claude to write structured JSON output, posts markdown review + formal forge review, auto-creates follow-up issues for pre-existing tech debt. Before starting the session, runs `lib/build-graph.py --changed-files ` and appends the JSON structural analysis (affected objectives, orphaned prerequisites, thin evidence) to the review prompt. Graph failures are non-fatal — review proceeds without it. +- `review/review-poll.sh` — Cron scheduler: finds unreviewed PRs with passing CI. Sources `lib/guard.sh` and calls `check_active reviewer` — skips if `$FACTORY_ROOT/state/.reviewer-active` is absent. **Circuit breaker**: counts existing `` comments; skips a PR if ≥3 consecutive errors for the same HEAD SHA (prevents flooding on repeated review failures). +- `review/review-pr.sh` — Creates/reuses a tmux session (`review-{project}-{pr}`), injects PR diff, waits for Claude to write structured JSON output, posts markdown review + formal forge review, auto-creates follow-up issues for pre-existing tech debt. Calls `resolve_forge_remote()` at startup to determine the correct git remote name (avoids hardcoded 'origin'). Before starting the session, runs `lib/build-graph.py --changed-files ` and appends the JSON structural analysis (affected objectives, orphaned prerequisites, thin evidence) to the review prompt. Graph failures are non-fatal — review proceeds without it. **Environment variables consumed**: - `FORGE_TOKEN` — Dev-agent token (must not be the same account as FORGE_REVIEW_TOKEN) diff --git a/review/review-poll.sh b/review/review-poll.sh index 57a647c..72a6e85 100755 --- a/review/review-poll.sh +++ b/review/review-poll.sh @@ -23,8 +23,15 @@ LOGFILE="${DISINTO_LOG_DIR}/review/review-poll.log" MAX_REVIEWS=3 REVIEW_IDLE_TIMEOUT=14400 # 4h: kill review session if idle +# Override LOG_AGENT for consistent agent identification +# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() +LOG_AGENT="review" + +# Override log() to append to review-specific log file +# shellcheck disable=SC2034 log() { - printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" + local agent="${LOG_AGENT:-review}" + printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOGFILE" } # Log rotation @@ -126,10 +133,11 @@ if [ -n "$REVIEW_SIDS" ]; then log " #${pr_num} re-review: new commits (${reviewed_sha:0:7}→${current_sha:0:7})" - if "${SCRIPT_DIR}/review-pr.sh" "$pr_num" 2>&1; then + review_output=$("${SCRIPT_DIR}/review-pr.sh" "$pr_num" 2>&1) && review_rc=0 || review_rc=$? + if [ "$review_rc" -eq 0 ]; then REVIEWED=$((REVIEWED + 1)) else - log " #${pr_num} re-review failed" + log " #${pr_num} re-review failed (exit code $review_rc): $(echo "$review_output" | tail -3)" fi [ "$REVIEWED" -lt "$MAX_REVIEWS" ] || break @@ -166,10 +174,25 @@ while IFS= read -r line; do log " #${PR_NUM} needs review (CI=success, SHA=${PR_SHA:0:7})" - if "${SCRIPT_DIR}/review-pr.sh" "$PR_NUM" 2>&1; then + # Circuit breaker: count existing review-error comments for this SHA + ERROR_COMMENTS=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API_BASE}/issues/${PR_NUM}/comments" | \ + jq --arg sha "$PR_SHA" \ + '[.[] | select(.body | contains(""))] | length') + + if [ "${ERROR_COMMENTS:-0}" -ge 3 ]; then + log " #${PR_NUM} blocked: ${ERROR_COMMENTS} consecutive error comments for ${PR_SHA:0:7}, skipping" + SKIPPED=$((SKIPPED + 1)) + continue + fi + + log " #${PR_NUM} error check: ${ERROR_COMMENTS:-0} prior error(s) for ${PR_SHA:0:7}" + + review_output=$("${SCRIPT_DIR}/review-pr.sh" "$PR_NUM" 2>&1) && review_rc=0 || review_rc=$? + if [ "$review_rc" -eq 0 ]; then REVIEWED=$((REVIEWED + 1)) else - log " #${PR_NUM} review failed" + log " #${PR_NUM} review failed (exit code $review_rc): $(echo "$review_output" | tail -3)" fi if [ "$REVIEWED" -ge "$MAX_REVIEWS" ]; then diff --git a/review/review-pr.sh b/review/review-pr.sh index 0ae0fdb..08ce653 100755 --- a/review/review-pr.sh +++ b/review/review-pr.sh @@ -27,6 +27,8 @@ source "$(dirname "$0")/../lib/env.sh" source "$(dirname "$0")/../lib/ci-helpers.sh" source "$(dirname "$0")/../lib/worktree.sh" source "$(dirname "$0")/../lib/agent-sdk.sh" +# shellcheck source=../lib/formula-session.sh +source "$(dirname "$0")/../lib/formula-session.sh" # Auto-pull factory code to pick up merged fixes before any logic runs git -C "$FACTORY_ROOT" pull --ff-only origin main 2>/dev/null || true @@ -56,6 +58,16 @@ if [ -f "$LOGFILE" ] && [ "$(stat -c%s "$LOGFILE" 2>/dev/null || echo 0)" -gt 10 mv "$LOGFILE" "$LOGFILE.old" fi +# ============================================================================= +# RESOLVE FORGE REMOTE FOR GIT OPERATIONS +# ============================================================================= +resolve_forge_remote + +# ============================================================================= +# RESOLVE AGENT IDENTITY FOR .PROFILE REPO +# ============================================================================= +resolve_agent_identity || true + # ============================================================================= # MEMORY GUARD # ============================================================================= @@ -121,7 +133,7 @@ PREV_REV=$(printf '%s' "$ALL_COMMENTS" | jq -r --arg s "$PR_SHA" \ if [ -n "$PREV_REV" ] && [ "$PREV_REV" != "null" ]; then PREV_BODY=$(printf '%s' "$PREV_REV" | jq -r '.body') PREV_SHA=$(printf '%s' "$PREV_BODY" | grep -oP '
-

Vault — quality gate

+

Vault — being redesigned

-
How it works
-

The vault sits between agents and dangerous actions. Before an agent can execute a risky operation (force push, deploy, delete), the vault reviews the request.

-

Auto-approve — safe, well-understood operations pass through instantly. Escalate — risky or novel operations get sent to a human via Matrix. Reject — clearly unsafe actions are blocked.

-

You define the boundaries. The vault enforces them. This is what lets you sleep while the factory runs.

+
Redesign in progress
+

The vault is being redesigned as a PR-based approval workflow on the ops repo. Instead of polling pending files, vault items will be created as PRs that require admin approval before execution.

+

See issues #73-#77 for the design: #75 defines the vault.sh helper for creating vault PRs, #76 rewrites the dispatcher to poll for merged vault PRs, #77 adds branch protection requiring admin approval.

@@ -524,8 +518,7 @@ disinto/ ├── predictor/ predictor-run.sh (daily cron executor) ├── planner/ planner-run.sh (weekly cron executor) ├── supervisor/ supervisor-run.sh (health monitoring) -├── vault/ vault-poll.sh, vault-agent.sh, vault-fire.sh -├── action/ action-poll.sh, action-agent.sh +├── vault/ vault-env.sh (vault redesign in progress, see #73-#77) ├── lib/ env.sh, agent-session.sh, ci-helpers.sh ├── projects/ *.toml per-project config ├── formulas/ TOML specs for multi-step agent tasks diff --git a/skill/SKILL.md b/skill/SKILL.md deleted file mode 100644 index 4077ae0..0000000 --- a/skill/SKILL.md +++ /dev/null @@ -1,350 +0,0 @@ ---- -name: disinto -description: >- - Operate the disinto autonomous code factory. Use when bootstrapping a new - project with `disinto init`, managing factory agents, filing issues on the - forge, reading agent journals, querying CI pipelines, checking the dependency - graph, or inspecting factory health. -license: AGPL-3.0 -metadata: - author: johba - version: "0.2.0" -env_vars: - required: - - FORGE_TOKEN - - FORGE_API - - PROJECT_REPO_ROOT - optional: - - WOODPECKER_SERVER - - WOODPECKER_TOKEN - - WOODPECKER_REPO_ID -tools: - - bash - - curl - - jq - - git ---- - -# Disinto Factory Skill - -You are the human's assistant for operating the disinto autonomous code factory. -You ask the questions, explain the choices, and run the commands on the human's -behalf. The human makes decisions; you execute. - -Disinto manages eight agents that implement issues, review PRs, plan from a -vision, predict risks, groom the backlog, gate actions, and keep the system -healthy — all driven by cron and Claude. - -## System requirements - -Before bootstrapping, verify the target machine meets these minimums: - -| Requirement | Detail | -|-------------|--------| -| **VPS** | 8 GB+ RAM (4 GB swap recommended) | -| **Docker + Docker Compose** | Required for the default containerized stack | -| **Claude Code CLI** | Authenticated with API access (`claude --version`) | -| **`CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1`** | Set in the factory environment — prevents auto-update pings in production | -| **Disk** | Sufficient for CI images, git mirrors, and agent worktrees (40 GB+ recommended) | -| **tmux** | Required for persistent dev sessions | -| **git, jq, python3, curl** | Used by agents and helper scripts | - -Optional but recommended: - -| Tool | Purpose | -|------|---------| -| **sops + age** | Encrypt secrets at rest (`.env.enc`) | - -## Bootstrapping with `disinto init` - -The primary setup path. Walk the human through each step. - -### Step 1 — Check prerequisites - -Confirm Docker, Claude Code CLI, and required tools are installed: - -```bash -docker --version && docker compose version -claude --version -tmux -V && git --version && jq --version && python3 --version -``` - -### Step 2 — Run `disinto init` - -```bash -disinto init -``` - -Accepts GitHub, Codeberg, or any git URL. Common variations: - -```bash -disinto init https://github.com/org/repo # default (docker compose) -disinto init org/repo --forge-url http://forge:3000 # custom forge URL -disinto init org/repo --bare # bare-metal, no compose -disinto init org/repo --yes # skip confirmation prompts -``` - -### What `disinto init` does - -1. **Generates `docker-compose.yml`** with four services: Forgejo, Woodpecker - server, Woodpecker agent, and the agents container. -2. **Starts a local Forgejo instance** via Docker (at `http://localhost:3000`). -3. **Creates admin + bot users** (dev-bot, review-bot) with API tokens. -4. **Creates the repo** on Forgejo and pushes the code. -5. **Sets up Woodpecker CI** — OAuth2 app on Forgejo, activates the repo. -6. **Generates `projects/.toml`** — per-project config with paths, CI IDs, - and forge URL. -7. **Creates standard labels** (backlog, in-progress, blocked, etc.). -8. **Configures git mirror remotes** if `[mirrors]` is set in the TOML. -9. **Encrypts secrets** to `.env.enc` if sops + age are available. -10. **Brings up the full docker compose stack**. - -### Step 3 — Set environment variable - -Ensure `CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1` is set in the factory -environment (`.env` or the agents container). This prevents Claude Code from -making auto-update and telemetry requests in production. - -### Step 4 — Verify - -```bash -disinto status -``` - -## Docker stack architecture - -The default deployment is a docker-compose stack with four services: - -``` -┌──────────────────────────────────────────────────┐ -│ disinto-net │ -│ │ -│ ┌──────────┐ ┌─────────────┐ ┌────────────┐ │ -│ │ Forgejo │ │ Woodpecker │ │ Woodpecker │ │ -│ │ (forge) │◀─│ (CI server)│◀─│ (agent) │ │ -│ │ :3000 │ │ :8000 │ │ │ │ -│ └──────────┘ └─────────────┘ └────────────┘ │ -│ ▲ │ -│ │ │ -│ ┌─────┴──────────────────────────────────────┐ │ -│ │ agents │ │ -│ │ (cron → dev, review, gardener, planner, │ │ -│ │ predictor, supervisor, action, vault) │ │ -│ │ Claude CLI mounted from host │ │ -│ └────────────────────────────────────────────┘ │ -└──────────────────────────────────────────────────┘ -``` - -| Service | Image | Purpose | -|---------|-------|---------| -| **forgejo** | `codeberg.org/forgejo/forgejo:11.0` | Git forge, issue tracker, PR reviews | -| **woodpecker** | `woodpeckerci/woodpecker-server:v3` | CI server, triggers on push | -| **woodpecker-agent** | `woodpeckerci/woodpecker-agent:v3` | Runs CI pipelines in Docker | -| **agents** | `./docker/agents` (custom) | All eight factory agents, driven by cron | - -The agents container mounts the Claude CLI binary and `~/.claude` credentials -from the host. Secrets are loaded from `.env` (or decrypted from `.env.enc`). - -## Git mirror - -The factory assumes a local git mirror on the Forgejo instance to avoid -rate limits from upstream forges (GitHub, Codeberg). When `disinto init` runs: - -1. The repo is cloned from the upstream URL. -2. A `forgejo` remote is added pointing to the local Forgejo instance. -3. All branches and tags are pushed to Forgejo. -4. If `[mirrors]` is configured in the project TOML, additional remotes - (e.g. GitHub, Codeberg) are set up and synced via `lib/mirrors.sh`. - -All agent work happens against the local Forgejo forge. This means: -- No GitHub/Codeberg API rate limits on polling. -- CI triggers are local (Woodpecker watches Forgejo webhooks). -- Mirror pushes are fire-and-forget background operations after merge. - -To configure mirrors in the project TOML: - -```toml -[mirrors] -github = "git@github.com:user/repo.git" -codeberg = "git@codeberg.org:user/repo.git" -``` - -## Required environment - -| Variable | Purpose | -|----------|---------| -| `FORGE_TOKEN` | Forgejo/Gitea API token with repo scope | -| `FORGE_API` | Base API URL, e.g. `https://forge.example/api/v1/repos/owner/repo` | -| `PROJECT_REPO_ROOT` | Absolute path to the checked-out disinto repository | - -Optional: - -| Variable | Purpose | -|----------|---------| -| `WOODPECKER_SERVER` | Woodpecker CI base URL (for pipeline queries) | -| `WOODPECKER_TOKEN` | Woodpecker API bearer token | -| `WOODPECKER_REPO_ID` | Numeric repo ID in Woodpecker | - -## The eight agents - -| Agent | Role | Runs via | -|-------|------|----------| -| **Dev** | Picks backlog issues, implements in worktrees, opens PRs | `dev/dev-poll.sh` (cron) | -| **Review** | Reviews PRs against conventions, approves or requests changes | `review/review-poll.sh` (cron) | -| **Gardener** | Grooms backlog: dedup, quality gates, dust bundling, stale cleanup | `gardener/gardener-run.sh` (cron 0,6,12,18 UTC) | -| **Planner** | Tracks vision progress, maintains prerequisite tree, files constraint issues | `planner/planner-run.sh` (cron daily 07:00 UTC) | -| **Predictor** | Challenges claims, detects structural risks, files predictions | `predictor/predictor-run.sh` (cron daily 06:00 UTC) | -| **Supervisor** | Monitors health (RAM, disk, CI, agents), auto-fixes, escalates | `supervisor/supervisor-run.sh` (cron */20) | -| **Action** | Executes operational tasks dispatched by planner via formulas | `action/action-poll.sh` (cron) | -| **Vault** | Gates dangerous actions, manages resource procurement | `vault/vault-poll.sh` (cron) | - -### How agents interact - -``` -Planner ──creates-issues──▶ Backlog ◀──grooms── Gardener - │ │ - │ ▼ - │ Dev (implements) - │ │ - │ ▼ - │ Review (approves/rejects) - │ │ - │ ▼ - ▼ Merged -Predictor ──challenges──▶ Planner (triages predictions) -Supervisor ──monitors──▶ All agents (health, escalation) -Vault ──gates──▶ Action, Dev (dangerous operations) -``` - -### Issue lifecycle - -`backlog` → `in-progress` → PR → CI → review → merge → closed. - -Key labels: `backlog`, `priority`, `in-progress`, `blocked`, `underspecified`, -`tech-debt`, `vision`, `action`, `prediction/unreviewed`. - -Issues declare dependencies in a `## Dependencies` section listing `#N` -references. Dev-poll only picks issues whose dependencies are all closed. - -## Available scripts - -- **`scripts/factory-status.sh`** — Show agent status, open issues, and CI - pipeline state. Pass `--agents`, `--issues`, or `--ci` for specific sections. -- **`scripts/file-issue.sh`** — Create an issue on the forge with proper labels - and formatting. Pass `--title`, `--body`, and optionally `--labels`. -- **`scripts/read-journal.sh`** — Read agent journal entries. Pass agent name - (`planner`, `supervisor`) and optional `--date YYYY-MM-DD`. - -## Common workflows - -### 1. Bootstrap a new project - -Walk the human through `disinto init`: - -```bash -# 1. Verify prerequisites -docker --version && claude --version - -# 2. Bootstrap -disinto init https://github.com/org/repo - -# 3. Verify -disinto status -``` - -### 2. Check factory health - -```bash -bash scripts/factory-status.sh -``` - -This shows: which agents are active, recent open issues, and CI pipeline -status. Use `--agents` for just the agent status section. - -### 3. Read what the planner decided today - -```bash -bash scripts/read-journal.sh planner -``` - -Returns today's planner journal: predictions triaged, prerequisite tree -updates, top constraints, issues created, and observations. - -### 4. File a new issue - -```bash -bash scripts/file-issue.sh --title "fix: broken auth flow" \ - --body "$(cat scripts/../templates/issue-template.md)" \ - --labels backlog -``` - -Or generate the body inline — the template shows the expected format with -acceptance criteria and affected files sections. - -### 5. Check the dependency graph - -```bash -python3 "${PROJECT_REPO_ROOT}/lib/build-graph.py" \ - --project-root "${PROJECT_REPO_ROOT}" \ - --output /tmp/graph-report.json -cat /tmp/graph-report.json | jq '.analyses' -``` - -The graph builder parses VISION.md, the prerequisite tree, formulas, and open -issues. It detects: orphan issues (not referenced), dependency cycles, -disconnected clusters, bottleneck nodes, and thin objectives. - -### 6. Query a specific CI pipeline - -```bash -bash scripts/factory-status.sh --ci -``` - -Or query Woodpecker directly: - -```bash -curl -s -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ - "${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines?per_page=5" \ - | jq '.[] | {number, status, commit: .commit[:8], branch}' -``` - -### 7. Manage the docker stack - -```bash -disinto up # start all services -disinto down # stop all services -disinto logs # tail all service logs -disinto logs forgejo # tail specific service -disinto shell # shell into agents container -``` - -### 8. Read and interpret VISION.md progress - -Read `VISION.md` at the repo root for the full vision. Then cross-reference -with the prerequisite tree: - -```bash -cat "${OPS_REPO_ROOT}/prerequisites.md" -``` - -The prerequisite tree maps vision objectives to concrete issues. Items marked -`[x]` are complete; items marked `[ ]` show what blocks progress. The planner -updates this daily. - -## Gotchas - -- **Single-threaded pipeline**: only one issue is in-progress per project at a - time. Don't file issues expecting parallel work. -- **Secrets via env vars only**: never embed secrets in issue bodies, PR - descriptions, or comments. Use `$VAR_NAME` references. -- **Formulas are not skills**: formulas in `formulas/` are TOML issue templates - for multi-step agent tasks. Skills teach assistants; formulas drive agents. -- **Predictor journals**: the predictor does not write journal files. Its memory - lives in `prediction/unreviewed` and `prediction/actioned` issues. -- **State files**: agent activity is tracked via `state/.{agent}-active` files. - These are presence files, not logs. -- **ShellCheck required**: all `.sh` files must pass ShellCheck. CI enforces this. -- **Local forge is the source of truth**: all agent work targets the local - Forgejo instance. Upstream mirrors are synced after merge. -- **`CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1`**: must be set in production - to prevent Claude Code from making auto-update requests. diff --git a/skill/scripts/factory-status.sh b/skill/scripts/factory-status.sh deleted file mode 100755 index ee0d683..0000000 --- a/skill/scripts/factory-status.sh +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# factory-status.sh — query agent status, open issues, and CI pipelines -# -# Usage: factory-status.sh [--agents] [--issues] [--ci] [--help] -# No flags: show all sections -# --agents: show only agent activity status -# --issues: show only open issues summary -# --ci: show only CI pipeline status -# -# Required env: FORGE_TOKEN, FORGE_API, PROJECT_REPO_ROOT -# Optional env: WOODPECKER_SERVER, WOODPECKER_TOKEN, WOODPECKER_REPO_ID - -usage() { - sed -n '3,10s/^# //p' "$0" - exit 0 -} - -show_agents=false -show_issues=false -show_ci=false -show_all=true - -while [[ $# -gt 0 ]]; do - case "$1" in - --agents) show_agents=true; show_all=false; shift ;; - --issues) show_issues=true; show_all=false; shift ;; - --ci) show_ci=true; show_all=false; shift ;; - --help|-h) usage ;; - *) echo "Unknown option: $1" >&2; exit 1 ;; - esac -done - -: "${FORGE_TOKEN:?FORGE_TOKEN is required}" -: "${FORGE_API:?FORGE_API is required}" -: "${PROJECT_REPO_ROOT:?PROJECT_REPO_ROOT is required}" - -forge_get() { - curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Accept: application/json" \ - "${FORGE_API}$1" -} - -# --- Agent status --- -print_agent_status() { - echo "## Agent Status" - echo "" - local state_dir="${PROJECT_REPO_ROOT}/state" - local agents=(dev review gardener supervisor planner predictor action vault) - for agent in "${agents[@]}"; do - local state_file="${state_dir}/.${agent}-active" - if [[ -f "$state_file" ]]; then - echo " ${agent}: ACTIVE (since $(stat -c '%y' "$state_file" 2>/dev/null | cut -d. -f1 || echo 'unknown'))" - else - echo " ${agent}: idle" - fi - done - echo "" -} - -# --- Open issues --- -print_open_issues() { - echo "## Open Issues" - echo "" - local issues - issues=$(forge_get "/issues?state=open&type=issues&limit=50&sort=created&direction=desc" 2>/dev/null) || { - echo " (failed to fetch issues from forge)" - echo "" - return - } - local count - count=$(echo "$issues" | jq 'length') - echo " Total open: ${count}" - echo "" - - # Group by key labels - for label in backlog priority in-progress blocked; do - local labeled - labeled=$(echo "$issues" | jq --arg l "$label" '[.[] | select(.labels[]?.name == $l)]') - local n - n=$(echo "$labeled" | jq 'length') - if [[ "$n" -gt 0 ]]; then - echo " [${label}] (${n}):" - echo "$labeled" | jq -r '.[] | " #\(.number) \(.title)"' | head -10 - echo "" - fi - done -} - -# --- CI pipelines --- -print_ci_status() { - echo "## CI Pipelines" - echo "" - if [[ -z "${WOODPECKER_SERVER:-}" || -z "${WOODPECKER_TOKEN:-}" || -z "${WOODPECKER_REPO_ID:-}" ]]; then - echo " (Woodpecker not configured — set WOODPECKER_SERVER, WOODPECKER_TOKEN, WOODPECKER_REPO_ID)" - echo "" - return - fi - local pipelines - pipelines=$(curl -sf -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ - "${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines?per_page=10" 2>/dev/null) || { - echo " (failed to fetch pipelines from Woodpecker)" - echo "" - return - } - echo "$pipelines" | jq -r '.[] | " #\(.number) [\(.status)] \(.branch) \(.commit[:8]) — \(.message // "" | split("\n")[0])"' | head -10 - echo "" -} - -# --- Output --- -if $show_all || $show_agents; then print_agent_status; fi -if $show_all || $show_issues; then print_open_issues; fi -if $show_all || $show_ci; then print_ci_status; fi diff --git a/skill/scripts/file-issue.sh b/skill/scripts/file-issue.sh deleted file mode 100755 index fdcf788..0000000 --- a/skill/scripts/file-issue.sh +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# file-issue.sh — create an issue on the forge with labels -# -# Usage: file-issue.sh --title TITLE --body BODY [--labels LABEL1,LABEL2] [--help] -# -# Required env: FORGE_TOKEN, FORGE_API - -usage() { - sed -n '3,8s/^# //p' "$0" - exit 0 -} - -title="" -body="" -labels="" - -while [[ $# -gt 0 ]]; do - case "$1" in - --title) title="$2"; shift 2 ;; - --body) body="$2"; shift 2 ;; - --labels) labels="$2"; shift 2 ;; - --help|-h) usage ;; - *) printf 'file-issue: unknown option: %s\n' "$1" >&2; exit 1 ;; - esac -done - -: "${FORGE_TOKEN:?FORGE_TOKEN is required}" -: "${FORGE_API:?FORGE_API is required}" - -if [[ -z "$title" ]]; then - echo "Error: --title is required" >&2 - exit 1 -fi -if [[ -z "$body" ]]; then - echo "Error: --body is required" >&2 - exit 1 -fi - -# --- Resolve label names to IDs --- -label_ids="[]" -if [[ -n "$labels" ]]; then - all_labels=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Accept: application/json" \ - "${FORGE_API}/labels?limit=50" 2>/dev/null) || { - echo "Warning: could not fetch labels, creating issue without labels" >&2 - all_labels="[]" - } - label_ids="[" - first=true - IFS=',' read -ra label_arr <<< "$labels" - for lname in "${label_arr[@]}"; do - lname=$(echo "$lname" | xargs) # trim whitespace - lid=$(echo "$all_labels" | jq -r --arg n "$lname" '.[] | select(.name == $n) | .id') - if [[ -n "$lid" ]]; then - if ! $first; then label_ids+=","; fi - label_ids+="$lid" - first=false - else - echo "Warning: label '${lname}' not found, skipping" >&2 - fi - done - label_ids+="]" -fi - -# --- Secret scan (refuse to post bodies containing obvious secrets) --- -if echo "$body" | grep -qiE '(sk-[a-zA-Z0-9]{20,}|ghp_[a-zA-Z0-9]{36}|AKIA[A-Z0-9]{16}|-----BEGIN (RSA |EC )?PRIVATE KEY)'; then - echo "Error: body appears to contain a secret — refusing to post" >&2 - exit 1 -fi - -# --- Create the issue --- -payload=$(jq -n \ - --arg t "$title" \ - --arg b "$body" \ - --argjson l "$label_ids" \ - '{title: $t, body: $b, labels: $l}') - -response=$(curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - -d "$payload" \ - "${FORGE_API}/issues") || { - echo "Error: failed to create issue" >&2 - exit 1 -} - -number=$(echo "$response" | jq -r '.number') -url=$(echo "$response" | jq -r '.html_url') -echo "Created issue #${number}: ${url}" diff --git a/skill/scripts/read-journal.sh b/skill/scripts/read-journal.sh deleted file mode 100755 index 78bd787..0000000 --- a/skill/scripts/read-journal.sh +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# read-journal.sh — read agent journal entries -# -# Usage: read-journal.sh AGENT [--date YYYY-MM-DD] [--list] [--help] -# AGENT: planner, supervisor, or predictor -# --date: specific date (default: today) -# --list: list available journal dates instead of reading -# -# Required env: PROJECT_REPO_ROOT - -usage() { - cat <<'USAGE' -read-journal.sh AGENT [--date YYYY-MM-DD] [--list] [--help] - AGENT: planner, supervisor, or predictor - --date: specific date (default: today) - --list: list available journal dates instead of reading -USAGE - exit 0 -} - -agent="" -target_date=$(date +%Y-%m-%d) -list_mode=false - -while [[ $# -gt 0 ]]; do - case "$1" in - --date) target_date="$2"; shift 2 ;; - --list) list_mode=true; shift ;; - --help|-h) usage ;; - -*) echo "Unknown option: $1" >&2; exit 1 ;; - *) - if [[ -z "$agent" ]]; then - agent="$1" - else - echo "Unexpected argument: $1" >&2; exit 1 - fi - shift - ;; - esac -done - -: "${OPS_REPO_ROOT:?OPS_REPO_ROOT is required}" - -if [[ -z "$agent" ]]; then - echo "Error: agent name is required (planner, supervisor, predictor)" >&2 - echo "" >&2 - usage -fi - -# --- Resolve journal directory --- -case "$agent" in - planner) journal_dir="${OPS_REPO_ROOT}/journal/planner" ;; - supervisor) journal_dir="${OPS_REPO_ROOT}/journal/supervisor" ;; - predictor) - echo "The predictor does not write journal files." - echo "Its memory lives in forge issues labeled 'prediction/unreviewed' and 'prediction/actioned'." - echo "" - echo "Query predictions with:" - echo " curl -sH 'Authorization: token \${FORGE_TOKEN}' '\${FORGE_API}/issues?state=open&labels=prediction%2Funreviewed'" - exit 0 - ;; - *) - echo "Error: unknown agent '${agent}'" >&2 - echo "Available: planner, supervisor, predictor" >&2 - exit 1 - ;; -esac - -if [[ ! -d "$journal_dir" ]]; then - echo "No journal directory found at ${journal_dir}" >&2 - exit 1 -fi - -# --- List mode --- -if $list_mode; then - echo "Available journal dates for ${agent}:" - find "$journal_dir" -maxdepth 1 -name '*.md' -printf '%f\n' 2>/dev/null | sed 's|\.md$||' | sort -r | head -20 - exit 0 -fi - -# --- Read specific date --- -journal_file="${journal_dir}/${target_date}.md" -if [[ -f "$journal_file" ]]; then - cat "$journal_file" -else - echo "No journal entry for ${agent} on ${target_date}" >&2 - echo "" >&2 - echo "Recent entries:" >&2 - find "$journal_dir" -maxdepth 1 -name '*.md' -printf '%f\n' 2>/dev/null | sed 's|\.md$||' | sort -r | head -5 >&2 - exit 1 -fi diff --git a/skill/templates/issue-template.md b/skill/templates/issue-template.md deleted file mode 100644 index 2399bc7..0000000 --- a/skill/templates/issue-template.md +++ /dev/null @@ -1,21 +0,0 @@ -## Summary - - - -## Acceptance criteria - -- [ ] -- [ ] -- [ ] - -## Affected files - - - -- `path/to/file.sh` - -## Dependencies - - - -None diff --git a/state/.gitignore b/state/.gitignore index 0a0c1e8..eb205a2 100644 --- a/state/.gitignore +++ b/state/.gitignore @@ -1,2 +1,4 @@ # Active-state files are runtime state, not committed .*-active +# Supervisor is always active in the edge container — committed guard file +!.supervisor-active diff --git a/vault/.locks/.gitkeep b/state/.supervisor-active similarity index 100% rename from vault/.locks/.gitkeep rename to state/.supervisor-active diff --git a/supervisor/AGENTS.md b/supervisor/AGENTS.md index 322ab4b..3348c86 100644 --- a/supervisor/AGENTS.md +++ b/supervisor/AGENTS.md @@ -1,4 +1,4 @@ - + # Supervisor Agent **Role**: Health monitoring and auto-remediation, executed as a formula-driven @@ -9,19 +9,17 @@ resources or human decisions, files vault items instead of escalating directly. **Trigger**: `supervisor-run.sh` runs every 20 min via cron. Sources `lib/guard.sh` and calls `check_active supervisor` first — skips if -`$FACTORY_ROOT/state/.supervisor-active` is absent. Then creates a tmux session -with `claude --model sonnet`, injects `formulas/run-supervisor.toml` with -pre-collected metrics as context, monitors the phase file, and cleans up on -completion or timeout (20 min max session). No action issues — the supervisor -runs directly from cron like the planner and predictor. +`$FACTORY_ROOT/state/.supervisor-active` is absent. Then runs `claude -p` +via `agent-sdk.sh`, injects `formulas/run-supervisor.toml` with +pre-collected metrics as context, and cleans up on completion or timeout (20 min max session). +No action issues — the supervisor runs directly from cron like the planner and predictor. **Key files**: - `supervisor/supervisor-run.sh` — Cron wrapper + orchestrator: lock, memory guard, - runs preflight.sh, sources disinto project config, creates tmux session, injects - formula prompt with metrics, monitors phase file, handles crash recovery via - `run_formula_and_monitor` + runs preflight.sh, sources disinto project config, runs claude -p via agent-sdk.sh, + injects formula prompt with metrics, handles crash recovery - `supervisor/preflight.sh` — Data collection: system resources (RAM, disk, swap, - load), Docker status, active tmux sessions + phase files, lock files, agent log + load), Docker status, active sessions + phase files, lock files, agent log tails, CI pipeline status, open PRs, issue counts, stale worktrees, blocked issues. Also performs **stale phase cleanup**: scans `/tmp/*-session-*.phase` files for `PHASE:escalate` entries and auto-removes any whose linked issue @@ -31,12 +29,8 @@ runs directly from cron like the planner and predictor. - `formulas/run-supervisor.toml` — Execution spec: five steps (preflight review, health-assessment, decide-actions, report, journal) with `needs` dependencies. Claude evaluates all metrics and takes actions in a single interactive session -- `$OPS_REPO_ROOT/journal/supervisor/*.md` — Daily health logs from each supervisor run -- `supervisor/PROMPT.md` — Best-practices reference for remediation actions - `$OPS_REPO_ROOT/knowledge/*.md` — Domain-specific remediation guides (memory, disk, CI, git, dev-agent, review-agent, forge) -- `supervisor/supervisor-poll.sh` — Legacy bash orchestrator (superseded by - supervisor-run.sh + formula) **Alert priorities**: P0 (memory crisis), P1 (disk), P2 (factory stopped/stalled), P3 (degraded PRs, circular deps, stale deps), P4 (housekeeping). @@ -47,5 +41,5 @@ P3 (degraded PRs, circular deps, stale deps), P4 (housekeeping). - `WOODPECKER_TOKEN`, `WOODPECKER_SERVER`, `WOODPECKER_DB_PASSWORD`, `WOODPECKER_DB_USER`, `WOODPECKER_DB_HOST`, `WOODPECKER_DB_NAME` — CI database queries **Lifecycle**: supervisor-run.sh (cron */20) → lock + memory guard → run -preflight.sh (collect metrics) → load formula + context → create tmux -session → Claude assesses health, auto-fixes, writes journal → `PHASE:done`. +preflight.sh (collect metrics) → load formula + context → run claude -p via agent-sdk.sh +→ Claude assesses health, auto-fixes, writes journal → `PHASE:done`. diff --git a/supervisor/PROMPT.md b/supervisor/PROMPT.md deleted file mode 100644 index 7381785..0000000 --- a/supervisor/PROMPT.md +++ /dev/null @@ -1,118 +0,0 @@ -# Supervisor Agent - -You are the supervisor agent for `$FORGE_REPO`. You were called because -`supervisor-poll.sh` detected an issue it couldn't auto-fix. - -## Priority Order - -1. **P0 — Memory crisis:** RAM <500MB or swap >3GB -2. **P1 — Disk pressure:** Disk >80% -3. **P2 — Factory stopped:** Dev-agent dead, CI down, git broken, all backlog dep-blocked -4. **P3 — Factory degraded:** Derailed PR, stuck pipeline, unreviewed PRs, circular deps, stale deps -5. **P4 — Housekeeping:** Stale processes, log rotation - -## What You Can Do - -Fix the issue yourself. You have full shell access and `--dangerously-skip-permissions`. - -Before acting, read the relevant knowledge file from the ops repo: -- Memory issues → `cat ${OPS_REPO_ROOT}/knowledge/memory.md` -- Disk issues → `cat ${OPS_REPO_ROOT}/knowledge/disk.md` -- CI issues → `cat ${OPS_REPO_ROOT}/knowledge/ci.md` -- forge / rate limits → `cat ${OPS_REPO_ROOT}/knowledge/forge.md` -- Dev-agent issues → `cat ${OPS_REPO_ROOT}/knowledge/dev-agent.md` -- Review-agent issues → `cat ${OPS_REPO_ROOT}/knowledge/review-agent.md` -- Git issues → `cat ${OPS_REPO_ROOT}/knowledge/git.md` - -## Credentials & API Access - -Environment variables are set. Source the helper library for convenience functions: -```bash -source ${FACTORY_ROOT}/lib/env.sh -``` - -This gives you: -- `forge_api GET "/pulls?state=open"` — forge API (uses $FORGE_TOKEN) -- `wpdb -c "SELECT ..."` — Woodpecker Postgres (uses $WOODPECKER_DB_PASSWORD) -- `woodpecker_api "/repos/$WOODPECKER_REPO_ID/pipelines"` — Woodpecker REST API (uses $WOODPECKER_TOKEN) -- `$FORGE_REVIEW_TOKEN` — for posting reviews as the review_bot account -- `$PROJECT_REPO_ROOT` — path to the target project repo -- `$PROJECT_NAME` — short project name (for worktree prefixes, container names) -- `$PRIMARY_BRANCH` — main branch (master or main) -- `$FACTORY_ROOT` — path to the disinto repo - -## Handling Dependency Alerts - -### Circular dependencies (P3) -When you see "Circular dependency deadlock: #A -> #B -> #A", the backlog is permanently -stuck. Your job: figure out the correct dependency direction and fix the wrong one. - -1. Read both issue bodies: `forge_api GET "/issues/A"`, `forge_api GET "/issues/B"` -2. Read the referenced source files in `$PROJECT_REPO_ROOT` to understand which change - actually depends on which -3. Edit the issue that has the incorrect dep to remove the `#NNN` reference from its - `## Dependencies` section (replace with `- None` if it was the only dep) -4. If the correct direction is unclear from code, file a vault item with both issue summaries - -Use the forge API to edit issue bodies: -```bash -# Read current body -BODY=$(forge_api GET "/issues/NNN" | jq -r '.body') -# Edit (remove the circular ref, keep other deps) -NEW_BODY=$(echo "$BODY" | sed 's/- #XXX/- None/') -forge_api PATCH "/issues/NNN" -d "$(jq -nc --arg b "$NEW_BODY" '{body:$b}')" -``` - -### Stale dependencies (P3) -When you see "Stale dependency: #A blocked by #B (open N days)", the dep may be -obsolete or misprioritized. Investigate: - -1. Check if dep #B is still relevant (read its body, check if the code it targets changed) -2. If the dep is obsolete → remove it from #A's `## Dependencies` section -3. If the dep is still needed → file a vault item, suggesting to prioritize #B or split #A - -### Dev-agent blocked (P2) -When you see "Dev-agent blocked: last N polls all report 'no ready issues'": - -1. Check if circular deps exist (they'll appear as separate P3 alerts) -2. Check if all backlog issues depend on a single unmerged issue — if so, file a vault - item to prioritize that blocker -3. If no clear blocker, file a vault item with the list of blocked issues and their deps - -## When you cannot fix it - -File a vault procurement item so the human is notified through the vault: -```bash -cat > "${OPS_REPO_ROOT}/vault/pending/supervisor-$(date -u +%Y%m%d-%H%M)-issue.md" <<'VAULT_EOF' -# -## What - -## Why - -## Unblocks -- Factory health: -VAULT_EOF -``` - -The vault-poll will notify the human and track the request. - -Do NOT talk to the human directly. The vault is the factory's only interface -to the human for resources and approvals. Fix first, report after. - -## Output - -``` -FIXED: -``` -or -``` -VAULT: filed $OPS_REPO_ROOT/vault/pending/.md — -``` - -## Learning - -If you discover something new, append it to the relevant knowledge file in the ops repo: -```bash -echo "### Lesson title -Description of what you learned." >> "${OPS_REPO_ROOT}/knowledge/.md" -``` diff --git a/supervisor/preflight.sh b/supervisor/preflight.sh index ba740b7..e9e4de2 100755 --- a/supervisor/preflight.sh +++ b/supervisor/preflight.sh @@ -132,8 +132,7 @@ echo "" echo "## Recent Agent Logs" for _log in supervisor/supervisor.log dev/dev-agent.log review/review.log \ - gardener/gardener.log planner/planner.log predictor/predictor.log \ - action/action.log; do + gardener/gardener.log planner/planner.log predictor/predictor.log; do _logpath="${FACTORY_ROOT}/${_log}" if [ -f "$_logpath" ]; then _log_age_min=$(( ($(date +%s) - $(stat -c %Y "$_logpath" 2>/dev/null || echo 0)) / 60 )) diff --git a/supervisor/supervisor-poll.sh b/supervisor/supervisor-poll.sh deleted file mode 100755 index 1e83966..0000000 --- a/supervisor/supervisor-poll.sh +++ /dev/null @@ -1,808 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail -# supervisor-poll.sh — Supervisor agent: bash checks + claude -p for fixes -# -# Two-layer architecture: -# 1. Factory infrastructure (project-agnostic): RAM, disk, swap, docker, stale processes -# 2. Per-project checks (config-driven): CI, PRs, dev-agent, deps — iterated over projects/*.toml -# -# Runs every 10min via cron. -# -# Cron: */10 * * * * /path/to/disinto/supervisor/supervisor-poll.sh -# -# Peek: cat /tmp/supervisor-status -# Log: tail -f /path/to/disinto/supervisor/supervisor.log - -source "$(dirname "$0")/../lib/env.sh" -source "$(dirname "$0")/../lib/ci-helpers.sh" - -LOGFILE="${DISINTO_LOG_DIR}/supervisor/supervisor.log" -STATUSFILE="/tmp/supervisor-status" -LOCKFILE="/tmp/supervisor-poll.lock" -PROMPT_FILE="${FACTORY_ROOT}/supervisor/PROMPT.md" -PROJECTS_DIR="${FACTORY_ROOT}/projects" - -METRICS_FILE="${DISINTO_LOG_DIR}/metrics/supervisor-metrics.jsonl" - -emit_metric() { - printf '%s\n' "$1" >> "$METRICS_FILE" -} - -# Count all matching items from a paginated forge API endpoint. -# Usage: codeberg_count_paginated "/issues?state=open&labels=backlog&type=issues" -# Returns total count across all pages (max 20 pages = 1000 items). -codeberg_count_paginated() { - local endpoint="$1" total=0 page=1 count - while true; do - count=$(forge_api GET "${endpoint}&limit=50&page=${page}" 2>/dev/null | jq 'length' 2>/dev/null || echo 0) - total=$((total + ${count:-0})) - [ "${count:-0}" -lt 50 ] && break - page=$((page + 1)) - [ "$page" -gt 20 ] && break - done - echo "$total" -} - -rotate_metrics() { - [ -f "$METRICS_FILE" ] || return 0 - local cutoff tmpfile - cutoff=$(date -u -d '30 days ago' +%Y-%m-%dT%H:%M) - tmpfile="${METRICS_FILE}.tmp" - jq -c --arg cutoff "$cutoff" 'select(.ts >= $cutoff)' \ - "$METRICS_FILE" > "$tmpfile" 2>/dev/null - # Only replace if jq produced output, or the source is already empty - if [ -s "$tmpfile" ] || [ ! -s "$METRICS_FILE" ]; then - mv "$tmpfile" "$METRICS_FILE" - else - rm -f "$tmpfile" - fi -} - -# Prevent overlapping runs -if [ -f "$LOCKFILE" ]; then - LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null) - if kill -0 "$LOCK_PID" 2>/dev/null; then - exit 0 - fi - rm -f "$LOCKFILE" -fi -echo $$ > "$LOCKFILE" -trap 'rm -f "$LOCKFILE" "$STATUSFILE"' EXIT -mkdir -p "$(dirname "$METRICS_FILE")" -rotate_metrics - -flog() { - printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" -} - -status() { - printf '[%s] supervisor: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" > "$STATUSFILE" - flog "$*" -} - -# Alerts by priority -P0_ALERTS="" -P1_ALERTS="" -P2_ALERTS="" -P3_ALERTS="" -P4_ALERTS="" - -p0() { P0_ALERTS="${P0_ALERTS}• [P0] $*\n"; flog "P0: $*"; } -p1() { P1_ALERTS="${P1_ALERTS}• [P1] $*\n"; flog "P1: $*"; } -p2() { P2_ALERTS="${P2_ALERTS}• [P2] $*\n"; flog "P2: $*"; } -p3() { P3_ALERTS="${P3_ALERTS}• [P3] $*\n"; flog "P3: $*"; } -p4() { P4_ALERTS="${P4_ALERTS}• [P4] $*\n"; flog "P4: $*"; } - -FIXES="" -fixed() { FIXES="${FIXES}• ✅ $*\n"; flog "FIXED: $*"; } - -# ############################################################################# -# LAYER 1: FACTORY INFRASTRUCTURE -# (project-agnostic, runs once) -# ############################################################################# - -# ============================================================================= -# P0: MEMORY — check first, fix first -# ============================================================================= -status "P0: checking memory" - -AVAIL_MB=$(free -m | awk '/Mem:/{print $7}') -SWAP_USED_MB=$(free -m | awk '/Swap:/{print $3}') - -if [ "${AVAIL_MB:-9999}" -lt 500 ] || { [ "${SWAP_USED_MB:-0}" -gt 3000 ] && [ "${AVAIL_MB:-9999}" -lt 2000 ]; }; then - flog "MEMORY CRISIS: avail=${AVAIL_MB}MB swap_used=${SWAP_USED_MB}MB — auto-fixing" - - # Kill stale agent-spawned claude processes (>3h old) — skip interactive sessions - STALE_CLAUDES=$(pgrep -f "claude -p" --older 10800 2>/dev/null || true) - if [ -n "$STALE_CLAUDES" ]; then - echo "$STALE_CLAUDES" | xargs kill 2>/dev/null || true - fixed "Killed stale claude processes: ${STALE_CLAUDES}" - fi - - # Drop filesystem caches - sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null 2>&1 - fixed "Dropped filesystem caches" - - # Re-check after fixes - AVAIL_MB_AFTER=$(free -m | awk '/Mem:/{print $7}') - SWAP_AFTER=$(free -m | awk '/Swap:/{print $3}') - - if [ "${AVAIL_MB_AFTER:-0}" -lt 500 ] || [ "${SWAP_AFTER:-0}" -gt 3000 ]; then - p0 "Memory still critical after auto-fix: avail=${AVAIL_MB_AFTER}MB swap=${SWAP_AFTER}MB" - else - flog "Memory recovered: avail=${AVAIL_MB_AFTER}MB swap=${SWAP_AFTER}MB" - fi -fi - -# P0 alerts already logged — clear so they are not duplicated in the final consolidated log -if [ -n "$P0_ALERTS" ]; then - P0_ALERTS="" -fi - -# ============================================================================= -# P1: DISK -# ============================================================================= -status "P1: checking disk" - -DISK_PERCENT=$(df -h / | awk 'NR==2{print $5}' | tr -d '%') - -if [ "${DISK_PERCENT:-0}" -gt 80 ]; then - flog "DISK PRESSURE: ${DISK_PERCENT}% — auto-cleaning" - - # Docker cleanup (safe — keeps images) - sudo docker system prune -f >/dev/null 2>&1 && fixed "Docker prune" - - # Truncate logs >10MB - for logfile in "${DISINTO_LOG_DIR}"/{dev,review,supervisor}/*.log; do - if [ -f "$logfile" ]; then - SIZE_KB=$(du -k "$logfile" 2>/dev/null | cut -f1) - if [ "${SIZE_KB:-0}" -gt 10240 ]; then - truncate -s 0 "$logfile" - fixed "Truncated $(basename "$logfile") (was ${SIZE_KB}KB)" - fi - fi - done - - # Woodpecker log_entries cleanup - LOG_ENTRIES_MB=$(wpdb -c "SELECT pg_size_pretty(pg_total_relation_size('log_entries'));" 2>/dev/null | xargs) - if echo "$LOG_ENTRIES_MB" | grep -qP '\d+\s*(GB|MB)'; then - SIZE_NUM=$(echo "$LOG_ENTRIES_MB" | grep -oP '\d+') - SIZE_UNIT=$(echo "$LOG_ENTRIES_MB" | grep -oP '(GB|MB)') - if [ "$SIZE_UNIT" = "GB" ] || { [ "$SIZE_UNIT" = "MB" ] && [ "$SIZE_NUM" -gt 500 ]; }; then - wpdb -c "DELETE FROM log_entries WHERE id < (SELECT max(id) - 100000 FROM log_entries);" 2>/dev/null - fixed "Trimmed Woodpecker log_entries (was ${LOG_ENTRIES_MB})" - fi - fi - - DISK_AFTER=$(df -h / | awk 'NR==2{print $5}' | tr -d '%') - if [ "${DISK_AFTER:-0}" -gt 80 ]; then - p1 "Disk still ${DISK_AFTER}% after auto-clean" - else - flog "Disk recovered: ${DISK_AFTER}%" - fi -fi - -# P1 alerts already logged — clear so they are not duplicated in the final consolidated log -if [ -n "$P1_ALERTS" ]; then - P1_ALERTS="" -fi - -# Emit infra metric -_RAM_TOTAL_MB=$(free -m | awk '/Mem:/{print $2}') -_RAM_USED_PCT=$(( ${_RAM_TOTAL_MB:-0} > 0 ? (${_RAM_TOTAL_MB:-0} - ${AVAIL_MB:-0}) * 100 / ${_RAM_TOTAL_MB:-1} : 0 )) -emit_metric "$(jq -nc \ - --arg ts "$(date -u +%Y-%m-%dT%H:%MZ)" \ - --argjson ram "${_RAM_USED_PCT:-0}" \ - --argjson disk "${DISK_PERCENT:-0}" \ - --argjson swap "${SWAP_USED_MB:-0}" \ - '{ts:$ts,type:"infra",ram_used_pct:$ram,disk_used_pct:$disk,swap_mb:$swap}' 2>/dev/null)" 2>/dev/null || true - -# ============================================================================= -# P4-INFRA: HOUSEKEEPING — stale processes, log rotation (project-agnostic) -# ============================================================================= -status "P4: infra housekeeping" - -# Stale agent-spawned claude processes (>3h) — skip interactive sessions -STALE_CLAUDES=$(pgrep -f "claude -p" --older 10800 2>/dev/null || true) -if [ -n "$STALE_CLAUDES" ]; then - echo "$STALE_CLAUDES" | xargs kill 2>/dev/null || true - fixed "Killed stale claude processes: $(echo $STALE_CLAUDES | wc -w) procs" -fi - -# Rotate logs >5MB -for logfile in "${DISINTO_LOG_DIR}"/{dev,review,supervisor}/*.log; do - if [ -f "$logfile" ]; then - SIZE_KB=$(du -k "$logfile" 2>/dev/null | cut -f1) - if [ "${SIZE_KB:-0}" -gt 5120 ]; then - mv "$logfile" "${logfile}.old" 2>/dev/null - fixed "Rotated $(basename "$logfile")" - fi - fi -done - -# ############################################################################# -# LAYER 2: PER-PROJECT CHECKS -# (iterated over projects/*.toml, config-driven) -# ############################################################################# - -# Infra retry tracking (shared across projects, created once) -_RETRY_DIR="/tmp/supervisor-infra-retries" -mkdir -p "$_RETRY_DIR" - -# Function: run all per-project checks for the currently loaded project config -check_project() { - local proj_name="${PROJECT_NAME:-unknown}" - flog "── checking project: ${proj_name} (${FORGE_REPO}) ──" - - # =========================================================================== - # P2: FACTORY STOPPED — CI, dev-agent, git - # =========================================================================== - status "P2: ${proj_name}: checking pipeline" - - # CI stuck - STUCK_CI=$(wpdb -c "SELECT count(*) FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status='running' AND EXTRACT(EPOCH FROM now() - to_timestamp(started)) > 1200;" 2>/dev/null | xargs || true) - [ "${STUCK_CI:-0}" -gt 0 ] 2>/dev/null && p2 "${proj_name}: CI: ${STUCK_CI} pipeline(s) running >20min" - - PENDING_CI=$(wpdb -c "SELECT count(*) FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status='pending' AND EXTRACT(EPOCH FROM now() - to_timestamp(created)) > 1800;" 2>/dev/null | xargs || true) - [ "${PENDING_CI:-0}" -gt 0 ] && p2 "${proj_name}: CI: ${PENDING_CI} pipeline(s) pending >30min" - - # Emit CI metric (last completed pipeline within 24h — skip if project has no recent CI) - _CI_ROW=$(wpdb -A -F ',' -c "SELECT id, COALESCE(ROUND(EXTRACT(EPOCH FROM (to_timestamp(finished) - to_timestamp(started)))/60)::int, 0), status FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status IN ('success','failure','error') AND finished > 0 AND to_timestamp(finished) > now() - interval '24 hours' ORDER BY id DESC LIMIT 1;" 2>/dev/null | grep -E '^[0-9]' | head -1 || true) - if [ -n "$_CI_ROW" ]; then - _CI_ID=$(echo "$_CI_ROW" | cut -d',' -f1 | tr -d ' ') - _CI_DUR=$(echo "$_CI_ROW" | cut -d',' -f2 | tr -d ' ') - _CI_STAT=$(echo "$_CI_ROW" | cut -d',' -f3 | tr -d ' ') - emit_metric "$(jq -nc \ - --arg ts "$(date -u +%Y-%m-%dT%H:%MZ)" \ - --arg proj "$proj_name" \ - --argjson pipeline "${_CI_ID:-0}" \ - --argjson duration "${_CI_DUR:-0}" \ - --arg status "${_CI_STAT:-unknown}" \ - '{ts:$ts,type:"ci",project:$proj,pipeline:$pipeline,duration_min:$duration,status:$status}' 2>/dev/null)" 2>/dev/null || true - fi - - # =========================================================================== - # P2e: INFRA FAILURES — auto-retrigger pipelines with infra failures - # =========================================================================== - if [ "${CHECK_INFRA_RETRY:-true}" = "true" ]; then - status "P2e: ${proj_name}: checking infra failures" - - # Recent failed pipelines (last 6h) - _failed_nums=$(wpdb -A -c " - SELECT number FROM pipelines - WHERE repo_id = ${WOODPECKER_REPO_ID} - AND status IN ('failure', 'error') - AND finished > 0 - AND to_timestamp(finished) > now() - interval '6 hours' - ORDER BY number DESC LIMIT 5;" 2>/dev/null \ - | tr -d ' ' | grep -E '^[0-9]+$' || true) - - # shellcheck disable=SC2086 - for _pip_num in $_failed_nums; do - [ -z "$_pip_num" ] && continue - - # Check retry count; alert if retries exhausted - _retry_file="${_RETRY_DIR}/${WOODPECKER_REPO_ID}-${_pip_num}" - _retries=0 - [ -f "$_retry_file" ] && _retries=$(cat "$_retry_file" 2>/dev/null || echo 0) - if [ "${_retries:-0}" -ge 2 ]; then - p2 "${proj_name}: Pipeline #${_pip_num}: infra retries exhausted (2/2), needs manual investigation" - continue - fi - - # Classify failure type via shared helper - _classification=$(classify_pipeline_failure "${WOODPECKER_REPO_ID}" "$_pip_num" 2>/dev/null || echo "code") - - if [[ "$_classification" == infra* ]]; then - _infra_reason="${_classification#infra }" - _new_retries=$(( _retries + 1 )) - if woodpecker_api "/repos/${WOODPECKER_REPO_ID}/pipelines/${_pip_num}" \ - -X POST >/dev/null 2>&1; then - echo "$_new_retries" > "$_retry_file" - fixed "${proj_name}: Retriggered pipeline #${_pip_num} (${_infra_reason}, retry ${_new_retries}/2)" - else - p2 "${proj_name}: Pipeline #${_pip_num}: infra failure (${_infra_reason}) but retrigger API call failed" - flog "${proj_name}: Failed to retrigger pipeline #${_pip_num}: API error" - fi - fi - done - - # Clean up stale retry tracking files (>24h) - find "$_RETRY_DIR" -type f -mmin +1440 -delete 2>/dev/null || true - fi - - # Dev-agent health (only if monitoring enabled) - if [ "${CHECK_DEV_AGENT:-true}" = "true" ]; then - DEV_LOCK="/tmp/dev-agent-${proj_name}.lock" - if [ -f "$DEV_LOCK" ]; then - DEV_PID=$(cat "$DEV_LOCK" 2>/dev/null) - if ! kill -0 "$DEV_PID" 2>/dev/null; then - rm -f "$DEV_LOCK" - fixed "${proj_name}: Removed stale dev-agent lock (PID ${DEV_PID} dead)" - else - DEV_STATUS_AGE=$(stat -c %Y "/tmp/dev-agent-status-${proj_name}" 2>/dev/null || echo 0) - NOW_EPOCH=$(date +%s) - STATUS_AGE_MIN=$(( (NOW_EPOCH - DEV_STATUS_AGE) / 60 )) - if [ "$STATUS_AGE_MIN" -gt 30 ]; then - p2 "${proj_name}: Dev-agent: status unchanged for ${STATUS_AGE_MIN}min" - fi - fi - fi - fi - - # Git repo health - if [ -d "${PROJECT_REPO_ROOT}" ]; then - cd "${PROJECT_REPO_ROOT}" 2>/dev/null || true - GIT_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") - GIT_REBASE=$([ -d .git/rebase-merge ] || [ -d .git/rebase-apply ] && echo "yes" || echo "no") - - if [ "$GIT_REBASE" = "yes" ]; then - git rebase --abort 2>/dev/null && git checkout "${PRIMARY_BRANCH}" 2>/dev/null && \ - fixed "${proj_name}: Aborted stale rebase, switched to ${PRIMARY_BRANCH}" || \ - p2 "${proj_name}: Git: stale rebase, auto-abort failed" - fi - if [ "$GIT_BRANCH" != "${PRIMARY_BRANCH}" ] && [ "$GIT_BRANCH" != "unknown" ]; then - git checkout "${PRIMARY_BRANCH}" 2>/dev/null && \ - fixed "${proj_name}: Switched repo from '${GIT_BRANCH}' to ${PRIMARY_BRANCH}" || \ - p2 "${proj_name}: Git: on '${GIT_BRANCH}' instead of ${PRIMARY_BRANCH}" - fi - fi - - # =========================================================================== - # P2b: FACTORY STALLED — backlog exists but no agent running - # =========================================================================== - if [ "${CHECK_PIPELINE_STALL:-true}" = "true" ]; then - status "P2: ${proj_name}: checking pipeline stall" - - BACKLOG_COUNT=$(forge_api GET "/issues?state=open&labels=backlog&type=issues&limit=1" 2>/dev/null | jq -r 'length' 2>/dev/null || echo "0") - IN_PROGRESS=$(forge_api GET "/issues?state=open&labels=in-progress&type=issues&limit=1" 2>/dev/null | jq -r 'length' 2>/dev/null || echo "0") - - if [ "${BACKLOG_COUNT:-0}" -gt 0 ] && [ "${IN_PROGRESS:-0}" -eq 0 ]; then - DEV_LOG="${DISINTO_LOG_DIR}/dev/dev-agent.log" - if [ -f "$DEV_LOG" ]; then - LAST_LOG_EPOCH=$(stat -c %Y "$DEV_LOG" 2>/dev/null || echo 0) - else - LAST_LOG_EPOCH=0 - fi - NOW_EPOCH=$(date +%s) - IDLE_MIN=$(( (NOW_EPOCH - LAST_LOG_EPOCH) / 60 )) - - if [ "$IDLE_MIN" -gt 20 ]; then - p2 "${proj_name}: Pipeline stalled: ${BACKLOG_COUNT} backlog issue(s), no agent ran for ${IDLE_MIN}min" - fi - fi - fi - - # =========================================================================== - # P2c: DEV-AGENT PRODUCTIVITY — all backlog blocked for too long - # =========================================================================== - if [ "${CHECK_DEV_AGENT:-true}" = "true" ]; then - status "P2: ${proj_name}: checking dev-agent productivity" - - DEV_LOG_FILE="${DISINTO_LOG_DIR}/dev/dev-agent.log" - if [ -f "$DEV_LOG_FILE" ]; then - RECENT_POLLS=$(tail -100 "$DEV_LOG_FILE" | grep "poll:" | tail -6) - TOTAL_RECENT=$(echo "$RECENT_POLLS" | grep -c "." || true) - BLOCKED_IN_RECENT=$(echo "$RECENT_POLLS" | grep -c "no ready issues" || true) - if [ "$TOTAL_RECENT" -ge 6 ] && [ "$BLOCKED_IN_RECENT" -eq "$TOTAL_RECENT" ]; then - p2 "${proj_name}: Dev-agent blocked: last ${BLOCKED_IN_RECENT} polls all report 'no ready issues'" - fi - fi - fi - - # =========================================================================== - # P3: FACTORY DEGRADED — derailed PRs, unreviewed PRs - # =========================================================================== - if [ "${CHECK_PRS:-true}" = "true" ]; then - status "P3: ${proj_name}: checking PRs" - - OPEN_PRS=$(forge_api GET "/pulls?state=open&limit=10" 2>/dev/null | jq -r '.[].number' 2>/dev/null || true) - for pr in $OPEN_PRS; do - PR_JSON=$(forge_api GET "/pulls/${pr}" 2>/dev/null || true) - [ -z "$PR_JSON" ] && continue - PR_SHA=$(echo "$PR_JSON" | jq -r '.head.sha // ""') - [ -z "$PR_SHA" ] && continue - - CI_STATE=$(ci_commit_status "$PR_SHA" 2>/dev/null || true) - - MERGEABLE=$(echo "$PR_JSON" | jq -r '.mergeable // true') - if [ "$MERGEABLE" = "false" ] && ci_passed "$CI_STATE"; then - p3 "${proj_name}: PR #${pr}: CI pass but merge conflict — needs rebase" - elif [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then - UPDATED=$(echo "$PR_JSON" | jq -r '.updated_at // ""') - if [ -n "$UPDATED" ]; then - UPDATED_EPOCH=$(date -d "$UPDATED" +%s 2>/dev/null || echo 0) - NOW_EPOCH=$(date +%s) - AGE_MIN=$(( (NOW_EPOCH - UPDATED_EPOCH) / 60 )) - [ "$AGE_MIN" -gt 30 ] && p3 "${proj_name}: PR #${pr}: CI=${CI_STATE}, stale ${AGE_MIN}min" - fi - elif ci_passed "$CI_STATE"; then - HAS_REVIEW=$(forge_api GET "/issues/${pr}/comments?limit=50" 2>/dev/null | \ - jq -r --arg sha "$PR_SHA" '[.[] | select(.body | contains(" + +## What was expected + + + +## Steps to reproduce + + +1. +2. +3. + +## Environment + + +- Browser/Client: +- Wallet (if applicable): +- Network (if applicable): +- Version: diff --git a/tests/mock-forgejo.py b/tests/mock-forgejo.py new file mode 100755 index 0000000..c65b522 --- /dev/null +++ b/tests/mock-forgejo.py @@ -0,0 +1,834 @@ +#!/usr/bin/env python3 +"""Mock Forgejo API server for CI smoke tests. + +Implements 16 Forgejo API endpoints that disinto init calls. +State stored in-memory (dicts), responds instantly. +""" + +import base64 +import hashlib +import json +import os +import re +import signal +import socket +import sys +import threading +import uuid +from http.server import HTTPServer, BaseHTTPRequestHandler +from socketserver import ThreadingMixIn +from urllib.parse import parse_qs, urlparse + +# Global state +state = { + "users": {}, # key: username -> user object + "tokens": {}, # key: token_sha1 -> token object + "repos": {}, # key: "owner/repo" -> repo object + "orgs": {}, # key: orgname -> org object + "labels": {}, # key: "owner/repo" -> list of labels + "collaborators": {}, # key: "owner/repo" -> set of usernames + "protections": {}, # key: "owner/repo" -> list of protections + "oauth2_apps": [], # list of oauth2 app objects +} + +next_ids = {"users": 1, "tokens": 1, "repos": 1, "orgs": 1, "labels": 1, "oauth2_apps": 1} + +SHUTDOWN_REQUESTED = False + + +def log_request(handler, method, path, status): + """Log request details.""" + print(f"[{handler.log_date_time_string()}] {method} {path} {status}", file=sys.stderr) + + +def json_response(handler, status, data): + """Send JSON response.""" + body = json.dumps(data).encode("utf-8") + handler.send_response(status) + handler.send_header("Content-Type", "application/json") + handler.send_header("Content-Length", len(body)) + handler.end_headers() + handler.wfile.write(body) + + +def basic_auth_user(handler): + """Extract username from Basic auth header. Returns None if invalid.""" + auth_header = handler.headers.get("Authorization", "") + if not auth_header.startswith("Basic "): + return None + try: + decoded = base64.b64decode(auth_header[6:]).decode("utf-8") + username, _ = decoded.split(":", 1) + return username + except Exception: + return None + + +def token_auth_valid(handler): + """Check if Authorization header contains token. Doesn't validate value.""" + auth_header = handler.headers.get("Authorization", "") + return auth_header.startswith("token ") + + +def require_token(handler): + """Require token auth. Return user or None if invalid.""" + if not token_auth_valid(handler): + return None + return True # Any token is valid for mock purposes + + +def require_basic_auth(handler, required_user=None): + """Require basic auth. Return username or None if invalid.""" + username = basic_auth_user(handler) + if username is None: + return None + # Check user exists in state + if username not in state["users"]: + return None + if required_user and username != required_user: + return None + return username + + +class ForgejoHandler(BaseHTTPRequestHandler): + """HTTP request handler for mock Forgejo API.""" + + def log_message(self, format, *args): + """Override to use our logging.""" + pass # We log in do_request + + def do_request(self, method): + """Route request to appropriate handler.""" + parsed = urlparse(self.path) + path = parsed.path + query = parse_qs(parsed.query) + + log_request(self, method, self.path, "PENDING") + + # Strip /api/v1/ prefix for routing (or leading slash for other routes) + route_path = path + if route_path.startswith("/api/v1/"): + route_path = route_path[8:] + elif route_path.startswith("/"): + route_path = route_path.lstrip("/") + + # Route to handler + try: + # First try exact match (with / replaced by _) + handler_path = route_path.replace("/", "_") + handler_name = f"handle_{method}_{handler_path}" + handler = getattr(self, handler_name, None) + + if handler: + handler(query) + else: + # Try pattern matching for routes with dynamic segments + self._handle_patterned_route(method, route_path, query) + except Exception as e: + log_request(self, method, self.path, 500) + json_response(self, 500, {"message": str(e)}) + + def _handle_patterned_route(self, method, route_path, query): + """Handle routes with dynamic segments using pattern matching.""" + # Define patterns: (regex, handler_name) + patterns = [ + # Users patterns + (r"^users/([^/]+)$", f"handle_{method}_users_username"), + (r"^users/([^/]+)/tokens$", f"handle_{method}_users_username_tokens"), + (r"^users/([^/]+)/tokens/([^/]+)$", f"handle_{method}_users_username_tokens_token_id"), + (r"^users/([^/]+)/repos$", f"handle_{method}_users_username_repos"), + # Repos patterns + (r"^repos/([^/]+)/([^/]+)$", f"handle_{method}_repos_owner_repo"), + (r"^repos/([^/]+)/([^/]+)/labels$", f"handle_{method}_repos_owner_repo_labels"), + (r"^repos/([^/]+)/([^/]+)/branch_protections$", f"handle_{method}_repos_owner_repo_branch_protections"), + (r"^repos/([^/]+)/([^/]+)/collaborators/([^/]+)$", f"handle_{method}_repos_owner_repo_collaborators_collaborator"), + # Org patterns + (r"^orgs/([^/]+)/repos$", f"handle_{method}_orgs_org_repos"), + # User patterns + (r"^user/repos$", f"handle_{method}_user_repos"), + (r"^user/applications/oauth2$", f"handle_{method}_user_applications_oauth2"), + # Admin patterns + (r"^admin/users$", f"handle_{method}_admin_users"), + (r"^admin/users/([^/]+)$", f"handle_{method}_admin_users_username"), + (r"^admin/users/([^/]+)/repos$", f"handle_{method}_admin_users_username_repos"), + # Org patterns + (r"^orgs$", f"handle_{method}_orgs"), + ] + + for pattern, handler_name in patterns: + if re.match(pattern, route_path): + handler = getattr(self, handler_name, None) + if handler: + handler(query) + return + + self.handle_404() + + def do_GET(self): + self.do_request("GET") + + def do_POST(self): + self.do_request("POST") + + def do_PATCH(self): + self.do_request("PATCH") + + def do_PUT(self): + self.do_request("PUT") + + def handle_GET_version(self, query): + """GET /api/v1/version""" + json_response(self, 200, {"version": "11.0.0-mock"}) + + def handle_GET_users_username(self, query): + """GET /api/v1/users/{username}""" + # Extract username from path + parts = self.path.split("/") + if len(parts) >= 5: + username = parts[4] + else: + json_response(self, 404, {"message": "user does not exist"}) + return + + if username in state["users"]: + json_response(self, 200, state["users"][username]) + else: + json_response(self, 404, {"message": "user does not exist"}) + + def handle_GET_users_username_repos(self, query): + """GET /api/v1/users/{username}/repos""" + if not require_token(self): + json_response(self, 401, {"message": "invalid authentication"}) + return + + parts = self.path.split("/") + if len(parts) >= 5: + username = parts[4] + else: + json_response(self, 404, {"message": "user not found"}) + return + + if username not in state["users"]: + json_response(self, 404, {"message": "user not found"}) + return + + # Return repos owned by this user + user_repos = [r for r in state["repos"].values() if r["owner"]["login"] == username] + json_response(self, 200, user_repos) + + def handle_GET_repos_owner_repo(self, query): + """GET /api/v1/repos/{owner}/{repo}""" + parts = self.path.split("/") + if len(parts) >= 6: + owner = parts[4] + repo = parts[5] + else: + json_response(self, 404, {"message": "repository not found"}) + return + + key = f"{owner}/{repo}" + if key in state["repos"]: + json_response(self, 200, state["repos"][key]) + else: + json_response(self, 404, {"message": "repository not found"}) + + def handle_GET_repos_owner_repo_labels(self, query): + """GET /api/v1/repos/{owner}/{repo}/labels""" + parts = self.path.split("/") + if len(parts) >= 6: + owner = parts[4] + repo = parts[5] + else: + json_response(self, 404, {"message": "repository not found"}) + return + + require_token(self) + + key = f"{owner}/{repo}" + if key in state["labels"]: + json_response(self, 200, state["labels"][key]) + else: + json_response(self, 200, []) + + def handle_GET_user_applications_oauth2(self, query): + """GET /api/v1/user/applications/oauth2""" + require_token(self) + json_response(self, 200, state["oauth2_apps"]) + + def handle_GET_mock_shutdown(self, query): + """GET /mock/shutdown""" + global SHUTDOWN_REQUESTED + SHUTDOWN_REQUESTED = True + json_response(self, 200, {"status": "shutdown"}) + + def handle_POST_admin_users(self, query): + """POST /api/v1/admin/users""" + require_token(self) + + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length).decode("utf-8") + data = json.loads(body) if body else {} + + username = data.get("username") + email = data.get("email") + + if not username or not email: + json_response(self, 400, {"message": "username and email are required"}) + return + + user_id = next_ids["users"] + next_ids["users"] += 1 + + user = { + "id": user_id, + "login": username, + "email": email, + "full_name": data.get("full_name", ""), + "is_admin": data.get("admin", False), + "must_change_password": data.get("must_change_password", False), + "login_name": data.get("login_name", username), + "visibility": data.get("visibility", "public"), + "avatar_url": f"https://seccdn.libravatar.org/avatar/{hashlib.md5(email.encode()).hexdigest()}", + } + + state["users"][username] = user + json_response(self, 201, user) + + def handle_GET_users_username_tokens(self, query): + """GET /api/v1/users/{username}/tokens""" + # Support both token auth (for listing own tokens) and basic auth (for admin listing) + username = require_token(self) + if not username: + username = require_basic_auth(self) + if not username: + json_response(self, 401, {"message": "invalid authentication"}) + return + + # Return list of tokens for this user + tokens = [t for t in state["tokens"].values() if t.get("username") == username] + json_response(self, 200, tokens) + + def handle_DELETE_users_username_tokens_token_id(self, query): + """DELETE /api/v1/users/{username}/tokens/{id}""" + # Support both token auth and basic auth + username = require_token(self) + if not username: + username = require_basic_auth(self) + if not username: + json_response(self, 401, {"message": "invalid authentication"}) + return + + parts = self.path.split("/") + if len(parts) >= 8: + token_id_str = parts[7] + else: + json_response(self, 404, {"message": "token not found"}) + return + + # Find and delete token by ID + deleted = False + for tok_sha1, tok in list(state["tokens"].items()): + if tok.get("id") == int(token_id_str) and tok.get("username") == username: + del state["tokens"][tok_sha1] + deleted = True + break + + if deleted: + self.send_response(204) + self.send_header("Content-Length", 0) + self.end_headers() + else: + json_response(self, 404, {"message": "token not found"}) + + def handle_POST_users_username_tokens(self, query): + """POST /api/v1/users/{username}/tokens""" + username = require_basic_auth(self) + if not username: + json_response(self, 401, {"message": "invalid authentication"}) + return + + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length).decode("utf-8") + data = json.loads(body) if body else {} + + token_name = data.get("name") + if not token_name: + json_response(self, 400, {"message": "name is required"}) + return + + token_id = next_ids["tokens"] + next_ids["tokens"] += 1 + + # Deterministic token: sha256(username + name)[:40] + token_str = hashlib.sha256(f"{username}{token_name}".encode()).hexdigest()[:40] + + token = { + "id": token_id, + "name": token_name, + "sha1": token_str, + "scopes": data.get("scopes", ["all"]), + "created_at": "2026-04-01T00:00:00Z", + "expires_at": None, + "username": username, # Store username for lookup + } + + state["tokens"][token_str] = token + json_response(self, 201, token) + + def handle_GET_orgs(self, query): + """GET /api/v1/orgs""" + if not require_token(self): + json_response(self, 401, {"message": "invalid authentication"}) + return + json_response(self, 200, list(state["orgs"].values())) + + def handle_POST_orgs(self, query): + """POST /api/v1/orgs""" + require_token(self) + + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length).decode("utf-8") + data = json.loads(body) if body else {} + + username = data.get("username") + if not username: + json_response(self, 400, {"message": "username is required"}) + return + + org_id = next_ids["orgs"] + next_ids["orgs"] += 1 + + org = { + "id": org_id, + "username": username, + "full_name": username, + "avatar_url": f"https://seccdn.libravatar.org/avatar/{hashlib.md5(username.encode()).hexdigest()}", + "visibility": data.get("visibility", "public"), + } + + state["orgs"][username] = org + json_response(self, 201, org) + + def handle_POST_orgs_org_repos(self, query): + """POST /api/v1/orgs/{org}/repos""" + require_token(self) + + parts = self.path.split("/") + if len(parts) >= 6: + org = parts[4] + else: + json_response(self, 404, {"message": "organization not found"}) + return + + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length).decode("utf-8") + data = json.loads(body) if body else {} + + repo_name = data.get("name") + if not repo_name: + json_response(self, 400, {"message": "name is required"}) + return + + repo_id = next_ids["repos"] + next_ids["repos"] += 1 + + key = f"{org}/{repo_name}" + repo = { + "id": repo_id, + "full_name": key, + "name": repo_name, + "owner": {"id": state["orgs"][org]["id"], "login": org}, + "empty": False, + "default_branch": data.get("default_branch", "main"), + "description": data.get("description", ""), + "private": data.get("private", False), + "html_url": f"https://example.com/{key}", + "ssh_url": f"git@example.com:{key}.git", + "clone_url": f"https://example.com/{key}.git", + "created_at": "2026-04-01T00:00:00Z", + } + + state["repos"][key] = repo + json_response(self, 201, repo) + + def handle_POST_users_username_repos(self, query): + """POST /api/v1/users/{username}/repos""" + require_token(self) + + parts = self.path.split("/") + if len(parts) >= 5: + username = parts[4] + else: + json_response(self, 400, {"message": "username required"}) + return + + if username not in state["users"]: + json_response(self, 404, {"message": "user not found"}) + return + + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length).decode("utf-8") + data = json.loads(body) if body else {} + + repo_name = data.get("name") + if not repo_name: + json_response(self, 400, {"message": "name is required"}) + return + + repo_id = next_ids["repos"] + next_ids["repos"] += 1 + + key = f"{username}/{repo_name}" + repo = { + "id": repo_id, + "full_name": key, + "name": repo_name, + "owner": {"id": state["users"][username]["id"], "login": username}, + "empty": not data.get("auto_init", False), + "default_branch": data.get("default_branch", "main"), + "description": data.get("description", ""), + "private": data.get("private", False), + "html_url": f"https://example.com/{key}", + "ssh_url": f"git@example.com:{key}.git", + "clone_url": f"https://example.com/{key}.git", + "created_at": "2026-04-01T00:00:00Z", + } + + state["repos"][key] = repo + json_response(self, 201, repo) + + def handle_POST_admin_users_username_repos(self, query): + """POST /api/v1/admin/users/{username}/repos + Admin API to create a repo under a specific user namespace. + This allows creating repos in any user's namespace when authenticated as admin. + """ + require_token(self) + + parts = self.path.split("/") + if len(parts) >= 6: + target_user = parts[4] + else: + json_response(self, 400, {"message": "username required"}) + return + + if target_user not in state["users"]: + json_response(self, 404, {"message": "user not found"}) + return + + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length).decode("utf-8") + data = json.loads(body) if body else {} + + repo_name = data.get("name") + if not repo_name: + json_response(self, 400, {"message": "name is required"}) + return + + repo_id = next_ids["repos"] + next_ids["repos"] += 1 + + key = f"{target_user}/{repo_name}" + repo = { + "id": repo_id, + "full_name": key, + "name": repo_name, + "owner": {"id": state["users"][target_user]["id"], "login": target_user}, + "empty": not data.get("auto_init", False), + "default_branch": data.get("default_branch", "main"), + "description": data.get("description", ""), + "private": data.get("private", False), + "html_url": f"https://example.com/{key}", + "ssh_url": f"git@example.com:{key}.git", + "clone_url": f"https://example.com/{key}.git", + "created_at": "2026-04-01T00:00:00Z", + } + + state["repos"][key] = repo + json_response(self, 201, repo) + + def handle_POST_user_repos(self, query): + """POST /api/v1/user/repos""" + require_token(self) + + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length).decode("utf-8") + data = json.loads(body) if body else {} + + repo_name = data.get("name") + if not repo_name: + json_response(self, 400, {"message": "name is required"}) + return + + # Get authenticated user from token + auth_header = self.headers.get("Authorization", "") + token = auth_header.split(" ", 1)[1] if " " in auth_header else "" + + # Find user by token (use stored username field) + owner = None + for tok_sha1, tok in state["tokens"].items(): + if tok_sha1 == token: + owner = tok.get("username") + break + + if not owner: + json_response(self, 401, {"message": "invalid token"}) + return + + repo_id = next_ids["repos"] + next_ids["repos"] += 1 + + key = f"{owner}/{repo_name}" + repo = { + "id": repo_id, + "full_name": key, + "name": repo_name, + "owner": {"id": state["users"].get(owner, {}).get("id", 0), "login": owner}, + "empty": False, + "default_branch": data.get("default_branch", "main"), + "description": data.get("description", ""), + "private": data.get("private", False), + "html_url": f"https://example.com/{key}", + "ssh_url": f"git@example.com:{key}.git", + "clone_url": f"https://example.com/{key}.git", + "created_at": "2026-04-01T00:00:00Z", + } + + state["repos"][key] = repo + json_response(self, 201, repo) + + def handle_POST_repos_owner_repo_labels(self, query): + """POST /api/v1/repos/{owner}/{repo}/labels""" + require_token(self) + + parts = self.path.split("/") + if len(parts) >= 6: + owner = parts[4] + repo = parts[5] + else: + json_response(self, 404, {"message": "repository not found"}) + return + + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length).decode("utf-8") + data = json.loads(body) if body else {} + + label_name = data.get("name") + label_color = data.get("color") + + if not label_name or not label_color: + json_response(self, 400, {"message": "name and color are required"}) + return + + label_id = next_ids["labels"] + next_ids["labels"] += 1 + + key = f"{owner}/{repo}" + label = { + "id": label_id, + "name": label_name, + "color": label_color, + "description": data.get("description", ""), + "url": f"https://example.com/api/v1/repos/{key}/labels/{label_id}", + } + + if key not in state["labels"]: + state["labels"][key] = [] + state["labels"][key].append(label) + json_response(self, 201, label) + + def handle_POST_repos_owner_repo_branch_protections(self, query): + """POST /api/v1/repos/{owner}/{repo}/branch_protections""" + require_token(self) + + parts = self.path.split("/") + if len(parts) >= 6: + owner = parts[4] + repo = parts[5] + else: + json_response(self, 404, {"message": "repository not found"}) + return + + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length).decode("utf-8") + data = json.loads(body) if body else {} + + branch_name = data.get("branch_name", "main") + key = f"{owner}/{repo}" + + # Generate unique ID for protection + if key in state["protections"]: + protection_id = len(state["protections"][key]) + 1 + else: + protection_id = 1 + + protection = { + "id": protection_id, + "repo_id": state["repos"].get(key, {}).get("id", 0), + "branch_name": branch_name, + "rule_name": data.get("rule_name", branch_name), + "enable_push": data.get("enable_push", False), + "enable_merge_whitelist": data.get("enable_merge_whitelist", True), + "merge_whitelist_usernames": data.get("merge_whitelist_usernames", ["admin"]), + "required_approvals": data.get("required_approvals", 1), + "apply_to_admins": data.get("apply_to_admins", True), + } + + if key not in state["protections"]: + state["protections"][key] = [] + state["protections"][key].append(protection) + json_response(self, 201, protection) + + def handle_POST_user_applications_oauth2(self, query): + """POST /api/v1/user/applications/oauth2""" + require_token(self) + + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length).decode("utf-8") + data = json.loads(body) if body else {} + + app_name = data.get("name") + if not app_name: + json_response(self, 400, {"message": "name is required"}) + return + + app_id = next_ids["oauth2_apps"] + next_ids["oauth2_apps"] += 1 + + app = { + "id": app_id, + "name": app_name, + "client_id": str(uuid.uuid4()), + "client_secret": hashlib.sha256(str(uuid.uuid4()).encode()).hexdigest(), + "redirect_uris": data.get("redirect_uris", []), + "confidential_client": data.get("confidential_client", True), + "created_at": "2026-04-01T00:00:00Z", + } + + state["oauth2_apps"].append(app) + json_response(self, 201, app) + + def handle_PATCH_admin_users_username(self, query): + """PATCH /api/v1/admin/users/{username}""" + if not require_token(self): + json_response(self, 401, {"message": "invalid authentication"}) + return + + parts = self.path.split("/") + if len(parts) >= 6: + username = parts[5] + else: + json_response(self, 404, {"message": "user does not exist"}) + return + + if username not in state["users"]: + json_response(self, 404, {"message": "user does not exist"}) + return + + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length).decode("utf-8") + data = json.loads(body) if body else {} + + user = state["users"][username] + for key, value in data.items(): + # Map 'admin' to 'is_admin' for consistency + update_key = 'is_admin' if key == 'admin' else key + if update_key in user: + user[update_key] = value + + json_response(self, 200, user) + + def handle_PUT_repos_owner_repo_collaborators_collaborator(self, query): + """PUT /api/v1/repos/{owner}/{repo}/collaborators/{collaborator}""" + require_token(self) + + parts = self.path.split("/") + if len(parts) >= 8: + owner = parts[4] + repo = parts[5] + collaborator = parts[7] + else: + json_response(self, 404, {"message": "repository not found"}) + return + + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length).decode("utf-8") + data = json.loads(body) if body else {} + + key = f"{owner}/{repo}" + if key not in state["collaborators"]: + state["collaborators"][key] = set() + state["collaborators"][key].add(collaborator) + + self.send_response(204) + self.send_header("Content-Length", 0) + self.end_headers() + + def handle_GET_repos_owner_repo_collaborators_collaborator(self, query): + """GET /api/v1/repos/{owner}/{repo}/collaborators/{collaborator}""" + require_token(self) + + parts = self.path.split("/") + if len(parts) >= 8: + owner = parts[4] + repo = parts[5] + collaborator = parts[7] + else: + json_response(self, 404, {"message": "repository not found"}) + return + + key = f"{owner}/{repo}" + if key in state["collaborators"] and collaborator in state["collaborators"][key]: + self.send_response(204) + self.send_header("Content-Length", 0) + self.end_headers() + else: + json_response(self, 404, {"message": "collaborator not found"}) + + def handle_404(self): + """Return 404 for unknown routes.""" + json_response(self, 404, {"message": "route not found"}) + + +class ThreadingHTTPServer(ThreadingMixIn, HTTPServer): + """Threaded HTTP server for handling concurrent requests.""" + daemon_threads = True + + +def main(): + """Start the mock server.""" + global SHUTDOWN_REQUESTED + + port = int(os.environ.get("MOCK_FORGE_PORT", 3000)) + try: + server = ThreadingHTTPServer(("0.0.0.0", port), ForgejoHandler) + try: + server.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + except OSError: + pass # Not all platforms support this + except OSError as e: + print(f"Error: Failed to start server on port {port}: {e}", file=sys.stderr) + sys.exit(1) + + print(f"Mock Forgejo server starting on port {port}", file=sys.stderr) + sys.stderr.flush() + + def shutdown_handler(signum, frame): + global SHUTDOWN_REQUESTED + SHUTDOWN_REQUESTED = True + # Can't call server.shutdown() directly from signal handler in threaded server + threading.Thread(target=server.shutdown, daemon=True).start() + + signal.signal(signal.SIGTERM, shutdown_handler) + signal.signal(signal.SIGINT, shutdown_handler) + + try: + server.serve_forever() + except KeyboardInterrupt: + pass + finally: + server.shutdown() + print("Mock Forgejo server stopped", file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/tests/smoke-init.sh b/tests/smoke-init.sh index b0a6cf0..a8371bd 100644 --- a/tests/smoke-init.sh +++ b/tests/smoke-init.sh @@ -1,32 +1,34 @@ #!/usr/bin/env bash -# tests/smoke-init.sh — End-to-end smoke test for disinto init +# tests/smoke-init.sh — End-to-end smoke test for disinto init with mock Forgejo # -# Expects a running Forgejo at SMOKE_FORGE_URL with a bootstrap admin -# user already created (see .woodpecker/smoke-init.yml for CI setup). -# Validates the full init flow: Forgejo API, user/token creation, -# repo setup, labels, TOML generation, and cron installation. +# Validates the full init flow using mock Forgejo server: +# 1. Verify mock Forgejo is ready +# 2. Set up mock binaries (docker, claude, tmux) +# 3. Run disinto init +# 4. Verify Forgejo state (users, repo) +# 5. Verify local state (TOML, .env, repo clone) +# 6. Verify cron setup # -# Required env: SMOKE_FORGE_URL (default: http://localhost:3000) +# Required env: FORGE_URL (default: http://localhost:3000) # Required tools: bash, curl, jq, python3, git set -euo pipefail FACTORY_ROOT="$(cd "$(dirname "$0")/.." && pwd)" -FORGE_URL="${SMOKE_FORGE_URL:-http://localhost:3000}" -SETUP_ADMIN="setup-admin" -SETUP_PASS="SetupPass-789xyz" -TEST_SLUG="smoke-org/smoke-repo" +# Always use localhost for mock Forgejo (in case FORGE_URL is set from docker-compose) +export FORGE_URL="http://localhost:3000" MOCK_BIN="/tmp/smoke-mock-bin" -MOCK_STATE="/tmp/smoke-mock-state" +TEST_SLUG="smoke-org/smoke-repo" FAILED=0 fail() { printf 'FAIL: %s\n' "$*" >&2; FAILED=1; } pass() { printf 'PASS: %s\n' "$*"; } cleanup() { - rm -rf "$MOCK_BIN" "$MOCK_STATE" /tmp/smoke-test-repo \ - "${FACTORY_ROOT}/projects/smoke-repo.toml" \ - "${FACTORY_ROOT}/docker-compose.yml" + # Kill any leftover mock-forgejo.py processes by name + pkill -f "mock-forgejo.py" 2>/dev/null || true + rm -rf "$MOCK_BIN" /tmp/smoke-test-repo \ + "${FACTORY_ROOT}/projects/smoke-repo.toml" # Restore .env only if we created the backup if [ -f "${FACTORY_ROOT}/.env.smoke-backup" ]; then mv "${FACTORY_ROOT}/.env.smoke-backup" "${FACTORY_ROOT}/.env" @@ -40,11 +42,11 @@ trap cleanup EXIT if [ -f "${FACTORY_ROOT}/.env" ]; then cp "${FACTORY_ROOT}/.env" "${FACTORY_ROOT}/.env.smoke-backup" fi -# Start with a clean .env (setup_forge writes tokens here) +# Start with a clean .env printf '' > "${FACTORY_ROOT}/.env" -# ── 1. Verify Forgejo is ready ────────────────────────────────────────────── -echo "=== 1/6 Verifying Forgejo at ${FORGE_URL} ===" +# ── 1. Verify mock Forgejo is ready ───────────────────────────────────────── +echo "=== 1/6 Verifying mock Forgejo at ${FORGE_URL} ===" retries=0 api_version="" while true; do @@ -55,163 +57,64 @@ while true; do fi retries=$((retries + 1)) if [ "$retries" -gt 30 ]; then - fail "Forgejo API not responding after 30s" + fail "Mock Forgejo API not responding after 30s" exit 1 fi sleep 1 done -pass "Forgejo API v${api_version} (${retries}s)" - -# Verify bootstrap admin user exists -if curl -sf --max-time 5 "${FORGE_URL}/api/v1/users/${SETUP_ADMIN}" >/dev/null 2>&1; then - pass "Bootstrap admin '${SETUP_ADMIN}' exists" -else - fail "Bootstrap admin '${SETUP_ADMIN}' not found — was Forgejo set up?" - exit 1 -fi +pass "Mock Forgejo API v${api_version} (${retries}s)" # ── 2. Set up mock binaries ───────────────────────────────────────────────── echo "=== 2/6 Setting up mock binaries ===" -mkdir -p "$MOCK_BIN" "$MOCK_STATE" - -# Store bootstrap admin credentials for the docker mock -printf '%s:%s' "${SETUP_ADMIN}" "${SETUP_PASS}" > "$MOCK_STATE/bootstrap_creds" +mkdir -p "$MOCK_BIN" # ── Mock: docker ── -# Routes 'docker exec' user-creation calls to the Forgejo admin API, -# using the bootstrap admin's credentials. +# Intercepts docker exec calls that disinto init --bare makes to Forgejo CLI cat > "$MOCK_BIN/docker" << 'DOCKERMOCK' #!/usr/bin/env bash set -euo pipefail - -FORGE_URL="${SMOKE_FORGE_URL:-http://localhost:3000}" -MOCK_STATE="/tmp/smoke-mock-state" - -if [ ! -f "$MOCK_STATE/bootstrap_creds" ]; then - echo "mock-docker: bootstrap credentials not found" >&2 - exit 1 -fi -BOOTSTRAP_CREDS="$(cat "$MOCK_STATE/bootstrap_creds")" - -# docker ps — return empty (no containers running) -if [ "${1:-}" = "ps" ]; then - exit 0 -fi - -# docker exec — route to Forgejo API +FORGE_URL="${SMOKE_FORGE_URL:-${FORGE_URL:-http://localhost:3000}}" +if [ "${1:-}" = "ps" ]; then exit 0; fi if [ "${1:-}" = "exec" ]; then - shift # remove 'exec' - - # Skip docker exec flags (-u VALUE, -T, -i, etc.) + shift while [ $# -gt 0 ] && [ "${1#-}" != "$1" ]; do - case "$1" in - -u|-w|-e) shift 2 ;; - *) shift ;; - esac + case "$1" in -u|-w|-e) shift 2 ;; *) shift ;; esac done - shift # remove container name (e.g. disinto-forgejo) - - # $@ is now: forgejo admin user list|create [flags] + shift # container name if [ "${1:-}" = "forgejo" ] && [ "${2:-}" = "admin" ] && [ "${3:-}" = "user" ]; then subcmd="${4:-}" - - if [ "$subcmd" = "list" ]; then - echo "ID Username Email" - exit 0 - fi - + if [ "$subcmd" = "list" ]; then echo "ID Username Email"; exit 0; fi if [ "$subcmd" = "create" ]; then - shift 4 # skip 'forgejo admin user create' - username="" password="" email="" is_admin="false" + shift 4; username="" password="" email="" is_admin="false" while [ $# -gt 0 ]; do case "$1" in - --admin) is_admin="true"; shift ;; - --username) username="$2"; shift 2 ;; - --password) password="$2"; shift 2 ;; - --email) email="$2"; shift 2 ;; - --must-change-password*) shift ;; - *) shift ;; + --admin) is_admin="true"; shift ;; --username) username="$2"; shift 2 ;; + --password) password="$2"; shift 2 ;; --email) email="$2"; shift 2 ;; + --must-change-password*) shift ;; *) shift ;; esac done - - if [ -z "$username" ] || [ -z "$password" ] || [ -z "$email" ]; then - echo "mock-docker: missing required args" >&2 - exit 1 - fi - - # Create user via Forgejo admin API - if ! curl -sf -X POST \ - -u "$BOOTSTRAP_CREDS" \ - -H "Content-Type: application/json" \ + curl -sf -X POST -H "Content-Type: application/json" \ "${FORGE_URL}/api/v1/admin/users" \ - -d "{\"username\":\"${username}\",\"password\":\"${password}\",\"email\":\"${email}\",\"must_change_password\":false,\"login_name\":\"${username}\",\"source_id\":0}" \ - >/dev/null 2>&1; then - echo "mock-docker: failed to create user '${username}'" >&2 - exit 1 - fi - - # Patch user: ensure must_change_password is false (Forgejo admin - # API POST may ignore it) and promote to admin if requested - patch_body="{\"must_change_password\":false,\"login_name\":\"${username}\",\"source_id\":0" + -d "{\"username\":\"${username}\",\"password\":\"${password}\",\"email\":\"${email}\",\"must_change_password\":false}" >/dev/null 2>&1 if [ "$is_admin" = "true" ]; then - patch_body="${patch_body},\"admin\":true" + curl -sf -X PATCH -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/admin/users/${username}" \ + -d "{\"admin\":true,\"must_change_password\":false}" >/dev/null 2>&1 || true fi - patch_body="${patch_body}}" - - curl -sf -X PATCH \ - -u "$BOOTSTRAP_CREDS" \ - -H "Content-Type: application/json" \ - "${FORGE_URL}/api/v1/admin/users/${username}" \ - -d "${patch_body}" \ - >/dev/null 2>&1 || true - - echo "New user '${username}' has been successfully created!" - exit 0 + echo "New user '${username}' has been successfully created!"; exit 0 fi - if [ "$subcmd" = "change-password" ]; then - shift 4 # skip 'forgejo admin user change-password' - username="" password="" + shift 4; username="" while [ $# -gt 0 ]; do - case "$1" in - --username) username="$2"; shift 2 ;; - --password) password="$2"; shift 2 ;; - --must-change-password*) shift ;; - --config*) shift ;; - *) shift ;; - esac + case "$1" in --username) username="$2"; shift 2 ;; --password) shift 2 ;; --must-change-password*|--config*) shift ;; *) shift ;; esac done - - if [ -z "$username" ]; then - echo "mock-docker: change-password missing --username" >&2 - exit 1 - fi - - # PATCH user via Forgejo admin API to clear must_change_password - patch_body="{\"must_change_password\":false,\"login_name\":\"${username}\",\"source_id\":0" - if [ -n "$password" ]; then - patch_body="${patch_body},\"password\":\"${password}\"" - fi - patch_body="${patch_body}}" - - if ! curl -sf -X PATCH \ - -u "$BOOTSTRAP_CREDS" \ - -H "Content-Type: application/json" \ + curl -sf -X PATCH -H "Content-Type: application/json" \ "${FORGE_URL}/api/v1/admin/users/${username}" \ - -d "${patch_body}" \ - >/dev/null 2>&1; then - echo "mock-docker: failed to change-password for '${username}'" >&2 - exit 1 - fi + -d "{\"must_change_password\":false}" >/dev/null 2>&1 || true exit 0 fi fi - - echo "mock-docker: unhandled exec: $*" >&2 - exit 1 fi - -echo "mock-docker: unhandled command: $*" >&2 exit 1 DOCKERMOCK chmod +x "$MOCK_BIN/docker" @@ -231,11 +134,8 @@ chmod +x "$MOCK_BIN/claude" printf '#!/usr/bin/env bash\nexit 0\n' > "$MOCK_BIN/tmux" chmod +x "$MOCK_BIN/tmux" -# No crontab mock — use real BusyBox crontab (available in the Forgejo -# Alpine image). Cron entries are verified via 'crontab -l' in step 6. - export PATH="$MOCK_BIN:$PATH" -pass "Mock binaries installed (docker, claude, tmux)" +pass "Mock binaries installed" # ── 3. Run disinto init ───────────────────────────────────────────────────── echo "=== 3/6 Running disinto init ===" @@ -245,9 +145,26 @@ rm -f "${FACTORY_ROOT}/projects/smoke-repo.toml" git config --global user.email "smoke@test.local" git config --global user.name "Smoke Test" +# USER needs to be set twice: assignment then export (SC2155) +USER=$(whoami) +export USER + +# Create mock git repo to avoid clone failure (mock server has no git support) +mkdir -p "/tmp/smoke-test-repo" +cd "/tmp/smoke-test-repo" +git init --quiet +git config user.email "smoke@test.local" +git config user.name "Smoke Test" +echo "# smoke-repo" > README.md +git add README.md +git commit --quiet -m "Initial commit" + export SMOKE_FORGE_URL="$FORGE_URL" export FORGE_URL +# Skip push to mock server (no git support) +export SKIP_PUSH=true + if bash "${FACTORY_ROOT}/bin/disinto" init \ "${TEST_SLUG}" \ --bare --yes \ @@ -258,6 +175,18 @@ else fail "disinto init exited non-zero" fi +# ── Idempotency test: run init again ─────────────────────────────────────── +echo "=== Idempotency test: running disinto init again ===" +if bash "${FACTORY_ROOT}/bin/disinto" init \ + "${TEST_SLUG}" \ + --bare --yes \ + --forge-url "$FORGE_URL" \ + --repo-root "/tmp/smoke-test-repo"; then + pass "disinto init (re-run) completed successfully" +else + fail "disinto init (re-run) exited non-zero" +fi + # ── 4. Verify Forgejo state ───────────────────────────────────────────────── echo "=== 4/6 Verifying Forgejo state ===" @@ -290,35 +219,6 @@ if [ "$repo_found" = false ]; then fail "Repo not found on Forgejo under any expected path" fi -# Labels exist on repo — use bootstrap admin to check -setup_token=$(curl -sf -X POST \ - -u "${SETUP_ADMIN}:${SETUP_PASS}" \ - -H "Content-Type: application/json" \ - "${FORGE_URL}/api/v1/users/${SETUP_ADMIN}/tokens" \ - -d '{"name":"smoke-verify","scopes":["all"]}' 2>/dev/null \ - | jq -r '.sha1 // empty') || setup_token="" - -if [ -n "$setup_token" ]; then - label_count=0 - for repo_path in "${TEST_SLUG}" "dev-bot/smoke-repo" "disinto-admin/smoke-repo"; do - label_count=$(curl -sf \ - -H "Authorization: token ${setup_token}" \ - "${FORGE_URL}/api/v1/repos/${repo_path}/labels?limit=50" 2>/dev/null \ - | jq 'length' 2>/dev/null) || label_count=0 - if [ "$label_count" -gt 0 ]; then - break - fi - done - - if [ "$label_count" -ge 5 ]; then - pass "Labels created on repo (${label_count} labels)" - else - fail "Expected >= 5 labels, found ${label_count}" - fi -else - fail "Could not obtain verification token from bootstrap admin" -fi - # ── 5. Verify local state ─────────────────────────────────────────────────── echo "=== 5/6 Verifying local state ===" @@ -357,7 +257,7 @@ else fail ".env not found" fi -# Repo was cloned +# Repo was cloned (mock git repo created before disinto init) if [ -d "/tmp/smoke-test-repo/.git" ]; then pass "Repo cloned to /tmp/smoke-test-repo" else diff --git a/vault/AGENTS.md b/vault/AGENTS.md deleted file mode 100644 index 5b010ec..0000000 --- a/vault/AGENTS.md +++ /dev/null @@ -1,45 +0,0 @@ - -# Vault Agent - -**Role**: Three-pipeline gate — action safety classification, resource procurement, and human-action drafting. - -**Pipeline A — Action Gating (*.json)**: Actions enter a pending queue and are -classified by Claude via `vault-agent.sh`, which can auto-approve (call -`vault-fire.sh` directly), auto-reject (call `vault-reject.sh`), or escalate -to a human by writing `PHASE:escalate` to a phase file — using the same -unified escalation path as dev/action agents. - -**Pipeline B — Procurement (*.md)**: The planner files resource requests as -markdown files in `$OPS_REPO_ROOT/vault/pending/`. `vault-poll.sh` notifies the human via -vault/forge. The human fulfills the request (creates accounts, provisions infra, -adds secrets to `.env`) and moves the file to `$OPS_REPO_ROOT/vault/approved/`. -`vault-fire.sh` then extracts the proposed entry and appends it to -`$OPS_REPO_ROOT/RESOURCES.md`. - -**Pipeline C — Rent-a-Human (outreach drafts)**: Any agent can dispatch the -`run-rent-a-human` formula (via an `action` issue) when a task requires a human -touch — posting on Reddit, commenting on HN, signing up for a service, etc. -Claude drafts copy-paste-ready content to `vault/outreach/{platform}/drafts/` -and notifies the human via vault/forge for one-click execution. No vault approval -needed — the human reviews and publishes directly. - -**Trigger**: `vault-poll.sh` runs every 30 min via cron. - -**Key files**: -- `vault/vault-poll.sh` — Processes pending items: retry approved, auto-reject after 48h timeout, invoke vault-agent for JSON actions, notify human for procurement requests -- `vault/vault-agent.sh` — Classifies and routes pending JSON actions via `claude -p`: auto-approve, auto-reject, or escalate to human -- `vault/vault-env.sh` — Shared env setup for vault sub-scripts: sources `lib/env.sh`, overrides `FORGE_TOKEN` with `FORGE_VAULT_TOKEN`, sets `VAULT_TOKEN` for vault-runner container -- `vault/PROMPT.md` — System prompt for the vault agent's Claude invocation -- `vault/vault-fire.sh` — Executes an approved action (JSON) in an **ephemeral Docker container** with vault-only secrets injected (GITHUB_TOKEN, CLAWHUB_TOKEN — never exposed to agents). For deployment actions, calls `lib/ci-helpers.sh:ci_promote()` to gate production promotes via Woodpecker environments. Writes `$OPS_REPO_ROOT/RESOURCES.md` entry for procurement MD approvals. -- `vault/vault-reject.sh` — Marks a JSON action as rejected -- `formulas/run-rent-a-human.toml` — Formula for human-action drafts: Claude researches target platform norms, drafts copy-paste content, writes to `vault/outreach/{platform}/drafts/`, notifies human via vault/forge - -**Procurement flow** (all vault items live in `$OPS_REPO_ROOT/vault/`): -1. Planner drops `$OPS_REPO_ROOT/vault/pending/.md` with what/why/proposed RESOURCES.md entry -2. `vault-poll.sh` notifies human via vault/forge -3. Human fulfills: creates account, adds secrets to `.env`, moves file to `approved/` -4. `vault-fire.sh` extracts proposed entry, appends to `$OPS_REPO_ROOT/RESOURCES.md`, moves to `fired/` -5. Next planner run reads RESOURCES.md → new capability available → unblocks prerequisite tree - -**Environment variables consumed**: -- All from `lib/env.sh` diff --git a/vault/PROMPT.md b/vault/PROMPT.md deleted file mode 100644 index 3f93ee5..0000000 --- a/vault/PROMPT.md +++ /dev/null @@ -1,122 +0,0 @@ -# Vault Agent - -You are the vault agent for `$FORGE_REPO`. You were called by -`vault-poll.sh` because one or more actions in `$OPS_REPO_ROOT/vault/pending/` need -classification and routing. - -## Two Pipelines - -The vault handles two kinds of items: - -### A. Action Gating (*.json) -Actions from agents that need safety classification before execution. -You classify and route these: auto-approve, escalate, or reject. - -### B. Procurement Requests (*.md) -Resource requests from the planner. These always escalate to the human — -you do NOT auto-approve or reject procurement requests. The human fulfills -the request (creates accounts, provisions infra, adds secrets to .env) -and moves the file from `$OPS_REPO_ROOT/vault/pending/` to `$OPS_REPO_ROOT/vault/approved/`. -`vault-fire.sh` then writes the RESOURCES.md entry. - -## Your Job (Action Gating only) - -For each pending JSON action, decide: **auto-approve**, **escalate**, or **reject**. - -## Routing Table (risk × reversibility) - -| Risk | Reversible | Route | -|----------|------------|---------------------------------------------| -| low | true | auto-approve → fire immediately | -| low | false | auto-approve → fire, log prominently | -| medium | true | auto-approve → fire, notify via vault/forge | -| medium | false | escalate via vault/forge → wait for human reply | -| high | any | always escalate → wait for human reply | - -## Rules - -1. **Never lower risk.** You may override the source agent's self-assessed - risk *upward*, never downward. If a `blog-post` looks like it contains - pricing claims, bump it to `medium` or `high`. -2. **`requires_human: true` always escalates.** Regardless of risk level. -3. **Unknown action types → reject** with reason `unknown_type`. -4. **Malformed JSON → reject** with reason `malformed`. -5. **Payload validation:** Check that the payload has the minimum required - fields for the action type. Missing fields → reject with reason. -6. **Procurement requests (*.md) → skip.** These are handled by the human - directly. Do not attempt to classify, approve, or reject them. - -## Action Type Defaults - -| Type | Default Risk | Default Reversible | -|------------------|-------------|-------------------| -| `blog-post` | low | yes | -| `social-post` | medium | yes | -| `email-blast` | high | no | -| `pricing-change` | high | partial | -| `dns-change` | high | partial | -| `webhook-call` | medium | depends | -| `stripe-charge` | high | no | - -## Procurement Request Format (reference only) - -Procurement requests dropped by the planner look like: - -```markdown -# Procurement Request: - -## What - - -## Why - - -## Unblocks - - -## Proposed RESOURCES.md Entry -## -- type: -- capability: -- env: -``` - -## Available Tools - -You have shell access. Use these for routing decisions: - -```bash -source ${FACTORY_ROOT}/lib/env.sh -``` - -### Auto-approve and fire -```bash -bash ${FACTORY_ROOT}/vault/vault-fire.sh -``` - -### Escalate -```bash -echo "PHASE:escalate" > "$PHASE_FILE" -``` - -### Reject -```bash -bash ${FACTORY_ROOT}/vault/vault-reject.sh "" -``` - -## Output Format - -After processing each action, print exactly: - -``` -ROUTE: -``` - -## Important - -- Process ALL pending JSON actions in the batch. Never skip silently. -- For auto-approved actions, fire them immediately via `vault-fire.sh`. -- For escalated actions, move to `$OPS_REPO_ROOT/vault/approved/` only AFTER human approval. -- Read the action JSON carefully. Check the payload, not just the metadata. -- Ignore `.md` files in pending/ — those are procurement requests handled - separately by vault-poll.sh and the human. diff --git a/vault/SCHEMA.md b/vault/SCHEMA.md new file mode 100644 index 0000000..0a465c3 --- /dev/null +++ b/vault/SCHEMA.md @@ -0,0 +1,81 @@ +# Vault Action TOML Schema + +This document defines the schema for vault action TOML files used in the PR-based approval workflow (issue #74). + +## File Location + +Vault actions are stored in `vault/actions/.toml` on the ops repo. + +## Schema Definition + +```toml +# Required +id = "publish-skill-20260331" +formula = "clawhub-publish" +context = "SKILL.md bumped to 0.3.0" + +# Required secrets to inject +secrets = ["CLAWHUB_TOKEN"] + +# Optional +model = "sonnet" +tools = ["clawhub"] +timeout_minutes = 30 +``` + +## Field Specifications + +### Required Fields + +| Field | Type | Description | +|-------|------|-------------| +| `id` | string | Unique identifier for the vault action. Format: `-` (e.g., `publish-skill-20260331`) | +| `formula` | string | Formula name from `formulas/` directory that defines the operational task to execute | +| `context` | string | Human-readable explanation of why this action is needed. Used in PR description | +| `secrets` | array of strings | List of secret names to inject into the execution environment. Only these secrets are passed to the container | + +### Optional Fields + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `model` | string | `sonnet` | Override the default Claude model for this action | +| `tools` | array of strings | `[]` | MCP tools to enable during execution | +| `timeout_minutes` | integer | `60` | Maximum execution time in minutes | + +## Secret Names + +Secret names must be defined in `.env.vault.enc` on the ops repo. The vault validates that requested secrets exist in the allowlist before execution. + +Common secret names: +- `CLAWHUB_TOKEN` - Token for ClawHub skill publishing +- `GITHUB_TOKEN` - GitHub API token for repository operations +- `DEPLOY_KEY` - Infrastructure deployment key + +## Validation Rules + +1. **Required fields**: `id`, `formula`, `context`, and `secrets` must be present +2. **Formula validation**: The formula must exist in the `formulas/` directory +3. **Secret validation**: All secrets in the `secrets` array must be in the allowlist +4. **No unknown fields**: The TOML must not contain fields outside the schema +5. **ID uniqueness**: The `id` must be unique across all vault actions + +## Example Files + +See `vault/examples/` for complete examples: +- `webhook-call.toml` - Example of calling an external webhook +- `promote.toml` - Example of promoting a build/artifact +- `publish.toml` - Example of publishing a skill to ClawHub + +## Usage + +Validate a vault action file: + +```bash +./vault/validate.sh vault/actions/.toml +``` + +The validator will check: +- All required fields are present +- Secret names are in the allowlist +- No unknown fields are present +- Formula exists in the formulas directory diff --git a/vault/examples/promote.toml b/vault/examples/promote.toml new file mode 100644 index 0000000..b956c9f --- /dev/null +++ b/vault/examples/promote.toml @@ -0,0 +1,21 @@ +# vault/examples/promote.toml +# Example: Promote a build/artifact to production +# +# This vault action demonstrates promoting a built artifact to a +# production environment with proper authentication. + +id = "promote-20260331" +formula = "run-supervisor" +context = "Promote build v1.2.3 to production environment" + +# Secrets to inject for deployment authentication +secrets = ["DEPLOY_KEY", "DOCKER_HUB_TOKEN"] + +# Optional: use larger model for complex deployment logic +model = "sonnet" + +# Optional: enable MCP tools for container operations +tools = ["docker"] + +# Optional: deployments may take longer +timeout_minutes = 45 diff --git a/vault/examples/publish.toml b/vault/examples/publish.toml new file mode 100644 index 0000000..2373b00 --- /dev/null +++ b/vault/examples/publish.toml @@ -0,0 +1,21 @@ +# vault/examples/publish.toml +# Example: Publish a skill to ClawHub +# +# This vault action demonstrates publishing a skill to ClawHub +# using the clawhub-publish formula. + +id = "publish-site-20260331" +formula = "run-publish-site" +context = "Publish updated site to production" + +# Secrets to inject (only these get passed to the container) +secrets = ["DEPLOY_KEY"] + +# Optional: use sonnet model +model = "sonnet" + +# Optional: enable MCP tools +tools = [] + +# Optional: 30 minute timeout +timeout_minutes = 30 diff --git a/vault/examples/release.toml b/vault/examples/release.toml new file mode 100644 index 0000000..f8af6d1 --- /dev/null +++ b/vault/examples/release.toml @@ -0,0 +1,35 @@ +# vault/examples/release.toml +# Example: Release vault item schema +# +# This example demonstrates the release vault item schema for creating +# versioned releases with vault-gated approval. +# +# The release formula tags Forgejo main, pushes to mirrors, builds and +# tags the agents Docker image, and restarts agent containers. +# +# Example vault item (auto-generated by `disinto release v1.2.0`): +# +# id = "release-v120" +# formula = "release" +# context = "Release v1.2.0" +# secrets = [] +# +# Steps executed by the release formula: +# 1. preflight - Validate prerequisites (version, FORGE_TOKEN, Docker) +# 2. tag-main - Create tag on Forgejo main via API +# 3. push-mirrors - Push tag to Codeberg and GitHub mirrors +# 4. build-image - Build agents Docker image with --no-cache +# 5. tag-image - Tag image with version (disinto-agents:v1.2.0) +# 6. restart-agents - Restart agent containers with new image +# 7. commit-result - Write release result to tracking file + +id = "release-v120" +formula = "release" +context = "Release v1.2.0 — includes vault redesign, .profile system, architect agent" +secrets = [] + +# Optional: specify a larger model for complex release logic +# model = "sonnet" + +# Optional: releases may take longer due to Docker builds +# timeout_minutes = 60 diff --git a/vault/examples/webhook-call.toml b/vault/examples/webhook-call.toml new file mode 100644 index 0000000..27b3f25 --- /dev/null +++ b/vault/examples/webhook-call.toml @@ -0,0 +1,21 @@ +# vault/examples/webhook-call.toml +# Example: Call an external webhook with authentication +# +# This vault action demonstrates calling an external webhook endpoint +# with proper authentication via injected secrets. + +id = "webhook-call-20260331" +formula = "run-rent-a-human" +context = "Notify Slack channel about deployment completion" + +# Secrets to inject (only these get passed to the container) +secrets = ["DEPLOY_KEY"] + +# Optional: use sonnet model for this action +model = "sonnet" + +# Optional: enable MCP tools +tools = [] + +# Optional: 30 minute timeout +timeout_minutes = 30 diff --git a/vault/validate.sh b/vault/validate.sh new file mode 100755 index 0000000..f01ea63 --- /dev/null +++ b/vault/validate.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# vault/validate.sh — Validate vault action TOML files +# +# Usage: ./vault/validate.sh +# +# Validates a vault action TOML file according to the schema defined in +# vault/SCHEMA.md. Checks: +# - Required fields are present +# - Secret names are in the allowlist +# - No unknown fields are present +# - Formula exists in formulas/ + +set -euo pipefail + +# Get script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Source vault environment +source "$SCRIPT_DIR/vault-env.sh" + +# Get the TOML file to validate +TOML_FILE="${1:-}" + +if [ -z "$TOML_FILE" ]; then + echo "Usage: $0 " >&2 + echo "Example: $0 vault/examples/publish.toml" >&2 + exit 1 +fi + +# Resolve relative paths +if [[ "$TOML_FILE" != /* ]]; then + TOML_FILE="$(cd "$(dirname "$TOML_FILE")" && pwd)/$(basename "$TOML_FILE")" +fi + +# Run validation +if validate_vault_action "$TOML_FILE"; then + echo "VALID: $TOML_FILE" + echo " ID: $VAULT_ACTION_ID" + echo " Formula: $VAULT_ACTION_FORMULA" + echo " Context: $VAULT_ACTION_CONTEXT" + echo " Secrets: $VAULT_ACTION_SECRETS" + exit 0 +else + echo "INVALID: $TOML_FILE" >&2 + exit 1 +fi diff --git a/vault/vault-agent.sh b/vault/vault-agent.sh deleted file mode 100755 index 4436982..0000000 --- a/vault/vault-agent.sh +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env bash -# vault-agent.sh — Invoke claude -p to classify and route pending vault actions -# -# Called by vault-poll.sh when pending actions exist. Reads all pending/*.json, -# builds a prompt with action summaries, and lets the LLM decide routing. -# -# The LLM can call vault-fire.sh (auto-approve) or vault-reject.sh (reject) -# directly. For escalations, it writes a PHASE:escalate file and marks the -# action as "escalated" in pending/ so vault-poll skips it on future runs. - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -source "${SCRIPT_DIR}/vault-env.sh" - -VAULT_SCRIPT_DIR="${FACTORY_ROOT}/vault" -OPS_VAULT_DIR="${OPS_REPO_ROOT}/vault" -PROMPT_FILE="${VAULT_SCRIPT_DIR}/PROMPT.md" -LOGFILE="${VAULT_SCRIPT_DIR}/vault.log" -CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-3600}" - -log() { - printf '[%s] vault-agent: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" -} - -# Collect all pending actions (skip already-escalated) -ACTIONS_BATCH="" -ACTION_COUNT=0 - -for action_file in "${OPS_VAULT_DIR}/pending/"*.json; do - [ -f "$action_file" ] || continue - - ACTION_STATUS=$(jq -r '.status // ""' < "$action_file" 2>/dev/null) - [ "$ACTION_STATUS" = "escalated" ] && continue - - # Validate JSON - if ! jq empty < "$action_file" 2>/dev/null; then - ACTION_ID=$(basename "$action_file" .json) - log "malformed JSON: $action_file — rejecting" - bash "${VAULT_SCRIPT_DIR}/vault-reject.sh" "$ACTION_ID" "malformed JSON" 2>/dev/null || true - continue - fi - - ACTION_JSON=$(cat "$action_file") - ACTIONS_BATCH="${ACTIONS_BATCH} ---- ACTION --- -$(echo "$ACTION_JSON" | jq '.') ---- END ACTION --- -" - ACTION_COUNT=$((ACTION_COUNT + 1)) -done - -if [ "$ACTION_COUNT" -eq 0 ]; then - log "no actionable pending items" - exit 0 -fi - -log "processing $ACTION_COUNT pending action(s) via claude -p" - -# Build the prompt -SYSTEM_PROMPT=$(cat "$PROMPT_FILE" 2>/dev/null || echo "You are a vault agent. Classify and route actions.") - -PROMPT="${SYSTEM_PROMPT} - -## Pending Actions (${ACTION_COUNT} total) -${ACTIONS_BATCH} - -## Environment -- FACTORY_ROOT=${FACTORY_ROOT} -- OPS_REPO_ROOT=${OPS_REPO_ROOT} -- Vault data: ${OPS_VAULT_DIR} -- vault-fire.sh: bash ${VAULT_SCRIPT_DIR}/vault-fire.sh -- vault-reject.sh: bash ${VAULT_SCRIPT_DIR}/vault-reject.sh \"\" - -Process each action now. For auto-approve, fire immediately. For reject, call vault-reject.sh. - -For actions that need human approval (escalate), write a PHASE:escalate file -to signal the unified escalation path: - printf 'PHASE:escalate\nReason: vault procurement — %s\n' '' \\ - > /tmp/vault-escalate-.phase -Then STOP and wait — a human will review via the forge." - -CLAUDE_OUTPUT=$(timeout "$CLAUDE_TIMEOUT" claude -p "$PROMPT" \ - --model sonnet \ - --dangerously-skip-permissions \ - --max-turns 20 \ - 2>/dev/null) || true - -log "claude finished ($(echo "$CLAUDE_OUTPUT" | wc -c) bytes)" - -# Log routing decisions -ROUTES=$(echo "$CLAUDE_OUTPUT" | grep "^ROUTE:" || true) -if [ -n "$ROUTES" ]; then - echo "$ROUTES" | while read -r line; do - log " $line" - done -fi diff --git a/vault/vault-env.sh b/vault/vault-env.sh index 79e4176..8e7f7c6 100644 --- a/vault/vault-env.sh +++ b/vault/vault-env.sh @@ -7,3 +7,148 @@ source "$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/lib/env.sh" # Use vault-bot's own Forgejo identity FORGE_TOKEN="${FORGE_VAULT_TOKEN:-${FORGE_TOKEN}}" + +# Vault redesign in progress (PR-based approval workflow) +# This file is kept for shared env setup; scripts being replaced by #73 + +# ============================================================================= +# VAULT ACTION VALIDATION +# ============================================================================= + +# Allowed secret names - must match keys in .env.vault.enc +VAULT_ALLOWED_SECRETS="CLAWHUB_TOKEN GITHUB_TOKEN DEPLOY_KEY NPM_TOKEN DOCKER_HUB_TOKEN" + +# Validate a vault action TOML file +# Usage: validate_vault_action +# Returns: 0 if valid, 1 if invalid +# Sets: VAULT_ACTION_ID, VAULT_ACTION_FORMULA, VAULT_ACTION_CONTEXT on success +validate_vault_action() { + local toml_file="$1" + + if [ -z "$toml_file" ]; then + echo "ERROR: No TOML file specified" >&2 + return 1 + fi + + if [ ! -f "$toml_file" ]; then + echo "ERROR: File not found: $toml_file" >&2 + return 1 + fi + + log "Validating vault action: $toml_file" + + # Get script directory for relative path resolution + # FACTORY_ROOT is set by lib/env.sh which is sourced above + local formulas_dir="${FACTORY_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)}/formulas" + + # Extract TOML values using grep/sed (basic TOML parsing) + local toml_content + toml_content=$(cat "$toml_file") + + # Extract string values (id, formula, context) + local id formula context + id=$(echo "$toml_content" | grep -E '^id\s*=' | sed -E 's/^id\s*=\s*"(.*)"/\1/' | tr -d '\r') + formula=$(echo "$toml_content" | grep -E '^formula\s*=' | sed -E 's/^formula\s*=\s*"(.*)"/\1/' | tr -d '\r') + context=$(echo "$toml_content" | grep -E '^context\s*=' | sed -E 's/^context\s*=\s*"(.*)"/\1/' | tr -d '\r') + + # Extract secrets array + local secrets_line secrets_array + secrets_line=$(echo "$toml_content" | grep -E '^secrets\s*=' | tr -d '\r') + secrets_array=$(echo "$secrets_line" | sed -E 's/^secrets\s*=\s*\[(.*)\]/\1/' | tr -d '[]"' | tr ',' ' ' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') + + # Check for unknown fields (any top-level key not in allowed list) + local unknown_fields + unknown_fields=$(echo "$toml_content" | grep -E '^[a-zA-Z_][a-zA-Z0-9_]*\s*=' | sed -E 's/^([a-zA-Z_][a-zA-Z0-9_]*)\s*=.*/\1/' | sort -u | while read -r field; do + case "$field" in + id|formula|context|secrets|model|tools|timeout_minutes) ;; + *) echo "$field" ;; + esac + done) + + if [ -n "$unknown_fields" ]; then + echo "ERROR: Unknown fields in TOML: $(echo "$unknown_fields" | tr '\n' ', ' | sed 's/,$//')" >&2 + return 1 + fi + + # Validate required fields + if [ -z "$id" ]; then + echo "ERROR: Missing required field: id" >&2 + return 1 + fi + + if [ -z "$formula" ]; then + echo "ERROR: Missing required field: formula" >&2 + return 1 + fi + + if [ -z "$context" ]; then + echo "ERROR: Missing required field: context" >&2 + return 1 + fi + + # Validate formula exists in formulas/ + if [ ! -f "$formulas_dir/${formula}.toml" ]; then + echo "ERROR: Formula not found: $formula" >&2 + return 1 + fi + + # Validate secrets field exists and is not empty + if [ -z "$secrets_line" ]; then + echo "ERROR: Missing required field: secrets" >&2 + return 1 + fi + + # Validate each secret is in the allowlist + for secret in $secrets_array; do + secret=$(echo "$secret" | tr -d '"' | xargs) # trim whitespace and quotes + if [ -n "$secret" ]; then + if ! echo " $VAULT_ALLOWED_SECRETS " | grep -q " $secret "; then + echo "ERROR: Unknown secret (not in allowlist): $secret" >&2 + return 1 + fi + fi + done + + # Validate optional fields if present + # model + if echo "$toml_content" | grep -qE '^model\s*='; then + local model_value + model_value=$(echo "$toml_content" | grep -E '^model\s*=' | sed -E 's/^model\s*=\s*"(.*)"/\1/' | tr -d '\r') + if [ -z "$model_value" ]; then + echo "ERROR: 'model' must be a non-empty string" >&2 + return 1 + fi + fi + + # tools + if echo "$toml_content" | grep -qE '^tools\s*='; then + local tools_line + tools_line=$(echo "$toml_content" | grep -E '^tools\s*=' | tr -d '\r') + if ! echo "$tools_line" | grep -q '\['; then + echo "ERROR: 'tools' must be an array" >&2 + return 1 + fi + fi + + # timeout_minutes + if echo "$toml_content" | grep -qE '^timeout_minutes\s*='; then + local timeout_value + timeout_value=$(echo "$toml_content" | grep -E '^timeout_minutes\s*=' | sed -E 's/^timeout_minutes\s*=\s*([0-9]+)/\1/' | tr -d '\r') + if [ -z "$timeout_value" ] || [ "$timeout_value" -le 0 ] 2>/dev/null; then + echo "ERROR: 'timeout_minutes' must be a positive integer" >&2 + return 1 + fi + fi + + # Export validated values (for use by caller script) + export VAULT_ACTION_ID="$id" + export VAULT_ACTION_FORMULA="$formula" + export VAULT_ACTION_CONTEXT="$context" + export VAULT_ACTION_SECRETS="$secrets_array" + + log "VAULT_ACTION_ID=$VAULT_ACTION_ID" + log "VAULT_ACTION_FORMULA=$VAULT_ACTION_FORMULA" + log "VAULT_ACTION_SECRETS=$VAULT_ACTION_SECRETS" + + return 0 +} diff --git a/vault/vault-fire.sh b/vault/vault-fire.sh deleted file mode 100755 index ad57022..0000000 --- a/vault/vault-fire.sh +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env bash -# vault-fire.sh — Execute an approved vault item by ID -# -# Handles two pipelines: -# A. Action gating (*.json): pending/ → approved/ → fired/ -# Execution delegated to ephemeral vault-runner container via disinto vault-run. -# The vault-runner gets vault secrets (.env.vault.enc); this script does NOT. -# B. Procurement (*.md): approved/ → fired/ (writes RESOURCES.md entry) -# -# If item is in pending/, moves to approved/ first. -# If item is already in approved/, fires directly (crash recovery). -# -# Usage: bash vault-fire.sh - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -source "${SCRIPT_DIR}/vault-env.sh" - -OPS_VAULT_DIR="${OPS_REPO_ROOT}/vault" -LOCKS_DIR="${DISINTO_LOG_DIR}/vault/.locks" -LOGFILE="${DISINTO_LOG_DIR}/vault/vault.log" -RESOURCES_FILE="${OPS_REPO_ROOT}/RESOURCES.md" - -log() { - printf '[%s] vault-fire: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" -} - -ACTION_ID="${1:?Usage: vault-fire.sh }" - -# ============================================================================= -# Detect pipeline: procurement (.md) or action gating (.json) -# ============================================================================= -IS_PROCUREMENT=false -ACTION_FILE="" - -if [ -f "${OPS_VAULT_DIR}/approved/${ACTION_ID}.md" ]; then - IS_PROCUREMENT=true - ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.md" -elif [ -f "${OPS_VAULT_DIR}/pending/${ACTION_ID}.md" ]; then - IS_PROCUREMENT=true - mv "${OPS_VAULT_DIR}/pending/${ACTION_ID}.md" "${OPS_VAULT_DIR}/approved/${ACTION_ID}.md" - ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.md" - log "$ACTION_ID: pending → approved (procurement)" -elif [ -f "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" ]; then - ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" -elif [ -f "${OPS_VAULT_DIR}/pending/${ACTION_ID}.json" ]; then - mv "${OPS_VAULT_DIR}/pending/${ACTION_ID}.json" "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" - ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" - TMP=$(mktemp) - jq '.status = "approved"' "$ACTION_FILE" > "$TMP" && mv "$TMP" "$ACTION_FILE" - log "$ACTION_ID: pending → approved" -else - log "ERROR: item $ACTION_ID not found in pending/ or approved/" - exit 1 -fi - -# Acquire lock -mkdir -p "$LOCKS_DIR" -LOCKFILE="${LOCKS_DIR}/${ACTION_ID}.lock" -if [ -f "$LOCKFILE" ]; then - LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || true) - if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then - log "$ACTION_ID: already being fired by PID $LOCK_PID" - exit 0 - fi -fi -echo $$ > "$LOCKFILE" -trap 'rm -f "$LOCKFILE"' EXIT - -# ============================================================================= -# Pipeline A: Procurement — extract RESOURCES.md entry and append -# ============================================================================= -if [ "$IS_PROCUREMENT" = true ]; then - log "$ACTION_ID: firing procurement request" - - # Extract the proposed RESOURCES.md entry from the markdown file. - # Everything after the "## Proposed RESOURCES.md Entry" heading to EOF. - # Uses awk because the entry itself contains ## headings (## ). - ENTRY="" - ENTRY=$(awk '/^## Proposed RESOURCES\.md Entry/{found=1; next} found{print}' "$ACTION_FILE" 2>/dev/null || true) - - # Strip leading/trailing blank lines and markdown code fences - ENTRY=$(echo "$ENTRY" | sed '/^```/d' | sed -e '/./,$!d' -e :a -e '/^\n*$/{$d;N;ba;}') - - if [ -z "$ENTRY" ]; then - log "ERROR: $ACTION_ID has no '## Proposed RESOURCES.md Entry' section" - exit 1 - fi - - # Append entry to RESOURCES.md - printf '\n%s\n' "$ENTRY" >> "$RESOURCES_FILE" - log "$ACTION_ID: wrote RESOURCES.md entry" - - # Move to fired/ - mv "$ACTION_FILE" "${OPS_VAULT_DIR}/fired/${ACTION_ID}.md" - rm -f "${LOCKS_DIR}/${ACTION_ID}.notified" - log "$ACTION_ID: approved → fired (procurement)" - exit 0 -fi - -# ============================================================================= -# Pipeline B: Action gating — delegate to ephemeral vault-runner container -# ============================================================================= -ACTION_TYPE=$(jq -r '.type // ""' < "$ACTION_FILE") -ACTION_SOURCE=$(jq -r '.source // ""' < "$ACTION_FILE") - -if [ -z "$ACTION_TYPE" ]; then - log "ERROR: $ACTION_ID has no type field" - exit 1 -fi - -log "$ACTION_ID: firing type=$ACTION_TYPE source=$ACTION_SOURCE via vault-runner" - -FIRE_EXIT=0 - -# Delegate execution to the ephemeral vault-runner container. -# The vault-runner gets vault secrets (.env.vault.enc) injected at runtime; -# this host process never sees those secrets. -if [ -f "${FACTORY_ROOT}/.env.vault.enc" ] && [ -f "${FACTORY_ROOT}/docker-compose.yml" ]; then - bash "${FACTORY_ROOT}/bin/disinto" vault-run "$ACTION_ID" >> "$LOGFILE" 2>&1 || FIRE_EXIT=$? -else - # Fallback for bare-metal or pre-migration setups: run action handler directly - log "$ACTION_ID: no .env.vault.enc or docker-compose.yml — running action directly" - bash "${SCRIPT_DIR}/vault-run-action.sh" "$ACTION_ID" >> "$LOGFILE" 2>&1 || FIRE_EXIT=$? -fi - -# ============================================================================= -# Move to fired/ or leave in approved/ on failure -# ============================================================================= -if [ "$FIRE_EXIT" -eq 0 ]; then - # Update with fired timestamp and move to fired/ - TMP=$(mktemp) - jq --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" '.status = "fired" | .fired_at = $ts' "$ACTION_FILE" > "$TMP" \ - && mv "$TMP" "${OPS_VAULT_DIR}/fired/${ACTION_ID}.json" - rm -f "$ACTION_FILE" - log "$ACTION_ID: approved → fired" -else - log "ERROR: $ACTION_ID fire failed (exit $FIRE_EXIT) — stays in approved/ for retry" - exit "$FIRE_EXIT" -fi diff --git a/vault/vault-poll.sh b/vault/vault-poll.sh deleted file mode 100755 index a32b31f..0000000 --- a/vault/vault-poll.sh +++ /dev/null @@ -1,301 +0,0 @@ -#!/usr/bin/env bash -# vault-poll.sh — Vault: process pending actions + procurement requests -# -# Runs every 30min via cron. Two pipelines: -# A. Action gating (*.json): auto-approve/escalate/reject via vault-agent.sh -# B. Procurement (*.md): notify human, fire approved requests via vault-fire.sh -# -# Phases: -# 1. Retry any approved/ items that weren't fired (crash recovery) -# 2. Auto-reject escalations with no reply for 48h -# 3. Invoke vault-agent.sh for new pending JSON actions -# 4. Notify human about new pending procurement requests (.md) -# -# Cron: */30 * * * * /path/to/disinto/vault/vault-poll.sh -# -# Peek: cat /tmp/vault-status -# Log: tail -f /path/to/disinto/vault/vault.log - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -source "${SCRIPT_DIR}/../lib/env.sh" -# Use vault-bot's own Forgejo identity (#747) -FORGE_TOKEN="${FORGE_VAULT_TOKEN:-${FORGE_TOKEN}}" - -LOGFILE="${DISINTO_LOG_DIR}/vault/vault.log" -STATUSFILE="/tmp/vault-status" -LOCKFILE="/tmp/vault-poll.lock" -VAULT_SCRIPT_DIR="${FACTORY_ROOT}/vault" -OPS_VAULT_DIR="${OPS_REPO_ROOT}/vault" -LOCKS_DIR="${DISINTO_LOG_DIR}/vault/.locks" - -TIMEOUT_HOURS=48 - -# Prevent overlapping runs -if [ -f "$LOCKFILE" ]; then - LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null) - if kill -0 "$LOCK_PID" 2>/dev/null; then - exit 0 - fi - rm -f "$LOCKFILE" -fi -echo $$ > "$LOCKFILE" -trap 'rm -f "$LOCKFILE" "$STATUSFILE"' EXIT - -log() { - printf '[%s] vault: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" -} - -status() { - printf '[%s] vault: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" > "$STATUSFILE" - log "$*" -} - -# Acquire per-action lock (returns 0 if acquired, 1 if already locked) -lock_action() { - local action_id="$1" - local lockfile="${LOCKS_DIR}/${action_id}.lock" - mkdir -p "$LOCKS_DIR" - if [ -f "$lockfile" ]; then - local lock_pid - lock_pid=$(cat "$lockfile" 2>/dev/null || true) - if [ -n "$lock_pid" ] && kill -0 "$lock_pid" 2>/dev/null; then - return 1 - fi - rm -f "$lockfile" - fi - echo $$ > "$lockfile" - return 0 -} - -unlock_action() { - local action_id="$1" - rm -f "${LOCKS_DIR}/${action_id}.lock" -} - -# ============================================================================= -# PHASE 1: Retry approved items (crash recovery — JSON actions + MD procurement) -# ============================================================================= -status "phase 1: retrying approved items" - -for action_file in "${OPS_VAULT_DIR}/approved/"*.json; do - [ -f "$action_file" ] || continue - ACTION_ID=$(jq -r '.id // ""' < "$action_file" 2>/dev/null) - [ -z "$ACTION_ID" ] && continue - - if ! lock_action "$ACTION_ID"; then - log "skip $ACTION_ID — locked by another process" - continue - fi - - log "retrying approved action: $ACTION_ID" - if bash "${VAULT_SCRIPT_DIR}/vault-fire.sh" "$ACTION_ID" >> "$LOGFILE" 2>&1; then - log "fired $ACTION_ID (retry)" - else - log "ERROR: fire failed for $ACTION_ID (retry)" - fi - - unlock_action "$ACTION_ID" -done - -# Retry approved procurement requests (.md) -for req_file in "${OPS_VAULT_DIR}/approved/"*.md; do - [ -f "$req_file" ] || continue - REQ_ID=$(basename "$req_file" .md) - - if ! lock_action "$REQ_ID"; then - log "skip procurement $REQ_ID — locked by another process" - continue - fi - - log "retrying approved procurement: $REQ_ID" - if bash "${VAULT_SCRIPT_DIR}/vault-fire.sh" "$REQ_ID" >> "$LOGFILE" 2>&1; then - log "fired procurement $REQ_ID (retry)" - else - log "ERROR: fire failed for procurement $REQ_ID (retry)" - fi - - unlock_action "$REQ_ID" -done - -# ============================================================================= -# PHASE 2: Timeout escalations (48h no reply → auto-reject) -# ============================================================================= -status "phase 2: checking escalation timeouts" - -NOW_EPOCH=$(date +%s) -TIMEOUT_SECS=$((TIMEOUT_HOURS * 3600)) - -for action_file in "${OPS_VAULT_DIR}/pending/"*.json; do - [ -f "$action_file" ] || continue - - ACTION_STATUS=$(jq -r '.status // ""' < "$action_file" 2>/dev/null) - [ "$ACTION_STATUS" != "escalated" ] && continue - - ACTION_ID=$(jq -r '.id // ""' < "$action_file" 2>/dev/null) - ESCALATED_AT=$(jq -r '.escalated_at // ""' < "$action_file" 2>/dev/null) - [ -z "$ESCALATED_AT" ] && continue - - ESCALATED_EPOCH=$(date -d "$ESCALATED_AT" +%s 2>/dev/null || echo 0) - AGE_SECS=$((NOW_EPOCH - ESCALATED_EPOCH)) - - if [ "$AGE_SECS" -gt "$TIMEOUT_SECS" ]; then - AGE_HOURS=$((AGE_SECS / 3600)) - log "timeout: $ACTION_ID escalated ${AGE_HOURS}h ago with no reply — auto-rejecting" - bash "${VAULT_SCRIPT_DIR}/vault-reject.sh" "$ACTION_ID" "timeout (${AGE_HOURS}h, no human reply)" >> "$LOGFILE" 2>&1 || true - fi -done - -# ============================================================================= -# PHASE 3: Process new pending actions (JSON — action gating) -# ============================================================================= -status "phase 3: processing pending actions" - -PENDING_COUNT=0 -PENDING_SUMMARY="" - -for action_file in "${OPS_VAULT_DIR}/pending/"*.json; do - [ -f "$action_file" ] || continue - - ACTION_STATUS=$(jq -r '.status // ""' < "$action_file" 2>/dev/null) - # Skip already-escalated actions (waiting for human reply) - [ "$ACTION_STATUS" = "escalated" ] && continue - - ACTION_ID=$(jq -r '.id // ""' < "$action_file" 2>/dev/null) - [ -z "$ACTION_ID" ] && continue - - if ! lock_action "$ACTION_ID"; then - log "skip $ACTION_ID — locked" - continue - fi - - PENDING_COUNT=$((PENDING_COUNT + 1)) - ACTION_TYPE=$(jq -r '.type // "unknown"' < "$action_file" 2>/dev/null) - ACTION_SOURCE=$(jq -r '.source // "unknown"' < "$action_file" 2>/dev/null) - PENDING_SUMMARY="${PENDING_SUMMARY} ${ACTION_ID} [${ACTION_TYPE}] from ${ACTION_SOURCE}\n" - - unlock_action "$ACTION_ID" -done - -if [ "$PENDING_COUNT" -gt 0 ]; then - log "found $PENDING_COUNT pending action(s), invoking vault-agent" - status "invoking vault-agent for $PENDING_COUNT action(s)" - - bash "${VAULT_SCRIPT_DIR}/vault-agent.sh" >> "$LOGFILE" 2>&1 || { - log "ERROR: vault-agent failed" - } -fi - -# ============================================================================= -# PHASE 4: Notify human about new pending procurement requests (.md) -# ============================================================================= -status "phase 4: processing pending procurement requests" - -PROCURE_COUNT=0 - -for req_file in "${OPS_VAULT_DIR}/pending/"*.md; do - [ -f "$req_file" ] || continue - REQ_ID=$(basename "$req_file" .md) - - # Check if already notified (marker file) - if [ -f "${LOCKS_DIR}/${REQ_ID}.notified" ]; then - continue - fi - - if ! lock_action "$REQ_ID"; then - log "skip procurement $REQ_ID — locked" - continue - fi - - PROCURE_COUNT=$((PROCURE_COUNT + 1)) - - # Extract title from first heading - REQ_TITLE=$(grep -m1 '^# ' "$req_file" | sed 's/^# //' || echo "$REQ_ID") - - log "new procurement request: $REQ_ID — $REQ_TITLE" - - # Mark as notified so we don't re-send - mkdir -p "${LOCKS_DIR}" - touch "${LOCKS_DIR}/${REQ_ID}.notified" - - unlock_action "$REQ_ID" -done - -# ============================================================================= -# PHASE 5: Detect vault-bot authorized comments on issues -# ============================================================================= -status "phase 5: scanning for vault-bot authorized comments" - -COMMENT_COUNT=0 - -if [ -n "${FORGE_REPO:-}" ] && [ -n "${FORGE_TOKEN:-}" ]; then - # Get open issues with action label - ACTION_ISSUES=$(curl -sf \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_URL}/api/v1/repos/${FORGE_REPO}/issues?state=open&labels=action&limit=50" 2>/dev/null) || ACTION_ISSUES="[]" - - ISSUE_COUNT=$(printf '%s' "$ACTION_ISSUES" | jq 'length') - for idx in $(seq 0 $((ISSUE_COUNT - 1))); do - ISSUE_NUM=$(printf '%s' "$ACTION_ISSUES" | jq -r ".[$idx].number") - - # Skip if already processed - if [ -f "${LOCKS_DIR}/issue-${ISSUE_NUM}.vault-fired" ]; then - continue - fi - - # Get comments on this issue - COMMENTS=$(curl -sf \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_URL}/api/v1/repos/${FORGE_REPO}/issues/${ISSUE_NUM}/comments?limit=50" 2>/dev/null) || continue - - # Look for vault-bot comments containing VAULT:APPROVED with a JSON action spec - APPROVED_BODY=$(printf '%s' "$COMMENTS" | jq -r ' - [.[] | select(.user.login == "vault-bot") | select(.body | test("VAULT:APPROVED"))] | last | .body // empty - ' 2>/dev/null) || continue - - [ -z "$APPROVED_BODY" ] && continue - - # Extract JSON action spec from fenced code block in the comment - ACTION_JSON=$(printf '%s' "$APPROVED_BODY" | sed -n '/^```json$/,/^```$/p' | sed '1d;$d') - [ -z "$ACTION_JSON" ] && continue - - # Validate JSON - if ! printf '%s' "$ACTION_JSON" | jq empty 2>/dev/null; then - log "malformed action JSON in vault-bot comment on issue #${ISSUE_NUM}" - continue - fi - - ACTION_ID=$(printf '%s' "$ACTION_JSON" | jq -r '.id // empty') - if [ -z "$ACTION_ID" ]; then - ACTION_ID="issue-${ISSUE_NUM}-$(date +%s)" - ACTION_JSON=$(printf '%s' "$ACTION_JSON" | jq --arg id "$ACTION_ID" '.id = $id') - fi - - # Skip if this action already exists in any stage - if [ -f "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" ] || \ - [ -f "${OPS_VAULT_DIR}/fired/${ACTION_ID}.json" ] || \ - [ -f "${OPS_VAULT_DIR}/rejected/${ACTION_ID}.json" ]; then - continue - fi - - log "vault-bot authorized action on issue #${ISSUE_NUM}: ${ACTION_ID}" - printf '%s' "$ACTION_JSON" | jq '.status = "approved"' > "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" - COMMENT_COUNT=$((COMMENT_COUNT + 1)) - - # Fire the action - if bash "${VAULT_SCRIPT_DIR}/vault-fire.sh" "$ACTION_ID" >> "$LOGFILE" 2>&1; then - log "fired ${ACTION_ID} from issue #${ISSUE_NUM}" - # Mark issue as processed - touch "${LOCKS_DIR}/issue-${ISSUE_NUM}.vault-fired" - else - log "ERROR: fire failed for ${ACTION_ID} from issue #${ISSUE_NUM}" - fi - done -fi - -if [ "$PENDING_COUNT" -eq 0 ] && [ "$PROCURE_COUNT" -eq 0 ] && [ "$COMMENT_COUNT" -eq 0 ]; then - status "all clear — no pending items" -else - status "poll complete — ${PENDING_COUNT} action(s), ${PROCURE_COUNT} procurement(s), ${COMMENT_COUNT} comment-authorized" -fi diff --git a/vault/vault-reject.sh b/vault/vault-reject.sh deleted file mode 100755 index 54fa127..0000000 --- a/vault/vault-reject.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash -# vault-reject.sh — Move a vault action to rejected/ with reason -# -# Usage: bash vault-reject.sh "" - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -source "${SCRIPT_DIR}/vault-env.sh" - -OPS_VAULT_DIR="${OPS_REPO_ROOT}/vault" -LOGFILE="${DISINTO_LOG_DIR}/vault/vault.log" -LOCKS_DIR="${DISINTO_LOG_DIR}/vault/.locks" - -log() { - printf '[%s] vault-reject: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" -} - -ACTION_ID="${1:?Usage: vault-reject.sh \"\"}" -REASON="${2:-unspecified}" - -# Find the action file -ACTION_FILE="" -if [ -f "${OPS_VAULT_DIR}/pending/${ACTION_ID}.json" ]; then - ACTION_FILE="${OPS_VAULT_DIR}/pending/${ACTION_ID}.json" -elif [ -f "${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" ]; then - ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" -else - log "ERROR: action $ACTION_ID not found in pending/ or approved/" - exit 1 -fi - -# Update with rejection metadata and move to rejected/ -TMP=$(mktemp) -jq --arg reason "$REASON" --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ - '.status = "rejected" | .rejected_at = $ts | .reject_reason = $reason' \ - "$ACTION_FILE" > "$TMP" && mv "$TMP" "${OPS_VAULT_DIR}/rejected/${ACTION_ID}.json" -rm -f "$ACTION_FILE" - -# Clean up lock if present -rm -f "${LOCKS_DIR}/${ACTION_ID}.lock" - -log "$ACTION_ID: rejected — $REASON" diff --git a/vault/vault-run-action.sh b/vault/vault-run-action.sh deleted file mode 100755 index 707f3db..0000000 --- a/vault/vault-run-action.sh +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env bash -# vault-run-action.sh — Execute an action inside the ephemeral vault-runner container -# -# This script is the entrypoint for the vault-runner container. It runs with -# vault secrets injected as environment variables (GITHUB_TOKEN, CLAWHUB_TOKEN, -# deploy keys, etc.) and dispatches to the appropriate action handler. -# -# The vault-runner container is ephemeral: it starts, runs the action, and is -# destroyed. Secrets exist only in container memory, never on disk. -# -# Usage: vault-run-action.sh - -set -euo pipefail - -VAULT_SCRIPT_DIR="${DISINTO_VAULT_DIR:-/home/agent/disinto/vault}" -OPS_VAULT_DIR="${DISINTO_OPS_VAULT_DIR:-${VAULT_SCRIPT_DIR}}" -LOGFILE="${VAULT_SCRIPT_DIR}/vault.log" -ACTION_ID="${1:?Usage: vault-run-action.sh }" - -log() { - printf '[%s] vault-runner: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" 2>/dev/null || \ - printf '[%s] vault-runner: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >&2 -} - -# Find action file in approved/ -ACTION_FILE="${OPS_VAULT_DIR}/approved/${ACTION_ID}.json" -if [ ! -f "$ACTION_FILE" ]; then - log "ERROR: action file not found: ${ACTION_FILE}" - echo "ERROR: action file not found: ${ACTION_FILE}" >&2 - exit 1 -fi - -ACTION_TYPE=$(jq -r '.type // ""' < "$ACTION_FILE") -ACTION_SOURCE=$(jq -r '.source // ""' < "$ACTION_FILE") -PAYLOAD=$(jq -c '.payload // {}' < "$ACTION_FILE") - -if [ -z "$ACTION_TYPE" ]; then - log "ERROR: ${ACTION_ID} has no type field" - exit 1 -fi - -log "${ACTION_ID}: executing type=${ACTION_TYPE} source=${ACTION_SOURCE}" - -FIRE_EXIT=0 - -case "$ACTION_TYPE" in - webhook-call) - # HTTP call to endpoint with optional method/headers/body - ENDPOINT=$(echo "$PAYLOAD" | jq -r '.endpoint // ""') - METHOD=$(echo "$PAYLOAD" | jq -r '.method // "POST"') - REQ_BODY=$(echo "$PAYLOAD" | jq -r '.body // ""') - - if [ -z "$ENDPOINT" ]; then - log "ERROR: ${ACTION_ID} webhook-call missing endpoint" - exit 1 - fi - - CURL_ARGS=(-sf -X "$METHOD" -o /dev/null -w "%{http_code}") - while IFS= read -r header; do - [ -n "$header" ] && CURL_ARGS+=(-H "$header") - done < <(echo "$PAYLOAD" | jq -r '.headers // {} | to_entries[] | "\(.key): \(.value)"' 2>/dev/null || true) - if [ -n "$REQ_BODY" ] && [ "$REQ_BODY" != "null" ]; then - CURL_ARGS+=(-d "$REQ_BODY") - fi - - HTTP_CODE=$(curl "${CURL_ARGS[@]}" "$ENDPOINT" 2>/dev/null) || HTTP_CODE="000" - if [[ "$HTTP_CODE" =~ ^2 ]]; then - log "${ACTION_ID}: webhook-call -> HTTP ${HTTP_CODE} OK" - else - log "ERROR: ${ACTION_ID} webhook-call -> HTTP ${HTTP_CODE}" - FIRE_EXIT=1 - fi - ;; - - promote) - # Promote a Woodpecker pipeline to a deployment environment (staging/production). - # Payload: {"repo_id": N, "pipeline": N, "environment": "staging"|"production"} - PROMOTE_REPO_ID=$(echo "$PAYLOAD" | jq -r '.repo_id // ""') - PROMOTE_PIPELINE=$(echo "$PAYLOAD" | jq -r '.pipeline // ""') - PROMOTE_ENV=$(echo "$PAYLOAD" | jq -r '.environment // ""') - - if [ -z "$PROMOTE_REPO_ID" ] || [ -z "$PROMOTE_PIPELINE" ] || [ -z "$PROMOTE_ENV" ]; then - log "ERROR: ${ACTION_ID} promote missing repo_id, pipeline, or environment" - FIRE_EXIT=1 - else - # Validate environment is staging or production - case "$PROMOTE_ENV" in - staging|production) ;; - *) - log "ERROR: ${ACTION_ID} promote invalid environment '${PROMOTE_ENV}' (must be staging or production)" - FIRE_EXIT=1 - ;; - esac - - if [ "$FIRE_EXIT" -eq 0 ]; then - WP_SERVER="${WOODPECKER_SERVER:-http://woodpecker:8000}" - WP_TOKEN="${WOODPECKER_TOKEN:-}" - - if [ -z "$WP_TOKEN" ]; then - log "ERROR: ${ACTION_ID} promote requires WOODPECKER_TOKEN" - FIRE_EXIT=1 - else - PROMOTE_RESP=$(curl -sf -X POST \ - -H "Authorization: Bearer ${WP_TOKEN}" \ - -H "Content-Type: application/x-www-form-urlencoded" \ - -d "event=deployment&deploy_to=${PROMOTE_ENV}" \ - "${WP_SERVER}/api/repos/${PROMOTE_REPO_ID}/pipelines/${PROMOTE_PIPELINE}" 2>/dev/null) || PROMOTE_RESP="" - - NEW_PIPELINE=$(printf '%s' "$PROMOTE_RESP" | jq -r '.number // empty' 2>/dev/null) - if [ -n "$NEW_PIPELINE" ]; then - log "${ACTION_ID}: promoted pipeline ${PROMOTE_PIPELINE} to ${PROMOTE_ENV} -> new pipeline #${NEW_PIPELINE}" - else - log "ERROR: ${ACTION_ID} promote API failed (repo_id=${PROMOTE_REPO_ID} pipeline=${PROMOTE_PIPELINE} env=${PROMOTE_ENV})" - FIRE_EXIT=1 - fi - fi - fi - fi - ;; - - blog-post|social-post|email-blast|pricing-change|dns-change|stripe-charge) - HANDLER="${VAULT_SCRIPT_DIR}/handlers/${ACTION_TYPE}.sh" - if [ -x "$HANDLER" ]; then - bash "$HANDLER" "$ACTION_ID" "$PAYLOAD" 2>&1 || FIRE_EXIT=$? - else - log "ERROR: ${ACTION_ID} no handler for type '${ACTION_TYPE}' (${HANDLER} not found)" - FIRE_EXIT=1 - fi - ;; - - *) - log "ERROR: ${ACTION_ID} unknown action type '${ACTION_TYPE}'" - FIRE_EXIT=1 - ;; -esac - -exit "$FIRE_EXIT"