diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..d9781fe --- /dev/null +++ b/.dockerignore @@ -0,0 +1,20 @@ +# Secrets — prevent .env files from being baked into the image +.env +.env.enc +.env.vault +.env.vault.enc + +# Version control — .git is huge and not needed in image +.git + +# Archives — not needed at runtime +*.tar.gz + +# Prometheus data — large, ephemeral data +prometheus-data/ + +# Compose files — only needed at runtime via volume mount +docker-compose.yml + +# Project TOML files — gitignored anyway, won't be in build context +projects/*.toml diff --git a/.env.example b/.env.example index 6124671..037abe1 100644 --- a/.env.example +++ b/.env.example @@ -20,6 +20,7 @@ FORGE_URL=http://localhost:3000 # [CONFIG] local Forgejo instance # Each agent has its own Forgejo account and API token (#747). # Per-agent tokens fall back to FORGE_TOKEN if not set. FORGE_TOKEN= # [SECRET] dev-bot API token (default for all agents) +FORGE_TOKEN_DEVQWEN= # [SECRET] dev-qwen API token (for agents-llama) FORGE_REVIEW_TOKEN= # [SECRET] review-bot API token FORGE_PLANNER_TOKEN= # [SECRET] planner-bot API token FORGE_GARDENER_TOKEN= # [SECRET] gardener-bot API token diff --git a/.woodpecker/agent-smoke.sh b/.woodpecker/agent-smoke.sh index 85de2ad..4d7c783 100644 --- a/.woodpecker/agent-smoke.sh +++ b/.woodpecker/agent-smoke.sh @@ -6,8 +6,6 @@ # 2. Every custom function called by agent scripts is defined in lib/ or the script itself # # Fast (<10s): no network, no tmux, no Claude needed. -# Would have caught: kill_tmux_session (renamed), create_agent_session (missing), -# read_phase (missing from dev-agent.sh scope) set -euo pipefail @@ -21,12 +19,16 @@ FAILED=0 # Uses awk instead of grep -Eo for busybox/Alpine compatibility (#296). get_fns() { local f="$1" - # BRE mode (no -E). Use [(][)] for literal parens — unambiguous across - # GNU grep and BusyBox grep (some BusyBox builds treat bare () as grouping - # even in BRE). BRE one-or-more via [X][X]* instead of +. - grep '^[[:space:]]*[a-zA-Z_][a-zA-Z0-9_][a-zA-Z0-9_]*[[:space:]]*[(][)]' "$f" 2>/dev/null \ - | sed 's/^[[:space:]]*//; s/[[:space:]]*[(][)].*$//' \ - | sort -u || true + # Pure-awk implementation: avoids grep/sed cross-platform differences + # (BusyBox grep BRE quirks, sed ; separator issues on Alpine). + awk ' + /^[[:space:]]*[a-zA-Z_][a-zA-Z0-9_][a-zA-Z0-9_]*[[:space:]]*[(][)]/ { + line = $0 + gsub(/^[[:space:]]+/, "", line) + sub(/[[:space:]]*[(].*/, "", line) + print line + } + ' "$f" 2>/dev/null | sort -u || true } # Extract call-position identifiers that look like custom function calls: @@ -95,13 +97,12 @@ echo "=== 2/2 Function resolution ===" # # Included — these are inline-sourced by agent scripts: # lib/env.sh — sourced by every agent (log, forge_api, etc.) -# lib/agent-session.sh — sourced by orchestrators (create_agent_session, monitor_phase_loop, etc.) # lib/agent-sdk.sh — sourced by SDK agents (agent_run, agent_recover_session) # lib/ci-helpers.sh — sourced by pollers and review (ci_passed, classify_pipeline_failure, etc.) # lib/load-project.sh — sourced by env.sh when PROJECT_TOML is set # lib/file-action-issue.sh — sourced by gardener-run.sh (file_action_issue) -# lib/secret-scan.sh — sourced by file-action-issue.sh, phase-handler.sh (scan_for_secrets, redact_secrets) -# lib/formula-session.sh — sourced by formula-driven agents (acquire_cron_lock, run_formula_and_monitor, etc.) +# lib/secret-scan.sh — sourced by file-action-issue.sh (scan_for_secrets, redact_secrets) +# lib/formula-session.sh — sourced by formula-driven agents (acquire_cron_lock, check_memory, etc.) # lib/mirrors.sh — sourced by merge sites (mirror_push) # lib/guard.sh — sourced by all cron entry points (check_active) # lib/issue-lifecycle.sh — sourced by agents for issue claim/release/block/deps @@ -116,7 +117,7 @@ echo "=== 2/2 Function resolution ===" # If a new lib file is added and sourced by agents, add it to LIB_FUNS below # and add a check_script call for it in the lib files section further down. LIB_FUNS=$( - for f in lib/agent-session.sh lib/agent-sdk.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do + for f in lib/agent-sdk.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do if [ -f "$f" ]; then get_fns "$f"; fi done | sort -u ) @@ -180,13 +181,12 @@ check_script() { # These are already in LIB_FUNS (their definitions are available to agents), # but this verifies calls *within* each lib file are also resolvable. check_script lib/env.sh lib/mirrors.sh -check_script lib/agent-session.sh check_script lib/agent-sdk.sh check_script lib/ci-helpers.sh check_script lib/secret-scan.sh check_script lib/file-action-issue.sh lib/secret-scan.sh check_script lib/tea-helpers.sh lib/secret-scan.sh -check_script lib/formula-session.sh lib/agent-session.sh +check_script lib/formula-session.sh check_script lib/load-project.sh check_script lib/mirrors.sh lib/env.sh check_script lib/guard.sh @@ -199,18 +199,16 @@ check_script lib/ci-debug.sh check_script lib/parse-deps.sh # Agent scripts — list cross-sourced files where function scope flows across files. -# phase-handler.sh defines default callback stubs; sourcing agents may override. check_script dev/dev-agent.sh -check_script dev/phase-handler.sh lib/secret-scan.sh check_script dev/dev-poll.sh check_script dev/phase-test.sh check_script gardener/gardener-run.sh check_script review/review-pr.sh lib/agent-sdk.sh check_script review/review-poll.sh -check_script planner/planner-run.sh lib/agent-session.sh lib/formula-session.sh +check_script planner/planner-run.sh lib/formula-session.sh check_script supervisor/supervisor-poll.sh check_script supervisor/update-prompt.sh -check_script supervisor/supervisor-run.sh +check_script supervisor/supervisor-run.sh lib/formula-session.sh check_script supervisor/preflight.sh check_script predictor/predictor-run.sh check_script architect/architect-run.sh diff --git a/.woodpecker/ci.yml b/.woodpecker/ci.yml index 08ae24d..fc2f12a 100644 --- a/.woodpecker/ci.yml +++ b/.woodpecker/ci.yml @@ -8,6 +8,19 @@ when: event: [push, pull_request] +# Override default clone to authenticate against Forgejo using FORGE_TOKEN. +# Required because Forgejo is configured with REQUIRE_SIGN_IN, so anonymous +# git clones fail with exit code 128. FORGE_TOKEN is injected globally via +# WOODPECKER_ENVIRONMENT in docker-compose.yml (generated by lib/generators.sh). +clone: + git: + image: alpine/git + commands: + - AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:$FORGE_TOKEN@|") + - git clone --depth 1 "$AUTH_URL" . + - git fetch --depth 1 origin "$CI_COMMIT_REF" + - git checkout FETCH_HEAD + steps: - name: shellcheck image: koalaman/shellcheck-alpine:stable diff --git a/.woodpecker/detect-duplicates.py b/.woodpecker/detect-duplicates.py index 1d2c195..35f3aa8 100644 --- a/.woodpecker/detect-duplicates.py +++ b/.woodpecker/detect-duplicates.py @@ -267,41 +267,31 @@ def main() -> int: "2653705045fdf65072cccfd16eb04900": "Standard prompt template (GRAPH_SECTION, SCRATCH_CONTEXT, FORMULA_CONTENT)", "93726a3c799b72ed2898a55552031921": "Standard prompt template continuation (SCRATCH_CONTEXT, FORMULA_CONTENT, SCRATCH_INSTRUCTION)", "c11eaaacab69c9a2d3c38c75215eca84": "Standard prompt template end (FORMULA_CONTENT, SCRATCH_INSTRUCTION)", - # install_project_crons function in entrypoint.sh and entrypoint-llama.sh (intentional duplicate) - "007e1390498374c68ab5d66aa6d277b2": "install_project_crons function in entrypoints (window 007e1390)", - "04143957d4c63e8a16ac28bddaff589b": "install_project_crons function in entrypoints (window 04143957)", - "076a19221cde674b2fce20a17292fa78": "install_project_crons function in entrypoints (window 076a1922)", - "0d498287626e105f16b24948aed53584": "install_project_crons function in entrypoints (window 0d498287)", - "137b746928011acd758c7a9c690810b2": "install_project_crons function in entrypoints (window 137b7469)", - "287d33d98d21e3e07e0869e56ad94527": "install_project_crons function in entrypoints (window 287d33d9)", - "325a3d54a15e59d333ec2a20c062cc8c": "install_project_crons function in entrypoints (window 325a3d54)", - "34e1943d5738f540d67c5c6bd3e60b20": "install_project_crons function in entrypoints (window 34e1943d)", - "3dabd19698f9705b05376c38042ccce8": "install_project_crons function in entrypoints (window 3dabd196)", - "446b420f7f9821a2553bc4995d1fac25": "install_project_crons function in entrypoints (window 446b420f)", - "4826cf4896b792368c7b4d77573d0f8b": "install_project_crons function in entrypoints (window 4826cf48)", - "4e564d3bbda0ef33962af6042736dc1e": "install_project_crons function in entrypoints (window 4e564d3b)", - "5a3d92b22e5d5bca8cce17d581ac6803": "install_project_crons function in entrypoints (window 5a3d92b2)", - "63c20c5a31cf5e08f3a901ddf6db98af": "install_project_crons function in entrypoints (window 63c20c5a)", - "77547751325562fac397bbfd3a21c88e": "install_project_crons function in entrypoints (window 77547751)", - "80bdff63e54b4a260043d264b83d8eb0": "install_project_crons function in entrypoints (window 80bdff63)", - "84e55706393f731b293890dd6d830316": "install_project_crons function in entrypoints (window 84e55706)", - "85f8a9d029ee9efecca73fd30449ccf4": "install_project_crons function in entrypoints (window 85f8a9d0)", - "86e28dae676c905c5aa0035128e20e46": "install_project_crons function in entrypoints (window 86e28dae)", - "a222b73bcd6a57adb2315726e81ab6cf": "install_project_crons function in entrypoints (window a222b73b)", - "abd6c7efe66f533c48c883c2a6998886": "install_project_crons function in entrypoints (window abd6c7ef)", - "bcfeb67ce4939181330afea4949a95cf": "install_project_crons function in entrypoints (window bcfeb67c)", - "c1248c98f978c48e4a1e5009a1440917": "install_project_crons function in entrypoints (window c1248c98)", - "c40571185b3306345ecf9ac33ab352a6": "install_project_crons function in entrypoints (window c4057118)", - "c566639b237036a7a385982274d3d271": "install_project_crons function in entrypoints (window c566639b)", - "d9cd2f3d874c32366d577ea0d334cd1a": "install_project_crons function in entrypoints (window d9cd2f3d)", - "df4d3e905b12f2c68b206e45dddf9214": "install_project_crons function in entrypoints (window df4d3e90)", - "e8e65ccf867fc6cbe49695ecdce2518e": "install_project_crons function in entrypoints (window e8e65ccf)", - "eb8b298f06cda4359cc171206e0014bf": "install_project_crons function in entrypoints (window eb8b298f)", - "ecdf0daa2f2845359a6a4aa12d327246": "install_project_crons function in entrypoints (window ecdf0daa)", - "eeac93b2fba4de4589d36ca20845ec9f": "install_project_crons function in entrypoints (window eeac93b2)", - "f08a7139db9c96cd3526549c499c0332": "install_project_crons function in entrypoints (window f08a7139)", - "f0917809bdf28ff93fff0749e7e7fea0": "install_project_crons function in entrypoints (window f0917809)", - "f0e4101f9b90c2fa921e088057a96db7": "install_project_crons function in entrypoints (window f0e4101f)", + # Appears in stack_lock_acquire (lib/stack-lock.sh) and lib/pr-lifecycle.sh + "29d4f34b703f44699237713cc8d8065b": "Structural end-of-while-loop+case (return 1, esac, done, closing brace)", + # Forgejo org-creation API call pattern shared between forge-setup.sh and ops-setup.sh + # Extracted from bin/disinto (not a .sh file, excluded from prior scans) into lib/forge-setup.sh + "059b11945140c172465f9126b829ed7f": "Forgejo org-creation curl pattern (forge-setup.sh + ops-setup.sh)", + # Docker compose environment block for agents service (generators.sh + hire-agent.sh) + # Intentional duplicate - both generate the same docker-compose.yml template + "8066210169a462fe565f18b6a26a57e0": "Docker compose environment block (generators.sh + hire-agent.sh) - old", + "fd978fcd726696e0f280eba2c5198d50": "Docker compose environment block continuation (generators.sh + hire-agent.sh) - old", + "e2760ccc2d4b993a3685bd8991594eb2": "Docker compose env_file + depends_on block (generators.sh + hire-agent.sh) - old", + # The hash shown in output is 161a80f7 - need to match exactly what the script finds + "161a80f7296d6e9d45895607b7f5b9c9": "Docker compose env_file + depends_on block (generators.sh + hire-agent.sh) - old", + # New hash after explicit environment fix (#381) + "83fa229b86a7fdcb1d3591ab8e718f9d": "Docker compose explicit environment block (generators.sh + hire-agent.sh) - #381", + # Verification mode helper functions - intentionally duplicated in dispatcher and entrypoint + # These functions check if bug-report parent issues have all sub-issues closed + "b783d403276f78b49ad35840845126a1": "Verification helper: sub_issues variable declaration", + "4b19b9a1bdfbc62f003fc237ed270ed9": "Verification helper: python3 -c invocation", + "cc1d0a9f85dfe0cc32e9ef6361cb8c3a": "Verification helper: Python imports and args", + "768926748b811ebd30f215f57db5de40": "Verification helper: json.load from /dev/stdin", + "4c58586a30bcf6b009c02010ed8f6256": "Verification helper: sub_issues list initialization", + "53ea3d6359f51d622467bd77b079cc88": "Verification helper: iterate issues in data", + "21aec56a99d5252b23fb9a38b895e8e8": "Verification helper: check body for Decomposed from pattern", + "60ea98b3604557d539193b2a6624e232": "Verification helper: append sub-issue number", + "9f6ae8e7811575b964279d8820494eb0": "Verification helper: for loop done pattern", } if not sh_files: diff --git a/.woodpecker/smoke-init.yml b/.woodpecker/smoke-init.yml index 3e1f33a..3953053 100644 --- a/.woodpecker/smoke-init.yml +++ b/.woodpecker/smoke-init.yml @@ -4,6 +4,7 @@ when: - "bin/disinto" - "lib/load-project.sh" - "lib/env.sh" + - "lib/generators.sh" - "tests/**" - ".woodpecker/smoke-init.yml" @@ -12,6 +13,7 @@ steps: image: python:3-alpine commands: - apk add --no-cache bash curl jq git coreutils - - python3 tests/mock-forgejo.py & + - python3 tests/mock-forgejo.py & echo $! > /tmp/mock-forgejo.pid - sleep 2 - bash tests/smoke-init.sh + - kill $(cat /tmp/mock-forgejo.pid) 2>/dev/null || true diff --git a/AGENTS.md b/AGENTS.md index 7fcca01..7d028ea 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,13 +1,13 @@ - + # Disinto — Agent Instructions ## What this repo is -Disinto is an autonomous code factory. It manages seven agents (dev, review, -gardener, supervisor, planner, predictor, architect) that pick up issues from -forge, implement them, review PRs, plan from the vision, and keep the system -healthy — all via cron and `claude -p`. The dispatcher executes formula-based -operational tasks. +Disinto is an autonomous code factory. It manages ten agents (dev, review, +gardener, supervisor, planner, predictor, architect, reproduce, triage, edge +dispatcher) that pick up issues from forge, implement them, review PRs, plan +from the vision, and keep the system healthy — all via cron (bare-metal) or a polling loop (Docker) and `claude -p`. +The dispatcher executes formula-based operational tasks. Each agent has a `.profile` repository on Forgejo that stores lessons learned from prior sessions, providing continuous improvement across runs. @@ -21,27 +21,45 @@ See `README.md` for the full architecture and `disinto-factory/SKILL.md` for set ``` disinto/ (code repo) -├── dev/ dev-poll.sh, dev-agent.sh, phase-handler.sh — issue implementation +├── dev/ dev-poll.sh, dev-agent.sh, phase-test.sh — issue implementation ├── review/ review-poll.sh, review-pr.sh — PR review ├── gardener/ gardener-run.sh — direct cron executor for run-gardener formula +│ best-practices.md — gardener best-practice reference +│ pending-actions.json — queued gardener actions ├── predictor/ predictor-run.sh — daily cron executor for run-predictor formula ├── planner/ planner-run.sh — direct cron executor for run-planner formula ├── supervisor/ supervisor-run.sh — formula-driven health monitoring (cron wrapper) │ preflight.sh — pre-flight data collection for supervisor formula -│ supervisor-poll.sh — legacy bash orchestrator (superseded) ├── architect/ architect-run.sh — strategic decomposition of vision into sprints ├── vault/ vault-env.sh — shared env setup (vault redesign in progress, see #73-#77) -├── lib/ env.sh, agent-session.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, formula-session.sh, profile.sh, build-graph.py +│ SCHEMA.md — vault item schema documentation +│ validate.sh — vault item validator +│ examples/ — example vault action TOMLs (promote, publish, release, webhook-call) +├── lib/ env.sh, agent-sdk.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, formula-session.sh, stack-lock.sh, forge-setup.sh, forge-push.sh, ops-setup.sh, ci-setup.sh, generators.sh, hire-agent.sh, release.sh, build-graph.py, +│ branch-protection.sh, secret-scan.sh, tea-helpers.sh, vault.sh, ci-log-reader.py +│ hooks/ — Claude Code session hooks (on-compact-reinject, on-idle-stop, on-phase-change, on-pretooluse-guard, on-session-end, on-stop-failure) ├── projects/ *.toml.example — templates; *.toml — local per-box config (gitignored) ├── formulas/ Issue templates (TOML specs for multi-step agent tasks) -└── docs/ Protocol docs (PHASE-PROTOCOL.md, EVIDENCE-ARCHITECTURE.md) +├── docker/ Dockerfiles and entrypoints for reproduce, triage, and edge dispatcher agents +├── docs/ Protocol docs (PHASE-PROTOCOL.md, EVIDENCE-ARCHITECTURE.md) +├── site/ disinto.ai website content +├── tests/ Test files (mock-forgejo.py, smoke-init.sh) +├── templates/ Issue templates +├── bin/ The `disinto` CLI script +├── disinto-factory/ Setup documentation and skill +├── state/ Runtime state +├── .woodpecker/ Woodpecker CI pipeline configs +├── VISION.md High-level project vision +└── CLAUDE.md Claude Code project instructions disinto-ops/ (ops repo — {project}-ops) ├── vault/ +│ ├── actions/ where vault action TOMLs land (core of vault workflow) │ ├── pending/ vault items awaiting approval │ ├── approved/ approved vault items │ ├── fired/ executed vault items │ └── rejected/ rejected vault items +├── sprints/ sprint planning artifacts ├── knowledge/ shared agent knowledge + best practices ├── evidence/ engagement data, experiment results ├── portfolio.md addressables + observables @@ -49,39 +67,11 @@ disinto-ops/ (ops repo — {project}-ops) └── RESOURCES.md accounts, tokens (refs), infra inventory ``` -> **Note:** Journal directories (`journal/planner/` and `journal/supervisor/`) have been removed from the ops repo. Agent journals are now stored in each agent's `.profile` repo on Forgejo. - ## Agent .profile Model -Each agent has a `.profile` repository on Forgejo that stores: -- `formula.toml` — agent-specific formula (optional, falls back to `formulas/.toml`) -- `knowledge/lessons-learned.md` — distilled lessons from journal entries -- `journal/` — session reflection entries (archived after digestion) +Each agent has a `.profile` repository on Forgejo storing `knowledge/lessons-learned.md` (injected into each session prompt) and `journal/` reflection entries (digested into lessons). Pre-session: `formula_prepare_profile_context()` loads lessons. Post-session: `profile_write_journal` records reflections. See `lib/formula-session.sh`. -### How it works - -1. **Pre-session:** The agent calls `formula_prepare_profile_context()` which: - - Resolves the agent's Forgejo identity from their token - - Clones/pulls the `.profile` repo to a local cache - - Loads `knowledge/lessons-learned.md` into `LESSONS_CONTEXT` for prompt injection - - Automatically digests journals if >10 undigested entries exist - -2. **Prompt injection:** Lessons are injected into the agent prompt: - ``` - ## Lessons learned (from .profile/knowledge/lessons-learned.md) - - ``` - -3. **Post-session:** The agent calls `profile_write_journal` which: - - Generates a reflection entry about the session - - Writes it to `journal/issue-{N}.md` - - Commits and pushes to the `.profile` repo - - Journals are archived after being digested into lessons-learned.md - -> **Terminology note:** "Formulas" in this repo are TOML issue templates in `formulas/` that -> orchestrate multi-step agent tasks (e.g., `run-gardener.toml`, `run-planner.toml`). This is -> distinct from "processes" described in `docs/EVIDENCE-ARCHITECTURE.md`, which are measurement -> and mutation pipelines that read external platforms and write structured evidence to git. +> **Terminology note:** "Formulas" are TOML issue templates in `formulas/` that orchestrate multi-step agent tasks. Distinct from "processes" in `docs/EVIDENCE-ARCHITECTURE.md`. ## Tech stack @@ -124,6 +114,9 @@ bash dev/phase-test.sh | Planner | `planner/` | Strategic planning | [planner/AGENTS.md](planner/AGENTS.md) | | Predictor | `predictor/` | Infrastructure pattern detection | [predictor/AGENTS.md](predictor/AGENTS.md) | | Architect | `architect/` | Strategic decomposition | [architect/AGENTS.md](architect/AGENTS.md) | +| Reproduce | `docker/reproduce/` | Bug reproduction using Playwright MCP | `formulas/reproduce.toml` | +| Triage | `docker/reproduce/` | Deep root cause analysis | `formulas/triage.toml` | +| Edge dispatcher | `docker/edge/` | Polls ops repo for vault actions, executes via Claude sessions | `docker/edge/dispatcher.sh` | > **Vault:** Being redesigned as a PR-based approval workflow (issues #73-#77). > See [docs/VAULT.md](docs/VAULT.md) for the vault PR workflow details. @@ -146,30 +139,24 @@ Issues flow: `backlog` → `in-progress` → PR → CI → review → merge → | `blocked` | Issue is stuck — agent session failed, crashed, timed out, or CI exhausted. Diagnostic comment on the issue has details. Also used for unmet dependencies. | dev-agent.sh, dev-poll.sh (on failure) | | `tech-debt` | Pre-existing issue flagged by AI reviewer, not introduced by a PR. | review-pr.sh (auto-created follow-ups) | | `underspecified` | Dev-agent refused the issue as too large or vague. | dev-poll.sh (on preflight `too_large`), dev-agent.sh (on mid-run `too_large` refusal) | +| `bug-report` | Issue describes user-facing broken behavior with reproduction steps. Separate triage track for reproduction automation. | Gardener (bug-report detection in grooming) | +| `in-triage` | Bug reproduced but root cause not obvious — triage agent investigates. Set alongside `bug-report`. | reproduce-agent (when reproduction succeeds but cause unclear) | +| `rejected` | Issue formally rejected — cannot reproduce, out of scope, or invalid. | reproduce-agent, humans | | `vision` | Goal anchors — high-level objectives from VISION.md. | Planner, humans | | `prediction/unreviewed` | Unprocessed prediction filed by predictor. | predictor-run.sh | | `prediction/dismissed` | Prediction triaged as DISMISS — planner disagrees, closed with reason. | Planner (triage-predictions step) | | `prediction/actioned` | Prediction promoted or dismissed by planner. | Planner (triage-predictions step) | +| `formula` | Issue is a formula-based operational task. Dev-poll skips these; dispatcher handles them. | Dispatcher (when dispatching formula tasks) | ### Dependency conventions -Issues declare dependencies in their body using a `## Dependencies` or -`## Depends on` section listing `#N` references. The dev-poll scheduler uses -`lib/parse-deps.sh` to extract these and only picks issues whose dependencies -are all closed. - -### Single-threaded pipeline - -Each project processes one issue at a time. Dev-poll will not start new work -while an open PR is waiting for CI or review. This keeps context clear and -prevents merge conflicts between concurrent changes. +Issues declare dependencies via `## Dependencies` / `## Depends on` sections listing `#N` refs. `lib/parse-deps.sh` extracts these; dev-poll only picks issues whose deps are all closed. See AD-002 for single-threaded pipeline rules. --- -## Addressables +## Addressables and Observables -Concrete artifacts the factory has produced or is building. The gardener -maintains this table during grooming — see `formulas/run-gardener.toml`. +Concrete artifacts the factory has produced or is building. Observables have measurement wired — the gardener promotes addressables once an evidence process is connected. | Artifact | Location | Observable? | |----------|----------|-------------| @@ -178,14 +165,6 @@ maintains this table during grooming — see `formulas/run-gardener.toml`. | Skill | ClawHub (in progress) | No | | GitHub org | github.com/Disinto | No | -## Observables - -Addressables with measurement wired — the factory can read structured -feedback from these. The gardener promotes addressables here once an -evidence process is connected. - -None yet. - --- ## Architecture Decisions @@ -194,11 +173,11 @@ Humans write these. Agents read and enforce them. | ID | Decision | Rationale | |---|---|---| -| AD-001 | Nervous system runs from cron, not PR-based actions. | Planner, predictor, gardener, supervisor run directly via `*-run.sh`. They create work, they don't become work. (See PR #474 revert.) | +| AD-001 | Nervous system runs from cron (bare-metal) or a polling loop (Docker), not PR-based actions. | Planner, predictor, gardener, supervisor run directly via `*-run.sh`. They create work, they don't become work. (See PR #474 revert.) | | AD-002 | Single-threaded pipeline per project. | One dev issue at a time. No new work while a PR awaits CI or review. Prevents merge conflicts and keeps context clear. | | AD-003 | The runtime creates and destroys, the formula preserves. | Runtime manages worktrees/sessions/temp. Formulas commit knowledge to git before signaling done. | | AD-004 | Event-driven > polling > fixed delays. | Never `waitForTimeout` or hardcoded sleep. Use phase files, webhooks, or poll loops with backoff. | -| AD-005 | Secrets via env var indirection, never in issue bodies. | Issue bodies become code. Agent secrets go in `.env.enc`, vault secrets in `.env.vault.enc` (both SOPS-encrypted). Referenced as `$VAR_NAME`. Runner gets only vault secrets; agents get only agent secrets. | +| AD-005 | Secrets via env var indirection, never in issue bodies. | Issue bodies become code. Agent secrets go in `.env.enc`, vault secrets in `.env.vault.enc` (SOPS-encrypted when available; plaintext `.env`/`.env.vault` fallback supported). Referenced as `$VAR_NAME`. Runner gets only vault secrets; agents get only agent secrets. | | AD-006 | External actions go through vault dispatch, never direct. | Agents build addressables; only the vault exercises them (publishes, deploys, posts). Tokens for external systems (`GITHUB_TOKEN`, `CLAWHUB_TOKEN`, deploy keys) live only in `.env.vault.enc` and are injected into the ephemeral runner container. `lib/env.sh` unsets them so agents never hold them. PRs with direct external actions without vault dispatch get REQUEST_CHANGES. (Vault redesign in progress: PR-based approval on ops repo, see #73-#77) | **Who enforces what:** diff --git a/architect/AGENTS.md b/architect/AGENTS.md index c2e99ba..f9e5065 100644 --- a/architect/AGENTS.md +++ b/architect/AGENTS.md @@ -1,4 +1,4 @@ - + # Architect — Agent Instructions ## What this agent is @@ -11,7 +11,7 @@ converses with humans through PR comments. - **Input**: Vision issues from VISION.md, prerequisite tree from ops repo - **Output**: Sprint proposals as PRs on the ops repo, sub-issue files -- **Mechanism**: Formula-driven execution via `formulas/run-architect.toml` +- **Mechanism**: Bash-driven orchestration in `architect-run.sh`, pitching formula via `formulas/run-architect.toml` - **Identity**: `architect-bot` on Forgejo ## Responsibilities @@ -29,21 +29,64 @@ converses with humans through PR comments. ## Formula -The architect is driven by `formulas/run-architect.toml`. This formula defines +The architect pitching is driven by `formulas/run-architect.toml`. This formula defines the steps for: - Research: analyzing vision items and prerequisite tree -- Design: identifying implementation approaches and forks -- Sprint proposal: creating structured sprint PRs +- Pitch: creating structured sprint PRs - Sub-issue filing: creating concrete implementation issues +## Bash-driven orchestration + +Bash in `architect-run.sh` handles state detection and orchestration: + +- **Deterministic state detection**: Bash reads the Forgejo reviews API to detect + ACCEPT/REJECT decisions — no model-dependent API parsing +- **Human guidance injection**: Review body text from ACCEPT reviews is injected + directly into the research prompt as context +- **Response processing**: When ACCEPT/REJECT responses are detected, bash invokes + the agent with appropriate context (session resumed for questions phase) + +### State transitions + +``` +New vision issue → pitch PR (model generates pitch, bash creates PR) + ↓ +ACCEPT review → research + questions (model, session saved to $SID_FILE) + ↓ +Answers received → sub-issue filing (model, session resumed via --resume) + ↓ +REJECT review → close PR + journal (model processes rejection, bash merges PR) +``` + +### Session management + +The agent maintains a global session file at `/tmp/architect-session-{project}.sid`. +When processing responses, bash checks if the PR is in the questions phase and +resumes the session using `--resume session_id` to preserve codebase context. + ## Execution Run via `architect/architect-run.sh`, which: - Acquires a cron lock and checks available memory +- Cleans up per-issue scratch files from previous runs (`/tmp/architect-{project}-scratch-*.md`) - Sources shared libraries (env.sh, formula-session.sh) - Uses FORGE_ARCHITECT_TOKEN for authentication +- Processes existing architect PRs via bash-driven design phase - Loads the formula and builds context from VISION.md, AGENTS.md, and ops repo -- Executes the formula via `agent_run` +- Bash orchestrates state management: + - Fetches open vision issues, open architect PRs, and merged sprint PRs from Forgejo API + - Filters out visions already with open PRs, in-progress label, sub-issues, or merged sprint PRs + - Selects up to `pitch_budget` (3 - open architect PRs) remaining vision issues + - For each selected issue, invokes stateless `claude -p` with issue body + context + - Creates PRs directly from pitch content (no scratch files) +- Agent is invoked only for response processing (ACCEPT/REJECT handling) + +**Multi-sprint pitching**: The architect pitches up to 3 sprints per run. Bash handles all state management: +- Fetches Forgejo API data (vision issues, open PRs, merged PRs) +- Filters and deduplicates (no model-level dedup or journal-based memory) +- For each selected vision issue, bash invokes stateless `claude -p` to generate pitch markdown +- Bash creates the PR with pitch content and posts ACCEPT/REJECT footer comment +- Branch names use issue number (architect/sprint-vision-{issue_number}) to avoid collisions ## Cron @@ -63,3 +106,4 @@ empty file not created, just document it). - #100: Architect formula — research + design fork identification - #101: Architect formula — sprint PR creation with questions - #102: Architect formula — answer parsing + sub-issue filing +- #491: Refactor — bash-driven design phase with stateful session resumption diff --git a/architect/architect-run.sh b/architect/architect-run.sh index b3d2513..38702b2 100755 --- a/architect/architect-run.sh +++ b/architect/architect-run.sh @@ -7,9 +7,20 @@ # # Flow: # 1. Guards: cron lock, memory check -# 2. Load formula (formulas/run-architect.toml) -# 3. Context: VISION.md, AGENTS.md, ops:prerequisites.md, structural graph -# 4. agent_run(worktree, prompt) → Claude decomposes vision into sprints +# 2. Precondition checks: skip if no work (no vision issues, no responses) +# 3. Load formula (formulas/run-architect.toml) +# 4. Context: VISION.md, AGENTS.md, ops:prerequisites.md, structural graph +# 5. Stateless pitch generation: for each selected issue: +# - Fetch issue body from Forgejo API (bash) +# - Invoke claude -p with issue body + context (stateless, no API calls) +# - Create PR with pitch content (bash) +# - Post footer comment (bash) +# 6. Response processing: handle ACCEPT/REJECT on existing PRs +# +# Precondition checks (bash before model): +# - Skip if no vision issues AND no open architect PRs +# - Skip if 3+ architect PRs open AND no ACCEPT/REJECT responses to process +# - Only invoke model when there's actual work: new pitches or response processing # # Usage: # architect-run.sh [projects/disinto.toml] # project config (default: disinto) @@ -36,27 +47,52 @@ source "$FACTORY_ROOT/lib/guard.sh" # shellcheck source=../lib/agent-sdk.sh source "$FACTORY_ROOT/lib/agent-sdk.sh" -LOG_FILE="$SCRIPT_DIR/architect.log" +LOG_FILE="${DISINTO_LOG_DIR}/architect/architect.log" # shellcheck disable=SC2034 # consumed by agent-sdk.sh LOGFILE="$LOG_FILE" # shellcheck disable=SC2034 # consumed by agent-sdk.sh SID_FILE="/tmp/architect-session-${PROJECT_NAME}.sid" +# Per-PR session files for stateful resumption across runs +SID_DIR="/tmp/architect-sessions-${PROJECT_NAME}" +mkdir -p "$SID_DIR" SCRATCH_FILE="/tmp/architect-${PROJECT_NAME}-scratch.md" +SCRATCH_FILE_PREFIX="/tmp/architect-${PROJECT_NAME}-scratch" WORKTREE="/tmp/${PROJECT_NAME}-architect-run" -log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } +# Override LOG_AGENT for consistent agent identification +# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh +LOG_AGENT="architect" + +# Override log() to append to architect-specific log file +# shellcheck disable=SC2034 +log() { + local agent="${LOG_AGENT:-architect}" + printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE" +} # ── Guards ──────────────────────────────────────────────────────────────── check_active architect acquire_cron_lock "/tmp/architect-run.lock" -check_memory 2000 +memory_guard 2000 log "--- Architect run start ---" +# ── Resolve forge remote for git operations ───────────────────────────── +resolve_forge_remote + +# ── Resolve agent identity for .profile repo ──────────────────────────── +if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_ARCHITECT_TOKEN:-}" ]; then + AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_ARCHITECT_TOKEN}" \ + "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true) +fi + # ── Load formula + context ─────────────────────────────────────────────── -load_formula "$FACTORY_ROOT/formulas/run-architect.toml" +load_formula_or_profile "architect" "$FACTORY_ROOT/formulas/run-architect.toml" || exit 1 build_context_block VISION.md AGENTS.md ops:prerequisites.md +# ── Prepare .profile context (lessons injection) ───────────────────────── +formula_prepare_profile_context + # ── Build structural analysis graph ────────────────────────────────────── build_graph_section @@ -84,6 +120,7 @@ and file sub-issues after design forks are resolved. ${CONTEXT_BLOCK} ${GRAPH_SECTION} ${SCRATCH_CONTEXT} +$(formula_lessons_block) ## Formula ${FORMULA_CONTENT} @@ -97,11 +134,611 @@ PROMPT=$(build_architect_prompt) # ── Create worktree ────────────────────────────────────────────────────── formula_worktree_setup "$WORKTREE" -# ── Run agent ───────────────────────────────────────────────────────────── -export CLAUDE_MODEL="sonnet" +# ── Detect if PR is in questions-awaiting-answers phase ────────────────── +# A PR is in the questions phase if it has a `## Design forks` section and +# question comments. We check this to decide whether to resume the session +# from the research/questions run (preserves codebase context for answer parsing). +detect_questions_phase() { + local pr_number="" + local pr_body="" -agent_run --worktree "$WORKTREE" "$PROMPT" -log "agent_run complete" + # Get open architect PRs on ops repo + local ops_repo="${OPS_REPO_ROOT:-/home/agent/data/ops}" + if [ ! -d "${ops_repo}/.git" ]; then + return 1 + fi + # Use Forgejo API to find open architect PRs + local response + response=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open" 2>/dev/null) || return 1 + + # Check each open PR for architect markers + pr_number=$(printf '%s' "$response" | jq -r '.[] | select(.title | contains("architect:")) | .number' 2>/dev/null | head -1) || return 1 + + if [ -z "$pr_number" ]; then + return 1 + fi + + # Fetch PR body + pr_body=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_number}" 2>/dev/null | jq -r '.body // empty') || return 1 + + # Check for `## Design forks` section (added by #101 after ACCEPT) + if ! printf '%s' "$pr_body" | grep -q "## Design forks"; then + return 1 + fi + + # Check for question comments (Q1:, Q2:, etc.) + # Use jq to extract body text before grepping (handles JSON escaping properly) + local comments + comments=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_number}/comments" 2>/dev/null) || return 1 + + if ! printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qE 'Q[0-9]+:'; then + return 1 + fi + + # PR is in questions phase + log "Detected PR #${pr_number} in questions-awaiting-answers phase" + return 0 +} + +# ── Sub-issue existence check ──────────────────────────────────────────── +# Check if a vision issue already has sub-issues filed from it. +# Returns 0 if sub-issues exist and are open, 1 otherwise. +# Args: vision_issue_number +has_open_subissues() { + local vision_issue="$1" + local subissue_count=0 + + # Search for issues whose body contains 'Decomposed from #N' pattern + # Fetch all open issues with bodies in one API call (avoids N+1 calls) + local issues_json + issues_json=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues?state=open&limit=100" 2>/dev/null) || return 1 + + # Check each issue for the decomposition pattern using jq to extract bodies + subissue_count=$(printf '%s' "$issues_json" | jq -r --arg vid "$vision_issue" ' + [.[] | select(.number != ($vid | tonumber)) | select(.body // "" | contains("Decomposed from #" + $vid))] | length + ' 2>/dev/null) || subissue_count=0 + + if [ "$subissue_count" -gt 0 ]; then + log "Vision issue #${vision_issue} has ${subissue_count} open sub-issue(s) — skipping" + return 0 # Has open sub-issues + fi + + log "Vision issue #${vision_issue} has no open sub-issues" + return 1 # No open sub-issues +} + +# ── Merged sprint PR check ─────────────────────────────────────────────── +# Check if a vision issue already has a merged sprint PR on the ops repo. +# Returns 0 if a merged sprint PR exists, 1 otherwise. +# Args: vision_issue_number +has_merged_sprint_pr() { + local vision_issue="$1" + + # Get closed PRs from ops repo + local prs_json + prs_json=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=closed&limit=100" 2>/dev/null) || return 1 + + # Check each closed PR for architect markers and vision issue reference + local pr_numbers + pr_numbers=$(printf '%s' "$prs_json" | jq -r '.[] | select(.title | contains("architect:")) | .number' 2>/dev/null) || return 1 + + local pr_num + while IFS= read -r pr_num; do + [ -z "$pr_num" ] && continue + + # Get PR details including merged status + local pr_details + pr_details=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_num}" 2>/dev/null) || continue + + # Check if PR is actually merged (not just closed) + local is_merged + is_merged=$(printf '%s' "$pr_details" | jq -r '.merged // false') || continue + + if [ "$is_merged" != "true" ]; then + continue + fi + + # Get PR body and check for vision issue reference + local pr_body + pr_body=$(printf '%s' "$pr_details" | jq -r '.body // ""') || continue + + # Check if PR body references the vision issue number + # Look for patterns like "#N" where N is the vision issue number + if printf '%s' "$pr_body" | grep -qE "(#|refs|references)[[:space:]]*#${vision_issue}|#${vision_issue}[^0-9]|#${vision_issue}$"; then + log "Found merged sprint PR #${pr_num} referencing vision issue #${vision_issue} — skipping" + return 0 # Has merged sprint PR + fi + done <<< "$pr_numbers" + + log "Vision issue #${vision_issue} has no merged sprint PR" + return 1 # No merged sprint PR +} + +# ── Helper: Fetch all open vision issues from Forgejo API ───────────────── +# Returns: JSON array of vision issue objects +fetch_vision_issues() { + curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues?labels=vision&state=open&limit=100" 2>/dev/null || echo '[]' +} + +# ── Helper: Fetch open architect PRs from ops repo Forgejo API ─────────── +# Returns: JSON array of architect PR objects +fetch_open_architect_prs() { + curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null || echo '[]' +} + +# ── Helper: Get vision issue body by number ────────────────────────────── +# Args: issue_number +# Returns: issue body text +get_vision_issue_body() { + local issue_num="$1" + curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues/${issue_num}" 2>/dev/null | jq -r '.body // ""' +} + +# ── Helper: Get vision issue title by number ───────────────────────────── +# Args: issue_number +# Returns: issue title +get_vision_issue_title() { + local issue_num="$1" + curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues/${issue_num}" 2>/dev/null | jq -r '.title // ""' +} + +# ── Helper: Create a sprint pitch via stateless claude -p call ─────────── +# The model NEVER calls Forgejo API. It only reads context and generates pitch. +# Args: vision_issue_number vision_issue_title vision_issue_body +# Returns: pitch markdown to stdout +# +# This is a stateless invocation: the model has no memory between calls. +# All state management (which issues to pitch, dedup logic, etc.) happens in bash. +generate_pitch() { + local issue_num="$1" + local issue_title="$2" + local issue_body="$3" + + # Build context block with vision issue details + local pitch_context + pitch_context=" +## Vision Issue #${issue_num} +### Title +${issue_title} + +### Description +${issue_body} + +## Project Context +${CONTEXT_BLOCK} +${GRAPH_SECTION} +$(formula_lessons_block) +## Formula +${FORMULA_CONTENT} + +${SCRATCH_INSTRUCTION} +${PROMPT_FOOTER} +" + + # Prompt: model generates pitch markdown only, no API calls + local pitch_prompt="You are the architect agent for ${FORGE_REPO}. Write a sprint pitch for the vision issue above. + +Instructions: +1. Output ONLY the pitch markdown (no explanations, no preamble, no postscript) +2. Use this exact format: + +# Sprint: + +## Vision issues +- #${issue_num} — ${issue_title} + +## What this enables + + +## What exists today + + +## Complexity + + + +## Risks + + +## Cost — new infra to maintain + + + +## Recommendation + + +IMPORTANT: Do NOT include design forks or questions. This is a go/no-go pitch. + +--- + +${pitch_context} +" + + # Execute stateless claude -p call + local pitch_output + pitch_output=$(agent_run -p "$pitch_prompt" --output-format json --dangerously-skip-permissions --max-turns 200 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") || true + + # Extract pitch content from JSON response + local pitch + pitch=$(printf '%s' "$pitch_output" | jq -r '.content // empty' 2>/dev/null) || pitch="" + + if [ -z "$pitch" ]; then + log "WARNING: empty pitch generated for vision issue #${issue_num}" + return 1 + fi + + # Output pitch to stdout for caller to use + printf '%s' "$pitch" +} + +# ── Helper: Create PR on ops repo via Forgejo API ──────────────────────── +# Args: sprint_title sprint_body branch_name +# Returns: PR number on success, empty on failure +create_sprint_pr() { + local sprint_title="$1" + local sprint_body="$2" + local branch_name="$3" + + # Create branch on ops repo + if ! curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/branches" \ + -d "{\"new_branch_name\": \"${branch_name}\", \"old_branch_name\": \"${PRIMARY_BRANCH:-main}\"}" >/dev/null 2>&1; then + log "WARNING: failed to create branch ${branch_name}" + return 1 + fi + + # Extract sprint name from title for filename + local sprint_name + sprint_name=$(printf '%s' "$sprint_title" | sed 's/^architect: *//; s/ *$//') + local sprint_slug + sprint_slug=$(printf '%s' "$sprint_name" | tr '[:upper:]' '[:lower:]' | tr ' ' '-' | sed 's/--*/-/g') + + # Prepare sprint spec content + local sprint_spec="# Sprint: ${sprint_name} + +${sprint_body} +" + # Base64 encode the content + local sprint_spec_b64 + sprint_spec_b64=$(printf '%s' "$sprint_spec" | base64 -w 0) + + # Write sprint spec file to branch + if ! curl -sf -X PUT \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/contents/sprints/${sprint_slug}.md" \ + -d "{\"message\": \"sprint: add ${sprint_slug}.md\", \"content\": \"${sprint_spec_b64}\", \"branch\": \"${branch_name}\"}" >/dev/null 2>&1; then + log "WARNING: failed to write sprint spec file" + return 1 + fi + + # Create PR - use jq to build JSON payload safely (prevents injection from markdown) + local pr_payload + pr_payload=$(jq -n \ + --arg title "$sprint_title" \ + --arg body "$sprint_body" \ + --arg head "$branch_name" \ + --arg base "${PRIMARY_BRANCH:-main}" \ + '{title: $title, body: $body, head: $head, base: $base}') + + local pr_response + pr_response=$(curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls" \ + -d "$pr_payload" 2>/dev/null) || return 1 + + # Extract PR number + local pr_number + pr_number=$(printf '%s' "$pr_response" | jq -r '.number // empty') + + log "Created sprint PR #${pr_number}: ${sprint_title}" + printf '%s' "$pr_number" +} + +# ── Helper: Post footer comment on PR ──────────────────────────────────── +# Args: pr_number +post_pr_footer() { + local pr_number="$1" + local footer="Reply \`ACCEPT\` to proceed with design questions, or \`REJECT: \` to decline." + + if curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_number}/comments" \ + -d "{\"body\": \"${footer}\"}" >/dev/null 2>&1; then + log "Posted footer comment on PR #${pr_number}" + return 0 + else + log "WARNING: failed to post footer comment on PR #${pr_number}" + return 1 + fi +} + +# ── Helper: Add in-progress label to vision issue ──────────────────────── +# Args: vision_issue_number +add_inprogress_label() { + local issue_num="$1" + + # Get label ID for 'in-progress' + local labels_json + labels_json=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/labels" 2>/dev/null) || return 1 + + local inprogress_label_id + inprogress_label_id=$(printf '%s' "$labels_json" | jq -r --arg label "in-progress" '.[] | select(.name == $label) | .id' 2>/dev/null) || true + + if [ -z "$inprogress_label_id" ]; then + log "WARNING: in-progress label not found" + return 1 + fi + + # Add label to issue + if curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_API}/repos/${FORGE_REPO}/issues/${issue_num}/labels" \ + -d "{\"labels\": [${inprogress_label_id}]}" >/dev/null 2>&1; then + log "Added in-progress label to vision issue #${issue_num}" + return 0 + else + log "WARNING: failed to add in-progress label to vision issue #${issue_num}" + return 1 + fi +} + +# ── Precondition checks in bash before invoking the model ───────────────── + +# Check 1: Skip if no vision issues exist and no open architect PRs to handle +vision_count=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \ + "$FORGE_API/issues?labels=vision&state=open&limit=1" 2>/dev/null | jq length) || vision_count=0 +if [ "${vision_count:-0}" -eq 0 ]; then + # Check for open architect PRs that need handling (ACCEPT/REJECT responses) + open_arch_prs=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=10" 2>/dev/null | jq '[.[] | select(.title | startswith("architect:"))] | length') || open_arch_prs=0 + if [ "${open_arch_prs:-0}" -eq 0 ]; then + log "no vision issues and no open architect PRs — skipping" + exit 0 + fi +fi + +# Check 2: Scan for ACCEPT/REJECT responses on open architect PRs (unconditional) +# This ensures responses are processed regardless of open_arch_prs count +has_responses_to_process=false +pr_numbers=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null | jq -r '.[] | select(.title | startswith("architect:")) | .number') || pr_numbers="" +for pr_num in $pr_numbers; do + comments=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_num}/comments" 2>/dev/null) || continue + if printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qE '(ACCEPT|REJECT):'; then + has_responses_to_process=true + break + fi +done + +# Check 2 (continued): Skip if already at max open pitches (3), unless there are responses to process +open_arch_prs=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null | jq '[.[] | select(.title | startswith("architect:"))] | length') || open_arch_prs=0 +if [ "${open_arch_prs:-0}" -ge 3 ]; then + if [ "$has_responses_to_process" = false ]; then + log "already 3 open architect PRs with no responses to process — skipping" + exit 0 + fi + log "3 open architect PRs found but responses detected — processing" +fi + +# ── Bash-driven state management: Select vision issues for pitching ─────── +# This logic is also documented in formulas/run-architect.toml preflight step + +# Fetch all data from Forgejo API upfront (bash handles state, not model) +vision_issues_json=$(fetch_vision_issues) +open_arch_prs_json=$(fetch_open_architect_prs) + +# Build list of vision issues that already have open architect PRs +declare -A _arch_vision_issues_with_open_prs +while IFS= read -r pr_num; do + [ -z "$pr_num" ] && continue + pr_body=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_num}" 2>/dev/null | jq -r '.body // ""') || continue + # Extract vision issue numbers referenced in PR body (e.g., "refs #419" or "#419") + while IFS= read -r ref_issue; do + [ -z "$ref_issue" ] && continue + _arch_vision_issues_with_open_prs["$ref_issue"]=1 + done <<< "$(printf '%s' "$pr_body" | grep -oE '#[0-9]+' | tr -d '#' | sort -u)" +done <<< "$(printf '%s' "$open_arch_prs_json" | jq -r '.[] | select(.title | startswith("architect:")) | .number')" + +# Get all open vision issues +vision_issues_json=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \ + "${FORGE_API}/issues?labels=vision&state=open&limit=100" 2>/dev/null) || vision_issues_json='[]' + +# Get issues with in-progress label +in_progress_issues=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \ + "${FORGE_API}/issues?labels=in-progress&state=open&limit=100" 2>/dev/null | jq -r '.[].number' 2>/dev/null) || in_progress_issues="" + +# Select vision issues for pitching +ARCHITECT_TARGET_ISSUES=() +vision_issue_count=0 +pitch_budget=$((3 - open_arch_prs)) + +# Get all vision issue numbers +vision_issue_nums=$(printf '%s' "$vision_issues_json" | jq -r '.[].number' 2>/dev/null) || vision_issue_nums="" + +while IFS= read -r vision_issue; do + [ -z "$vision_issue" ] && continue + vision_issue_count=$((vision_issue_count + 1)) + + # Skip if pitch budget exhausted + if [ "${pitch_budget}" -le 0 ] || [ ${#ARCHITECT_TARGET_ISSUES[@]} -ge "$pitch_budget" ]; then + log "Pitch budget exhausted (${#ARCHITECT_TARGET_ISSUES[@]}/${pitch_budget})" + break + fi + + # Skip if vision issue already has open architect PR + if [ "${_arch_vision_issues_with_open_prs[$vision_issue]:-}" = "1" ]; then + log "Vision issue #${vision_issue} already has open architect PR — skipping" + continue + fi + + # Skip if vision issue has in-progress label + if printf '%s\n' "$in_progress_issues" | grep -q "^${vision_issue}$"; then + log "Vision issue #${vision_issue} has in-progress label — skipping" + continue + fi + + # Skip if vision issue has open sub-issues (already being worked on) + if has_open_subissues "$vision_issue"; then + log "Vision issue #${vision_issue} has open sub-issues — skipping" + continue + fi + + # Skip if vision issue has merged sprint PR (decomposition already done) + if has_merged_sprint_pr "$vision_issue"; then + log "Vision issue #${vision_issue} has merged sprint PR — skipping" + continue + fi + + # Add to target issues + ARCHITECT_TARGET_ISSUES+=("$vision_issue") + log "Selected vision issue #${vision_issue} for pitching" +done <<< "$vision_issue_nums" + +# If no issues selected, decide whether to exit or process responses +if [ ${#ARCHITECT_TARGET_ISSUES[@]} -eq 0 ]; then + if [ "${has_responses_to_process:-false}" = "true" ]; then + log "No new pitches needed — responses to process" + # Fall through to response processing block below + else + log "No vision issues available for pitching (all have open PRs, sub-issues, or merged sprint PRs) — signaling PHASE:done" + # Signal PHASE:done by writing to phase file if it exists + if [ -f "/tmp/architect-${PROJECT_NAME}.phase" ]; then + echo "PHASE:done" > "/tmp/architect-${PROJECT_NAME}.phase" + fi + exit 0 + fi +fi + +log "Selected ${#ARCHITECT_TARGET_ISSUES[@]} vision issue(s) for pitching: ${ARCHITECT_TARGET_ISSUES[*]}" + +# ── Stateless pitch generation and PR creation (bash-driven, no model API calls) ── +# For each target issue: +# 1. Fetch issue body from Forgejo API (bash) +# 2. Invoke claude -p with issue body + context (stateless, no API calls) +# 3. Create PR with pitch content (bash) +# 4. Post footer comment (bash) + +pitch_count=0 +for vision_issue in "${ARCHITECT_TARGET_ISSUES[@]}"; do + log "Processing vision issue #${vision_issue}" + + # Fetch vision issue details from Forgejo API (bash, not model) + issue_title=$(get_vision_issue_title "$vision_issue") + issue_body=$(get_vision_issue_body "$vision_issue") + + if [ -z "$issue_title" ] || [ -z "$issue_body" ]; then + log "WARNING: failed to fetch vision issue #${vision_issue} details" + continue + fi + + # Generate pitch via stateless claude -p call (model has no API access) + log "Generating pitch for vision issue #${vision_issue}" + pitch=$(generate_pitch "$vision_issue" "$issue_title" "$issue_body") || true + + if [ -z "$pitch" ]; then + log "WARNING: failed to generate pitch for vision issue #${vision_issue}" + continue + fi + + # Create sprint PR (bash, not model) + # Use issue number in branch name to avoid collisions across runs + branch_name="architect/sprint-vision-${vision_issue}" + pr_number=$(create_sprint_pr "architect: ${issue_title}" "$pitch" "$branch_name") + + if [ -z "$pr_number" ]; then + log "WARNING: failed to create PR for vision issue #${vision_issue}" + continue + fi + + # Post footer comment + post_pr_footer "$pr_number" + + # Add in-progress label to vision issue + add_inprogress_label "$vision_issue" + + pitch_count=$((pitch_count + 1)) + log "Completed pitch for vision issue #${vision_issue} — PR #${pr_number}" +done + +log "Generated ${pitch_count} sprint pitch(es)" + +# ── Run agent for response processing if needed ─────────────────────────── +# Always process ACCEPT/REJECT responses when present, regardless of new pitches +if [ "${has_responses_to_process:-false}" = "true" ]; then + log "Processing ACCEPT/REJECT responses on existing PRs" + + # Check if any PRs have responses that need agent handling + needs_agent=false + pr_numbers=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls?state=open&limit=100" 2>/dev/null | jq -r '.[] | select(.title | startswith("architect:")) | .number') || pr_numbers="" + + for pr_num in $pr_numbers; do + # Check for ACCEPT/REJECT in comments + comments=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/issues/${pr_num}/comments" 2>/dev/null) || continue + + # Check for review decisions (higher precedence) + reviews=$(curl -sf -H "Authorization: token $FORGE_TOKEN" \ + "${FORGE_API}/repos/${FORGE_OPS_REPO}/pulls/${pr_num}/reviews" 2>/dev/null) || reviews="" + + # Check for ACCEPT (APPROVED review or ACCEPT comment) + if printf '%s' "$reviews" | jq -e '.[] | select(.state == "APPROVED")' >/dev/null 2>&1; then + log "PR #${pr_num} has APPROVED review — needs agent handling" + needs_agent=true + elif printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qiE '^[^:]+: *ACCEPT'; then + log "PR #${pr_num} has ACCEPT comment — needs agent handling" + needs_agent=true + elif printf '%s' "$comments" | jq -r '.[].body // empty' | grep -qiE '^[^:]+: *REJECT:'; then + log "PR #${pr_num} has REJECT comment — needs agent handling" + needs_agent=true + fi + done + + # Run agent only if there are responses to process + if [ "$needs_agent" = "true" ]; then + # Determine whether to resume session + RESUME_ARGS=() + if detect_questions_phase && [ -f "$SID_FILE" ]; then + RESUME_SESSION=$(cat "$SID_FILE") + RESUME_ARGS=(--resume "$RESUME_SESSION") + log "Resuming session from questions phase run: ${RESUME_SESSION:0:12}..." + elif ! detect_questions_phase; then + log "PR not in questions phase — starting fresh session" + elif [ ! -f "$SID_FILE" ]; then + log "No session ID found for questions phase — starting fresh session" + fi + + agent_run "${RESUME_ARGS[@]}" --worktree "$WORKTREE" "$PROMPT" + log "agent_run complete" + fi +fi + +# ── Clean up scratch files (legacy single file + per-issue files) ────────── rm -f "$SCRATCH_FILE" +rm -f "${SCRATCH_FILE_PREFIX}"-*.md + +# Write journal entry post-session +profile_write_journal "architect-run" "Architect run $(date -u +%Y-%m-%d)" "complete" "" || true + log "--- Architect run done ---" diff --git a/bin/disinto b/bin/disinto index c3b454f..4c663a1 100755 --- a/bin/disinto +++ b/bin/disinto @@ -25,6 +25,13 @@ set -euo pipefail FACTORY_ROOT="$(cd "$(dirname "$0")/.." && pwd)" source "${FACTORY_ROOT}/lib/env.sh" +source "${FACTORY_ROOT}/lib/ops-setup.sh" # setup_ops_repo, migrate_ops_repo +source "${FACTORY_ROOT}/lib/hire-agent.sh" +source "${FACTORY_ROOT}/lib/forge-setup.sh" +source "${FACTORY_ROOT}/lib/generators.sh" +source "${FACTORY_ROOT}/lib/forge-push.sh" +source "${FACTORY_ROOT}/lib/ci-setup.sh" +source "${FACTORY_ROOT}/lib/release.sh" # ── Helpers ────────────────────────────────────────────────────────────────── @@ -160,397 +167,38 @@ write_secrets_encrypted() { return 0 } -FORGEJO_DATA_DIR="${HOME}/.disinto/forgejo" +export FORGEJO_DATA_DIR="${HOME}/.disinto/forgejo" # Generate docker-compose.yml in the factory root. +# (Implementation in lib/generators.sh) generate_compose() { - local forge_port="${1:-3000}" - local compose_file="${FACTORY_ROOT}/docker-compose.yml" - - cat > "$compose_file" <<'COMPOSEEOF' -# docker-compose.yml — generated by disinto init -# Brings up Forgejo, Woodpecker, and the agent runtime. - -services: - forgejo: - image: codeberg.org/forgejo/forgejo:11.0 - restart: unless-stopped - security_opt: - - apparmor=unconfined - volumes: - - forgejo-data:/data - environment: - FORGEJO__database__DB_TYPE: sqlite3 - FORGEJO__server__ROOT_URL: http://forgejo:3000/ - FORGEJO__server__HTTP_PORT: "3000" - FORGEJO__security__INSTALL_LOCK: "true" - FORGEJO__service__DISABLE_REGISTRATION: "true" - FORGEJO__webhook__ALLOWED_HOST_LIST: "private" - networks: - - disinto-net - - woodpecker: - image: woodpeckerci/woodpecker-server:v3 - restart: unless-stopped - security_opt: - - apparmor=unconfined - ports: - - "8000:8000" - - "9000:9000" - volumes: - - woodpecker-data:/var/lib/woodpecker - environment: - WOODPECKER_FORGEJO: "true" - WOODPECKER_FORGEJO_URL: http://forgejo:3000 - WOODPECKER_FORGEJO_CLIENT: ${WP_FORGEJO_CLIENT:-} - WOODPECKER_FORGEJO_SECRET: ${WP_FORGEJO_SECRET:-} - WOODPECKER_HOST: ${WOODPECKER_HOST:-http://woodpecker:8000} - WOODPECKER_OPEN: "true" - WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-} - WOODPECKER_DATABASE_DRIVER: sqlite3 - WOODPECKER_DATABASE_DATASOURCE: /var/lib/woodpecker/woodpecker.sqlite - depends_on: - - forgejo - networks: - - disinto-net - - woodpecker-agent: - image: woodpeckerci/woodpecker-agent:v3 - restart: unless-stopped - network_mode: host - privileged: true - volumes: - - /var/run/docker.sock:/var/run/docker.sock - environment: - WOODPECKER_SERVER: localhost:9000 - WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-} - WOODPECKER_GRPC_SECURE: "false" - WOODPECKER_HEALTHCHECK_ADDR: ":3333" - WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net - WOODPECKER_MAX_WORKFLOWS: 1 - depends_on: - - woodpecker - - agents: - build: - context: . - dockerfile: docker/agents/Dockerfile - restart: unless-stopped - security_opt: - - apparmor=unconfined - volumes: - - agent-data:/home/agent/data - - project-repos:/home/agent/repos - - ${HOME}/.claude:/home/agent/.claude - - ${HOME}/.claude.json:/home/agent/.claude.json:ro - - CLAUDE_BIN_PLACEHOLDER:/usr/local/bin/claude:ro - - ${HOME}/.ssh:/home/agent/.ssh:ro - - ${HOME}/.config/sops/age:/home/agent/.config/sops/age:ro - - woodpecker-data:/woodpecker-data:ro - environment: - FORGE_URL: http://forgejo:3000 - WOODPECKER_SERVER: http://woodpecker:8000 - DISINTO_CONTAINER: "1" - PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project} - WOODPECKER_DATA_DIR: /woodpecker-data - env_file: - - .env - # IMPORTANT: agents get .env only (forge tokens, CI tokens, config). - # Vault-only secrets (GITHUB_TOKEN, CLAWHUB_TOKEN, deploy keys) live in - # .env.vault.enc and are NEVER injected here — only the runner - # container receives them at fire time (AD-006, #745). - depends_on: - - forgejo - - woodpecker - networks: - - disinto-net - - runner: - build: - context: . - dockerfile: docker/agents/Dockerfile - profiles: ["vault"] - security_opt: - - apparmor=unconfined - volumes: - - agent-data:/home/agent/data - environment: - FORGE_URL: http://forgejo:3000 - DISINTO_CONTAINER: "1" - PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project} - # Vault redesign in progress (PR-based approval, see #73-#77) - # This container is being replaced — entrypoint will be updated in follow-up - networks: - - disinto-net - - # Edge proxy — reverse proxy to Forgejo, Woodpecker, and staging - # Serves on ports 80/443, routes based on path - edge: - build: ./docker/edge - ports: - - "80:80" - - "443:443" - environment: - - DISINTO_VERSION=${DISINTO_VERSION:-main} - - FORGE_URL=http://forgejo:3000 - - FORGE_REPO=johba/disinto - - FORGE_OPS_REPO=johba/disinto-ops - - FORGE_TOKEN=${FORGE_TOKEN:-} - - FORGE_ADMIN_USERS=${FORGE_ADMIN_USERS:-disinto-admin,johba} - - FORGE_ADMIN_TOKEN=${FORGE_ADMIN_TOKEN:-} - - OPS_REPO_ROOT=/opt/disinto-ops - - PROJECT_REPO_ROOT=/opt/disinto - - PRIMARY_BRANCH=main - volumes: - - ./docker/Caddyfile:/etc/caddy/Caddyfile - - caddy_data:/data - - /var/run/docker.sock:/var/run/docker.sock - depends_on: - - forgejo - - woodpecker - - staging - networks: - - disinto-net - - # Staging container — static file server for staging artifacts - # Edge proxy routes to this container for default requests - staging: - image: caddy:alpine - command: ["caddy", "file-server", "--root", "/srv/site"] - volumes: - - ./docker:/srv/site:ro - networks: - - disinto-net - - # Staging deployment slot — activated by Woodpecker staging pipeline (#755). - # Profile-gated: only starts when explicitly targeted by deploy commands. - # Customize image/ports/volumes for your project after init. - staging-deploy: - image: alpine:3 - profiles: ["staging"] - security_opt: - - apparmor=unconfined - environment: - DEPLOY_ENV: staging - networks: - - disinto-net - command: ["echo", "staging slot — replace with project image"] - -volumes: - forgejo-data: - woodpecker-data: - agent-data: - project-repos: - caddy_data: - -networks: - disinto-net: - driver: bridge -COMPOSEEOF - - # Patch the Claude CLI binary path — resolve from host PATH at init time. - local claude_bin - claude_bin="$(command -v claude 2>/dev/null || true)" - if [ -n "$claude_bin" ]; then - # Resolve symlinks to get the real binary path - claude_bin="$(readlink -f "$claude_bin")" - sed -i "s|CLAUDE_BIN_PLACEHOLDER|${claude_bin}|" "$compose_file" - else - echo "Warning: claude CLI not found in PATH — update docker-compose.yml volumes manually" >&2 - sed -i "s|CLAUDE_BIN_PLACEHOLDER|/usr/local/bin/claude|" "$compose_file" - fi - - # Patch the forgejo port mapping into the file if non-default - if [ "$forge_port" != "3000" ]; then - # Add port mapping to forgejo service so it's reachable from host during init - sed -i "/image: codeberg\.org\/forgejo\/forgejo:11\.0/a\\ ports:\\n - \"${forge_port}:3000\"" "$compose_file" - else - sed -i "/image: codeberg\.org\/forgejo\/forgejo:11\.0/a\\ ports:\\n - \"3000:3000\"" "$compose_file" - fi - - echo "Created: ${compose_file}" + _generate_compose_impl "$@" } # Generate docker/agents/ files if they don't already exist. +# (Implementation in lib/generators.sh) generate_agent_docker() { - local docker_dir="${FACTORY_ROOT}/docker/agents" - mkdir -p "$docker_dir" - - if [ ! -f "${docker_dir}/Dockerfile" ]; then - echo "Warning: docker/agents/Dockerfile not found — expected in repo" >&2 - fi - if [ ! -f "${docker_dir}/entrypoint.sh" ]; then - echo "Warning: docker/agents/entrypoint.sh not found — expected in repo" >&2 - fi + _generate_agent_docker_impl "$@" } # Generate docker/Caddyfile template for edge proxy. +# (Implementation in lib/generators.sh) generate_caddyfile() { - local docker_dir="${FACTORY_ROOT}/docker" - local caddyfile="${docker_dir}/Caddyfile" - - if [ -f "$caddyfile" ]; then - echo "Caddyfile: ${caddyfile} (already exists, skipping)" - return - fi - - cat > "$caddyfile" <<'CADDYFILEEOF' -# Caddyfile — edge proxy configuration -# IP-only binding at bootstrap; domain + TLS added later via vault resource request - -:80 { - # Reverse proxy to Forgejo - handle /forgejo/* { - reverse_proxy forgejo:3000 - } - - # Reverse proxy to Woodpecker CI - handle /ci/* { - reverse_proxy woodpecker:8000 - } - - # Default: proxy to staging container - handle { - reverse_proxy staging:80 - } -} -CADDYFILEEOF - - echo "Created: ${caddyfile}" + _generate_caddyfile_impl "$@" } # Generate docker/index.html default page. +# (Implementation in lib/generators.sh) generate_staging_index() { - local docker_dir="${FACTORY_ROOT}/docker" - local index_file="${docker_dir}/index.html" - - if [ -f "$index_file" ]; then - echo "Staging: ${index_file} (already exists, skipping)" - return - fi - - cat > "$index_file" <<'INDEXEOF' - - - - - - Nothing shipped yet - - - -
-

Nothing shipped yet

-

CI pipelines will update this page with your staging artifacts.

-
- - -INDEXEOF - - echo "Created: ${index_file}" + _generate_staging_index_impl "$@" } # Generate template .woodpecker/ deployment pipeline configs in a project repo. # Creates staging.yml and production.yml alongside the project's existing CI config. # These pipelines trigger on Woodpecker's deployment event with environment filters. +# (Implementation in lib/generators.sh) generate_deploy_pipelines() { - local repo_root="$1" project_name="$2" - local wp_dir="${repo_root}/.woodpecker" - - mkdir -p "$wp_dir" - - # Skip if deploy pipelines already exist - if [ -f "${wp_dir}/staging.yml" ] && [ -f "${wp_dir}/production.yml" ]; then - echo "Deploy: .woodpecker/{staging,production}.yml (already exist)" - return - fi - - if [ ! -f "${wp_dir}/staging.yml" ]; then - cat > "${wp_dir}/staging.yml" <<'STAGINGEOF' -# .woodpecker/staging.yml — Staging deployment pipeline -# Triggered by runner via Woodpecker promote API. -# Human approves promotion in vault → runner calls promote → this runs. - -when: - event: deployment - environment: staging - -steps: - - name: deploy-staging - image: docker:27 - commands: - - echo "Deploying to staging environment..." - - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from CI #${CI_PIPELINE_PARENT}" - # Pull the image built by CI and deploy to staging - # Customize these commands for your project: - # - docker compose -f docker-compose.yml --profile staging up -d - - echo "Staging deployment complete" - - - name: verify-staging - image: alpine:3 - commands: - - echo "Verifying staging deployment..." - # Add health checks, smoke tests, or integration tests here: - # - curl -sf http://staging:8080/health || exit 1 - - echo "Staging verification complete" -STAGINGEOF - echo "Created: ${wp_dir}/staging.yml" - fi - - if [ ! -f "${wp_dir}/production.yml" ]; then - cat > "${wp_dir}/production.yml" <<'PRODUCTIONEOF' -# .woodpecker/production.yml — Production deployment pipeline -# Triggered by runner via Woodpecker promote API. -# Human approves promotion in vault → runner calls promote → this runs. - -when: - event: deployment - environment: production - -steps: - - name: deploy-production - image: docker:27 - commands: - - echo "Deploying to production environment..." - - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from staging" - # Pull the verified image and deploy to production - # Customize these commands for your project: - # - docker compose -f docker-compose.yml up -d - - echo "Production deployment complete" - - - name: verify-production - image: alpine:3 - commands: - - echo "Verifying production deployment..." - # Add production health checks here: - # - curl -sf http://production:8080/health || exit 1 - - echo "Production verification complete" -PRODUCTIONEOF - echo "Created: ${wp_dir}/production.yml" - fi + _generate_deploy_pipelines_impl "$@" } # Check whether compose mode is active (docker-compose.yml exists). @@ -558,646 +206,11 @@ is_compose_mode() { [ -f "${FACTORY_ROOT}/docker-compose.yml" ] } -# Provision or connect to a local Forgejo instance. -# Creates admin + bot users, generates API tokens, stores in .env. -# When $DISINTO_BARE is set, uses standalone docker run; otherwise uses compose. -setup_forge() { - local forge_url="$1" - local repo_slug="$2" - local use_bare="${DISINTO_BARE:-false}" - - echo "" - echo "── Forge setup ────────────────────────────────────────" - - # Helper: run a command inside the Forgejo container - _forgejo_exec() { - if [ "$use_bare" = true ]; then - docker exec -u git disinto-forgejo "$@" - else - docker compose -f "${FACTORY_ROOT}/docker-compose.yml" exec -T -u git forgejo "$@" - fi - } - - # Check if Forgejo is already running - if curl -sf --max-time 5 "${forge_url}/api/v1/version" >/dev/null 2>&1; then - echo "Forgejo: ${forge_url} (already running)" - else - echo "Forgejo not reachable at ${forge_url}" - echo "Starting Forgejo via Docker..." - - if ! command -v docker &>/dev/null; then - echo "Error: docker not found — needed to provision Forgejo" >&2 - echo " Install Docker or start Forgejo manually at ${forge_url}" >&2 - exit 1 - fi - - # Extract port from forge_url - local forge_port - forge_port=$(printf '%s' "$forge_url" | sed -E 's|.*:([0-9]+)/?$|\1|') - forge_port="${forge_port:-3000}" - - if [ "$use_bare" = true ]; then - # Bare-metal mode: standalone docker run - mkdir -p "${FORGEJO_DATA_DIR}" - - if docker ps -a --format '{{.Names}}' | grep -q '^disinto-forgejo$'; then - docker start disinto-forgejo >/dev/null 2>&1 || true - else - docker run -d \ - --name disinto-forgejo \ - --restart unless-stopped \ - -p "${forge_port}:3000" \ - -p 2222:22 \ - -v "${FORGEJO_DATA_DIR}:/data" \ - -e "FORGEJO__database__DB_TYPE=sqlite3" \ - -e "FORGEJO__server__ROOT_URL=${forge_url}/" \ - -e "FORGEJO__server__HTTP_PORT=3000" \ - -e "FORGEJO__service__DISABLE_REGISTRATION=true" \ - codeberg.org/forgejo/forgejo:11.0 - fi - else - # Compose mode: start Forgejo via docker compose - docker compose -f "${FACTORY_ROOT}/docker-compose.yml" up -d forgejo - fi - - # Wait for Forgejo to become healthy - echo -n "Waiting for Forgejo to start" - local retries=0 - while ! curl -sf --max-time 3 "${forge_url}/api/v1/version" >/dev/null 2>&1; do - retries=$((retries + 1)) - if [ "$retries" -gt 60 ]; then - echo "" - echo "Error: Forgejo did not become ready within 60s" >&2 - exit 1 - fi - echo -n "." - sleep 1 - done - echo " ready" - fi - - # Wait for Forgejo database to accept writes (API may be ready before DB is) - echo -n "Waiting for Forgejo database" - local db_ready=false - for _i in $(seq 1 30); do - if _forgejo_exec forgejo admin user list >/dev/null 2>&1; then - db_ready=true - break - fi - echo -n "." - sleep 1 - done - echo "" - if [ "$db_ready" != true ]; then - echo "Error: Forgejo database not ready after 30s" >&2 - exit 1 - fi - - # Create admin user if it doesn't exist - local admin_user="disinto-admin" - local admin_pass - local env_file="${FACTORY_ROOT}/.env" - - # Re-read persisted admin password if available (#158) - if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then - admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-) - fi - # Generate a fresh password only when none was persisted - if [ -z "${admin_pass:-}" ]; then - admin_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - fi - - if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then - echo "Creating admin user: ${admin_user}" - local create_output - if ! create_output=$(_forgejo_exec forgejo admin user create \ - --admin \ - --username "${admin_user}" \ - --password "${admin_pass}" \ - --email "admin@disinto.local" \ - --must-change-password=false 2>&1); then - echo "Error: failed to create admin user '${admin_user}':" >&2 - echo " ${create_output}" >&2 - exit 1 - fi - # Forgejo 11.x ignores --must-change-password=false on create; - # explicitly clear the flag so basic-auth token creation works. - _forgejo_exec forgejo admin user change-password \ - --username "${admin_user}" \ - --password "${admin_pass}" \ - --must-change-password=false - - # Verify admin user was actually created - if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then - echo "Error: admin user '${admin_user}' not found after creation" >&2 - exit 1 - fi - - # Persist admin password to .env for idempotent re-runs (#158) - if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then - sed -i "s|^FORGE_ADMIN_PASS=.*|FORGE_ADMIN_PASS=${admin_pass}|" "$env_file" - else - printf 'FORGE_ADMIN_PASS=%s\n' "$admin_pass" >> "$env_file" - fi - else - echo "Admin user: ${admin_user} (already exists)" - # Reset password to the persisted value so basic-auth works (#158) - _forgejo_exec forgejo admin user change-password \ - --username "${admin_user}" \ - --password "${admin_pass}" \ - --must-change-password=false - fi - # Preserve password for Woodpecker OAuth2 token generation (#779) - _FORGE_ADMIN_PASS="$admin_pass" - - # Create human user (johba) as site admin if it doesn't exist - local human_user="johba" - local human_pass - human_pass="human-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - - if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then - echo "Creating human user: ${human_user}" - local create_output - if ! create_output=$(_forgejo_exec forgejo admin user create \ - --admin \ - --username "${human_user}" \ - --password "${human_pass}" \ - --email "johba@disinto.local" \ - --must-change-password=false 2>&1); then - echo "Error: failed to create human user '${human_user}':" >&2 - echo " ${create_output}" >&2 - exit 1 - fi - # Forgejo 11.x ignores --must-change-password=false on create; - # explicitly clear the flag so basic-auth token creation works. - _forgejo_exec forgejo admin user change-password \ - --username "${human_user}" \ - --password "${human_pass}" \ - --must-change-password=false - - # Verify human user was actually created - if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then - echo "Error: human user '${human_user}' not found after creation" >&2 - exit 1 - fi - echo " Human user '${human_user}' created as site admin" - else - echo "Human user: ${human_user} (already exists)" - fi - - # Get or create admin token - local admin_token - admin_token=$(curl -sf -X POST \ - -u "${admin_user}:${admin_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${admin_user}/tokens" \ - -d '{"name":"disinto-admin-token","scopes":["all"]}' 2>/dev/null \ - | jq -r '.sha1 // empty') || admin_token="" - - if [ -z "$admin_token" ]; then - # Token might already exist — try listing - admin_token=$(curl -sf \ - -u "${admin_user}:${admin_pass}" \ - "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \ - | jq -r '.[0].sha1 // empty') || admin_token="" - fi - - if [ -z "$admin_token" ]; then - echo "Error: failed to obtain admin API token" >&2 - exit 1 - fi - - # Get or create human user token - local human_token - if curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then - human_token=$(curl -sf -X POST \ - -u "${human_user}:${human_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${human_user}/tokens" \ - -d '{"name":"disinto-human-token","scopes":["all"]}' 2>/dev/null \ - | jq -r '.sha1 // empty') || human_token="" - - if [ -z "$human_token" ]; then - # Token might already exist — try listing - human_token=$(curl -sf \ - -u "${human_user}:${human_pass}" \ - "${forge_url}/api/v1/users/${human_user}/tokens" 2>/dev/null \ - | jq -r '.[0].sha1 // empty') || human_token="" - fi - - if [ -n "$human_token" ]; then - # Store human token in .env - if grep -q '^HUMAN_TOKEN=' "$env_file" 2>/dev/null; then - sed -i "s|^HUMAN_TOKEN=.*|HUMAN_TOKEN=${human_token}|" "$env_file" - else - printf 'HUMAN_TOKEN=%s\n' "$human_token" >> "$env_file" - fi - export HUMAN_TOKEN="$human_token" - echo " Human token saved (HUMAN_TOKEN)" - fi - fi - - # Create bot users and tokens - # Each agent gets its own Forgejo account for identity and audit trail (#747). - # Map: bot-username -> env-var-name for the token - local -A bot_token_vars=( - [dev-bot]="FORGE_TOKEN" - [review-bot]="FORGE_REVIEW_TOKEN" - [planner-bot]="FORGE_PLANNER_TOKEN" - [gardener-bot]="FORGE_GARDENER_TOKEN" - [vault-bot]="FORGE_VAULT_TOKEN" - [supervisor-bot]="FORGE_SUPERVISOR_TOKEN" - [predictor-bot]="FORGE_PREDICTOR_TOKEN" - [architect-bot]="FORGE_ARCHITECT_TOKEN" - ) - - local bot_user bot_pass token token_var - - for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot architect-bot; do - bot_pass="bot-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - token_var="${bot_token_vars[$bot_user]}" - - if ! curl -sf --max-time 5 \ - -H "Authorization: token ${admin_token}" \ - "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then - echo "Creating bot user: ${bot_user}" - local create_output - if ! create_output=$(_forgejo_exec forgejo admin user create \ - --username "${bot_user}" \ - --password "${bot_pass}" \ - --email "${bot_user}@disinto.local" \ - --must-change-password=false 2>&1); then - echo "Error: failed to create bot user '${bot_user}':" >&2 - echo " ${create_output}" >&2 - exit 1 - fi - # Forgejo 11.x ignores --must-change-password=false on create; - # explicitly clear the flag so basic-auth token creation works. - _forgejo_exec forgejo admin user change-password \ - --username "${bot_user}" \ - --password "${bot_pass}" \ - --must-change-password=false - - # Verify bot user was actually created - if ! curl -sf --max-time 5 \ - -H "Authorization: token ${admin_token}" \ - "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then - echo "Error: bot user '${bot_user}' not found after creation" >&2 - exit 1 - fi - fi - - # Generate token via API (basic auth as the bot user — Forgejo requires - # basic auth on POST /users/{username}/tokens, token auth is rejected) - token=$(curl -sf -X POST \ - -u "${bot_user}:${bot_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${bot_user}/tokens" \ - -d "{\"name\":\"disinto-${bot_user}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \ - | jq -r '.sha1 // empty') || token="" - - if [ -z "$token" ]; then - # Token name collision — create with timestamp suffix - token=$(curl -sf -X POST \ - -u "${bot_user}:${bot_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${bot_user}/tokens" \ - -d "{\"name\":\"disinto-${bot_user}-$(date +%s)\",\"scopes\":[\"all\"]}" 2>/dev/null \ - | jq -r '.sha1 // empty') || token="" - fi - - if [ -z "$token" ]; then - echo "Error: failed to create API token for '${bot_user}'" >&2 - exit 1 - fi - - # Store token in .env under the per-agent variable name - if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then - sed -i "s|^${token_var}=.*|${token_var}=${token}|" "$env_file" - else - printf '%s=%s\n' "$token_var" "$token" >> "$env_file" - fi - export "${token_var}=${token}" - echo " ${bot_user} token saved (${token_var})" - - # Backwards-compat aliases for dev-bot and review-bot - if [ "$bot_user" = "dev-bot" ]; then - export CODEBERG_TOKEN="$token" - elif [ "$bot_user" = "review-bot" ]; then - export REVIEW_BOT_TOKEN="$token" - fi - done - - # Store FORGE_URL in .env if not already present - if ! grep -q '^FORGE_URL=' "$env_file" 2>/dev/null; then - printf 'FORGE_URL=%s\n' "$forge_url" >> "$env_file" - fi - - # Create the repo on Forgejo if it doesn't exist - local org_name="${repo_slug%%/*}" - local repo_name="${repo_slug##*/}" - - # Check if repo already exists - if ! curl -sf --max-time 5 \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/repos/${repo_slug}" >/dev/null 2>&1; then - - # Try creating org first (ignore if exists) - curl -sf -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/orgs" \ - -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true - - # Create repo under org - if ! curl -sf -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/orgs/${org_name}/repos" \ - -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then - # Fallback: create under the human user namespace using admin endpoint - if [ -n "${admin_token:-}" ]; then - if ! curl -sf -X POST \ - -H "Authorization: token ${admin_token}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/admin/users/${org_name}/repos" \ - -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then - echo "Error: failed to create repo '${repo_slug}' on Forgejo (admin endpoint)" >&2 - exit 1 - fi - elif [ -n "${HUMAN_TOKEN:-}" ]; then - if ! curl -sf -X POST \ - -H "Authorization: token ${HUMAN_TOKEN}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/user/repos" \ - -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then - echo "Error: failed to create repo '${repo_slug}' on Forgejo (user endpoint)" >&2 - exit 1 - fi - else - echo "Error: failed to create repo '${repo_slug}' — no admin or human token available" >&2 - exit 1 - fi - fi - - # Add all bot users as collaborators with appropriate permissions - # dev-bot: write (PR creation via lib/vault.sh) - # review-bot: read (PR review) - # planner-bot: write (prerequisites.md, memory) - # gardener-bot: write (backlog grooming) - # vault-bot: write (vault items) - # supervisor-bot: read (health monitoring) - # predictor-bot: read (pattern detection) - # architect-bot: write (sprint PRs) - local bot_user bot_perm - declare -A bot_permissions=( - [dev-bot]="write" - [review-bot]="read" - [planner-bot]="write" - [gardener-bot]="write" - [vault-bot]="write" - [supervisor-bot]="read" - [predictor-bot]="read" - [architect-bot]="write" - ) - for bot_user in "${!bot_permissions[@]}"; do - bot_perm="${bot_permissions[$bot_user]}" - curl -sf -X PUT \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/repos/${repo_slug}/collaborators/${bot_user}" \ - -d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1 || true - done - - # Add disinto-admin as admin collaborator - curl -sf -X PUT \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/repos/${repo_slug}/collaborators/disinto-admin" \ - -d '{"permission":"admin"}' >/dev/null 2>&1 || true - - echo "Repo: ${repo_slug} created on Forgejo" - else - echo "Repo: ${repo_slug} (already exists on Forgejo)" - fi - - echo "Forge: ${forge_url} (ready)" -} - # Create and seed the {project}-ops repo on Forgejo with initial directory structure. # The ops repo holds operational data: vault items, journals, evidence, prerequisites. -setup_ops_repo() { - local forge_url="$1" ops_slug="$2" ops_root="$3" primary_branch="${4:-main}" - local org_name="${ops_slug%%/*}" - local ops_name="${ops_slug##*/}" +# ops repo setup is now in lib/ops-setup.sh - echo "" - echo "── Ops repo setup ─────────────────────────────────────" - - # Check if ops repo already exists on Forgejo - if curl -sf --max-time 5 \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/repos/${ops_slug}" >/dev/null 2>&1; then - echo "Ops repo: ${ops_slug} (already exists on Forgejo)" - else - # Create ops repo under org (or human user if org creation failed) - if ! curl -sf -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/orgs/${org_name}/repos" \ - -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" >/dev/null 2>&1; then - # Fallback: create under the human user namespace - curl -sf -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/johba/repos" \ - -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data\"}" >/dev/null 2>&1 || true - fi - - # Add all bot users as collaborators with appropriate permissions - # vault branch protection (#77) requires: - # - Admin-only merge to main (enforced by admin_enforced: true) - # - Bots can push branches and create PRs, but cannot merge - local bot_user bot_perm - declare -A bot_permissions=( - [dev-bot]="write" - [review-bot]="read" - [planner-bot]="write" - [gardener-bot]="write" - [vault-bot]="write" - [supervisor-bot]="read" - [predictor-bot]="read" - [architect-bot]="write" - ) - for bot_user in "${!bot_permissions[@]}"; do - bot_perm="${bot_permissions[$bot_user]}" - curl -sf -X PUT \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/repos/${ops_slug}/collaborators/${bot_user}" \ - -d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1 || true - done - - # Add disinto-admin as admin collaborator - curl -sf -X PUT \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/repos/${ops_slug}/collaborators/disinto-admin" \ - -d '{"permission":"admin"}' >/dev/null 2>&1 || true - - echo "Ops repo: ${ops_slug} created on Forgejo" - fi - - # Clone ops repo locally if not present - if [ ! -d "${ops_root}/.git" ]; then - local auth_url - auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_TOKEN}@|") - local clone_url="${auth_url}/${ops_slug}.git" - echo "Cloning: ops repo -> ${ops_root}" - git clone --quiet "$clone_url" "$ops_root" 2>/dev/null || { - echo "Initializing: ops repo at ${ops_root}" - mkdir -p "$ops_root" - git -C "$ops_root" init --initial-branch="${primary_branch}" -q - } - else - echo "Ops repo: ${ops_root} (already exists locally)" - fi - - # Seed directory structure - local seeded=false - mkdir -p "${ops_root}/vault/pending" - mkdir -p "${ops_root}/vault/approved" - mkdir -p "${ops_root}/vault/fired" - mkdir -p "${ops_root}/vault/rejected" - mkdir -p "${ops_root}/knowledge" - mkdir -p "${ops_root}/evidence/engagement" - - if [ ! -f "${ops_root}/README.md" ]; then - cat > "${ops_root}/README.md" < **Note:** Journal directories (journal/planner/ and journal/supervisor/) have been removed from the ops repo. Agent journals are now stored in each agent's .profile repo on Forgejo. - -## Branch protection - -- \`main\`: 2 reviewers required for vault items -- Journal/evidence commits may use lighter rules -OPSEOF - seeded=true - fi - - # Create stub files if they don't exist - [ -f "${ops_root}/portfolio.md" ] || { echo "# Portfolio" > "${ops_root}/portfolio.md"; seeded=true; } - [ -f "${ops_root}/prerequisites.md" ] || { echo "# Prerequisite Tree" > "${ops_root}/prerequisites.md"; seeded=true; } - [ -f "${ops_root}/RESOURCES.md" ] || { echo "# Resources" > "${ops_root}/RESOURCES.md"; seeded=true; } - - # Commit and push seed content - if [ "$seeded" = true ] && [ -d "${ops_root}/.git" ]; then - # Auto-configure repo-local git identity if missing (#778) - if [ -z "$(git -C "$ops_root" config user.name 2>/dev/null)" ]; then - git -C "$ops_root" config user.name "disinto-admin" - fi - if [ -z "$(git -C "$ops_root" config user.email 2>/dev/null)" ]; then - git -C "$ops_root" config user.email "disinto-admin@localhost" - fi - - git -C "$ops_root" add -A - if ! git -C "$ops_root" diff --cached --quiet 2>/dev/null; then - git -C "$ops_root" commit -m "chore: seed ops repo structure" -q - # Push if remote exists - if git -C "$ops_root" remote get-url origin >/dev/null 2>&1; then - git -C "$ops_root" push origin "${primary_branch}" -q 2>/dev/null || true - fi - fi - echo "Seeded: ops repo with initial structure" - fi -} - -# Push local clone to the Forgejo remote. -push_to_forge() { - local repo_root="$1" forge_url="$2" repo_slug="$3" - - # Build authenticated remote URL: http://dev-bot:@host:port/org/repo.git - if [ -z "${FORGE_TOKEN:-}" ]; then - echo "Error: FORGE_TOKEN not set — cannot push to Forgejo" >&2 - return 1 - fi - local auth_url - auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_TOKEN}@|") - local remote_url="${auth_url}/${repo_slug}.git" - # Display URL without token - local display_url="${forge_url}/${repo_slug}.git" - - # Always set the remote URL to ensure credentials are current - if git -C "$repo_root" remote get-url forgejo >/dev/null 2>&1; then - git -C "$repo_root" remote set-url forgejo "$remote_url" - else - git -C "$repo_root" remote add forgejo "$remote_url" - fi - echo "Remote: forgejo -> ${display_url}" - - # Skip push if local repo has no commits (e.g. cloned from empty Forgejo repo) - if ! git -C "$repo_root" rev-parse HEAD >/dev/null 2>&1; then - echo "Push: skipped (local repo has no commits)" - return 0 - fi - - # Push all branches and tags - echo "Pushing: branches to forgejo" - if ! git -C "$repo_root" push forgejo --all 2>&1; then - echo "Error: failed to push branches to Forgejo" >&2 - return 1 - fi - echo "Pushing: tags to forgejo" - if ! git -C "$repo_root" push forgejo --tags 2>&1; then - echo "Error: failed to push tags to Forgejo" >&2 - return 1 - fi - - # Verify the repo is no longer empty (Forgejo may need a moment to index pushed refs) - local is_empty="true" - local verify_attempt - for verify_attempt in $(seq 1 5); do - local repo_info - repo_info=$(curl -sf --max-time 10 \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/repos/${repo_slug}" 2>/dev/null) || repo_info="" - if [ -z "$repo_info" ]; then - is_empty="skipped" - break # API unreachable, skip verification - fi - is_empty=$(printf '%s' "$repo_info" | jq -r '.empty // "unknown"') - if [ "$is_empty" != "true" ]; then - echo "Verify: repo is not empty (push confirmed)" - break - fi - if [ "$verify_attempt" -lt 5 ]; then - sleep 2 - fi - done - if [ "$is_empty" = "true" ]; then - echo "Warning: Forgejo repo still reports empty after push" >&2 - return 1 - fi -} +# push_to_forge() is sourced from lib/forge-push.sh # Preflight check — verify all factory requirements before proceeding. preflight_check() { @@ -1346,6 +359,18 @@ check_prs = true check_dev_agent = true check_pipeline_stall = false +# Local-model agents (optional) — configure to use llama-server or similar +# for local LLM inference. Each agent gets its own container with isolated +# credentials and configuration. +# +# [agents.llama] +# base_url = "http://10.10.10.1:8081" +# model = "unsloth/Qwen3.5-35B-A3B" +# api_key = "sk-no-key-required" +# roles = ["dev"] +# forge_user = "dev-qwen" +# compact_pct = 60 + # [mirrors] # github = "git@github.com:user/repo.git" # codeberg = "git@codeberg.org:user/repo.git" @@ -1366,6 +391,15 @@ create_labels() { ["underspecified"]="#fbca04" ["vision"]="#0e8a16" ["action"]="#1d76db" + ["prediction/unreviewed"]="#a2eeef" + ["prediction/dismissed"]="#d73a4a" + ["prediction/actioned"]="#28a745" + ["bug-report"]="#e11d48" + ["needs-triage"]="#f9d0c4" + ["reproduced"]="#0e8a16" + ["cannot-reproduce"]="#cccccc" + ["in-triage"]="#1d76db" + ["rejected"]="#cccccc" ) echo "Creating labels on ${repo}..." @@ -1378,9 +412,11 @@ create_labels() { | grep -o '"name":"[^"]*"' | cut -d'"' -f4) || existing="" local name color - for name in backlog in-progress blocked tech-debt underspecified vision action; do + local created=0 skipped=0 failed=0 + for name in backlog in-progress blocked tech-debt underspecified vision action bug-report prediction/unreviewed prediction/dismissed prediction/actioned needs-triage reproduced cannot-reproduce in-triage rejected; do if echo "$existing" | grep -qx "$name"; then echo " . ${name} (already exists)" + skipped=$((skipped + 1)) continue fi color="${labels[$name]}" @@ -1389,11 +425,15 @@ create_labels() { -H "Content-Type: application/json" \ "${api}/labels" \ -d "{\"name\":\"${name}\",\"color\":\"${color}\"}" >/dev/null 2>&1; then - echo " + ${name}" + echo " + ${name} (created)" + created=$((created + 1)) else echo " ! ${name} (failed to create)" + failed=$((failed + 1)) fi done + + echo "Labels: ${created} created, ${skipped} skipped, ${failed} failed" } # Generate a minimal VISION.md template in the target project. @@ -1433,404 +473,57 @@ EOF echo " Commit this to your repo when ready" } -# Generate and optionally install cron entries for the project agents. +# Copy issue templates from templates/ to target project repo. +copy_issue_templates() { + local repo_root="$1" + local template_dir="${FACTORY_ROOT}/templates" + local target_dir="${repo_root}/.forgejo/ISSUE_TEMPLATE" + + # Skip if templates directory doesn't exist + if [ ! -d "$template_dir" ]; then + return + fi + + # Create target directory + mkdir -p "$target_dir" + + # Copy each template file if it doesn't already exist + for template in "$template_dir"/issue/*; do + [ -f "$template" ] || continue + local filename + filename=$(basename "$template") + local target_path="${target_dir}/${filename}" + if [ ! -f "$target_path" ]; then + cp "$template" "$target_path" + echo "Copied: ${target_path}" + else + echo "Skipped: ${target_path} (already exists)" + fi + done +} + +# Install cron entries for project agents (implementation in lib/ci-setup.sh) install_cron() { - local name="$1" toml="$2" auto_yes="$3" bare="${4:-false}" - - # In compose mode, skip host cron — the agents container runs cron internally - if [ "$bare" = false ]; then - echo "" - echo "Cron: skipped (agents container handles scheduling in compose mode)" - return - fi - - # Bare mode: crontab is required on the host - if ! command -v crontab &>/dev/null; then - echo "Error: crontab not found (required for bare-metal mode)" >&2 - echo " Install: apt install cron / brew install cron" >&2 - exit 1 - fi - - # Use absolute path for the TOML in cron entries - local abs_toml - abs_toml="$(cd "$(dirname "$toml")" && pwd)/$(basename "$toml")" - - local cron_block - cron_block="# disinto: ${name} -2,7,12,17,22,27,32,37,42,47,52,57 * * * * ${FACTORY_ROOT}/review/review-poll.sh ${abs_toml} >/dev/null 2>&1 -4,9,14,19,24,29,34,39,44,49,54,59 * * * * ${FACTORY_ROOT}/dev/dev-poll.sh ${abs_toml} >/dev/null 2>&1 -0 0,6,12,18 * * * cd ${FACTORY_ROOT} && bash gardener/gardener-run.sh ${abs_toml} >/dev/null 2>&1" - - echo "" - echo "Cron entries to install:" - echo "$cron_block" - echo "" - - if [ "$auto_yes" = false ] && [ -t 0 ]; then - read -rp "Install these cron entries? [y/N] " confirm - if [[ ! "$confirm" =~ ^[Yy] ]]; then - echo "Skipped cron install. Add manually with: crontab -e" - return - fi - fi - - # Append to existing crontab - { crontab -l 2>/dev/null || true; printf '%s\n' "$cron_block"; } | crontab - - echo "Cron entries installed" + _load_ci_context + _install_cron_impl "$@" } -# Set up Woodpecker CI to use Forgejo as its forge backend. -# Creates an OAuth2 app on Forgejo for Woodpecker, activates the repo. +# Create Woodpecker OAuth2 app on Forgejo (implementation in lib/ci-setup.sh) create_woodpecker_oauth() { - local forge_url="$1" repo_slug="$2" - - echo "" - echo "── Woodpecker OAuth2 setup ────────────────────────────" - - # Create OAuth2 application on Forgejo for Woodpecker - local oauth2_name="woodpecker-ci" - local redirect_uri="http://localhost:8000/authorize" - local existing_app client_id client_secret - - # Check if OAuth2 app already exists - existing_app=$(curl -sf \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/user/applications/oauth2" 2>/dev/null \ - | jq -r --arg name "$oauth2_name" '.[] | select(.name == $name) | .client_id // empty' 2>/dev/null) || true - - if [ -n "$existing_app" ]; then - echo "OAuth2: ${oauth2_name} (already exists, client_id=${existing_app})" - client_id="$existing_app" - else - local oauth2_resp - oauth2_resp=$(curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/user/applications/oauth2" \ - -d "{\"name\":\"${oauth2_name}\",\"redirect_uris\":[\"${redirect_uri}\"],\"confidential_client\":true}" \ - 2>/dev/null) || oauth2_resp="" - - if [ -z "$oauth2_resp" ]; then - echo "Warning: failed to create OAuth2 app on Forgejo" >&2 - return - fi - - client_id=$(printf '%s' "$oauth2_resp" | jq -r '.client_id // empty') - client_secret=$(printf '%s' "$oauth2_resp" | jq -r '.client_secret // empty') - - if [ -z "$client_id" ]; then - echo "Warning: OAuth2 app creation returned no client_id" >&2 - return - fi - - echo "OAuth2: ${oauth2_name} created (client_id=${client_id})" - fi - - # Store Woodpecker forge config in .env - # WP_FORGEJO_CLIENT/SECRET match the docker-compose.yml variable references - # WOODPECKER_HOST must be host-accessible URL to match OAuth2 redirect_uri - local env_file="${FACTORY_ROOT}/.env" - local wp_vars=( - "WOODPECKER_FORGEJO=true" - "WOODPECKER_FORGEJO_URL=${forge_url}" - "WOODPECKER_HOST=http://localhost:8000" - ) - if [ -n "${client_id:-}" ]; then - wp_vars+=("WP_FORGEJO_CLIENT=${client_id}") - fi - if [ -n "${client_secret:-}" ]; then - wp_vars+=("WP_FORGEJO_SECRET=${client_secret}") - fi - - for var_line in "${wp_vars[@]}"; do - local var_name="${var_line%%=*}" - if grep -q "^${var_name}=" "$env_file" 2>/dev/null; then - sed -i "s|^${var_name}=.*|${var_line}|" "$env_file" - else - printf '%s\n' "$var_line" >> "$env_file" - fi - done - echo "Config: Woodpecker forge vars written to .env" + _load_ci_context + _create_woodpecker_oauth_impl "$@" } -# Auto-generate WOODPECKER_TOKEN by driving the Forgejo OAuth2 login flow. -# Requires _FORGE_ADMIN_PASS (set by setup_forge when admin user was just created). -# Called after compose stack is up, before activate_woodpecker_repo. +# Generate WOODPECKER_TOKEN via Forgejo OAuth2 flow (implementation in lib/ci-setup.sh) generate_woodpecker_token() { - local forge_url="$1" - local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}" - local env_file="${FACTORY_ROOT}/.env" - local admin_user="disinto-admin" - local admin_pass="${_FORGE_ADMIN_PASS:-}" - - # Skip if already set - if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then - echo "Config: WOODPECKER_TOKEN already set in .env" - return 0 - fi - - echo "" - echo "── Woodpecker token generation ────────────────────────" - - if [ -z "$admin_pass" ]; then - echo "Warning: Forgejo admin password not available — cannot generate WOODPECKER_TOKEN" >&2 - echo " Log into Woodpecker at ${wp_server} and create a token manually" >&2 - return 1 - fi - - # Wait for Woodpecker to become ready - echo -n "Waiting for Woodpecker" - local retries=0 - while ! curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; do - retries=$((retries + 1)) - if [ "$retries" -gt 30 ]; then - echo "" - echo "Warning: Woodpecker not ready at ${wp_server} — skipping token generation" >&2 - return 1 - fi - echo -n "." - sleep 2 - done - echo " ready" - - # Flow: Forgejo web login → OAuth2 authorize → Woodpecker callback → token - local cookie_jar auth_body_file - cookie_jar=$(mktemp /tmp/wp-auth-XXXXXX) - auth_body_file=$(mktemp /tmp/wp-body-XXXXXX) - - # Step 1: Log into Forgejo web UI (session cookie needed for OAuth consent) - local csrf - csrf=$(curl -sf -c "$cookie_jar" "${forge_url}/user/login" 2>/dev/null \ - | grep -o 'name="_csrf"[^>]*' | head -1 \ - | grep -oE '(content|value)="[^"]*"' | head -1 \ - | cut -d'"' -f2) || csrf="" - - if [ -z "$csrf" ]; then - echo "Warning: could not get Forgejo CSRF token — skipping token generation" >&2 - rm -f "$cookie_jar" "$auth_body_file" - return 1 - fi - - curl -sf -b "$cookie_jar" -c "$cookie_jar" -X POST \ - -o /dev/null \ - "${forge_url}/user/login" \ - --data-urlencode "_csrf=${csrf}" \ - --data-urlencode "user_name=${admin_user}" \ - --data-urlencode "password=${admin_pass}" \ - 2>/dev/null || true - - # Step 2: Start Woodpecker OAuth2 flow (captures authorize URL with state param) - local wp_redir - wp_redir=$(curl -sf -o /dev/null -w '%{redirect_url}' \ - "${wp_server}/authorize" 2>/dev/null) || wp_redir="" - - if [ -z "$wp_redir" ]; then - echo "Warning: Woodpecker did not provide OAuth redirect — skipping token generation" >&2 - rm -f "$cookie_jar" "$auth_body_file" - return 1 - fi - - # Rewrite internal Docker network URLs to host-accessible URLs. - # Handle both plain and URL-encoded forms of the internal hostnames. - local forge_url_enc wp_server_enc - forge_url_enc=$(printf '%s' "$forge_url" | sed 's|:|%3A|g; s|/|%2F|g') - wp_server_enc=$(printf '%s' "$wp_server" | sed 's|:|%3A|g; s|/|%2F|g') - wp_redir=$(printf '%s' "$wp_redir" \ - | sed "s|http://forgejo:3000|${forge_url}|g" \ - | sed "s|http%3A%2F%2Fforgejo%3A3000|${forge_url_enc}|g" \ - | sed "s|http://woodpecker:8000|${wp_server}|g" \ - | sed "s|http%3A%2F%2Fwoodpecker%3A8000|${wp_server_enc}|g") - - # Step 3: Hit Forgejo OAuth authorize endpoint with session - # First time: shows consent page. Already approved: redirects with code. - local auth_headers redirect_loc auth_code - auth_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \ - -D - -o "$auth_body_file" \ - "$wp_redir" 2>/dev/null) || auth_headers="" - - redirect_loc=$(printf '%s' "$auth_headers" \ - | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') - - if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then - # Auto-approved: extract code from redirect - auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/') - else - # Consent page: extract CSRF and all form fields, POST grant approval - local consent_csrf form_client_id form_state form_redirect_uri - consent_csrf=$(grep -o 'name="_csrf"[^>]*' "$auth_body_file" 2>/dev/null \ - | head -1 | grep -oE '(content|value)="[^"]*"' | head -1 \ - | cut -d'"' -f2) || consent_csrf="" - form_client_id=$(grep 'name="client_id"' "$auth_body_file" 2>/dev/null \ - | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_client_id="" - form_state=$(grep 'name="state"' "$auth_body_file" 2>/dev/null \ - | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_state="" - form_redirect_uri=$(grep 'name="redirect_uri"' "$auth_body_file" 2>/dev/null \ - | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_redirect_uri="" - - if [ -n "$consent_csrf" ]; then - local grant_headers - grant_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \ - -D - -o /dev/null -X POST \ - "${forge_url}/login/oauth/grant" \ - --data-urlencode "_csrf=${consent_csrf}" \ - --data-urlencode "client_id=${form_client_id}" \ - --data-urlencode "state=${form_state}" \ - --data-urlencode "scope=" \ - --data-urlencode "nonce=" \ - --data-urlencode "redirect_uri=${form_redirect_uri}" \ - --data-urlencode "granted=true" \ - 2>/dev/null) || grant_headers="" - - redirect_loc=$(printf '%s' "$grant_headers" \ - | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') - - if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then - auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/') - fi - fi - fi - - rm -f "$auth_body_file" - - if [ -z "${auth_code:-}" ]; then - echo "Warning: could not obtain OAuth2 authorization code — skipping token generation" >&2 - rm -f "$cookie_jar" - return 1 - fi - - # Step 4: Complete Woodpecker OAuth callback (exchanges code for session) - local state - state=$(printf '%s' "$wp_redir" | sed -n 's/.*[&?]state=\([^&]*\).*/\1/p') - - local wp_headers wp_token - wp_headers=$(curl -sf -c "$cookie_jar" \ - -D - -o /dev/null \ - "${wp_server}/authorize?code=${auth_code}&state=${state:-}" \ - 2>/dev/null) || wp_headers="" - - # Extract token from redirect URL (Woodpecker returns ?access_token=...) - redirect_loc=$(printf '%s' "$wp_headers" \ - | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') - - wp_token="" - if printf '%s' "${redirect_loc:-}" | grep -q 'access_token='; then - wp_token=$(printf '%s' "$redirect_loc" | sed 's/.*access_token=\([^&]*\).*/\1/') - fi - - # Fallback: check for user_sess cookie - if [ -z "$wp_token" ]; then - wp_token=$(awk '/user_sess/{print $NF}' "$cookie_jar" 2>/dev/null) || wp_token="" - fi - - rm -f "$cookie_jar" - - if [ -z "$wp_token" ]; then - echo "Warning: could not obtain Woodpecker token — skipping token generation" >&2 - return 1 - fi - - # Step 5: Create persistent personal access token via Woodpecker API - # WP v3 requires CSRF header for POST operations with session tokens. - local wp_csrf - wp_csrf=$(curl -sf -b "user_sess=${wp_token}" \ - "${wp_server}/web-config.js" 2>/dev/null \ - | sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p') || wp_csrf="" - - local pat_resp final_token - pat_resp=$(curl -sf -X POST \ - -b "user_sess=${wp_token}" \ - ${wp_csrf:+-H "X-CSRF-Token: ${wp_csrf}"} \ - "${wp_server}/api/user/token" \ - 2>/dev/null) || pat_resp="" - - final_token="" - if [ -n "$pat_resp" ]; then - final_token=$(printf '%s' "$pat_resp" \ - | jq -r 'if .token then .token elif .access_token then .access_token else empty end' \ - 2>/dev/null) || final_token="" - fi - - # Use persistent token if available, otherwise use session token - final_token="${final_token:-$wp_token}" - - # Save to .env - if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then - sed -i "s|^WOODPECKER_TOKEN=.*|WOODPECKER_TOKEN=${final_token}|" "$env_file" - else - printf 'WOODPECKER_TOKEN=%s\n' "$final_token" >> "$env_file" - fi - export WOODPECKER_TOKEN="$final_token" - echo "Config: WOODPECKER_TOKEN generated and saved to .env" + _load_ci_context + _generate_woodpecker_token_impl "$@" } +# Activate repo in Woodpecker CI (implementation in lib/ci-setup.sh) activate_woodpecker_repo() { - local forge_repo="$1" - local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}" - - # Wait for Woodpecker to become ready after stack start - local retries=0 - while [ $retries -lt 10 ]; do - if curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; then - break - fi - retries=$((retries + 1)) - sleep 2 - done - - if ! curl -sf --max-time 5 "${wp_server}/api/version" >/dev/null 2>&1; then - echo "Woodpecker: not reachable at ${wp_server} after stack start, skipping repo activation" >&2 - return - fi - - echo "" - echo "── Woodpecker repo activation ─────────────────────────" - - local wp_token="${WOODPECKER_TOKEN:-}" - if [ -z "$wp_token" ]; then - echo "Warning: WOODPECKER_TOKEN not set — cannot activate repo" >&2 - echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2 - return - fi - - local wp_repo_id - wp_repo_id=$(curl -sf \ - -H "Authorization: Bearer ${wp_token}" \ - "${wp_server}/api/repos/lookup/${forge_repo}" 2>/dev/null \ - | jq -r '.id // empty' 2>/dev/null) || true - - if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then - echo "Repo: ${forge_repo} already active in Woodpecker (id=${wp_repo_id})" - else - # Get Forgejo repo numeric ID for WP activation - local forge_repo_id - forge_repo_id=$(curl -sf \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_URL:-http://localhost:3000}/api/v1/repos/${forge_repo}" 2>/dev/null \ - | jq -r '.id // empty' 2>/dev/null) || forge_repo_id="" - - local activate_resp - activate_resp=$(curl -sf -X POST \ - -H "Authorization: Bearer ${wp_token}" \ - "${wp_server}/api/repos?forge_remote_id=${forge_repo_id:-0}" \ - 2>/dev/null) || activate_resp="" - - wp_repo_id=$(printf '%s' "$activate_resp" | jq -r '.id // empty' 2>/dev/null) || true - - if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then - echo "Repo: ${forge_repo} activated in Woodpecker (id=${wp_repo_id})" - - # Set pipeline timeout to 5 minutes (default is 60) - curl -sf -X PATCH -H "Authorization: Bearer ${wp_token}" -H "Content-Type: application/json" "${wp_server}/api/repos/${wp_repo_id}" -d '{"timeout": 5}' >/dev/null 2>&1 && echo "Config: pipeline timeout set to 5 minutes" || true - else - echo "Warning: could not activate repo in Woodpecker" >&2 - echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2 - fi - fi - - # Store repo ID for later TOML generation - if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then - _WP_REPO_ID="$wp_repo_id" - fi + _load_ci_context + _activate_woodpecker_repo_impl "$@" } # ── init command ───────────────────────────────────────────────────────────── @@ -1975,13 +668,19 @@ p.write_text(text) echo "Branch: ${branch}" # Set up {project}-ops repo (#757) - local ops_slug="${forge_repo}-ops" + # Always use disinto-admin as the ops repo owner — forge_repo owner may be + # the calling user (e.g. johba) but the ops repo belongs to disinto-admin. + local ops_slug="disinto-admin/${project_name}-ops" local ops_root="/home/${USER}/${project_name}-ops" setup_ops_repo "$forge_url" "$ops_slug" "$ops_root" "$branch" + # Migrate ops repo to canonical structure (seed missing directories/files) + # This brings pre-#407 deployments up to date with the canonical structure + migrate_ops_repo "$ops_root" "$branch" + # Set up vault branch protection on ops repo (#77) # This ensures admin-only merge to main, blocking bots from merging vault PRs - # Use HUMAN_TOKEN (johba) or FORGE_TOKEN (dev-bot) for admin operations + # Use HUMAN_TOKEN (disinto-admin) or FORGE_TOKEN (dev-bot) for admin operations export FORGE_OPS_REPO="$ops_slug" # Source env.sh to ensure FORGE_TOKEN is available source "${FACTORY_ROOT}/lib/env.sh" @@ -2005,6 +704,24 @@ p.write_text(text) echo "Created: ${toml_path}" fi + # Update ops_repo in TOML with the resolved actual ops slug. + # Uses in-place substitution to prevent duplicate keys on repeated init runs. + # If the key is missing (manually created TOML), it is inserted after the repo line. + if [ -n "${_ACTUAL_OPS_SLUG:-}" ] && [ -f "$toml_path" ]; then + python3 -c " +import sys, re, pathlib +p = pathlib.Path(sys.argv[1]) +text = p.read_text() +new_val = 'ops_repo = \"' + sys.argv[2] + '\"' +if re.search(r'^ops_repo\s*=', text, re.MULTILINE): + text = re.sub(r'^ops_repo\s*=\s*.*\$', new_val, text, flags=re.MULTILINE) +else: + text = re.sub(r'^(repo\s*=\s*\"[^\"]*\")', r'\1\n' + new_val, text, flags=re.MULTILINE) +p.write_text(text) +" "$toml_path" "${_ACTUAL_OPS_SLUG}" + echo "Updated: ops_repo in ${toml_path}" + fi + # Create OAuth2 app on Forgejo for Woodpecker (before compose up) _WP_REPO_ID="" create_woodpecker_oauth "$forge_url" "$forge_repo" @@ -2041,6 +758,9 @@ p.write_text(text) # Generate template deployment pipeline configs in project repo generate_deploy_pipelines "$repo_root" "$project_name" + # Copy issue templates to target project + copy_issue_templates "$repo_root" + # Install cron jobs install_cron "$project_name" "$toml_path" "$auto_yes" "$bare" @@ -2049,17 +769,36 @@ p.write_text(text) if [ -n "${MIRROR_NAMES:-}" ]; then echo "Mirrors: setting up remotes" local mname murl + local mirrors_ok=true for mname in $MIRROR_NAMES; do murl=$(eval "echo \"\$MIRROR_$(echo "$mname" | tr '[:lower:]' '[:upper:]')\"") || true [ -z "$murl" ] && continue - git -C "$repo_root" remote add "$mname" "$murl" 2>/dev/null \ - || git -C "$repo_root" remote set-url "$mname" "$murl" 2>/dev/null || true - echo " + ${mname} -> ${murl}" + if git -C "$repo_root" remote get-url "$mname" >/dev/null 2>&1; then + if git -C "$repo_root" remote set-url "$mname" "$murl"; then + echo " + ${mname} -> ${murl} (updated)" + else + echo " ! ${mname} -> ${murl} (failed to update URL)" + mirrors_ok=false + fi + else + if git -C "$repo_root" remote add "$mname" "$murl"; then + echo " + ${mname} -> ${murl} (added)" + else + echo " ! ${mname} -> ${murl} (failed to add remote)" + mirrors_ok=false + fi + fi done # Initial sync: push current primary branch to mirrors - source "${FACTORY_ROOT}/lib/mirrors.sh" - export PROJECT_REPO_ROOT="$repo_root" - mirror_push + if [ "$mirrors_ok" = true ]; then + source "${FACTORY_ROOT}/lib/mirrors.sh" + export PROJECT_REPO_ROOT="$repo_root" + if mirror_push; then + echo "Mirrors: initial sync complete" + else + echo "Warning: mirror push failed" >&2 + fi + fi fi # Encrypt secrets if SOPS + age are available @@ -2098,9 +837,16 @@ p.write_text(text) # Activate default agents (zero-cost when idle — they only invoke Claude # when there is actual work, so an empty project burns no LLM tokens) mkdir -p "${FACTORY_ROOT}/state" - touch "${FACTORY_ROOT}/state/.dev-active" - touch "${FACTORY_ROOT}/state/.reviewer-active" - touch "${FACTORY_ROOT}/state/.gardener-active" + + # State files are idempotent — create if missing, skip if present + for state_file in ".dev-active" ".reviewer-active" ".gardener-active"; do + if [ -f "${FACTORY_ROOT}/state/${state_file}" ]; then + echo "State: ${state_file} (already active)" + else + touch "${FACTORY_ROOT}/state/${state_file}" + echo "State: ${state_file} (created)" + fi + done echo "" echo "Done. Project ${project_name} is ready." @@ -2565,594 +1311,10 @@ disinto_shell() { # Creates a Forgejo user and .profile repo for an agent. # Usage: disinto hire-an-agent [--formula ] -disinto_hire_an_agent() { - local agent_name="${1:-}" - local role="${2:-}" - local formula_path="" - local local_model="" - local poll_interval="" - - if [ -z "$agent_name" ] || [ -z "$role" ]; then - echo "Error: agent-name and role required" >&2 - echo "Usage: disinto hire-an-agent [--formula ] [--local-model ] [--poll-interval ]" >&2 - exit 1 - fi - shift 2 - - # Parse flags - while [ $# -gt 0 ]; do - case "$1" in - --formula) - formula_path="$2" - shift 2 - ;; - --local-model) - local_model="$2" - shift 2 - ;; - --poll-interval) - poll_interval="$2" - shift 2 - ;; - *) - echo "Unknown option: $1" >&2 - exit 1 - ;; - esac - done - - # Default formula path - if [ -z "$formula_path" ]; then - formula_path="${FACTORY_ROOT}/formulas/${role}.toml" - fi - - # Validate formula exists - if [ ! -f "$formula_path" ]; then - echo "Error: formula not found at ${formula_path}" >&2 - exit 1 - fi - - echo "── Hiring agent: ${agent_name} (${role}) ───────────────────────" - echo "Formula: ${formula_path}" - if [ -n "$local_model" ]; then - echo "Local model: ${local_model}" - echo "Poll interval: ${poll_interval:-300}s" - fi - - # Ensure FORGE_TOKEN is set - if [ -z "${FORGE_TOKEN:-}" ]; then - echo "Error: FORGE_TOKEN not set" >&2 - exit 1 - fi - - # Get Forge URL - local forge_url="${FORGE_URL:-http://localhost:3000}" - echo "Forge: ${forge_url}" - - # Step 1: Create user via API (skip if exists) - echo "" - echo "Step 1: Creating user '${agent_name}' (if not exists)..." - - local user_exists=false - local user_pass="" - local admin_pass="" - - # Read admin password from .env for standalone runs (#184) - local env_file="${FACTORY_ROOT}/.env" - if [ -f "$env_file" ] && grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then - admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-) - fi - - # Get admin token early (needed for both user creation and password reset) - local admin_user="disinto-admin" - admin_pass="${admin_pass:-admin}" - local admin_token="" - local admin_token_name="temp-token-$(date +%s)" - admin_token=$(curl -sf -X POST \ - -u "${admin_user}:${admin_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${admin_user}/tokens" \ - -d "{\"name\":\"${admin_token_name}\",\"scopes\":[\"all\"]}" 2>/dev/null \ - | jq -r '.sha1 // empty') || admin_token="" - if [ -z "$admin_token" ]; then - # Token might already exist — try listing - admin_token=$(curl -sf \ - -u "${admin_user}:${admin_pass}" \ - "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \ - | jq -r '.[0].sha1 // empty') || admin_token="" - fi - if [ -z "$admin_token" ]; then - echo "Error: failed to obtain admin API token" >&2 - echo " Cannot proceed without admin privileges" >&2 - exit 1 - fi - - if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then - user_exists=true - echo " User '${agent_name}' already exists" - # Reset user password so we can get a token (#184) - user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - # Reset password using basic auth (admin token fallback would poison subsequent calls) - if curl -sf -X PATCH \ - -u "${admin_user}:${admin_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/admin/users/${agent_name}" \ - -d "{\"password\":\"${user_pass}\"}" >/dev/null 2>&1; then - echo " Reset password for existing user '${agent_name}'" - else - echo " Warning: could not reset password for existing user" >&2 - fi - else - # Create user using basic auth (admin token fallback would poison subsequent calls) - # Create the user - user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - if curl -sf -X POST \ - -u "${admin_user}:${admin_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/admin/users" \ - -d "{\"username\":\"${agent_name}\",\"password\":\"${user_pass}\",\"email\":\"${agent_name}@${PROJECT_NAME:-disinto}.local\",\"full_name\":\"${agent_name}\",\"active\":true,\"admin\":false,\"must_change_password\":false}" >/dev/null 2>&1; then - echo " Created user '${agent_name}'" - else - echo " Warning: failed to create user via admin API" >&2 - # Try alternative: user might already exist - if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then - user_exists=true - echo " User '${agent_name}' exists (confirmed)" - else - echo " Error: failed to create user '${agent_name}'" >&2 - exit 1 - fi - fi - fi - - # Step 2: Create .profile repo on Forgejo - echo "" - echo "Step 2: Creating '${agent_name}/.profile' repo (if not exists)..." - - local repo_exists=false - if curl -sf --max-time 5 "${forge_url}/api/v1/repos/${agent_name}/.profile" >/dev/null 2>&1; then - repo_exists=true - echo " Repo '${agent_name}/.profile' already exists" - else - # Get user token for creating repo - # Always try to get token using user_pass (set in Step 1 for new users, reset for existing) - local user_token="" - user_token=$(curl -sf -X POST \ - -u "${agent_name}:${user_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${agent_name}/tokens" \ - -d "{\"name\":\".profile-repo-token\",\"scopes\":[\"repository\"]}" 2>/dev/null \ - | jq -r '.sha1 // empty') || user_token="" - - if [ -z "$user_token" ]; then - # Try listing existing tokens - user_token=$(curl -sf \ - -u "${agent_name}:${user_pass}" \ - "${forge_url}/api/v1/users/${agent_name}/tokens" 2>/dev/null \ - | jq -r '.[0].sha1 // empty') || user_token="" - fi - - # Create the repo using the user's namespace (user/repos with user_token creates in that user's namespace) - # or use admin API to create in specific user's namespace - local repo_created=false - local create_output - - if [ -n "$user_token" ]; then - # Try creating as the agent user (user token creates in that user's namespace) - create_output=$(curl -sf -X POST \ - -H "Authorization: token ${user_token}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/user/repos" \ - -d "{\"name\":\".profile\",\"description\":\"${agent_name}'s .profile repo\",\"private\":true,\"auto_init\":false}" 2>&1) || true - - if echo "$create_output" | grep -q '"id":\|[0-9]'; then - repo_created=true - echo " Created repo '${agent_name}/.profile'" - fi - fi - - # If user token failed or wasn't available, use admin API to create in agent's namespace - if [ "$repo_created" = false ]; then - echo " Using admin API to create repo in ${agent_name}'s namespace" - create_output=$(curl -sf -X POST \ - -u "${admin_user}:${admin_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/admin/users/${agent_name}/repos" \ - -d "{\"name\":\".profile\",\"description\":\"${agent_name}'s .profile repo\",\"private\":true,\"auto_init\":false}" 2>&1) || true - - if echo "$create_output" | grep -q '"id":\|[0-9]'; then - repo_created=true - echo " Created repo '${agent_name}/.profile' (via admin API)" - fi - fi - - if [ "$repo_created" = false ]; then - echo " Error: failed to create repo '${agent_name}/.profile'" >&2 - echo " Response: ${create_output}" >&2 - exit 1 - fi - fi - - # Step 3: Clone repo and create initial commit - echo "" - echo "Step 3: Cloning repo and creating initial commit..." - - local clone_dir="/tmp/.profile-clone-${agent_name}" - rm -rf "$clone_dir" - mkdir -p "$clone_dir" - - # Build authenticated clone URL using basic auth (user_pass is always set in Step 1) - if [ -z "${user_pass:-}" ]; then - echo " Error: no user password available for cloning" >&2 - exit 1 - fi - - local clone_url="${forge_url}/${agent_name}/.profile.git" - local auth_url - auth_url=$(printf '%s' "$forge_url" | sed "s|://|://${agent_name}:${user_pass}@|") - auth_url="${auth_url}/.profile.git" - - # Display unauthenticated URL (auth token only in actual git clone command) - echo " Cloning: ${forge_url}/${agent_name}/.profile.git" - - # Try authenticated clone first (required for private repos) - if ! git clone --quiet "$auth_url" "$clone_dir" 2>/dev/null; then - echo " Error: failed to clone repo with authentication" >&2 - echo " Note: Ensure the user has a valid API token with repository access" >&2 - rm -rf "$clone_dir" - exit 1 - fi - - # Configure git - git -C "$clone_dir" config user.name "disinto-admin" - git -C "$clone_dir" config user.email "disinto-admin@localhost" - - # Create directory structure - echo " Creating directory structure..." - mkdir -p "${clone_dir}/journal" - mkdir -p "${clone_dir}/knowledge" - touch "${clone_dir}/journal/.gitkeep" - touch "${clone_dir}/knowledge/.gitkeep" - - # Copy formula - echo " Copying formula..." - cp "$formula_path" "${clone_dir}/formula.toml" - - # Create README - if [ ! -f "${clone_dir}/README.md" ]; then - cat > "${clone_dir}/README.md" </dev/null; then - git -C "$clone_dir" commit -m "chore: initial .profile setup" -q - git -C "$clone_dir" push origin main 2>&1 >/dev/null || \ - git -C "$clone_dir" push origin master 2>&1 >/dev/null || true - echo " Committed: initial .profile setup" - else - echo " No changes to commit" - fi - - rm -rf "$clone_dir" - - # Step 4: Set up branch protection - echo "" - echo "Step 4: Setting up branch protection..." - - # Source branch-protection.sh helper - local bp_script="${FACTORY_ROOT}/lib/branch-protection.sh" - if [ -f "$bp_script" ]; then - # Source required environment - if [ -f "${FACTORY_ROOT}/lib/env.sh" ]; then - source "${FACTORY_ROOT}/lib/env.sh" - fi - - # Set up branch protection for .profile repo - if source "$bp_script" 2>/dev/null && setup_profile_branch_protection "${agent_name}/.profile" "main"; then - echo " Branch protection configured for main branch" - echo " - Requires 1 approval before merge" - echo " - Admin-only merge enforcement" - echo " - Journal branch created for direct agent pushes" - else - echo " Warning: could not configure branch protection (Forgejo API may not be available)" - echo " Note: Branch protection can be set up manually later" - fi - else - echo " Warning: branch-protection.sh not found at ${bp_script}" - fi - - # Step 5: Create state marker - echo "" - echo "Step 5: Creating state marker..." - - local state_dir="${FACTORY_ROOT}/state" - mkdir -p "$state_dir" - local state_file="${state_dir}/.${role}-active" - - if [ ! -f "$state_file" ]; then - touch "$state_file" - echo " Created: ${state_file}" - else - echo " State marker already exists: ${state_file}" - fi - - # Step 6: Set up local model agent (if --local-model specified) - if [ -n "$local_model" ]; then - echo "" - echo "Step 6: Configuring local model agent..." - - local compose_file="${FACTORY_ROOT}/docker-compose.yml" - local override_file="${FACTORY_ROOT}/docker-compose.override.yml" - local override_dir - override_dir=$(dirname "$override_file") - mkdir -p "$override_dir" - - # Validate model endpoint is reachable - echo " Validating model endpoint: ${local_model}" - if ! curl -sf --max-time 10 "${local_model}/health" >/dev/null 2>&1; then - # Try /v1/chat/completions as fallback endpoint check - if ! curl -sf --max-time 10 "${local_model}/v1/chat/completions" >/dev/null 2>&1; then - echo " Warning: model endpoint may not be reachable at ${local_model}" - echo " Continuing with configuration..." - fi - else - echo " Model endpoint is reachable" - fi - - # Generate service name from agent name (lowercase) - local service_name="agents-${agent_name}" - service_name=$(echo "$service_name" | tr '[:upper:]' '[:lower:]') - - # Set default poll interval - local interval="${poll_interval:-300}" - - # Generate the override compose file - # Bash expands ${service_name}, ${local_model}, ${interval}, ${PROJECT_NAME} at generation time - # \$HOME, \$FORGE_TOKEN become ${HOME}, ${FORGE_TOKEN} in the file for docker-compose runtime expansion - cat > "$override_file" < -# Example: disinto release v1.2.0 - -disinto_release() { - local version="${1:-}" - local formula_path="${FACTORY_ROOT}/formulas/release.toml" - - if [ -z "$version" ]; then - echo "Error: version required" >&2 - echo "Usage: disinto release " >&2 - echo "Example: disinto release v1.2.0" >&2 - exit 1 - fi - - # Validate version format (must start with 'v' followed by semver) - if ! echo "$version" | grep -qE '^v[0-9]+\.[0-9]+\.[0-9]+$'; then - echo "Error: version must be in format v1.2.3 (semver with 'v' prefix)" >&2 - exit 1 - fi - - # Load project config to get FORGE_OPS_REPO - if [ -z "${PROJECT_NAME:-}" ]; then - # PROJECT_NAME is unset - detect project TOML from projects/ directory - local found_toml - found_toml=$(find "${FACTORY_ROOT}/projects" -maxdepth 1 -name "*.toml" ! -name "*.example" 2>/dev/null | head -1) - if [ -n "$found_toml" ]; then - source "${FACTORY_ROOT}/lib/load-project.sh" "$found_toml" - fi - else - local project_toml="${FACTORY_ROOT}/projects/${PROJECT_NAME}.toml" - if [ -f "$project_toml" ]; then - source "${FACTORY_ROOT}/lib/load-project.sh" "$project_toml" - fi - fi - - # Check formula exists - if [ ! -f "$formula_path" ]; then - echo "Error: release formula not found at ${formula_path}" >&2 - exit 1 - fi - - # Get the ops repo root - local ops_root="${FACTORY_ROOT}/../disinto-ops" - if [ ! -d "${ops_root}/.git" ]; then - echo "Error: ops repo not found at ${ops_root}" >&2 - echo " Run 'disinto init' to set up the ops repo first" >&2 - exit 1 - fi - - # Generate a unique ID for the vault item - local id="release-${version//./}" - local vault_toml="${ops_root}/vault/actions/${id}.toml" - - # Create vault TOML with the specific version - cat > "$vault_toml" </dev/null || true - - # Push branch - git push -u origin "$branch_name" 2>/dev/null || { - echo "Error: failed to push branch" >&2 - exit 1 - } - - # Create PR - local pr_response - pr_response=$(curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls" \ - -d "{\"title\":\"${pr_title}\",\"head\":\"${branch_name}\",\"base\":\"main\",\"body\":\"$(echo "$pr_body" | sed ':a;N;$!ba;s/\n/\\n/g')\"}" 2>/dev/null) || { - echo "Error: failed to create PR" >&2 - echo "Response: ${pr_response}" >&2 - exit 1 - } - - local pr_number - pr_number=$(echo "$pr_response" | jq -r '.number') - - local pr_url="${FORGE_URL}/${FORGE_OPS_REPO}/pulls/${pr_number}" - - # Enable auto-merge on the PR — Forgejo will auto-merge after approval - _vault_log "Enabling auto-merge for PR #${pr_number}" - curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls/${pr_number}/merge" \ - -d '{"Do":"merge","merge_when_checks_succeed":true}' >/dev/null 2>&1 || { - echo "Warning: failed to enable auto-merge (may already be enabled or not supported)" >&2 - } - - echo "" - echo "Release PR created: ${pr_url}" - echo "" - echo "Next steps:" - echo " 1. Review the PR" - echo " 2. Approve the PR (auto-merge will trigger after approval)" - echo " 3. The vault runner will execute the release formula" - echo "" - echo "After merge, the release will:" - echo " 1. Tag Forgejo main with ${version}" - echo " 2. Push tag to mirrors (Codeberg, GitHub)" - echo " 3. Build and tag the agents Docker image" - echo " 4. Restart agent containers" -} +# disinto_release() is sourced from lib/release.sh # ── ci-logs command ────────────────────────────────────────────────────────── # Reads CI logs from the Woodpecker SQLite database. diff --git a/dev/AGENTS.md b/dev/AGENTS.md index 2b787f1..7920824 100644 --- a/dev/AGENTS.md +++ b/dev/AGENTS.md @@ -1,4 +1,4 @@ - + # Dev Agent **Role**: Implement issues autonomously — write code, push branches, address @@ -14,9 +14,8 @@ in-progress issues are also picked up. The direct-merge scan runs before the loc check so approved PRs get merged even while a dev-agent session is active. **Key files**: -- `dev/dev-poll.sh` — Cron scheduler: finds next ready issue, handles merge/rebase of approved PRs, tracks CI fix attempts. Formula guard skips issues labeled `formula`, `prediction/dismissed`, or `prediction/unreviewed` (replaced `prediction/backlog` — that label no longer exists) +- `dev/dev-poll.sh` — Cron scheduler: finds next ready issue, handles merge/rebase of approved PRs, tracks CI fix attempts. `BOT_USER` is resolved once at startup via the Forge `/user` API and cached for all assignee checks. Formula guard skips issues labeled `formula`, `prediction/dismissed`, or `prediction/unreviewed`. **Race prevention**: checks issue assignee before claiming — skips if assigned to a different bot user. **Stale branch abandonment**: closes PRs and deletes branches that are behind `$PRIMARY_BRANCH` (restarts poll cycle for a fresh start). **Stale in-progress recovery**: on each poll cycle, scans for issues labeled `in-progress`. If the issue has a `vision` label, sets `BLOCKED_BY_INPROGRESS=true` and skips further stale checks (vision issues are managed by the architect). If the issue is assigned to `$BOT_USER` (this agent), checks for pending review feedback first — if an open PR has `REQUEST_CHANGES`, spawns the dev-agent to address it before setting `BLOCKED_BY_INPROGRESS=true`; otherwise just sets blocked. If assigned to another agent, logs and falls through (does not block). If no assignee, no open PR, and no agent lock file — removes `in-progress`, adds `blocked` with a human-triage comment. **Per-agent open-PR gate**: before starting new work, filters open waiting PRs to only those assigned to this agent (`$BOT_USER`). Other agents' PRs do not block this agent's pipeline (#358, #369). **Pre-lock merge scan own-PRs only**: the direct-merge scan only merges PRs whose linked issue is assigned to this agent — skips PRs owned by other bot users (#374). - `dev/dev-agent.sh` — Orchestrator: claims issue, creates worktree + tmux session with interactive `claude`, monitors phase file, injects CI results and review feedback, merges on approval -- `dev/phase-handler.sh` — Phase callback functions: `post_refusal_comment()`, `_on_phase_change()`, `build_phase_protocol_prompt()`. `do_merge()` detects already-merged PRs on HTTP 405 (race with dev-poll's pre-lock scan) and returns success instead of escalating. Sources `lib/mirrors.sh` and calls `mirror_push()` after every successful merge. - `dev/phase-test.sh` — Integration test for the phase protocol **Environment variables consumed** (via `lib/env.sh` + project TOML): @@ -33,7 +32,7 @@ check so approved PRs get merged even while a dev-agent session is active. **Crash recovery**: on `PHASE:crashed` or non-zero exit, the worktree is **preserved** (not destroyed) for debugging. Location logged. Supervisor housekeeping removes stale crashed worktrees older than 24h. -**Lifecycle**: dev-poll.sh (`check_active dev`) → dev-agent.sh → tmux `dev-{project}-{issue}` → phase file +**Lifecycle**: dev-poll.sh (`check_active dev`) → dev-agent.sh → tmux session → phase file drives CI/review loop → merge + `mirror_push()` → close issue. On respawn after `PHASE:escalate`, the stale phase file is cleared first so the session starts clean; the reinject prompt tells Claude not to re-escalate for the same reason. diff --git a/dev/dev-poll.sh b/dev/dev-poll.sh index 7d852df..ba69d5b 100755 --- a/dev/dev-poll.sh +++ b/dev/dev-poll.sh @@ -42,6 +42,11 @@ log() { printf '[%s] poll: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" } +# Resolve current agent identity once at startup — cache for all assignee checks +BOT_USER=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API%%/repos*}/user" | jq -r '.login') || BOT_USER="" +log "running as agent: ${BOT_USER}" + # ============================================================================= # CI FIX TRACKER: per-PR counter to avoid infinite respawn loops (max 3) # ============================================================================= @@ -94,6 +99,101 @@ is_blocked() { | jq -e '.[] | select(.name == "blocked")' >/dev/null 2>&1 } +# ============================================================================= +# STALENESS DETECTION FOR IN-PROGRESS ISSUES +# ============================================================================= + +# Check if in-progress label was added recently (within grace period). +# Prevents race where a poller marks an issue as stale before the claiming +# agent's assign + label sequence has fully propagated. See issue #471. +# Args: issue_number [grace_seconds] +# Returns: 0 if recently added (within grace period), 1 if not +in_progress_recently_added() { + local issue="$1" grace="${2:-60}" + local now label_ts delta + + now=$(date +%s) + + # Query issue timeline for the most recent in-progress label event. + # Forgejo serializes CommentType as an integer, not a string — + # CommentTypeLabel is 7 in the Gitea/Forgejo enum. + label_ts=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${issue}/timeline" | \ + jq -r '[.[] | select(.type == 7) | select(.label.name == "in-progress")] | last | .created_at // empty') || true + + if [ -z "$label_ts" ]; then + return 1 # no label event found — not recently added + fi + + # Convert ISO timestamp to epoch and compare + local label_epoch + label_epoch=$(date -d "$label_ts" +%s 2>/dev/null || echo 0) + delta=$(( now - label_epoch )) + + if [ "$delta" -lt "$grace" ]; then + return 0 # within grace period + fi + return 1 +} + +# Check if there's an open PR for a specific issue +# Args: issue_number +# Returns: 0 if open PR exists, 1 if not +open_pr_exists() { + local issue="$1" + local branch="fix/issue-${issue}" + local pr_num + + pr_num=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -r --arg branch "$branch" \ + '.[] | select(.head.ref == $branch) | .number' | head -1) || true + + [ -n "$pr_num" ] +} + +# Relabel a stale in-progress issue to blocked with diagnostic comment +# Args: issue_number reason +# Uses shared helpers from lib/issue-lifecycle.sh +relabel_stale_issue() { + local issue="$1" reason="$2" + + log "relabeling stale in-progress issue #${issue} to blocked: ${reason}" + + # Remove in-progress label + local ip_id + ip_id=$(_ilc_in_progress_id) + if [ -n "$ip_id" ]; then + curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${issue}/labels/${ip_id}" >/dev/null 2>&1 || true + fi + + # Add blocked label + local bk_id + bk_id=$(_ilc_blocked_id) + if [ -n "$bk_id" ]; then + curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${issue}/labels" \ + -d "{\"labels\":[${bk_id}]}" >/dev/null 2>&1 || true + fi + + # Post diagnostic comment using shared helper + local comment_body + comment_body=$( + printf '%s\n\n' '### Stale in-progress issue detected' + printf '%s\n' '| Field | Value |' + printf '%s\n' '|---|---|' + printf '| Detection reason | `%s` |\n' "$reason" + printf '| Timestamp | `%s` |\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" + printf '%s\n' '**Status:** This issue was labeled `in-progress` but has no assignee, no open PR, and no agent lock file.' + printf '%s\n' '**Action required:** A maintainer should triage this issue.' + ) + _ilc_post_comment "$issue" "$comment_body" + + _ilc_log "stale issue #${issue} relabeled to blocked: ${reason}" +} + # ============================================================================= # HELPER: handle CI-exhaustion check/block (DRY for 3 call sites) # Sets CI_FIX_ATTEMPTS for caller use. Returns 0 if exhausted, 1 if not. @@ -278,6 +378,16 @@ for i in $(seq 0 $(($(echo "$PL_PRS" | jq 'length') - 1))); do jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true if [ "${PL_HAS_APPROVE:-0}" -gt 0 ]; then + # Check if issue is assigned to this agent — only merge own PRs + if [ "$PL_ISSUE" -gt 0 ]; then + PR_ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${PL_ISSUE}") || true + PR_ISSUE_ASSIGNEE=$(echo "$PR_ISSUE_JSON" | jq -r '.assignee.login // ""') || true + if [ -n "$PR_ISSUE_ASSIGNEE" ] && [ "$PR_ISSUE_ASSIGNEE" != "$BOT_USER" ]; then + log "PR #${PL_PR_NUM} (issue #${PL_ISSUE}) assigned to ${PR_ISSUE_ASSIGNEE} — skipping merge (not mine)" + continue + fi + fi if try_direct_merge "$PL_PR_NUM" "$PL_ISSUE"; then PL_MERGED_ANY=true fi @@ -301,6 +411,9 @@ if [ -f "$LOCKFILE" ]; then rm -f "$LOCKFILE" fi +# --- Fetch origin refs before any stale branch checks --- +git fetch origin --prune 2>/dev/null || true + # --- Memory guard --- memory_guard 2000 @@ -309,129 +422,225 @@ memory_guard 2000 # ============================================================================= log "checking for in-progress issues" -# Get current bot identity for assignee checks -BOT_USER=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API%%/repos*}/user" | jq -r '.login') || BOT_USER="" - ORPHANS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/issues?state=open&labels=in-progress&limit=10&type=issues") ORPHAN_COUNT=$(echo "$ORPHANS_JSON" | jq 'length') +BLOCKED_BY_INPROGRESS=false if [ "$ORPHAN_COUNT" -gt 0 ]; then ISSUE_NUM=$(echo "$ORPHANS_JSON" | jq -r '.[0].number') - # Formula guard: formula-labeled issues should not be worked on by dev-agent. - # Remove in-progress label and skip to prevent infinite respawn cycle (#115). - ORPHAN_LABELS=$(echo "$ORPHANS_JSON" | jq -r '.[0].labels[].name' 2>/dev/null) || true - SKIP_LABEL=$(echo "$ORPHAN_LABELS" | grep -oE '^(formula|prediction/dismissed|prediction/unreviewed)$' | head -1) || true - if [ -n "$SKIP_LABEL" ]; then - log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — removing in-progress, skipping" - IP_ID=$(_ilc_in_progress_id) - curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true - exit 0 + # Staleness check: if no assignee, no open PR, and no agent lock, the issue is stale + OPEN_PR=false + if curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -e --arg branch "fix/issue-${ISSUE_NUM}" \ + '.[] | select(.head.ref == $branch)' >/dev/null 2>&1; then + OPEN_PR=true fi - # Check if there's already an open PR for this issue - HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls?state=open&limit=20" | \ - jq -r --arg branch "fix/issue-${ISSUE_NUM}" \ - '.[] | select(.head.ref == $branch) | .number' | head -1) || true + # Skip vision-labeled issues — they are managed by architect agent, not dev-poll + issue_labels=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${ISSUE_NUM}" | jq -r '[.labels[].name] | join(",")') + if echo "$issue_labels" | grep -q "vision"; then + log "issue #${ISSUE_NUM} has 'vision' label — skipping stale detection (managed by architect)" + BLOCKED_BY_INPROGRESS=true + fi - if [ -n "$HAS_PR" ]; then - # Check if branch is stale (behind primary branch) - BRANCH="fix/issue-${ISSUE_NUM}" - AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "999") - if [ "$AHEAD" -gt 0 ]; then - log "issue #${ISSUE_NUM} PR #${HAS_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR" - # Close the PR via API - curl -sf -X PATCH \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/pulls/${HAS_PR}" \ - -d '{"state":"closed"}' >/dev/null 2>&1 || true - # Delete the branch via git push - git -C "${PROJECT_REPO_ROOT:-}" push origin --delete "${BRANCH}" 2>/dev/null || true - # Reset to fresh start on primary branch - git -C "${PROJECT_REPO_ROOT:-}" checkout "${PRIMARY_BRANCH}" 2>/dev/null || true - git -C "${PROJECT_REPO_ROOT:-}" pull --ff-only origin "${PRIMARY_BRANCH}" 2>/dev/null || true - # Exit to restart poll cycle (issue will be picked up fresh) - exit 0 - fi + # Check if issue has an assignee — only block on issues assigned to this agent + assignee=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" "${API}/issues/${ISSUE_NUM}" | jq -r '.assignee.login // ""') + if [ -n "$assignee" ]; then + if [ "$assignee" = "$BOT_USER" ]; then + # Check if my PR has review feedback to address before exiting + HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -r --arg branch "fix/issue-${ISSUE_NUM}" \ + '.[] | select(.head.ref == $branch) | .number' | head -1) || true - PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${HAS_PR}" | jq -r '.head.sha') || true - CI_STATE=$(ci_commit_status "$PR_SHA") || true + if [ -n "$HAS_PR" ]; then + # Check for REQUEST_CHANGES review feedback + REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${HAS_PR}/reviews") || true + HAS_CHANGES=$(echo "$REVIEWS_JSON" | \ + jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true - # Non-code PRs (docs, formulas, evidence) may have no CI — treat as passed - if ! ci_passed "$CI_STATE" && ! ci_required_for_pr "$HAS_PR"; then - CI_STATE="success" - log "PR #${HAS_PR} has no code files — treating CI as passed" - fi - - # Check formal reviews (single fetch to avoid race window) - REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${HAS_PR}/reviews") || true - HAS_APPROVE=$(echo "$REVIEWS_JSON" | \ - jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true - HAS_CHANGES=$(echo "$REVIEWS_JSON" | \ - jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true - - if ci_passed "$CI_STATE" && [ "${HAS_APPROVE:-0}" -gt 0 ]; then - if try_direct_merge "$HAS_PR" "$ISSUE_NUM"; then - exit 0 - fi - # Direct merge failed (conflicts?) — fall back to dev-agent - log "falling back to dev-agent for PR #${HAS_PR} merge" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)" - exit 0 - - # Do NOT gate REQUEST_CHANGES on ci_passed: act immediately even if CI is - # pending/unknown. Definitive CI failure is handled by the elif below. - elif [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then - log "issue #${ISSUE_NUM} PR #${HAS_PR} has REQUEST_CHANGES — spawning agent" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)" - exit 0 - - elif ci_failed "$CI_STATE"; then - if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM" "check_only"; then - # Fall through to backlog scan instead of exit - : - else - # Increment at actual launch time (not on guard-hit paths) - if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM"; then - exit 0 # exhausted between check and launch + if [ "${HAS_CHANGES:-0}" -gt 0 ]; then + log "issue #${ISSUE_NUM} has review feedback — spawning agent" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)" + BLOCKED_BY_INPROGRESS=true + else + log "issue #${ISSUE_NUM} assigned to me — my thread is busy" + BLOCKED_BY_INPROGRESS=true fi - log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix (attempt ${CI_FIX_ATTEMPTS}/3)" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)" - exit 0 + else + log "issue #${ISSUE_NUM} assigned to me — my thread is busy" + BLOCKED_BY_INPROGRESS=true fi - else - log "issue #${ISSUE_NUM} has open PR #${HAS_PR} (CI: ${CI_STATE}, waiting)" - exit 0 + log "issue #${ISSUE_NUM} assigned to ${assignee} — their thread, not blocking" + BLOCKED_BY_INPROGRESS=true + # Issue assigned to another agent — don't block, fall through to backlog fi - else - # Check assignee before adopting orphaned issue - ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${ISSUE_NUM}") || true - ASSIGNEE=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true + fi - if [ -n "$ASSIGNEE" ] && [ "$ASSIGNEE" != "$BOT_USER" ]; then - log "issue #${ISSUE_NUM} assigned to ${ASSIGNEE} — skipping (not orphaned)" - # Remove in-progress label since this agent isn't working on it - IP_ID=$(_ilc_in_progress_id) - curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true - exit 0 + # Only proceed with in-progress checks if not blocked by another agent + if [ "$BLOCKED_BY_INPROGRESS" = false ]; then + # Check for dev-agent lock file (agent may be running in another container) + LOCK_FILE="/tmp/dev-impl-summary-${PROJECT_NAME}-${ISSUE_NUM}.txt" + if [ -f "$LOCK_FILE" ]; then + log "issue #${ISSUE_NUM} has agent lock file — trusting active work" + BLOCKED_BY_INPROGRESS=true fi - log "recovering orphaned issue #${ISSUE_NUM} (no PR found, assigned to ${BOT_USER:-unassigned})" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (recovery)" + if [ "$OPEN_PR" = false ] && [ "$BLOCKED_BY_INPROGRESS" = false ]; then + # Grace period: skip if in-progress label was added <60s ago (issue #471) + if in_progress_recently_added "$ISSUE_NUM" 60; then + log "issue #${ISSUE_NUM} in-progress label added <60s ago — skipping stale detection (grace period)" + BLOCKED_BY_INPROGRESS=true + else + log "issue #${ISSUE_NUM} is stale (no assignee, no open PR, no agent lock) — relabeling to blocked" + relabel_stale_issue "$ISSUE_NUM" "no_assignee_no_open_pr_no_lock" + BLOCKED_BY_INPROGRESS=true + fi + fi + + # Formula guard: formula-labeled issues should not be worked on by dev-agent. + # Remove in-progress label and skip to prevent infinite respawn cycle (#115). + if [ "$BLOCKED_BY_INPROGRESS" = false ]; then + ORPHAN_LABELS=$(echo "$ORPHANS_JSON" | jq -r '.[0].labels[].name' 2>/dev/null) || true + SKIP_LABEL=$(echo "$ORPHAN_LABELS" | grep -oE '^(formula|prediction/dismissed|prediction/unreviewed)$' | head -1) || true + if [ -n "$SKIP_LABEL" ]; then + log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — removing in-progress, skipping" + IP_ID=$(_ilc_in_progress_id) + curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true + BLOCKED_BY_INPROGRESS=true + fi + fi + + # Check if there's already an open PR for this issue + if [ "$BLOCKED_BY_INPROGRESS" = false ]; then + HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -r --arg branch "fix/issue-${ISSUE_NUM}" \ + '.[] | select(.head.ref == $branch) | .number' | head -1) || true + + if [ -n "$HAS_PR" ]; then + # Check if branch is stale (behind primary branch) + BRANCH="fix/issue-${ISSUE_NUM}" + AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "0") + if [ "$AHEAD" -gt 0 ]; then + log "issue #${ISSUE_NUM} PR #${HAS_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR" + # Close the PR via API + curl -sf -X PATCH \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/pulls/${HAS_PR}" \ + -d '{"state":"closed"}' >/dev/null 2>&1 || true + # Delete the branch via git push + git -C "${PROJECT_REPO_ROOT:-}" push origin --delete "${BRANCH}" 2>/dev/null || true + # Reset to fresh start on primary branch + git -C "${PROJECT_REPO_ROOT:-}" checkout "${PRIMARY_BRANCH}" 2>/dev/null || true + git -C "${PROJECT_REPO_ROOT:-}" pull --ff-only origin "${PRIMARY_BRANCH}" 2>/dev/null || true + BLOCKED_BY_INPROGRESS=true + fi + + # Only process PR if not abandoned (stale branch check above) + if [ "$BLOCKED_BY_INPROGRESS" = false ]; then + PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${HAS_PR}" | jq -r '.head.sha') || true + CI_STATE=$(ci_commit_status "$PR_SHA") || true + + # Non-code PRs (docs, formulas, evidence) may have no CI — treat as passed + if ! ci_passed "$CI_STATE" && ! ci_required_for_pr "$HAS_PR"; then + CI_STATE="success" + log "PR #${HAS_PR} has no code files — treating CI as passed" + fi + + # Check formal reviews (single fetch to avoid race window) + REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${HAS_PR}/reviews") || true + HAS_APPROVE=$(echo "$REVIEWS_JSON" | \ + jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true + HAS_CHANGES=$(echo "$REVIEWS_JSON" | \ + jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true + + if ci_passed "$CI_STATE" && [ "${HAS_APPROVE:-0}" -gt 0 ]; then + if try_direct_merge "$HAS_PR" "$ISSUE_NUM"; then + BLOCKED_BY_INPROGRESS=true + else + # Direct merge failed (conflicts?) — fall back to dev-agent + log "falling back to dev-agent for PR #${HAS_PR} merge" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)" + BLOCKED_BY_INPROGRESS=true + fi + + # Do NOT gate REQUEST_CHANGES on ci_passed: act immediately even if CI is + # pending/unknown. Definitive CI failure is handled by the elif below. + elif [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then + # Check if issue is assigned to this agent — skip if assigned to another bot + ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${ISSUE_NUM}") || true + assignee=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true + if [ -n "$assignee" ] && [ "$assignee" != "$BOT_USER" ]; then + log "issue #${ISSUE_NUM} PR #${HAS_PR} REQUEST_CHANGES but assigned to ${assignee} — skipping" + # Don't block — fall through to backlog + BLOCKED_BY_INPROGRESS=false + else + log "issue #${ISSUE_NUM} PR #${HAS_PR} has REQUEST_CHANGES — spawning agent" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)" + BLOCKED_BY_INPROGRESS=true + fi + + elif ci_failed "$CI_STATE"; then + if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM" "check_only"; then + # Fall through to backlog scan instead of exit + : + else + # Increment at actual launch time (not on guard-hit paths) + if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM"; then + BLOCKED_BY_INPROGRESS=true # exhausted between check and launch + else + log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix (attempt ${CI_FIX_ATTEMPTS}/3)" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)" + BLOCKED_BY_INPROGRESS=true + fi + fi + + else + log "issue #${ISSUE_NUM} has open PR #${HAS_PR} (CI: ${CI_STATE}, waiting)" + BLOCKED_BY_INPROGRESS=true + fi + fi + else + # Check assignee before adopting orphaned issue + ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${ISSUE_NUM}") || true + ASSIGNEE=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true + + if [ -n "$ASSIGNEE" ] && [ "$ASSIGNEE" != "$BOT_USER" ]; then + log "issue #${ISSUE_NUM} assigned to ${ASSIGNEE} — skipping (not orphaned)" + # Remove in-progress label since this agent isn't working on it + IP_ID=$(_ilc_in_progress_id) + curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true + # Don't block — fall through to backlog + else + log "recovering orphaned issue #${ISSUE_NUM} (no PR found, assigned to ${BOT_USER:-unassigned})" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (recovery)" + BLOCKED_BY_INPROGRESS=true + fi + fi + fi + fi + + # If blocked by in-progress work, exit now + if [ "$BLOCKED_BY_INPROGRESS" = true ]; then exit 0 fi fi @@ -501,6 +710,14 @@ for i in $(seq 0 $(($(echo "$OPEN_PRS" | jq 'length') - 1))); do # Stuck: REQUEST_CHANGES or CI failure -> spawn agent if [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then + # Check if issue is assigned to this agent — skip if assigned to another bot + ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${STUCK_ISSUE}") || true + assignee=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true + if [ -n "$assignee" ] && [ "$assignee" != "$BOT_USER" ]; then + log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) REQUEST_CHANGES but assigned to ${assignee} — skipping" + continue # skip this PR, check next stuck PR or fall through to backlog + fi log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) has REQUEST_CHANGES — fixing first" nohup "${SCRIPT_DIR}/dev-agent.sh" "$STUCK_ISSUE" >> "$LOGFILE" 2>&1 & log "started dev-agent PID $! for stuck PR #${PR_NUM}" @@ -593,7 +810,7 @@ for i in $(seq 0 $((BACKLOG_COUNT - 1))); do if [ -n "$EXISTING_PR" ]; then # Check if branch is stale (behind primary branch) BRANCH="fix/issue-${ISSUE_NUM}" - AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "999") + AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "0") if [ "$AHEAD" -gt 0 ]; then log "issue #${ISSUE_NUM} PR #${EXISTING_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR" # Close the PR via API @@ -668,9 +885,32 @@ done # Single-threaded per project: if any issue has an open PR waiting for review/CI, # don't start new work — let the pipeline drain first +# But only block on PRs assigned to this agent (per-agent logic from #358) if [ -n "$READY_ISSUE" ] && [ -n "${WAITING_PRS:-}" ]; then - log "holding #${READY_ISSUE} — waiting for open PR(s) to land first: ${WAITING_PRS}" - exit 0 + # Filter to only this agent's waiting PRs + MY_WAITING_PRS="" + for pr_num in $(echo "$WAITING_PRS" | tr ',' ' '); do + pr_num="${pr_num#\#}" # Remove leading # + # Check if this PR's issue is assigned to this agent + pr_info=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${pr_num}" 2>/dev/null) || true + pr_branch=$(echo "$pr_info" | jq -r '.head.ref') || true + issue_num=$(echo "$pr_branch" | grep -oP '(?<=fix/issue-)\d+' || true) + if [ -z "$issue_num" ]; then + continue + fi + issue_assignee=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${issue_num}" 2>/dev/null | jq -r '.assignee.login // ""') || true + if [ -n "$issue_assignee" ] && [ "$issue_assignee" = "$BOT_USER" ]; then + MY_WAITING_PRS="${MY_WAITING_PRS:-}${MY_WAITING_PRS:+, }#${pr_num}" + fi + done + + if [ -n "$MY_WAITING_PRS" ]; then + log "holding #${READY_ISSUE} — waiting for my open PR(s) to land first: ${MY_WAITING_PRS}" + exit 0 + fi + log "other agents' PRs waiting: ${WAITING_PRS} — proceeding with #${READY_ISSUE}" fi if [ -z "$READY_ISSUE" ]; then diff --git a/dev/phase-handler.sh b/dev/phase-handler.sh deleted file mode 100644 index 8f3b3b4..0000000 --- a/dev/phase-handler.sh +++ /dev/null @@ -1,820 +0,0 @@ -#!/usr/bin/env bash -# dev/phase-handler.sh — Phase callback functions for dev-agent.sh -# -# Source this file from agent orchestrators after lib/agent-session.sh is loaded. -# Defines: post_refusal_comment(), _on_phase_change(), build_phase_protocol_prompt() -# -# Required globals (set by calling agent before or after sourcing): -# ISSUE, FORGE_TOKEN, API, FORGE_WEB, PROJECT_NAME, FACTORY_ROOT -# BRANCH, PHASE_FILE, WORKTREE, IMPL_SUMMARY_FILE -# PRIMARY_BRANCH, SESSION_NAME, LOGFILE, ISSUE_TITLE -# WOODPECKER_REPO_ID, WOODPECKER_TOKEN, WOODPECKER_SERVER -# -# Globals with defaults (agents can override after sourcing): -# PR_NUMBER, CI_POLL_TIMEOUT, MAX_CI_FIXES, MAX_REVIEW_ROUNDS, -# REVIEW_POLL_TIMEOUT, CI_RETRY_COUNT, CI_FIX_COUNT, REVIEW_ROUND, -# CLAIMED, PHASE_POLL_INTERVAL -# -# Calls back to agent-defined helpers: -# cleanup_worktree(), cleanup_labels(), status(), log() -# -# shellcheck shell=bash -# shellcheck disable=SC2154 # globals are set in dev-agent.sh before calling -# shellcheck disable=SC2034 # CLAIMED is read by cleanup() in dev-agent.sh - -# Load secret scanner for redacting tmux output before posting to issues -# shellcheck source=../lib/secret-scan.sh -source "$(dirname "${BASH_SOURCE[0]}")/../lib/secret-scan.sh" - -# Load shared CI helpers (is_infra_step, classify_pipeline_failure, etc.) -# shellcheck source=../lib/ci-helpers.sh -source "$(dirname "${BASH_SOURCE[0]}")/../lib/ci-helpers.sh" - -# Load mirror push helper -# shellcheck source=../lib/mirrors.sh -source "$(dirname "${BASH_SOURCE[0]}")/../lib/mirrors.sh" - -# --- Default callback stubs (agents can override after sourcing) --- -# cleanup_worktree and cleanup_labels are called during phase transitions. -# Provide no-op defaults so phase-handler.sh is self-contained; sourcing -# agents override these with real implementations. -if ! declare -f cleanup_worktree >/dev/null 2>&1; then - cleanup_worktree() { :; } -fi -if ! declare -f cleanup_labels >/dev/null 2>&1; then - cleanup_labels() { :; } -fi - -# --- Default globals (agents can override after sourcing) --- -: "${CI_POLL_TIMEOUT:=1800}" -: "${REVIEW_POLL_TIMEOUT:=10800}" -: "${MAX_CI_FIXES:=3}" -: "${MAX_REVIEW_ROUNDS:=5}" -: "${CI_RETRY_COUNT:=0}" -: "${CI_FIX_COUNT:=0}" -: "${REVIEW_ROUND:=0}" -: "${PR_NUMBER:=}" -: "${CLAIMED:=false}" -: "${PHASE_POLL_INTERVAL:=30}" - -# --- Post diagnostic comment + label issue as blocked --- -# Captures tmux pane output, posts a structured comment on the issue, removes -# in-progress label, and adds the "blocked" label. -# -# Args: reason [session_name] -# Uses globals: ISSUE, SESSION_NAME, PR_NUMBER, FORGE_TOKEN, API -post_blocked_diagnostic() { - local reason="$1" - local session="${2:-${SESSION_NAME:-}}" - - # Capture last 50 lines from tmux pane (before kill) - local tmux_output="" - if [ -n "$session" ] && tmux has-session -t "$session" 2>/dev/null; then - tmux_output=$(tmux capture-pane -p -t "$session" -S -50 2>/dev/null || true) - fi - - # Redact any secrets from tmux output before posting to issue - if [ -n "$tmux_output" ]; then - tmux_output=$(redact_secrets "$tmux_output") - fi - - # Build diagnostic comment body - local comment - comment="### Session failure diagnostic - -| Field | Value | -|---|---| -| Exit reason | \`${reason}\` | -| Timestamp | \`$(date -u +%Y-%m-%dT%H:%M:%SZ)\` |" - [ -n "${PR_NUMBER:-}" ] && [ "${PR_NUMBER:-0}" != "0" ] && \ - comment="${comment} -| PR | #${PR_NUMBER} |" - - if [ -n "$tmux_output" ]; then - comment="${comment} - -
Last 50 lines from tmux pane - -\`\`\` -${tmux_output} -\`\`\` -
" - fi - - # Post comment to issue - curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/issues/${ISSUE}/comments" \ - -d "$(jq -nc --arg b "$comment" '{body:$b}')" >/dev/null 2>&1 || true - - # Remove in-progress, add blocked - cleanup_labels - local blocked_id - blocked_id=$(ensure_blocked_label_id) - if [ -n "$blocked_id" ]; then - curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/issues/${ISSUE}/labels" \ - -d "{\"labels\":[${blocked_id}]}" >/dev/null 2>&1 || true - fi - CLAIMED=false - _BLOCKED_POSTED=true -} - -# --- Build phase protocol prompt (shared across agents) --- -# Generates the phase-signaling instructions for Claude prompts. -# Args: phase_file summary_file branch [remote] -# Output: The protocol text (stdout) -build_phase_protocol_prompt() { - local _pf="$1" _sf="$2" _br="$3" _remote="${4:-${FORGE_REMOTE:-origin}}" - cat <<_PHASE_PROTOCOL_EOF_ -## Phase-Signaling Protocol (REQUIRED) - -You are running in a persistent tmux session managed by an orchestrator. -Communicate progress by writing to the phase file. The orchestrator watches -this file and injects events (CI results, review feedback) back into this session. - -### Key files -\`\`\` -PHASE_FILE="${_pf}" -SUMMARY_FILE="${_sf}" -\`\`\` - -### Phase transitions — write these exactly: - -**After committing and pushing your branch:** -\`\`\`bash -# Rebase on target branch before push to avoid merge conflicts -git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH} -git push ${_remote} ${_br} -# Write a short summary of what you implemented: -printf '%s' "" > "\${SUMMARY_FILE}" -# Signal the orchestrator to create the PR and watch for CI: -echo "PHASE:awaiting_ci" > "${_pf}" -\`\`\` -Then STOP and wait. The orchestrator will inject CI results. - -**When you receive a "CI passed" injection:** -\`\`\`bash -echo "PHASE:awaiting_review" > "${_pf}" -\`\`\` -Then STOP and wait. The orchestrator will inject review feedback. - -**When you receive a "CI failed:" injection:** -Fix the CI issue, then rebase on target branch and push: -\`\`\`bash -git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH} -git push --force-with-lease ${_remote} ${_br} -echo "PHASE:awaiting_ci" > "${_pf}" -\`\`\` -Then STOP and wait. - -**When you receive a "Review: REQUEST_CHANGES" injection:** -Address ALL review feedback, then rebase on target branch and push: -\`\`\`bash -git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH} -git push --force-with-lease ${_remote} ${_br} -echo "PHASE:awaiting_ci" > "${_pf}" -\`\`\` -(CI runs again after each push — always write awaiting_ci, not awaiting_review) - -**When you need human help (CI exhausted, merge blocked, stuck on a decision):** -\`\`\`bash -printf 'PHASE:escalate\nReason: %s\n' "describe what you need" > "${_pf}" -\`\`\` -Then STOP and wait. A human will review and respond via the forge. - -**On unrecoverable failure:** -\`\`\`bash -printf 'PHASE:failed\nReason: %s\n' "describe what failed" > "${_pf}" -\`\`\` -_PHASE_PROTOCOL_EOF_ -} - -# --- Merge helper --- -# do_merge — attempt to merge PR via forge API. -# Args: pr_num -# Returns: -# 0 = merged successfully -# 1 = other failure (conflict, network error, etc.) -# 2 = not enough approvals (HTTP 405) — PHASE:escalate already written -do_merge() { - local pr_num="$1" - local merge_response merge_http_code merge_body - merge_response=$(curl -s -w "\n%{http_code}" -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H 'Content-Type: application/json' \ - "${API}/pulls/${pr_num}/merge" \ - -d '{"Do":"merge","delete_branch_after_merge":true}') || true - merge_http_code=$(echo "$merge_response" | tail -1) - merge_body=$(echo "$merge_response" | sed '$d') - - if [ "$merge_http_code" = "200" ] || [ "$merge_http_code" = "204" ]; then - log "do_merge: PR #${pr_num} merged (HTTP ${merge_http_code})" - return 0 - fi - - # HTTP 405 — could be "merge requirements not met" OR "already merged" (race with dev-poll). - # Before escalating, check whether the PR was already merged by another agent. - if [ "$merge_http_code" = "405" ]; then - local pr_state - pr_state=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${pr_num}" | jq -r '.merged // false') || pr_state="false" - if [ "$pr_state" = "true" ]; then - log "do_merge: PR #${pr_num} already merged (detected after HTTP 405) — treating as success" - return 0 - fi - log "do_merge: PR #${pr_num} blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" - printf 'PHASE:escalate\nReason: %s\n' \ - "PR #${pr_num} merge blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" \ - > "$PHASE_FILE" - return 2 - fi - - log "do_merge: PR #${pr_num} merge failed (HTTP ${merge_http_code}): ${merge_body:0:200}" - return 1 -} - -# --- Refusal comment helper --- -post_refusal_comment() { - local emoji="$1" title="$2" body="$3" - local last_has_title - last_has_title=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${ISSUE}/comments?limit=5" | \ - jq -r --arg t "Dev-agent: ${title}" '[.[] | .body // ""] | any(contains($t)) | tostring') || true - if [ "$last_has_title" = "true" ]; then - log "skipping duplicate refusal comment: ${title}" - return 0 - fi - local comment - comment="${emoji} **Dev-agent: ${title}** - -${body} - ---- -*Automated assessment by dev-agent · $(date -u '+%Y-%m-%d %H:%M UTC')*" - printf '%s' "$comment" > "/tmp/refusal-comment.txt" - jq -Rs '{body: .}' < "/tmp/refusal-comment.txt" > "/tmp/refusal-comment.json" - curl -sf -o /dev/null -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/issues/${ISSUE}/comments" \ - --data-binary @"/tmp/refusal-comment.json" 2>/dev/null || \ - log "WARNING: failed to post refusal comment" - rm -f "/tmp/refusal-comment.txt" "/tmp/refusal-comment.json" -} - -# ============================================================================= -# PHASE DISPATCH CALLBACK -# ============================================================================= - -# _on_phase_change — Phase dispatch callback for monitor_phase_loop -# Receives the current phase as $1. -# Returns 0 to continue the loop, 1 to break (terminal phase reached). -_on_phase_change() { - local phase="$1" - - # ── PHASE: awaiting_ci ────────────────────────────────────────────────────── - if [ "$phase" = "PHASE:awaiting_ci" ]; then - # Release session lock — Claude is idle during CI polling (#724) - session_lock_release - - # Create PR if not yet created - if [ -z "${PR_NUMBER:-}" ]; then - status "creating PR for issue #${ISSUE}" - IMPL_SUMMARY="" - if [ -f "$IMPL_SUMMARY_FILE" ]; then - # Don't treat refusal JSON as a PR summary - if ! jq -e '.status' < "$IMPL_SUMMARY_FILE" >/dev/null 2>&1; then - IMPL_SUMMARY=$(head -c 4000 "$IMPL_SUMMARY_FILE") - fi - fi - - printf 'Fixes #%s\n\n## Changes\n%s' "$ISSUE" "$IMPL_SUMMARY" > "/tmp/pr-body-${ISSUE}.txt" - jq -n \ - --arg title "fix: ${ISSUE_TITLE} (#${ISSUE})" \ - --rawfile body "/tmp/pr-body-${ISSUE}.txt" \ - --arg head "$BRANCH" \ - --arg base "${PRIMARY_BRANCH}" \ - '{title: $title, body: $body, head: $head, base: $base}' > "/tmp/pr-request-${ISSUE}.json" - - PR_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/pulls" \ - --data-binary @"/tmp/pr-request-${ISSUE}.json") - - PR_HTTP_CODE=$(echo "$PR_RESPONSE" | tail -1) - PR_RESPONSE_BODY=$(echo "$PR_RESPONSE" | sed '$d') - rm -f "/tmp/pr-body-${ISSUE}.txt" "/tmp/pr-request-${ISSUE}.json" - - if [ "$PR_HTTP_CODE" = "201" ] || [ "$PR_HTTP_CODE" = "200" ]; then - PR_NUMBER=$(echo "$PR_RESPONSE_BODY" | jq -r '.number') - log "created PR #${PR_NUMBER}" - elif [ "$PR_HTTP_CODE" = "409" ]; then - # PR already exists (race condition) — find it - FOUND_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls?state=open&limit=20" | \ - jq -r --arg branch "$BRANCH" \ - '.[] | select(.head.ref == $branch) | .number' | head -1) || true - if [ -n "$FOUND_PR" ]; then - PR_NUMBER="$FOUND_PR" - log "PR already exists: #${PR_NUMBER}" - else - log "ERROR: PR creation got 409 but no existing PR found" - agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP 409, no existing PR found). Check the forge API. Retry by writing PHASE:awaiting_ci again after verifying the branch was pushed." - return 0 - fi - else - log "ERROR: PR creation failed (HTTP ${PR_HTTP_CODE})" - agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP ${PR_HTTP_CODE}). Check branch was pushed: git push ${FORGE_REMOTE:-origin} ${BRANCH}. Then write PHASE:awaiting_ci again." - return 0 - fi - fi - - # No CI configured? Treat as success immediately - if [ "${WOODPECKER_REPO_ID:-2}" = "0" ]; then - log "no CI configured — treating as passed" - agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER} (no CI configured for this project). -Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback." - return 0 - fi - - # Poll CI until done or timeout - status "waiting for CI on PR #${PR_NUMBER}" - CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || \ - curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') - - CI_DONE=false - CI_STATE="unknown" - CI_POLL_ELAPSED=0 - while [ "$CI_POLL_ELAPSED" -lt "$CI_POLL_TIMEOUT" ]; do - sleep 30 - CI_POLL_ELAPSED=$(( CI_POLL_ELAPSED + 30 )) - - # Check session still alive during CI wait (exit_marker + tmux fallback) - if [ -f "/tmp/claude-exited-${SESSION_NAME}.ts" ] || ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then - log "session died during CI wait" - break - fi - - # Re-fetch HEAD — Claude may have pushed new commits since loop started - CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || echo "$CI_CURRENT_SHA") - - CI_STATE=$(ci_commit_status "$CI_CURRENT_SHA") - if [ "$CI_STATE" = "success" ] || [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then - CI_DONE=true - [ "$CI_STATE" = "success" ] && CI_FIX_COUNT=0 - break - fi - done - - if ! $CI_DONE; then - log "TIMEOUT: CI didn't complete in ${CI_POLL_TIMEOUT}s" - agent_inject_into_session "$SESSION_NAME" "CI TIMEOUT: CI did not complete within 30 minutes for PR #${PR_NUMBER} (SHA: ${CI_CURRENT_SHA:0:7}). This may be an infrastructure issue. Write PHASE:escalate if you cannot proceed." - return 0 - fi - - log "CI: ${CI_STATE}" - - if [ "$CI_STATE" = "success" ]; then - agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER}. -Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback: - echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\"" - else - # Fetch CI error details - PIPELINE_NUM=$(ci_pipeline_number "$CI_CURRENT_SHA") - - FAILED_STEP="" - FAILED_EXIT="" - IS_INFRA=false - if [ -n "$PIPELINE_NUM" ]; then - FAILED_INFO=$(curl -sf \ - -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ - "${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines/${PIPELINE_NUM}" | \ - jq -r '.workflows[]?.children[]? | select(.state=="failure") | "\(.name)|\(.exit_code)"' | head -1 || true) - FAILED_STEP=$(echo "$FAILED_INFO" | cut -d'|' -f1) - FAILED_EXIT=$(echo "$FAILED_INFO" | cut -d'|' -f2) - fi - - log "CI failed: step=${FAILED_STEP:-unknown} exit=${FAILED_EXIT:-?}" - - if [ -n "$FAILED_STEP" ] && is_infra_step "$FAILED_STEP" "${FAILED_EXIT:-0}" >/dev/null 2>&1; then - IS_INFRA=true - fi - - if [ "$IS_INFRA" = true ] && [ "${CI_RETRY_COUNT:-0}" -lt 1 ]; then - CI_RETRY_COUNT=$(( CI_RETRY_COUNT + 1 )) - log "infra failure — retrigger CI (retry ${CI_RETRY_COUNT})" - (cd "$WORKTREE" && git commit --allow-empty \ - -m "ci: retrigger after infra failure (#${ISSUE})" --no-verify 2>&1 | tail -1) - # Rebase on target branch before push to avoid merge conflicts - if ! (cd "$WORKTREE" && \ - git fetch "${FORGE_REMOTE:-origin}" "${PRIMARY_BRANCH}" 2>/dev/null && \ - git rebase "${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH}" 2>&1 | tail -5); then - log "rebase conflict detected — aborting, agent must resolve" - (cd "$WORKTREE" && git rebase --abort 2>/dev/null || git reset --hard HEAD 2>/dev/null) || true - agent_inject_into_session "$SESSION_NAME" "REBASE CONFLICT: Cannot rebase onto ${PRIMARY_BRANCH} automatically. - -Please resolve merge conflicts manually: -1. Check conflict status: git status -2. Resolve conflicts in the conflicted files -3. Stage resolved files: git add -4. Continue rebase: git rebase --continue - -If you cannot resolve conflicts, abort: git rebase --abort -Then write PHASE:escalate with a reason." - return 0 - fi - # Rebase succeeded — push the result - (cd "$WORKTREE" && git push --force-with-lease "${FORGE_REMOTE:-origin}" "$BRANCH" 2>&1 | tail -3) - # Touch phase file so we recheck CI on the new SHA - # Do NOT update LAST_PHASE_MTIME here — let the main loop detect the fresh mtime - touch "$PHASE_FILE" - CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || true) - return 0 - fi - - CI_FIX_COUNT=$(( CI_FIX_COUNT + 1 )) - _ci_pipeline_url="${WOODPECKER_SERVER}/repos/${WOODPECKER_REPO_ID}/pipeline/${PIPELINE_NUM:-0}" - if [ "$CI_FIX_COUNT" -gt "$MAX_CI_FIXES" ]; then - log "CI failure not recoverable after ${CI_FIX_COUNT} fix attempts — escalating" - printf 'PHASE:escalate\nReason: ci_exhausted after %d attempts (step: %s)\n' "$CI_FIX_COUNT" "${FAILED_STEP:-unknown}" > "$PHASE_FILE" - # Do NOT update LAST_PHASE_MTIME here — let the main loop detect PHASE:escalate - return 0 - fi - - CI_ERROR_LOG="" - if [ -n "$PIPELINE_NUM" ]; then - CI_ERROR_LOG=$(bash "${FACTORY_ROOT}/lib/ci-debug.sh" failures "$PIPELINE_NUM" 2>/dev/null | tail -80 | head -c 8000 || echo "") - fi - - # Save CI result for crash recovery - printf 'CI failed (attempt %d/%d)\nStep: %s\nExit: %s\n\n%s' \ - "$CI_FIX_COUNT" "$MAX_CI_FIXES" "${FAILED_STEP:-unknown}" "${FAILED_EXIT:-?}" "$CI_ERROR_LOG" \ - > "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt" 2>/dev/null || true - - agent_inject_into_session "$SESSION_NAME" "CI failed on PR #${PR_NUMBER} (attempt ${CI_FIX_COUNT}/${MAX_CI_FIXES}). - -Failed step: ${FAILED_STEP:-unknown} (exit code ${FAILED_EXIT:-?}, pipeline #${PIPELINE_NUM:-?}) - -CI debug tool: - bash ${FACTORY_ROOT}/lib/ci-debug.sh failures ${PIPELINE_NUM:-0} - bash ${FACTORY_ROOT}/lib/ci-debug.sh logs ${PIPELINE_NUM:-0} - -Error snippet: -${CI_ERROR_LOG:-No logs available. Use ci-debug.sh to query the pipeline.} - -Instructions: -1. Run ci-debug.sh failures to get the full error output. -2. Read the failing test file(s) — understand what the tests EXPECT. -3. Fix the root cause — do NOT weaken tests. -4. Rebase on target branch and push: git fetch ${FORGE_REMOTE:-origin} ${PRIMARY_BRANCH} && git rebase ${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH} - git push --force-with-lease ${FORGE_REMOTE:-origin} ${BRANCH} -5. Write: echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\" -6. Stop and wait." - fi - - # ── PHASE: awaiting_review ────────────────────────────────────────────────── - elif [ "$phase" = "PHASE:awaiting_review" ]; then - # Release session lock — Claude is idle during review wait (#724) - session_lock_release - status "waiting for review on PR #${PR_NUMBER:-?}" - CI_FIX_COUNT=0 # Reset CI fix budget for this review cycle - - if [ -z "${PR_NUMBER:-}" ]; then - log "WARNING: awaiting_review but PR_NUMBER unknown — searching for PR" - FOUND_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls?state=open&limit=20" | \ - jq -r --arg branch "$BRANCH" \ - '.[] | select(.head.ref == $branch) | .number' | head -1) || true - if [ -n "$FOUND_PR" ]; then - PR_NUMBER="$FOUND_PR" - log "found PR #${PR_NUMBER}" - else - agent_inject_into_session "$SESSION_NAME" "ERROR: Cannot find open PR for branch ${BRANCH}. Did you push? Verify with git status and git push ${FORGE_REMOTE:-origin} ${BRANCH}, then write PHASE:awaiting_ci." - return 0 - fi - fi - - REVIEW_POLL_ELAPSED=0 - REVIEW_FOUND=false - while [ "$REVIEW_POLL_ELAPSED" -lt "$REVIEW_POLL_TIMEOUT" ]; do - sleep 300 # 5 min between review checks - REVIEW_POLL_ELAPSED=$(( REVIEW_POLL_ELAPSED + 300 )) - - # Check session still alive (exit_marker + tmux fallback) - if [ -f "/tmp/claude-exited-${SESSION_NAME}.ts" ] || ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then - log "session died during review wait" - REVIEW_FOUND=false - break - fi - - # Check if phase was updated while we wait (e.g., Claude reacted to something) - NEW_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0) - if [ "$NEW_MTIME" -gt "$LAST_PHASE_MTIME" ]; then - log "phase file updated during review wait — re-entering main loop" - # Do NOT update LAST_PHASE_MTIME here — leave it stale so the outer - # loop detects the change on its next tick and dispatches the new phase. - REVIEW_FOUND=true # Prevent timeout injection - # Clean up review-poll sentinel if it exists (session already advanced) - rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}" - break - fi - - REVIEW_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') || true - REVIEW_COMMENT=$(forge_api_all "/issues/${PR_NUMBER}/comments" | \ - jq -r --arg sha "$REVIEW_SHA" \ - '[.[] | select(.body | contains(" + If no watermark exists, treat the file as fully stale (review everything). 4. Check for changes since the watermark: git log --oneline ..HEAD -- If zero changes, the file is current — skip it. -5. For stale files: - - Read the AGENTS.md and the source files in that directory - - Update the documentation to reflect code changes since the watermark - - Set the watermark to the HEAD SHA from the preflight step - - Conventions: architecture and WHY not implementation details +5. For each stale file, run a STRUCTURAL DIFF — this is the core of the step: -## Part B: Size limit enforcement (progressive disclosure split) + a. FILE INVENTORY: list files at watermark vs HEAD for this directory: + git ls-tree -r --name-only -- + git ls-tree -r --name-only HEAD -- + Diff the two lists. Categorize: + - NEW files: in HEAD but not in watermark + - DELETED files: in watermark but not in HEAD + - Check AGENTS.md layout section: does it list each current file? + Files present in the directory but absent from the layout = GAPS. + Files listed in the layout but missing from the directory = LIES. -After all updates are done, count lines in the root AGENTS.md: + b. REFERENCE VALIDATION: extract every file path, function name, and + shell variable referenced in the AGENTS.md. For each: + - File paths: verify the file exists (ls or git ls-tree HEAD) + - Function names: grep for the definition in the codebase + - Script names: verify they exist where claimed + Any reference that fails validation is a LIE — flag it for correction. + + c. SEMANTIC CHANGES: for files that existed at both watermark and HEAD, + check if they changed meaningfully: + git diff ..HEAD -- /*.sh /*.py /*.toml + Look for: new exported functions, removed functions, renamed files, + changed CLI flags, new environment variables, new configuration. + Ignore: internal refactors, comment changes, formatting. + +6. For each stale file, apply corrections: + - Add NEW files to the layout section + - Remove DELETED files from the layout section + - Fix every LIE found in reference validation + - Add notes about significant SEMANTIC CHANGES + - Set the watermark to HEAD_SHA + - Conventions: document architecture and WHY, not implementation details + +## Part B: Size limit enforcement + +After all updates, count lines in the root AGENTS.md: wc -l < "$PROJECT_REPO_ROOT/AGENTS.md" -If the root AGENTS.md exceeds 200 lines, perform a progressive disclosure -split. The principle: agent reads the map, drills into detail only when -needed. You wouldn't dump a 500-page wiki on a new hire's first morning. +If it exceeds 200 lines, split verbose sections into per-directory files +using progressive disclosure: -6. Identify per-directory sections to extract. Each agent section under - "## Agents" (e.g. "### Dev (`dev/`)", "### Review (`review/`)") and - each helper section (e.g. "### Shared helpers (`lib/`)") is a candidate. - Also extract verbose subsections like "## Issue lifecycle and label - conventions" and "## Phase-Signaling Protocol" into docs/ or the - relevant directory. +7. Identify sections that can be extracted to per-directory files. + Keep the root AGENTS.md as a table of contents — brief overview, + directory layout, summary tables with links to detail files. -7. For each section to extract, create a `{dir}/AGENTS.md` file with: +8. For each extracted section, create a `{dir}/AGENTS.md` with: - Line 1: watermark - - The full section content (role, trigger, key files, env vars, lifecycle) - - Keep the same markdown structure and detail level + - The full section content, preserving structure and detail - Example for dev/: - ``` - - # Dev Agent +9. Replace extracted sections in root with concise summaries + links. - **Role**: Implement issues autonomously ... - **Trigger**: dev-poll.sh runs every 10 min ... - **Key files**: ... - **Environment variables consumed**: ... - **Lifecycle**: ... - ``` - -8. Replace extracted sections in the root AGENTS.md with a concise - directory map table. The root file keeps ONLY: - - Watermark (line 1) - - ## What this repo is (brief overview) - - ## Directory layout (existing tree) - - ## Tech stack - - ## Coding conventions - - ## How to lint and test - - ## Agents — replaced with a summary table pointing to per-dir files: - - ## Agents - - | Agent | Directory | Role | Guide | - |-------|-----------|------|-------| - | Dev | dev/ | Issue implementation | [dev/AGENTS.md](dev/AGENTS.md) | - | Review | review/ | PR review | [review/AGENTS.md](review/AGENTS.md) | - | Gardener | gardener/ | Backlog grooming | [gardener/AGENTS.md](gardener/AGENTS.md) | - | ... | ... | ... | ... | - - - ## Shared helpers — replaced with a brief pointer: - "See [lib/AGENTS.md](lib/AGENTS.md) for the full helper reference." - Keep the summary table if it fits, or move it to lib/AGENTS.md. - - - ## Issue lifecycle and label conventions — keep a brief summary - (labels table + dependency convention) or move verbose parts to - docs/PHASE-PROTOCOL.md - - - ## Architecture Decisions — keep in root (humans write, agents enforce) - - - ## Phase-Signaling Protocol — keep a brief summary with pointer: - "See [docs/PHASE-PROTOCOL.md](docs/PHASE-PROTOCOL.md) for the full spec." - -9. Verify the root AGENTS.md is now under 200 lines: - LINE_COUNT=$(wc -l < "$PROJECT_REPO_ROOT/AGENTS.md") - if [ "$LINE_COUNT" -gt 200 ]; then - echo "WARNING: root AGENTS.md still $LINE_COUNT lines after split" - fi - If still over 200, trim further — move more detail into per-directory - files. The root should read like a table of contents, not an encyclopedia. - -10. Each new per-directory AGENTS.md must have a watermark on line 1. - The gardener maintains freshness for ALL AGENTS.md files — root and - per-directory — using the same watermark mechanism from Part A. +10. Verify root is under 200 lines. If still over, extract more. ## Staging -11. Stage ALL AGENTS.md files you created or changed — do NOT commit yet. - All git writes happen in the commit-and-pr step at the end: +11. Stage all AGENTS.md files created or changed: find . -name "AGENTS.md" -not -path "./.git/*" -exec git add {} + -12. If no AGENTS.md files need updating AND root is under 200 lines, - skip this step entirely. +12. If no files need updating AND root is under 200 lines, skip entirely. CRITICAL: If this step fails for any reason, log the failure and move on. Do NOT let an AGENTS.md failure prevent the commit-and-pr step. diff --git a/formulas/run-predictor.toml b/formulas/run-predictor.toml index eb14412..97d22b6 100644 --- a/formulas/run-predictor.toml +++ b/formulas/run-predictor.toml @@ -119,27 +119,24 @@ For each weakness you identify, choose one: **Suggested action:** **EXPLOIT** — high confidence, have a theory you can test: - File a prediction/unreviewed issue AND an action issue that dispatches - a formula to generate evidence. + File a prediction/unreviewed issue AND a vault PR that dispatches + a formula to generate evidence (AD-006: external actions go through vault). - The prediction explains the theory. The action generates the proof. - When the planner runs next, evidence is already there. + The prediction explains the theory. The vault PR triggers the proof + after human approval. When the planner runs next, evidence is already there. - Action issue body format (label: action): - Dispatched by predictor to test theory in #. + Vault dispatch (requires lib/vault.sh): + source "$PROJECT_REPO_ROOT/lib/vault.sh" - ## Task - Run with focus on . - - ## Expected evidence - Results in evidence//-.json - - ## Acceptance criteria - - [ ] Formula ran to completion - - [ ] Evidence file written with structured results - - ## Affected files - - evidence// + TOML_CONTENT="id = \"predict--\" +context = \"Test prediction #: — focus: \" +formula = \"\" +secrets = [] +# Unblocks: # +# Expected evidence: evidence//-.json +" + PR_NUM=$(vault_request "predict--" "$TOML_CONTENT") + echo "Vault PR #${PR_NUM} filed to test prediction #" Available formulas (check $PROJECT_REPO_ROOT/formulas/*.toml for current list): cat "$PROJECT_REPO_ROOT/formulas/"*.toml | grep '^name' | head -10 @@ -156,10 +153,10 @@ tea is pre-configured with login "$TEA_LOGIN" and repo "$FORGE_REPO". tea issues create --login "$TEA_LOGIN" --repo "$FORGE_REPO" \ --title "" --body "<body>" --labels "prediction/unreviewed" -2. File action dispatches (if exploiting): - tea issues create --login "$TEA_LOGIN" --repo "$FORGE_REPO" \ - --title "action: test prediction #NNN — <formula> <focus>" \ - --body "<body>" --labels "action" +2. Dispatch formula via vault (if exploiting): + source "$PROJECT_REPO_ROOT/lib/vault.sh" + PR_NUM=$(vault_request "predict-NNN-<formula>" "$TOML_CONTENT") + # See EXPLOIT section above for TOML_CONTENT format 3. Close superseded predictions: tea issues close <number> --login "$TEA_LOGIN" --repo "$FORGE_REPO" @@ -173,11 +170,11 @@ tea is pre-configured with login "$TEA_LOGIN" and repo "$FORGE_REPO". ## Rules -- Max 5 actions total (predictions + action dispatches combined) -- Each exploit counts as 2 (prediction + action dispatch) +- Max 5 actions total (predictions + vault dispatches combined) +- Each exploit counts as 2 (prediction + vault dispatch) - So: 5 explores, or 2 exploits + 1 explore, or 1 exploit + 3 explores - Never re-file a dismissed prediction without new evidence -- Action issues must reference existing formulas — don't invent formulas +- Vault dispatches must reference existing formulas — don't invent formulas - Be specific: name the file, the metric, the threshold, the formula - If no weaknesses found, file nothing — that's a strong signal the project is healthy diff --git a/formulas/run-supervisor.toml b/formulas/run-supervisor.toml index 20b1015..ceaf340 100644 --- a/formulas/run-supervisor.toml +++ b/formulas/run-supervisor.toml @@ -1,7 +1,7 @@ # formulas/run-supervisor.toml — Supervisor formula (health monitoring + remediation) # # Executed by supervisor/supervisor-run.sh via cron (every 20 minutes). -# supervisor-run.sh creates a tmux session with Claude (sonnet) and injects +# supervisor-run.sh runs claude -p via agent-sdk.sh and injects # this formula with pre-collected metrics as context. # # Steps: preflight → health-assessment → decide-actions → report → journal @@ -137,14 +137,15 @@ For each finding from the health assessment, decide and execute an action. **P3 Stale PRs (CI done >20min, no push since):** Do NOT read dev-poll.sh, push branches, attempt merges, or investigate pipeline code. - Instead, nudge the dev-agent via tmux injection if a session is alive: - # Find the dev session for this issue - SESSION=$(tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "dev-.*-${ISSUE_NUM}" | head -1) - if [ -n "$SESSION" ]; then - # Inject a nudge into the dev-agent session - tmux send-keys -t "$SESSION" "# [supervisor] PR stale >20min — CI finished, please push or update" Enter - fi - If no active tmux session exists, note it in the journal for the next dev-poll cycle. + Instead, file a vault item for the dev-agent to pick up: + Write $OPS_REPO_ROOT/vault/pending/stale-pr-${ISSUE_NUM}.md: + # Stale PR: ${PR_TITLE} + ## What + CI finished >20min ago but no git push has been made to the PR branch. + ## Why + P3 — Factory degraded: PRs should be pushed within 20min of CI completion. + ## Unblocks + - Factory health: dev-agent will push the branch and continue the workflow Do NOT file vault items for stale PRs unless they remain stale for >3 consecutive runs. ### Cannot auto-fix → file vault item @@ -251,7 +252,6 @@ knowledge file in the ops repo: Knowledge files: memory.md, disk.md, ci.md, forge.md, dev-agent.md, review-agent.md, git.md. -After writing the journal, write the phase signal: - echo 'PHASE:done' > "$PHASE_FILE" +After writing the journal, the agent session completes automatically. """ needs = ["report"] diff --git a/formulas/triage.toml b/formulas/triage.toml new file mode 100644 index 0000000..a2ec909 --- /dev/null +++ b/formulas/triage.toml @@ -0,0 +1,267 @@ +# formulas/triage.toml — Triage-agent formula (generic template) +# +# This is the base template for triage investigations. +# Project-specific formulas (e.g. formulas/triage-harb.toml) extend this by +# overriding the fields in the [project] section and providing stack-specific +# step descriptions. +# +# Triggered by: bug-report + in-triage label combination. +# Set by the reproduce-agent when: +# - Bug was confirmed (reproduced) +# - Quick log analysis did not reveal an obvious root cause +# - Reproduce-agent documented all steps taken and logs examined +# +# Steps: +# 1. read-findings — parse issue comments for prior reproduce-agent evidence +# 2. trace-data-flow — follow symptom through UI → API → backend → data store +# 3. instrumentation — throwaway branch, add logging, restart, observe +# 4. decompose — file backlog issues for each root cause +# 5. link-back — update original issue, swap in-triage → in-progress +# 6. cleanup — delete throwaway debug branch +# +# Best practices: +# - Start from reproduce-agent findings; do not repeat their work +# - Budget: 70% tracing data flow, 30% instrumented re-runs +# - Multiple causes: check if layered (Depends-on) or independent (Related) +# - Always delete the throwaway debug branch before finishing +# - If inconclusive after full turn budget: leave in-triage, post what was +# tried, do NOT relabel — supervisor handles stale triage sessions +# +# Project-specific formulas extend this template by defining: +# - stack_script: how to start/stop the project stack +# - [project].data_flow: layer names (e.g. "chain → indexer → GraphQL → UI") +# - [project].api_endpoints: which APIs/services to inspect +# - [project].stack_lock: stack lock configuration +# - Per-step description overrides with project-specific commands +# +# No hard timeout — runs until Claude hits its turn limit. +# Stack lock held for full run (triage is rare; blocking CI is acceptable). + +name = "triage" +description = "Deep root cause analysis: trace data flow, add debug instrumentation, decompose causes into backlog issues." +version = 2 + +# Set stack_script to the restart command for local stacks. +# Leave empty ("") to connect to an existing staging environment. +stack_script = "" + +tools = ["playwright"] + +# --------------------------------------------------------------------------- +# Project-specific extension fields. +# Override these in formulas/triage-<project>.toml. +# --------------------------------------------------------------------------- +[project] +# Human-readable layer names for the data-flow trace (generic default). +# Example project override: "chain → indexer → GraphQL → UI" +data_flow = "UI → API → backend → data store" + +# Comma-separated list of API endpoints or services to inspect. +# Example: "GraphQL /graphql, REST /api/v1, RPC ws://localhost:8545" +api_endpoints = "" + +# Stack lock configuration (leave empty for default behavior). +# Example: "full" to hold a full stack lock during triage. +stack_lock = "" + +# --------------------------------------------------------------------------- +# Steps +# --------------------------------------------------------------------------- + +[[steps]] +id = "read-findings" +title = "Read reproduce-agent findings" +description = """ +Before doing anything else, parse all prior evidence from the issue comments. + +1. Fetch the issue body and all comments: + curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues/${ISSUE_NUMBER}" | jq -r '.body' + curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues/${ISSUE_NUMBER}/comments" | jq -r '.[].body' + +2. Identify the reproduce-agent comment (look for sections like + "Reproduction steps", "Logs examined", "What was tried"). + +3. Extract and note: + - The exact symptom (error message, unexpected value, visual regression) + - Steps that reliably trigger the bug + - Log lines or API responses already captured + - Any hypotheses the reproduce-agent already ruled out + +Do NOT repeat work the reproduce-agent already did. Your job starts where +theirs ended. If no reproduce-agent comment is found, note it and proceed +with fresh investigation using the issue body only. +""" + +[[steps]] +id = "trace-data-flow" +title = "Trace data flow from symptom to source" +description = """ +Systematically follow the symptom backwards through each layer of the stack. +Spend ~70% of your total turn budget here before moving to instrumentation. + +Generic layer traversal (adapt to the project's actual stack): + UI → API → backend → data store + +For each layer boundary: + 1. What does the upstream layer send? + 2. What does the downstream layer expect? + 3. Is there a mismatch? If yes — is this the root cause or a symptom? + +Tracing checklist: + a. Start at the layer closest to the visible symptom. + b. Read the relevant source files — do not guess data shapes. + c. Cross-reference API contracts: compare what the code sends vs what it + should send according to schemas, type definitions, or documentation. + d. Check recent git history on suspicious files: + git log --oneline -20 -- <file> + e. Search for related issues or TODOs in the code: + grep -r "TODO\|FIXME\|HACK" -- <relevant directory> + +Capture for each layer: + - The data shape flowing in and out (field names, types, nullability) + - Whether the layer's behavior matches its documented contract + - Any discrepancy found + +If a clear root cause becomes obvious during tracing, note it and continue +checking whether additional causes exist downstream. +""" +needs = ["read-findings"] + +[[steps]] +id = "instrumentation" +title = "Add debug instrumentation on a throwaway branch" +description = """ +Use ~30% of your total turn budget here. Only instrument after tracing has +identified the most likely failure points — do not instrument blindly. + +1. Create a throwaway debug branch (NEVER commit this to main): + cd "$PROJECT_REPO_ROOT" + git checkout -b debug/triage-${ISSUE_NUMBER} + +2. Add targeted logging at the layer boundaries identified during tracing: + - Console.log / structured log statements around the suspicious code path + - Log the actual values flowing through: inputs, outputs, intermediate state + - Add verbose mode flags if the stack supports them + - Keep instrumentation minimal — only what confirms or refutes the hypothesis + +3. Restart the stack using the configured script (if set): + ${stack_script:-"# No stack_script configured — restart manually or connect to staging"} + +4. Re-run the reproduction steps from the reproduce-agent findings. + +5. Observe and capture new output: + - Paste relevant log lines into your working notes + - Note whether the observed values match or contradict the hypothesis + +6. If the first instrumentation pass is inconclusive, iterate: + - Narrow the scope to the next most suspicious boundary + - Re-instrument, restart, re-run + - Maximum 2-3 instrumentation rounds before declaring inconclusive + +Do NOT push the debug branch. It will be deleted in the cleanup step. +""" +needs = ["trace-data-flow"] + +[[steps]] +id = "decompose" +title = "Decompose root causes into backlog issues" +description = """ +After tracing and instrumentation, articulate each distinct root cause. + +For each root cause found: + +1. Determine the relationship to other causes: + - Layered (one causes another) → use Depends-on in the issue body + - Independent (separate code paths fail independently) → use Related + +2. Create a backlog issue for each root cause: + curl -sf -X POST "${FORGE_API}/issues" \\ + -H "Authorization: token ${FORGE_TOKEN}" \\ + -H "Content-Type: application/json" \\ + -d '{ + "title": "fix: <specific description of root cause N>", + "body": "## Root cause\\n<exact code path, file:line>\\n\\n## Fix suggestion\\n<recommended approach>\\n\\n## Context\\nDecomposed from #${ISSUE_NUMBER} (cause N of M)\\n\\n## Dependencies\\n<#X if this depends on another cause being fixed first>", + "labels": [{"name": "backlog"}] + }' + +3. Note the newly created issue numbers. + +If only one root cause is found, still create a single backlog issue with +the specific code location and fix suggestion. + +If the investigation is inconclusive (no clear root cause found), skip this +step and proceed directly to link-back with the inconclusive outcome. +""" +needs = ["instrumentation"] + +[[steps]] +id = "link-back" +title = "Update original issue and relabel" +description = """ +Post a summary comment on the original issue and update its labels. + +### If root causes were found (conclusive): + +Post a comment: + "## Triage findings + + Found N root cause(s): + - #X — <one-line description> (cause 1 of N) + - #Y — <one-line description> (cause 2 of N, depends on #X) + + Data flow traced: <layer where the bug originates> + Instrumentation: <key log output that confirmed the cause> + + Next step: backlog issues above will be implemented in dependency order." + +Then swap labels: + - Remove: in-triage + - Add: in-progress + +### If investigation was inconclusive (turn budget exhausted): + +Post a comment: + "## Triage — inconclusive + + Traced: <layers checked> + Tried: <instrumentation attempts and what they showed> + Hypothesis: <best guess at cause, if any> + + No definitive root cause identified. Leaving in-triage for supervisor + to handle as a stale triage session." + +Do NOT relabel. Leave in-triage. The supervisor monitors stale triage +sessions and will escalate or reassign. + +**CRITICAL: Write outcome file** — Always write the outcome to the outcome file: + - If root causes found (conclusive): echo "reproduced" > /tmp/triage-outcome-${ISSUE_NUMBER}.txt + - If inconclusive: echo "needs-triage" > /tmp/triage-outcome-${ISSUE_NUMBER}.txt +""" +needs = ["decompose"] + +[[steps]] +id = "cleanup" +title = "Delete throwaway debug branch" +description = """ +Always delete the debug branch, even if the investigation was inconclusive. + +1. Switch back to the main branch: + cd "$PROJECT_REPO_ROOT" + git checkout "$PRIMARY_BRANCH" + +2. Delete the local debug branch: + git branch -D debug/triage-${ISSUE_NUMBER} + +3. Confirm no remote was pushed (if accidentally pushed, delete it too): + git push origin --delete debug/triage-${ISSUE_NUMBER} 2>/dev/null || true + +4. Verify the worktree is clean: + git status + git worktree list + +A clean repo is a prerequisite for the next dev-agent run. Never leave +debug branches behind — they accumulate and pollute the branch list. +""" +needs = ["link-back"] diff --git a/gardener/AGENTS.md b/gardener/AGENTS.md index cd473ba..942c0b4 100644 --- a/gardener/AGENTS.md +++ b/gardener/AGENTS.md @@ -1,4 +1,4 @@ -<!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a --> +<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 --> # Gardener Agent **Role**: Backlog grooming — detect duplicate issues, missing acceptance @@ -9,7 +9,10 @@ Claude to fix what it can; files vault items for what it cannot. **Trigger**: `gardener-run.sh` runs 4x/day via cron. Sources `lib/guard.sh` and calls `check_active gardener` first — skips if `$FACTORY_ROOT/state/.gardener-active` -is absent. Then creates a tmux session with `claude --model sonnet`, injects +is absent. **Early-exit optimization**: if no issues, PRs, or repo files have +changed since the last run (checked via Forgejo API and `git diff`), the model +is not invoked — the run exits immediately (no tmux session, no tokens consumed). +Otherwise, creates a tmux session with `claude --model sonnet`, injects `formulas/run-gardener.toml` as context, monitors the phase file, and cleans up on completion or timeout (2h max session). No action issues — the gardener runs directly from cron like the planner, predictor, and supervisor. diff --git a/gardener/gardener-run.sh b/gardener/gardener-run.sh index 62e9eb1..3da81a6 100755 --- a/gardener/gardener-run.sh +++ b/gardener/gardener-run.sh @@ -45,7 +45,7 @@ source "$FACTORY_ROOT/lib/agent-sdk.sh" # shellcheck source=../lib/pr-lifecycle.sh source "$FACTORY_ROOT/lib/pr-lifecycle.sh" -LOG_FILE="$SCRIPT_DIR/gardener.log" +LOG_FILE="${DISINTO_LOG_DIR}/gardener/gardener.log" # shellcheck disable=SC2034 # consumed by agent-sdk.sh LOGFILE="$LOG_FILE" # shellcheck disable=SC2034 # consumed by agent-sdk.sh @@ -54,22 +54,43 @@ SCRATCH_FILE="/tmp/gardener-${PROJECT_NAME}-scratch.md" RESULT_FILE="/tmp/gardener-result-${PROJECT_NAME}.txt" GARDENER_PR_FILE="/tmp/gardener-pr-${PROJECT_NAME}.txt" WORKTREE="/tmp/${PROJECT_NAME}-gardener-run" +LAST_SHA_FILE="${DISINTO_DATA_DIR}/gardener-last-sha.txt" -log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } +# Override LOG_AGENT for consistent agent identification +# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() +LOG_AGENT="gardener" # ── Guards ──────────────────────────────────────────────────────────────── check_active gardener acquire_cron_lock "/tmp/gardener-run.lock" -check_memory 2000 +memory_guard 2000 log "--- Gardener run start ---" -# ── Resolve agent identity for .profile repo ──────────────────────────── -if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_GARDENER_TOKEN:-}" ]; then - AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_GARDENER_TOKEN}" \ - "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true) +# ── Precondition checks: skip if nothing to do ──────────────────────────── +# Check for new commits since last run +CURRENT_SHA=$(git -C "$FACTORY_ROOT" rev-parse HEAD 2>/dev/null || echo "") +LAST_SHA=$(cat "$LAST_SHA_FILE" 2>/dev/null || echo "") + +# Check for open issues needing grooming +backlog_count=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues?labels=backlog&state=open&limit=1" 2>/dev/null | jq length) || backlog_count=0 +tech_debt_count=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues?labels=tech-debt&state=open&limit=1" 2>/dev/null | jq length) || tech_debt_count=0 + +if [ "$CURRENT_SHA" = "$LAST_SHA" ] && [ "${backlog_count:-0}" -eq 0 ] && [ "${tech_debt_count:-0}" -eq 0 ]; then + log "no new commits and no issues to groom — skipping" + exit 0 fi +log "current sha: ${CURRENT_SHA:0:8}..., backlog issues: ${backlog_count}, tech-debt issues: ${tech_debt_count}" + +# ── Resolve forge remote for git operations ───────────────────────────── +resolve_forge_remote + +# ── Resolve agent identity for .profile repo ──────────────────────────── +resolve_agent_identity || true + # ── Load formula + context ─────────────────────────────────────────────── load_formula_or_profile "gardener" "$FACTORY_ROOT/formulas/run-gardener.toml" || exit 1 build_context_block AGENTS.md @@ -127,16 +148,7 @@ ${SCRATCH_INSTRUCTION} ${PROMPT_FOOTER}" # ── Create worktree ────────────────────────────────────────────────────── -cd "$PROJECT_REPO_ROOT" -git fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true -worktree_cleanup "$WORKTREE" -git worktree add "$WORKTREE" "origin/${PRIMARY_BRANCH}" --detach 2>/dev/null - -cleanup() { - worktree_cleanup "$WORKTREE" - rm -f "$GARDENER_PR_FILE" -} -trap cleanup EXIT +formula_worktree_setup "$WORKTREE" # ── Post-merge manifest execution ──────────────────────────────────────── # Reads gardener/pending-actions.json and executes each action via API. @@ -165,19 +177,21 @@ _gardener_execute_manifest() { case "$action" in add_label) - local label label_id + local label label_id http_code resp label=$(jq -r ".[$i].label" "$manifest_file") label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${FORGE_API}/labels" | jq -r --arg n "$label" \ '.[] | select(.name == $n) | .id') || true if [ -n "$label_id" ]; then - if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ + resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${issue}/labels" \ - -d "{\"labels\":[${label_id}]}" >/dev/null 2>&1; then + -d "{\"labels\":[${label_id}]}" 2>/dev/null) || true + http_code=$(echo "$resp" | tail -1) + if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then log "manifest: add_label '${label}' to #${issue}" else - log "manifest: FAILED add_label '${label}' to #${issue}" + log "manifest: FAILED add_label '${label}' to #${issue}: HTTP ${http_code}" fi else log "manifest: FAILED add_label — label '${label}' not found" @@ -185,17 +199,19 @@ _gardener_execute_manifest() { ;; remove_label) - local label label_id + local label label_id http_code resp label=$(jq -r ".[$i].label" "$manifest_file") label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${FORGE_API}/labels" | jq -r --arg n "$label" \ '.[] | select(.name == $n) | .id') || true if [ -n "$label_id" ]; then - if curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/issues/${issue}/labels/${label_id}" >/dev/null 2>&1; then + resp=$(curl -sf -w "\n%{http_code}" -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues/${issue}/labels/${label_id}" 2>/dev/null) || true + http_code=$(echo "$resp" | tail -1) + if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then log "manifest: remove_label '${label}' from #${issue}" else - log "manifest: FAILED remove_label '${label}' from #${issue}" + log "manifest: FAILED remove_label '${label}' from #${issue}: HTTP ${http_code}" fi else log "manifest: FAILED remove_label — label '${label}' not found" @@ -203,34 +219,38 @@ _gardener_execute_manifest() { ;; close) - local reason + local reason http_code resp reason=$(jq -r ".[$i].reason // empty" "$manifest_file") - if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ + resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${issue}" \ - -d '{"state":"closed"}' >/dev/null 2>&1; then + -d '{"state":"closed"}' 2>/dev/null) || true + http_code=$(echo "$resp" | tail -1) + if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then log "manifest: closed #${issue} (${reason})" else - log "manifest: FAILED close #${issue}" + log "manifest: FAILED close #${issue}: HTTP ${http_code}" fi ;; comment) - local body escaped_body + local body escaped_body http_code resp body=$(jq -r ".[$i].body" "$manifest_file") escaped_body=$(printf '%s' "$body" | jq -Rs '.') - if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ + resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${issue}/comments" \ - -d "{\"body\":${escaped_body}}" >/dev/null 2>&1; then + -d "{\"body\":${escaped_body}}" 2>/dev/null) || true + http_code=$(echo "$resp" | tail -1) + if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then log "manifest: commented on #${issue}" else - log "manifest: FAILED comment on #${issue}" + log "manifest: FAILED comment on #${issue}: HTTP ${http_code}" fi ;; create_issue) - local title body labels escaped_title escaped_body label_ids + local title body labels escaped_title escaped_body label_ids http_code resp title=$(jq -r ".[$i].title" "$manifest_file") body=$(jq -r ".[$i].body" "$manifest_file") labels=$(jq -r ".[$i].labels // [] | .[]" "$manifest_file") @@ -250,40 +270,46 @@ _gardener_execute_manifest() { done <<< "$labels" [ -n "$ids_json" ] && label_ids="[${ids_json}]" fi - if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ + resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues" \ - -d "{\"title\":${escaped_title},\"body\":${escaped_body},\"labels\":${label_ids}}" >/dev/null 2>&1; then + -d "{\"title\":${escaped_title},\"body\":${escaped_body},\"labels\":${label_ids}}" 2>/dev/null) || true + http_code=$(echo "$resp" | tail -1) + if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then log "manifest: created issue '${title}'" else - log "manifest: FAILED create_issue '${title}'" + log "manifest: FAILED create_issue '${title}': HTTP ${http_code}" fi ;; edit_body) - local body escaped_body + local body escaped_body http_code resp body=$(jq -r ".[$i].body" "$manifest_file") escaped_body=$(printf '%s' "$body" | jq -Rs '.') - if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ + resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${issue}" \ - -d "{\"body\":${escaped_body}}" >/dev/null 2>&1; then + -d "{\"body\":${escaped_body}}" 2>/dev/null) || true + http_code=$(echo "$resp" | tail -1) + if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then log "manifest: edited body of #${issue}" else - log "manifest: FAILED edit_body #${issue}" + log "manifest: FAILED edit_body #${issue}: HTTP ${http_code}" fi ;; close_pr) - local pr + local pr http_code resp pr=$(jq -r ".[$i].pr" "$manifest_file") - if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ + resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/pulls/${pr}" \ - -d '{"state":"closed"}' >/dev/null 2>&1; then + -d '{"state":"closed"}' 2>/dev/null) || true + http_code=$(echo "$resp" | tail -1) + if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then log "manifest: closed PR #${pr}" else - log "manifest: FAILED close_pr #${pr}" + log "manifest: FAILED close_pr #${pr}: HTTP ${http_code}" fi ;; @@ -328,9 +354,9 @@ if [ -n "$PR_NUMBER" ]; then if [ "$_PR_WALK_EXIT_REASON" = "merged" ]; then # Post-merge: pull primary, mirror push, execute manifest - git -C "$PROJECT_REPO_ROOT" fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true + git -C "$PROJECT_REPO_ROOT" fetch "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true git -C "$PROJECT_REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true - git -C "$PROJECT_REPO_ROOT" pull --ff-only origin "$PRIMARY_BRANCH" 2>/dev/null || true + git -C "$PROJECT_REPO_ROOT" pull --ff-only "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true mirror_push _gardener_execute_manifest rm -f "$SCRATCH_FILE" @@ -347,4 +373,8 @@ fi profile_write_journal "gardener-run" "Gardener run $(date -u +%Y-%m-%d)" "complete" "" || true rm -f "$GARDENER_PR_FILE" + +# Persist last-seen SHA for next run comparison +echo "$CURRENT_SHA" > "$LAST_SHA_FILE" + log "--- Gardener run done ---" diff --git a/gardener/pending-actions.json b/gardener/pending-actions.json index 747973c..f66edcd 100644 --- a/gardener/pending-actions.json +++ b/gardener/pending-actions.json @@ -1,32 +1,47 @@ [ { - "action": "edit_body", - "issue": 765, - "body": "Depends on: none\n\n## Goal\n\nThe disinto website becomes a versioned artifact: built by CI, published to Codeberg's generic package registry, deployed to staging automatically. Version visible in footer.\n\n## Files to add/change\n\n### `site/VERSION`\n```\n0.1.0\n```\n\n### `site/build.sh`\n```bash\n#!/bin/bash\nVERSION=$(cat VERSION)\nmkdir -p dist\ncp *.html *.jpg *.webp *.png *.ico *.xml robots.txt dist/\nsed -i \"s|Built from scrap, powered by a single battery.|v${VERSION} · Built from scrap, powered by a single battery.|\" dist/index.html\necho \"$VERSION\" > dist/VERSION\n```\n\n### `site/index.html`\nNo template placeholder needed — `build.sh` does the sed replacement on the existing footer text.\n\n### `.woodpecker/site.yml`\n```yaml\nwhen:\n path: \"site/**\"\n event: push\n branch: main\n\nsteps:\n - name: build\n image: alpine\n commands:\n - cd site && sh build.sh\n - VERSION=$(cat site/VERSION)\n - tar czf site-${VERSION}.tar.gz -C site/dist .\n\n - name: publish\n image: alpine\n commands:\n - apk add curl\n - VERSION=$(cat site/VERSION)\n - >-\n curl -sf --user \"johba:$$FORGE_TOKEN\"\n --upload-file site-${VERSION}.tar.gz\n \"https://codeberg.org/api/packages/johba/generic/disinto-site/${VERSION}/site-${VERSION}.tar.gz\"\n environment:\n FORGE_TOKEN:\n from_secret: forge_token\n\n - name: deploy-staging\n image: alpine\n commands:\n - apk add curl\n - VERSION=$(cat site/VERSION)\n - >-\n curl -sf --user \"johba:$$FORGE_TOKEN\"\n \"https://codeberg.org/api/packages/johba/generic/disinto-site/${VERSION}/site-${VERSION}.tar.gz\"\n -o site.tar.gz\n - rm -rf /srv/staging/*\n - tar xzf site.tar.gz -C /srv/staging/\n environment:\n FORGE_TOKEN:\n from_secret: forge_token\n volumes:\n - /home/debian/staging-site:/srv/staging\n```\n\n## Infra setup (manual, before first run)\n- `mkdir -p /home/debian/staging-site`\n- Add to Caddyfile: `staging.disinto.ai { root * /home/debian/staging-site; file_server }`\n- DNS: `staging.disinto.ai` A record → same IP as `disinto.ai`\n- Reload Caddy: `sudo systemctl reload caddy`\n- Add `forge_token` as Woodpecker repo secret for johba/disinto (if not already set)\n- Add `/home/debian/staging-site` to `WOODPECKER_BACKEND_DOCKER_VOLUMES`\n\n## Verification\n- [ ] Merge PR that touches `site/` → CI runs site pipeline\n- [ ] Package appears at `codeberg.org/johba/-/packages/generic/disinto-site/0.1.0`\n- [ ] `staging.disinto.ai` serves the site with `v0.1.0` in footer\n- [ ] `disinto.ai` (production) unchanged\n\n## Related\n- #764 — docker stack edge proxy + staging (future: this moves inside the stack)\n- #755 — vault-gated production promotion (production deploy comes later)\n\n## Affected files\n- `site/VERSION` — new, holds current version string\n- `site/build.sh` — new, builds dist/ with version injected into footer\n- `.woodpecker/site.yml` — new, CI pipeline for build/publish/deploy-staging" + "action": "close", + "issue": 419, + "reason": "Vision goal complete — all sub-issues #437-#454 closed, vault blast-radius redesign delivered" + }, + { + "action": "close", + "issue": 494, + "reason": "Resolved by PRs #502 and #503 (both merged) — repo_root workaround removed, container paths derived at runtime" + }, + { + "action": "close", + "issue": 477, + "reason": "Obsolete — #379 (while-true loop) was deployed on 2026-04-08; env.sh container guard is now correct behavior, no revert needed" }, { "action": "edit_body", - "issue": 764, - "body": "Depends on: none (builds on existing docker-compose generation in `bin/disinto`)\n\n## Design\n\n`disinto init` + `disinto up` starts two additional containers as base factory infrastructure:\n\n### Edge proxy (Caddy)\n- Reverse proxies to Forgejo and Woodpecker\n- Serves staging site\n- Runs on ports 80/443\n- At bootstrap: IP-only, self-signed TLS or HTTP\n- Domain + Let's Encrypt added later via vault resource request\n\n### Staging container (Caddy)\n- Static file server for the project's staging artifacts\n- Starts with a default \"Nothing shipped yet\" page\n- CI pipelines write to a shared volume to update staging content\n- No vault approval needed — staging is the factory's sandbox\n\n### docker-compose addition\n```yaml\nservices:\n edge:\n image: caddy:alpine\n ports:\n - \"80:80\"\n - \"443:443\"\n volumes:\n - ./Caddyfile:/etc/caddy/Caddyfile\n - caddy_data:/data\n depends_on:\n - forgejo\n - woodpecker-server\n - staging\n\n staging:\n image: caddy:alpine\n volumes:\n - staging-site:/srv/site\n # Not exposed directly — edge proxies to it\n\nvolumes:\n caddy_data:\n staging-site:\n```\n\n### Caddyfile (generated by `disinto init`)\n```\n# IP-only at bootstrap, domain added later\n:80 {\n handle /forgejo/* {\n reverse_proxy forgejo:3000\n }\n handle /ci/* {\n reverse_proxy woodpecker-server:8000\n }\n handle {\n reverse_proxy staging:80\n }\n}\n```\n\n### Staging update flow\n1. CI builds artifact (site tarball, etc.)\n2. CI step writes to `staging-site` volume\n3. Staging container serves updated content immediately\n4. No restart needed — Caddy serves files directly\n\n### Domain lifecycle\n- Bootstrap: no domain, edge serves on IP\n- Later: factory files vault resource request for domain\n- Human buys domain, sets DNS\n- Caddyfile updated with domain, Let's Encrypt auto-provisions TLS\n\n## Affected files\n- `bin/disinto` — `generate_compose()` adds edge + staging services\n- New: default staging page (\"Nothing shipped yet\")\n- New: Caddyfile template in `docker/`\n\n## Related\n- #755 — vault-gated deployment promotion (production comes later)\n- #757 — ops repo (domain is a resource requested through vault)\n\n## Acceptance criteria\n- [ ] `disinto init` generates a `docker-compose.yml` that includes `edge` (Caddy) and `staging` containers\n- [ ] Edge proxy routes `/forgejo/*` → Forgejo, `/ci/*` → Woodpecker, default → staging container\n- [ ] Staging container serves a default \"Nothing shipped yet\" page on first boot\n- [ ] `docker/` directory contains a Caddyfile template generated by `disinto init`\n- [ ] `disinto up` starts all containers including edge and staging without manual steps" - }, - { - "action": "edit_body", - "issue": 761, - "body": "Depends on: #747\n\n## Design\n\nEach agent account on the bundled Forgejo gets a `.profile` repo. This repo holds the agent's formula (copied from disinto at creation time) and its journal.\n\n### Structure\n```\n{agent-bot}/.profile/\n├── formula.toml # snapshot of the formula at agent creation time\n├── journal/ # daily logs of what the agent did\n│ ├── 2026-03-26.md\n│ └── ...\n└── knowledge/ # learned patterns, best-practices (optional, agent can evolve)\n```\n\n### Lifecycle\n1. **Create agent** — `disinto init` or `disinto spawn-agent` creates Forgejo account + `.profile` repo\n2. **Copy formula** — current `formulas/{role}.toml` from disinto repo is copied to `.profile/formula.toml`\n3. **Agent reads its own formula** — at session start, agent reads from its `.profile`, not from the disinto repo\n4. **Agent writes journal** — daily entries pushed to `.profile/journal/`\n5. **Agent can evolve knowledge** — best-practices, heuristics, patterns written to `.profile/knowledge/`\n\n### What this enables\n\n**A/B testing formulas:** Create two agents from different formula versions, run both against the same backlog, compare results (cycle time, CI pass rate, review rejection rate).\n\n**Rollback:** New formula worse? Kill agent, spawn from older formula version.\n\n**Audit:** What formula was this agent running when it produced that PR? Check its `.profile` at that git commit.\n\n**Drift tracking:** Diff what an agent learned (`.profile/knowledge/`) vs what it started with. Measure formula evolution over time.\n\n**Portability:** Move agent to different box — `git clone` its `.profile`.\n\n### Disinto repo becomes the template\n\n```\ndisinto repo:\n formulas/dev-agent.toml ← canonical template, evolves\n formulas/review-agent.toml\n formulas/planner.toml\n ...\n\nRunning agents:\n dev-bot-v2/.profile/formula.toml ← snapshot from formulas/dev-agent.toml@v2\n dev-bot-v3/.profile/formula.toml ← snapshot from formulas/dev-agent.toml@v3\n review-bot/.profile/formula.toml ← snapshot from formulas/review-agent.toml\n```\n\nThe formula in the disinto repo is the template. The `.profile` copy is the instance. They can diverge — that's a feature, not a bug.\n\n## Affected files\n- `bin/disinto` — agent creation copies formula to .profile\n- Agent session scripts — read formula from .profile instead of local formulas/ dir\n- Planner/supervisor — can read other agents' journals from their .profile repos\n\n## Related\n- #747 — per-agent Forgejo accounts (prerequisite)\n- #757 — ops repo (shared concerns stay there: vault, portfolio, resources)\n\n## Acceptance criteria\n- [ ] `disinto spawn-agent` (or `disinto init`) creates a Forgejo account + `.profile` repo for each agent bot\n- [ ] Current `formulas/{role}.toml` is copied to `.profile/formula.toml` at agent creation time\n- [ ] Agent session script reads its formula from `.profile/formula.toml`, not from the repo's `formulas/` directory\n- [ ] Agent writes daily journal entries to `.profile/journal/YYYY-MM-DD.md`" - }, - { - "action": "edit_body", - "issue": 742, - "body": "## Problem\n\n`gardener/recipes/*.toml` (4 files: cascade-rebase, chicken-egg-ci, flaky-test, shellcheck-violations) are an older pattern predating `formulas/*.toml`. Two systems for the same thing.\n\n## Fix\n\nMigrate any unique content from recipes to the gardener formula or to new formulas. Delete the recipes directory.\n\n## Affected files\n- `gardener/recipes/*.toml` — delete after migration\n- `formulas/run-gardener.toml` — absorb relevant content\n- Gardener scripts that reference recipes/\n\n## Acceptance criteria\n- [ ] Contents of `gardener/recipes/*.toml` are diff'd against `formulas/run-gardener.toml` — any unique content is migrated\n- [ ] `gardener/recipes/` directory is deleted\n- [ ] No scripts in `gardener/` reference the `recipes/` path after migration\n- [ ] ShellCheck passes on all modified scripts" + "issue": 498, + "body": "Flagged by AI reviewer in PR #496.\n\n## Problem\n\n`has_responses_to_process` is only set to `true` inside the `open_arch_prs >= 3` gate in `architect/architect-run.sh` (line 543). When fewer than 3 architect PRs are open, ACCEPT/REJECT responses on existing PRs are never processed — the response-processing block at line 687 defaults to `false` and is skipped entirely.\n\nThis means that if a user ACCEPTs or REJECTs a pitch while the open PR count is below 3, the architect agent will never handle the response.\n\n## Fix\n\nSet `has_responses_to_process` (or an equivalent guard) unconditionally by scanning open PRs for ACCEPT/REJECT responses, not only when the 3-PR cap is hit.\n\n---\n*Auto-created from AI review*\n\n## Acceptance criteria\n\n- [ ] `has_responses_to_process` is computed by scanning open architect PRs for ACCEPT/REJECT responses regardless of `open_arch_prs` count\n- [ ] When a user posts ACCEPT or REJECT on an architect PR and open PR count < 3, the response is processed in the same run\n- [ ] Existing behavior when `open_arch_prs >= 3` is unchanged\n- [ ] ShellCheck passes on modified files\n\n## Affected files\n\n- `architect/architect-run.sh` (lines ~543 and ~687 — response-processing gate)" }, { "action": "add_label", - "issue": 742, + "issue": 498, "label": "backlog" }, + { + "action": "edit_body", + "issue": 499, + "body": "Flagged by AI reviewer in PR #496.\n\n## Problem\n\nIn `architect/architect-run.sh` line 203, the `has_open_subissues` function compares `.number` (a JSON integer) against `$vid` (a bash string via `--arg`). In jq, `42 != \"42\"` evaluates to true (different types are never equal), so the self-exclusion filter never fires. In practice this is low-risk since vision issues don't contain 'Decomposed from #N' in their own bodies, but the self-exclusion logic is silently broken.\n\n## Fix\n\nCast the string to a number in jq: `select(.number != ($vid | tonumber))`\n\n---\n*Auto-created from AI review*\n\n## Acceptance criteria\n\n- [ ] `has_open_subissues` self-exclusion filter correctly excludes the vision issue itself using `($vid | tonumber)` cast\n- [ ] A vision issue does not appear in its own subissue list\n- [ ] ShellCheck passes on modified files\n\n## Affected files\n\n- `architect/architect-run.sh` (line ~203 — `has_open_subissues` jq filter)" + }, { "action": "add_label", - "issue": 741, + "issue": 499, + "label": "backlog" + }, + { + "action": "edit_body", + "issue": 471, + "body": "## Bug description\n\nWhen dev-bot picks a backlog issue and launches dev-agent.sh, a second dev-poll instance (dev-qwen) can race ahead and mark the issue as stale/blocked before dev-agent.sh finishes claiming it.\n\n## Reproduction\n\nObserved on issues #443 and #445 (2026-04-08):\n\n**#443 timeline:**\n- `20:39:03` — dev-bot removes `backlog`, adds `in-progress` (via dev-poll backlog pickup)\n- `20:39:04` — dev-qwen removes `in-progress`, adds `blocked` with reason `no_assignee_no_open_pr_no_lock`\n- `20:40:11` — dev-bot pushes commit (dev-agent was actually working the whole time)\n- `20:44:02` — PR merged, issue closed\n\n**#445 timeline:**\n- `20:54:03` — dev-bot adds `in-progress`\n- `20:54:06` — dev-qwen marks `blocked` (3 seconds later)\n- `20:55:13` — dev-bot pushes commit\n- `21:09:03` — PR merged, issue closed\n\nIn both cases, the work completed successfully despite being labeled blocked.\n\n## Root cause\n\n`issue_claim()` in `lib/issue-lifecycle.sh` performs three sequential API calls:\n1. PATCH assignee\n2. POST in-progress label\n3. DELETE backlog label\n\nMeanwhile, dev-poll on another agent (dev-qwen) runs its orphan scan, sees the issue labeled `in-progress` but with no assignee set yet (assign PATCH hasn't landed or was read stale), no open PR, and no lock file. It concludes the issue is stale and relabels to `blocked`.\n\nThe race window is ~1-3 seconds between in-progress being set and the assignee being visible to other pollers.\n\n## Impact\n\n- Issues get spuriously labeled `blocked` with a misleading stale diagnostic comment\n- dev-agent continues working anyway (it already has the issue number), so the blocked label is just noise\n- But it could confuse the gardener or humans reading the issue timeline\n- If another dev-poll instance picks up the blocked issue for recovery before the original agent finishes, it could cause duplicate work\n\n## Possible fixes\n\n1. **Assign before labeling**: In `issue_claim()`, set the assignee first, then add in-progress. This way, by the time in-progress is visible, the assignee is already set.\n2. **Grace period in stale detection**: Skip issues whose in-progress label was added less than N seconds ago (check label event timestamp via timeline API).\n3. **Lock file before label**: Write the agent lock file (`/tmp/dev-impl-summary-...`) at the start of dev-agent.sh before calling `issue_claim()`, so the stale detector sees the lock.\n4. **Atomic claim check**: dev-poll should re-check assignee after a short delay before declaring stale, to allow for API propagation.\n\n## Acceptance criteria\n\n- [ ] Stale detection in dev-poll does not mark an issue as blocked within the first 60 seconds of the in-progress label being applied\n- [ ] `issue_claim()` assigns the issue before adding the in-progress label (or equivalent fix is implemented)\n- [ ] No spurious `blocked` labels appear on issues that are actively being worked (verified by log inspection or integration test)\n- [ ] ShellCheck passes on modified files\n\n## Affected files\n\n- `lib/issue-lifecycle.sh` — `issue_claim()` function (assignee + label ordering)\n- `dev/dev-poll.sh` — orphan/stale detection logic" + }, + { + "action": "add_label", + "issue": 471, "label": "backlog" } ] diff --git a/lib/AGENTS.md b/lib/AGENTS.md index c0119fa..a937e8b 100644 --- a/lib/AGENTS.md +++ b/lib/AGENTS.md @@ -1,4 +1,4 @@ -<!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a --> +<!-- last-reviewed: 7069b729f77de1687aeeac327e44098a608cf567 --> # Shared Helpers (`lib/`) All agents source `lib/env.sh` as their first action. Additional helpers are @@ -6,20 +6,29 @@ sourced as needed. | File | What it provides | Sourced by | |---|---|---| -| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`) — each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens — only the runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced — compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. | Every agent | -| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \<reason>" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status <sha>` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number <sha>` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote <repo_id> <pipeline_num> <environment>` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this — vault redesign in progress, see #73-#77). `ci_get_logs <pipeline_number> [--step <name>]` — reads CI logs from Woodpecker SQLite database; outputs last 200 lines to stdout. Requires mounted woodpecker-data volume at /woodpecker-data. | dev-poll, review-poll, review-pr, supervisor-poll | +| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (paginates all pages; accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`; handles invalid/empty JSON responses gracefully — returns empty on parse error instead of crashing), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`) — each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens — only the runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced — compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. **Save/restore scope (#364)**: only `FORGE_URL` is preserved across `.env` re-sourcing (compose injects `http://forgejo:3000`, `.env` has `http://localhost:3000`). `FORGE_TOKEN` is NOT preserved so refreshed tokens in `.env` take effect immediately. **Required env var**: `FORGE_PASS` — bot password for git HTTP push (Forgejo 11.x rejects API tokens for `git push`, #361). | Every agent | +| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \<reason>" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status <sha>` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number <sha>` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote <repo_id> <pipeline_num> <environment>` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this — vault redesign in progress, see #73-#77). `ci_get_logs <pipeline_number> [--step <name>]` — reads CI logs from Woodpecker SQLite database via `lib/ci-log-reader.py`; outputs last 200 lines to stdout. Requires mounted woodpecker-data volume at /woodpecker-data. | dev-poll, review-poll, review-pr | | `lib/ci-debug.sh` | CLI tool for Woodpecker CI: `list`, `status`, `logs`, `failures` subcommands. Not sourced — run directly. | Humans / dev-agent (tool access) | -| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). | env.sh (when `PROJECT_TOML` is set), supervisor-poll (per-project iteration) | -| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll | -| `lib/formula-session.sh` | `acquire_cron_lock()`, `check_memory()`, `load_formula()`, `build_context_block()`, `consume_escalation_reply()`, `start_formula_session()`, `formula_phase_callback()`, `build_prompt_footer()`, `build_graph_section()`, `run_formula_and_monitor(AGENT [TIMEOUT] [CALLBACK])` — shared helpers for formula-driven cron agents (lock, memory guard, formula loading, prompt assembly, tmux session, monitor loop, crash recovery). `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `formula_phase_callback()` handles `PHASE:escalate` (unified escalation path — kills the session). `run_formula_and_monitor` accepts an optional CALLBACK (default: `formula_phase_callback`) so callers can install custom merge-through or escalation handlers. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh | +| `lib/ci-log-reader.py` | Python tool: reads CI logs from Woodpecker SQLite database. `<pipeline_number> [--step <name>]` — returns last 200 lines from failed steps (or specified step). Used by `ci_get_logs()` in ci-helpers.sh. Requires `WOODPECKER_DATA_DIR` (default: /woodpecker-data). | ci-helpers.sh | +| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). Also exports `FORGE_REPO_OWNER` (the owner component of `FORGE_REPO`, e.g. `disinto-admin` from `disinto-admin/disinto`). **Container path derivation**: `PROJECT_REPO_ROOT` and `OPS_REPO_ROOT` are derived at runtime when `DISINTO_CONTAINER=1` — hardcoded to `/home/agent/repos/$PROJECT_NAME` and `/home/agent/repos/$PROJECT_NAME-ops` respectively — not read from the TOML. This ensures correct paths inside containers where host paths in the TOML would be wrong. | env.sh (when `PROJECT_TOML` is set) | +| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll | +| `lib/formula-session.sh` | `acquire_cron_lock()`, `load_formula()`, `load_formula_or_profile()`, `build_context_block()`, `ensure_ops_repo()`, `ops_commit_and_push()`, `build_prompt_footer()`, `build_sdk_prompt_footer()`, `formula_worktree_setup()`, `formula_prepare_profile_context()`, `formula_lessons_block()`, `profile_write_journal()`, `profile_load_lessons()`, `ensure_profile_repo()`, `_profile_has_repo()`, `_count_undigested_journals()`, `_profile_digest_journals()`, `_profile_commit_and_push()`, `resolve_agent_identity()`, `build_graph_section()`, `build_scratch_instruction()`, `read_scratch_context()`, `cleanup_stale_crashed_worktrees()` — shared helpers for formula-driven cron agents (lock, .profile repo management, prompt assembly, worktree setup). Memory guard is provided by `memory_guard()` in `lib/env.sh` (not duplicated here). `resolve_agent_identity()` — sets `FORGE_TOKEN`, `AGENT_IDENTITY`, `FORGE_REMOTE` from per-agent token env vars and FORGE_URL remote detection. `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh | | `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in cron logs. Sourced by dev-poll.sh, review-poll.sh, predictor-run.sh, supervisor-run.sh. | cron entry points | -| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh and dev/phase-handler.sh — called after every successful merge. | dev-poll.sh, phase-handler.sh | +| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh — called after every successful merge. | dev-poll.sh | | `lib/build-graph.py` | Python tool: parses VISION.md, prerequisites.md (from ops repo), AGENTS.md, formulas/*.toml, evidence/ (from ops repo), and forge issues/labels into a NetworkX DiGraph. Runs structural analyses (orphaned objectives, stale prerequisites, thin evidence, circular deps) and outputs a JSON report. Used by `review-pr.sh` (per-PR changed-file analysis) and `predictor-run.sh` (full-project analysis) to provide structural context to Claude. | review-pr.sh, predictor-run.sh | -| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | file-action-issue.sh, phase-handler.sh | -| `lib/file-action-issue.sh` | `file_action_issue()` — dedup check, secret scan, label lookup, and issue creation for formula-driven cron wrappers. Sets `FILED_ISSUE_NUM` on success. Returns 4 if secrets detected in body. | (available for future use) | +| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | issue-lifecycle.sh | +| `lib/stack-lock.sh` | File-based lock protocol for singleton project stack access. `stack_lock_acquire(holder, project)` — polls until free, breaks stale heartbeats (>10 min old), claims lock. `stack_lock_release(project)` — deletes lock file. `stack_lock_check(project)` — inspect current lock state. `stack_lock_heartbeat(project)` — update heartbeat timestamp (callers must call every 2 min while holding). Lock files at `~/data/locks/<project>-stack.lock`. | docker/edge/dispatcher.sh, reproduce formula | | `lib/tea-helpers.sh` | `tea_file_issue(title, body, labels...)` — create issue via tea CLI with secret scanning; sets `FILED_ISSUE_NUM`. `tea_relabel(issue_num, labels...)` — replace labels using tea's `edit` subcommand (not `label`). `tea_comment(issue_num, body)` — add comment with secret scanning. `tea_close(issue_num)` — close issue. All use `TEA_LOGIN` and `FORGE_REPO` from env.sh. Labels by name (no ID lookup). Tea binary download verified via sha256 checksum. Sourced by env.sh when `tea` binary is available. | env.sh (conditional) | | `lib/worktree.sh` | Reusable git worktree management: `worktree_create(path, branch, [base_ref])` — create worktree, checkout base, fetch submodules. `worktree_recover(path, branch, [remote])` — detect existing worktree, reuse if on correct branch (sets `_WORKTREE_REUSED`), otherwise clean and recreate. `worktree_cleanup(path)` — `git worktree remove --force`, clear Claude Code project cache (`~/.claude/projects/` matching path). `worktree_cleanup_stale([max_age_hours])` — scan `/tmp` for orphaned worktrees older than threshold, skip preserved and active tmux worktrees, prune. `worktree_preserve(path, reason)` — mark worktree as preserved for debugging (writes `.worktree-preserved` marker, skipped by stale cleanup). | dev-agent.sh, supervisor-run.sh, planner-run.sh, predictor-run.sh, gardener-run.sh | | `lib/pr-lifecycle.sh` | Reusable PR lifecycle library: `pr_create()`, `pr_find_by_branch()`, `pr_poll_ci()`, `pr_poll_review()`, `pr_merge()`, `pr_is_merged()`, `pr_walk_to_merge()`, `build_phase_protocol_prompt()`. Requires `lib/ci-helpers.sh`. | dev-agent.sh (future) | | `lib/issue-lifecycle.sh` | Reusable issue lifecycle library: `issue_claim()` (add in-progress, remove backlog), `issue_release()` (remove in-progress, add backlog), `issue_block()` (post diagnostic comment with secret redaction, add blocked label), `issue_close()`, `issue_check_deps()` (parse deps, check transitive closure; sets `_ISSUE_BLOCKED_BY`, `_ISSUE_SUGGESTION`), `issue_suggest_next()` (find next unblocked backlog issue; sets `_ISSUE_NEXT`), `issue_post_refusal()` (structured refusal comment with dedup). Label IDs cached in globals on first lookup. Sources `lib/secret-scan.sh`. | dev-agent.sh (future) | -| `lib/agent-session.sh` | Shared tmux + Claude session helpers: `create_agent_session()`, `inject_formula()`, `agent_wait_for_claude_ready()`, `agent_inject_into_session()`, `agent_kill_session()`, `monitor_phase_loop()`, `read_phase()`, `write_compact_context()`. `create_agent_session(session, workdir, [phase_file])` optionally installs a PostToolUse hook (matcher `Bash\|Write`) that detects phase file writes in real-time — when Claude writes to the phase file, the hook writes a marker so `monitor_phase_loop` reacts on the next poll instead of waiting for mtime changes. Also installs a StopFailure hook (matcher `rate_limit\|server_error\|authentication_failed\|billing_error`) that writes `PHASE:failed` with an `api_error` reason to the phase file and touches the phase-changed marker, so the orchestrator discovers API errors within one poll cycle instead of waiting for idle timeout. Also installs a SessionStart hook (matcher `compact`) that re-injects phase protocol instructions after context compaction — callers write the context file via `write_compact_context(phase_file, content)`, and the hook (`on-compact-reinject.sh`) outputs the file content to stdout so Claude retains critical instructions. When `phase_file` is set, passes it to the idle stop hook (`on-idle-stop.sh`) so the hook can **nudge Claude** (up to 2 times) if Claude returns to the prompt without writing to the phase file — the hook injects a tmux reminder asking Claude to signal PHASE:done or PHASE:awaiting_ci. The PreToolUse guard hook (`on-pretooluse-guard.sh`) receives the session name as a third argument — formula agents (`gardener-*`, `planner-*`, `predictor-*`, `supervisor-*`) are identified this way and allowed to access `FACTORY_ROOT` from worktrees (they need env.sh, AGENTS.md, formulas/, lib/). **OAuth flock**: when `DISINTO_CONTAINER=1`, Claude CLI is wrapped in `flock -w 300 ~/.claude/session.lock` to queue concurrent token refresh attempts and prevent rotation races across agents sharing the same credentials. `monitor_phase_loop` sets `_MONITOR_LOOP_EXIT` to one of: `done`, `idle_timeout`, `idle_prompt` (Claude returned to `>` for 3 consecutive polls without writing any phase — callback invoked with `PHASE:failed`, session already dead), `crashed`, or `PHASE:escalate` / other `PHASE:*` string. **Unified escalation**: `PHASE:escalate` is the signal that a session needs human input (renamed from `PHASE:needs_human`). **Callers must handle `idle_prompt`** in both their callback and their post-loop exit handler — see [`docs/PHASE-PROTOCOL.md` idle_prompt](docs/PHASE-PROTOCOL.md#idle_prompt-exit-reason) for the full contract. | dev-agent.sh | -| `lib/vault.sh` | **Vault PR helper** — create vault action PRs on ops repo via Forgejo API (works from containers without SSH). `vault_request <action_id> <toml_content>` validates TOML (using `validate_vault_action` from `vault/vault-env.sh`), creates branch `vault/<action-id>`, writes `vault/actions/<action-id>.toml`, creates PR targeting `main` with title `vault: <action-id>` and body from context field, returns PR number. Idempotent: if PR exists, returns existing number. Requires `FORGE_TOKEN`, `FORGE_URL`, `FORGE_REPO`, `FORGE_OPS_REPO`. Uses the calling agent's own token (saves/restores `FORGE_TOKEN` around sourcing `vault-env.sh`), so approval workflow respects individual agent identities. | dev-agent (vault actions), future vault dispatcher | +| `lib/vault.sh` | **Vault PR helper** — create vault action PRs on ops repo via Forgejo API (works from containers without SSH). `vault_request <action_id> <toml_content>` validates TOML (using `validate_vault_action` from `vault/vault-env.sh`), creates branch `vault/<action-id>`, writes `vault/actions/<action-id>.toml`, creates PR targeting `main` with title `vault: <action-id>` and body from context field, returns PR number. Idempotent: if PR exists, returns existing number. **Low-tier bypass**: if the action's `blast_radius` classifies as `low` (via `vault/classify.sh`), `vault_request` calls `_vault_commit_direct()` which commits directly to ops `main` using `FORGE_ADMIN_TOKEN` — no PR, no approval wait. Returns `0` (not a PR number) for direct commits. Requires `FORGE_TOKEN`, `FORGE_ADMIN_TOKEN` (low-tier only), `FORGE_URL`, `FORGE_REPO`, `FORGE_OPS_REPO`. Uses the calling agent's own token (saves/restores `FORGE_TOKEN` around sourcing `vault-env.sh`), so approval workflow respects individual agent identities. | dev-agent (vault actions), future vault dispatcher | +| `lib/branch-protection.sh` | Branch protection helpers for Forgejo repos. `setup_vault_branch_protection()` — configures admin-only merge protection on main (require 1 approval, restrict merge to admin role, block direct pushes). `setup_profile_branch_protection()` — same protection for `.profile` repos. `verify_branch_protection()` — checks protection is correctly configured. `remove_branch_protection()` — removes protection (cleanup/testing). Handles race condition after initial push: retries with backoff if Forgejo hasn't processed the branch yet. Requires `FORGE_TOKEN`, `FORGE_URL`, `FORGE_OPS_REPO`. | bin/disinto (hire-an-agent) | +| `lib/agent-sdk.sh` | `agent_run([--resume SESSION_ID] [--worktree DIR] PROMPT)` — one-shot `claude -p` invocation with session persistence. Saves session ID to `SID_FILE`, reads it back on resume. `agent_recover_session()` — restore previous session ID from `SID_FILE` on startup. **Nudge guard**: skips nudge injection if the worktree is clean and no push is expected, preventing spurious re-invocations. Callers must define `SID_FILE`, `LOGFILE`, and `log()` before sourcing. | formula-driven agents (dev-agent, planner-run, predictor-run, gardener-run) | +| `lib/forge-setup.sh` | `setup_forge()` — Forgejo instance provisioning: creates admin user, bot accounts, org, repos (code + ops), configures webhooks, sets repo topics. Extracted from `bin/disinto`. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`. **Password storage (#361)**: after creating each bot account, stores its password in `.env` as `FORGE_<BOT>_PASS` (e.g. `FORGE_PASS`, `FORGE_REVIEW_PASS`, etc.) for use by `forge-push.sh`. | bin/disinto (init) | +| `lib/forge-push.sh` | `push_to_forge()` — pushes a local clone to the Forgejo remote and verifies the push. `_assert_forge_push_globals()` validates required env vars before use. Requires `FORGE_URL`, `FORGE_PASS`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. **Auth**: uses `FORGE_PASS` (bot password) for git HTTP push — Forgejo 11.x rejects API tokens for `git push` (#361). | bin/disinto (init) | +| `lib/ops-setup.sh` | `setup_ops_repo()` — creates ops repo on Forgejo if it doesn't exist, configures bot collaborators, clones/initializes ops repo locally, seeds directory structure (vault, knowledge, evidence, sprints). Evidence subdirectories seeded: engagement/, red-team/, holdout/, evolution/, user-test/. Also seeds sprints/ for architect output. Exports `_ACTUAL_OPS_SLUG`. `migrate_ops_repo(ops_root, [primary_branch])` — idempotent migration helper that seeds missing directories and .gitkeep files on existing ops repos (pre-#407 deployments). | bin/disinto (init) | +| `lib/ci-setup.sh` | `_install_cron_impl()` — installs crontab entries for project agents. `_create_woodpecker_oauth_impl()` — creates OAuth2 app on Forgejo for Woodpecker. `_generate_woodpecker_token_impl()` — auto-generates WOODPECKER_TOKEN via OAuth2 flow. `_activate_woodpecker_repo_impl()` — activates repo in Woodpecker. All gated by `_load_ci_context()` which validates required env vars. | bin/disinto (init) | +| `lib/generators.sh` | Template generation for `disinto init`: `generate_compose()` — docker-compose.yml (uses `codeberg.org/forgejo/forgejo:11.0` tag; adds `security_opt: [apparmor:unconfined]` to all services for rootless container compatibility), `generate_caddyfile()` — Caddyfile, `generate_staging_index()` — staging index, `generate_deploy_pipelines()` — Woodpecker deployment pipeline configs. Requires `FACTORY_ROOT`, `PROJECT_NAME`, `PRIMARY_BRANCH`. | bin/disinto (init) | +| `lib/hire-agent.sh` | `disinto_hire_an_agent()` — user creation, `.profile` repo setup, formula copying, branch protection, and state marker creation for hiring a new agent. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`, `PROJECT_NAME`. Extracted from `bin/disinto`. | bin/disinto (hire) | +| `lib/release.sh` | `disinto_release()` — vault TOML creation, branch setup on ops repo, PR creation, and auto-merge request for a versioned release. `_assert_release_globals()` validates required env vars. Requires `FORGE_URL`, `FORGE_TOKEN`, `FORGE_OPS_REPO`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. Extracted from `bin/disinto`. | bin/disinto (release) | diff --git a/lib/agent-sdk.sh b/lib/agent-sdk.sh index 82ad9a9..1c1a69c 100644 --- a/lib/agent-sdk.sh +++ b/lib/agent-sdk.sh @@ -46,9 +46,23 @@ agent_run() { [ -n "${CLAUDE_MODEL:-}" ] && args+=(--model "$CLAUDE_MODEL") local run_dir="${worktree_dir:-$(pwd)}" - local output + local lock_file="${HOME}/.claude/session.lock" + mkdir -p "$(dirname "$lock_file")" + local output rc log "agent_run: starting (resume=${resume_id:-(new)}, dir=${run_dir})" - output=$(cd "$run_dir" && timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") || true + output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") && rc=0 || rc=$? + if [ "$rc" -eq 124 ]; then + log "agent_run: timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $rc)" + elif [ "$rc" -ne 0 ]; then + log "agent_run: claude exited with code $rc" + # Log last 3 lines of output for diagnostics + if [ -n "$output" ]; then + log "agent_run: last output lines: $(echo "$output" | tail -3)" + fi + fi + if [ -z "$output" ]; then + log "agent_run: empty output (claude may have crashed or failed, exit code: $rc)" + fi # Extract and persist session_id local new_sid @@ -66,27 +80,37 @@ agent_run() { # Nudge: if the model stopped without pushing, resume with encouragement. # Some models emit end_turn prematurely when confused. A nudge often unsticks them. - if [ -n "$_AGENT_SESSION_ID" ]; then + if [ -n "$_AGENT_SESSION_ID" ] && [ -n "$output" ]; then local has_changes has_changes=$(cd "$run_dir" && git status --porcelain 2>/dev/null | head -1) || true local has_pushed has_pushed=$(cd "$run_dir" && git log --oneline "${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH:-main}..HEAD" 2>/dev/null | head -1) || true if [ -z "$has_pushed" ]; then - local nudge="You stopped but did not push any code. " if [ -n "$has_changes" ]; then - nudge+="You have uncommitted changes. Commit them and push." + # Nudge: there are uncommitted changes + local nudge="You stopped but did not push any code. You have uncommitted changes. Commit them and push." + log "agent_run: nudging (uncommitted changes)" + local nudge_rc + output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") && nudge_rc=0 || nudge_rc=$? + if [ "$nudge_rc" -eq 124 ]; then + log "agent_run: nudge timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $nudge_rc)" + elif [ "$nudge_rc" -ne 0 ]; then + log "agent_run: nudge claude exited with code $nudge_rc" + # Log last 3 lines of output for diagnostics + if [ -n "$output" ]; then + log "agent_run: nudge last output lines: $(echo "$output" | tail -3)" + fi + fi + new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true + if [ -n "$new_sid" ]; then + _AGENT_SESSION_ID="$new_sid" + printf '%s' "$new_sid" > "$SID_FILE" + fi + printf '%s' "$output" > "$diag_file" 2>/dev/null || true + _AGENT_LAST_OUTPUT="$output" else - nudge+="Complete the implementation, commit, and push your branch." + log "agent_run: no push and no changes — skipping nudge" fi - log "agent_run: nudging (no push detected)" - output=$(cd "$run_dir" && timeout "${CLAUDE_TIMEOUT:-7200}" claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") || true - new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true - if [ -n "$new_sid" ]; then - _AGENT_SESSION_ID="$new_sid" - printf '%s' "$new_sid" > "$SID_FILE" - fi - printf '%s' "$output" > "$diag_file" 2>/dev/null || true - _AGENT_LAST_OUTPUT="$output" fi fi } diff --git a/lib/agent-session.sh b/lib/agent-session.sh deleted file mode 100644 index dbb1e2a..0000000 --- a/lib/agent-session.sh +++ /dev/null @@ -1,486 +0,0 @@ -#!/usr/bin/env bash -# agent-session.sh — Shared tmux + Claude interactive session helpers -# -# Source this into agent orchestrator scripts for reusable session management. -# -# Functions: -# agent_wait_for_claude_ready SESSION_NAME [TIMEOUT_SECS] -# agent_inject_into_session SESSION_NAME TEXT -# agent_kill_session SESSION_NAME -# monitor_phase_loop PHASE_FILE IDLE_TIMEOUT_SECS CALLBACK_FN [SESSION_NAME] -# session_lock_acquire [TIMEOUT_SECS] -# session_lock_release - -# --- Cooperative session lock (fd-based) --- -# File descriptor for the session lock. Set by create_agent_session(). -# Callers can release/re-acquire via session_lock_release/session_lock_acquire -# to allow other Claude sessions during idle phases (awaiting_review/awaiting_ci). -SESSION_LOCK_FD="" - -# Release the session lock without closing the file descriptor. -# The fd stays open so it can be re-acquired later. -session_lock_release() { - if [ -n "${SESSION_LOCK_FD:-}" ]; then - flock -u "$SESSION_LOCK_FD" - fi -} - -# Re-acquire the session lock. Blocks until available or timeout. -# Opens the lock fd if not already open (for use by external callers). -# Args: [timeout_secs] (default 300) -# Returns 0 on success, 1 on timeout/error. -# shellcheck disable=SC2120 # timeout arg is used by external callers -session_lock_acquire() { - local timeout="${1:-300}" - if [ -z "${SESSION_LOCK_FD:-}" ]; then - local lock_dir="${HOME}/.claude" - mkdir -p "$lock_dir" - exec {SESSION_LOCK_FD}>>"${lock_dir}/session.lock" - fi - flock -w "$timeout" "$SESSION_LOCK_FD" -} - -# Wait for the Claude ❯ ready prompt in a tmux pane. -# Returns 0 if ready within TIMEOUT_SECS (default 120), 1 otherwise. -agent_wait_for_claude_ready() { - local session="$1" - local timeout="${2:-120}" - local elapsed=0 - while [ "$elapsed" -lt "$timeout" ]; do - if tmux capture-pane -t "$session" -p 2>/dev/null | grep -q '❯'; then - return 0 - fi - sleep 2 - elapsed=$((elapsed + 2)) - done - return 1 -} - -# Paste TEXT into SESSION (waits for Claude to be ready first), then press Enter. -agent_inject_into_session() { - local session="$1" - local text="$2" - local tmpfile - # Re-acquire session lock before injecting — Claude will resume working - # shellcheck disable=SC2119 # using default timeout - session_lock_acquire || true - agent_wait_for_claude_ready "$session" 120 || true - # Clear idle marker — new work incoming - rm -f "/tmp/claude-idle-${session}.ts" - tmpfile=$(mktemp /tmp/agent-inject-XXXXXX) - printf '%s' "$text" > "$tmpfile" - tmux load-buffer -b "agent-inject-$$" "$tmpfile" - tmux paste-buffer -t "$session" -b "agent-inject-$$" - sleep 0.5 - tmux send-keys -t "$session" "" Enter - tmux delete-buffer -b "agent-inject-$$" 2>/dev/null || true - rm -f "$tmpfile" -} - -# Create a tmux session running Claude in the given workdir. -# Installs a Stop hook for idle detection (see monitor_phase_loop). -# Installs a PreToolUse hook to guard destructive Bash operations. -# Optionally installs a PostToolUse hook for phase file write detection. -# Optionally installs a StopFailure hook for immediate phase file update on API error. -# Args: session workdir [phase_file] -# Returns 0 if session is ready, 1 otherwise. -create_agent_session() { - local session="$1" - local workdir="${2:-.}" - local phase_file="${3:-}" - - # Prepare settings directory for hooks - mkdir -p "${workdir}/.claude" - local settings="${workdir}/.claude/settings.json" - - # Install Stop hook for idle detection: when Claude finishes a response, - # the hook writes a timestamp to a marker file. monitor_phase_loop checks - # this marker instead of fragile tmux pane scraping. - local idle_marker="/tmp/claude-idle-${session}.ts" - local hook_script="${FACTORY_ROOT}/lib/hooks/on-idle-stop.sh" - if [ -x "$hook_script" ]; then - local hook_cmd="${hook_script} ${idle_marker}" - # When a phase file is available, pass it and the session name so the - # hook can nudge Claude if it returns to the prompt without signalling. - if [ -n "$phase_file" ]; then - hook_cmd="${hook_script} ${idle_marker} ${phase_file} ${session}" - fi - if [ -f "$settings" ]; then - # Append our Stop hook to existing project settings - jq --arg cmd "$hook_cmd" ' - if (.hooks.Stop // [] | any(.[]; .hooks[]?.command == $cmd)) - then . - else .hooks.Stop = (.hooks.Stop // []) + [{ - matcher: "", - hooks: [{type: "command", command: $cmd}] - }] - end - ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" - else - jq -n --arg cmd "$hook_cmd" '{ - hooks: { - Stop: [{ - matcher: "", - hooks: [{type: "command", command: $cmd}] - }] - } - }' > "$settings" - fi - fi - - # Install PostToolUse hook for phase file write detection: when Claude - # writes to the phase file via Bash or Write, the hook writes a marker - # so monitor_phase_loop can react immediately instead of waiting for - # the next mtime-based poll cycle. - if [ -n "$phase_file" ]; then - local phase_marker="/tmp/phase-changed-${session}.marker" - local phase_hook_script="${FACTORY_ROOT}/lib/hooks/on-phase-change.sh" - if [ -x "$phase_hook_script" ]; then - local phase_hook_cmd="${phase_hook_script} ${phase_file} ${phase_marker}" - if [ -f "$settings" ]; then - jq --arg cmd "$phase_hook_cmd" ' - if (.hooks.PostToolUse // [] | any(.[]; .hooks[]?.command == $cmd)) - then . - else .hooks.PostToolUse = (.hooks.PostToolUse // []) + [{ - matcher: "Bash|Write", - hooks: [{type: "command", command: $cmd}] - }] - end - ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" - else - jq -n --arg cmd "$phase_hook_cmd" '{ - hooks: { - PostToolUse: [{ - matcher: "Bash|Write", - hooks: [{type: "command", command: $cmd}] - }] - } - }' > "$settings" - fi - rm -f "$phase_marker" - fi - fi - - # Install StopFailure hook for immediate phase file update on API error: - # when Claude hits a rate limit, server error, billing error, or auth failure, - # the hook writes PHASE:failed to the phase file and touches the phase-changed - # marker so monitor_phase_loop picks it up within one poll cycle instead of - # waiting for idle timeout (up to 2 hours). - if [ -n "$phase_file" ]; then - local stop_failure_hook_script="${FACTORY_ROOT}/lib/hooks/on-stop-failure.sh" - if [ -x "$stop_failure_hook_script" ]; then - # phase_marker is defined in the PostToolUse block above; redeclare so - # this block is self-contained if that block is ever removed. - local sf_phase_marker="/tmp/phase-changed-${session}.marker" - local stop_failure_hook_cmd="${stop_failure_hook_script} ${phase_file} ${sf_phase_marker}" - if [ -f "$settings" ]; then - jq --arg cmd "$stop_failure_hook_cmd" ' - if (.hooks.StopFailure // [] | any(.[]; .hooks[]?.command == $cmd)) - then . - else .hooks.StopFailure = (.hooks.StopFailure // []) + [{ - matcher: "rate_limit|server_error|authentication_failed|billing_error", - hooks: [{type: "command", command: $cmd}] - }] - end - ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" - else - jq -n --arg cmd "$stop_failure_hook_cmd" '{ - hooks: { - StopFailure: [{ - matcher: "rate_limit|server_error|authentication_failed|billing_error", - hooks: [{type: "command", command: $cmd}] - }] - } - }' > "$settings" - fi - fi - fi - - # Install PreToolUse hook for destructive operation guard: blocks force push - # to primary branch, rm -rf outside worktree, direct API merge calls, and - # checkout/switch to primary branch. Claude sees the denial reason on exit 2 - # and can self-correct. - local guard_hook_script="${FACTORY_ROOT}/lib/hooks/on-pretooluse-guard.sh" - if [ -x "$guard_hook_script" ]; then - local abs_workdir - abs_workdir=$(cd "$workdir" 2>/dev/null && pwd) || abs_workdir="$workdir" - local guard_hook_cmd="${guard_hook_script} ${PRIMARY_BRANCH:-main} ${abs_workdir} ${session}" - if [ -f "$settings" ]; then - jq --arg cmd "$guard_hook_cmd" ' - if (.hooks.PreToolUse // [] | any(.[]; .hooks[]?.command == $cmd)) - then . - else .hooks.PreToolUse = (.hooks.PreToolUse // []) + [{ - matcher: "Bash", - hooks: [{type: "command", command: $cmd}] - }] - end - ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" - else - jq -n --arg cmd "$guard_hook_cmd" '{ - hooks: { - PreToolUse: [{ - matcher: "Bash", - hooks: [{type: "command", command: $cmd}] - }] - } - }' > "$settings" - fi - fi - - # Install SessionEnd hook for guaranteed cleanup: when the Claude session - # exits (clean or crash), write a termination marker so monitor_phase_loop - # detects the exit faster than tmux has-session polling alone. - local exit_marker="/tmp/claude-exited-${session}.ts" - local session_end_hook_script="${FACTORY_ROOT}/lib/hooks/on-session-end.sh" - if [ -x "$session_end_hook_script" ]; then - local session_end_hook_cmd="${session_end_hook_script} ${exit_marker}" - if [ -f "$settings" ]; then - jq --arg cmd "$session_end_hook_cmd" ' - if (.hooks.SessionEnd // [] | any(.[]; .hooks[]?.command == $cmd)) - then . - else .hooks.SessionEnd = (.hooks.SessionEnd // []) + [{ - matcher: "", - hooks: [{type: "command", command: $cmd}] - }] - end - ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" - else - jq -n --arg cmd "$session_end_hook_cmd" '{ - hooks: { - SessionEnd: [{ - matcher: "", - hooks: [{type: "command", command: $cmd}] - }] - } - }' > "$settings" - fi - fi - rm -f "$exit_marker" - - # Install SessionStart hook for context re-injection after compaction: - # when Claude Code compacts context during long sessions, the phase protocol - # instructions are lost. This hook fires after each compaction and outputs - # the content of a context file so Claude retains critical instructions. - # The context file is written by callers via write_compact_context(). - if [ -n "$phase_file" ]; then - local compact_hook_script="${FACTORY_ROOT}/lib/hooks/on-compact-reinject.sh" - if [ -x "$compact_hook_script" ]; then - local context_file="${phase_file%.phase}.context" - local compact_hook_cmd="${compact_hook_script} ${context_file}" - if [ -f "$settings" ]; then - jq --arg cmd "$compact_hook_cmd" ' - if (.hooks.SessionStart // [] | any(.[]; .hooks[]?.command == $cmd)) - then . - else .hooks.SessionStart = (.hooks.SessionStart // []) + [{ - matcher: "compact", - hooks: [{type: "command", command: $cmd}] - }] - end - ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" - else - jq -n --arg cmd "$compact_hook_cmd" '{ - hooks: { - SessionStart: [{ - matcher: "compact", - hooks: [{type: "command", command: $cmd}] - }] - } - }' > "$settings" - fi - fi - fi - - rm -f "$idle_marker" - local model_flag="" - if [ -n "${CLAUDE_MODEL:-}" ]; then - model_flag="--model ${CLAUDE_MODEL}" - fi - - # Acquire a session-level mutex via fd-based flock to prevent concurrent - # Claude sessions from racing on OAuth token refresh. Unlike the previous - # command-wrapper flock, the fd approach allows callers to release the lock - # during idle phases (awaiting_review/awaiting_ci) and re-acquire before - # injecting the next prompt. See #724. - # Use ~/.claude/session.lock so the lock is shared across containers when - # the host ~/.claude directory is bind-mounted. - local lock_dir="${HOME}/.claude" - mkdir -p "$lock_dir" - local claude_lock="${lock_dir}/session.lock" - if [ -z "${SESSION_LOCK_FD:-}" ]; then - exec {SESSION_LOCK_FD}>>"${claude_lock}" - fi - if ! flock -w 300 "$SESSION_LOCK_FD"; then - return 1 - fi - local claude_cmd="claude --dangerously-skip-permissions ${model_flag}" - - tmux new-session -d -s "$session" -c "$workdir" \ - "$claude_cmd" 2>/dev/null - sleep 1 - tmux has-session -t "$session" 2>/dev/null || return 1 - agent_wait_for_claude_ready "$session" 120 || return 1 - return 0 -} - -# Inject a prompt/formula into a session (alias for agent_inject_into_session). -inject_formula() { - agent_inject_into_session "$@" -} - -# Monitor a phase file, calling a callback on changes and handling idle timeout. -# Sets _MONITOR_LOOP_EXIT to the exit reason (idle_timeout, idle_prompt, done, crashed, PHASE:failed, PHASE:escalate). -# Sets _MONITOR_SESSION to the resolved session name (arg 4 or $SESSION_NAME). -# Callbacks should reference _MONITOR_SESSION instead of $SESSION_NAME directly. -# Args: phase_file idle_timeout_secs callback_fn [session_name] -# session_name — tmux session to health-check; falls back to $SESSION_NAME global -# -# Idle detection: uses a Stop hook marker file (written by lib/hooks/on-idle-stop.sh) -# to detect when Claude finishes responding without writing a phase signal. -# If the marker exists for 3 consecutive polls with no phase written, the session -# is killed and the callback invoked with "PHASE:failed". -monitor_phase_loop() { - local phase_file="$1" - local idle_timeout="$2" - local callback="$3" - local _session="${4:-${SESSION_NAME:-}}" - # Export resolved session name so callbacks can reference it regardless of - # which session was passed to monitor_phase_loop (analogous to _MONITOR_LOOP_EXIT). - export _MONITOR_SESSION="$_session" - local poll_interval="${PHASE_POLL_INTERVAL:-10}" - local last_mtime=0 - local idle_elapsed=0 - local idle_pane_count=0 - - while true; do - sleep "$poll_interval" - idle_elapsed=$(( idle_elapsed + poll_interval )) - - # Session health check: SessionEnd hook marker provides fast detection, - # tmux has-session is the fallback for unclean exits (e.g. tmux crash). - local exit_marker="/tmp/claude-exited-${_session}.ts" - if [ -f "$exit_marker" ] || ! tmux has-session -t "${_session}" 2>/dev/null; then - local current_phase - current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true) - case "$current_phase" in - PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate) - ;; # terminal — fall through to phase handler - *) - # Call callback with "crashed" — let agent-specific code handle recovery - if type "${callback}" &>/dev/null; then - "$callback" "PHASE:crashed" - fi - # If callback didn't restart session, break - if ! tmux has-session -t "${_session}" 2>/dev/null; then - _MONITOR_LOOP_EXIT="crashed" - return 1 - fi - idle_elapsed=0 - idle_pane_count=0 - continue - ;; - esac - fi - - # Check phase-changed marker from PostToolUse hook — if present, the hook - # detected a phase file write so we reset last_mtime to force processing - # this cycle instead of waiting for the next mtime change. - local phase_marker="/tmp/phase-changed-${_session}.marker" - if [ -f "$phase_marker" ]; then - rm -f "$phase_marker" - last_mtime=0 - fi - - # Check phase file for changes - local phase_mtime - phase_mtime=$(stat -c %Y "$phase_file" 2>/dev/null || echo 0) - local current_phase - current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true) - - if [ -z "$current_phase" ] || [ "$phase_mtime" -le "$last_mtime" ]; then - # No phase change — check idle timeout - if [ "$idle_elapsed" -ge "$idle_timeout" ]; then - _MONITOR_LOOP_EXIT="idle_timeout" - agent_kill_session "${_session}" - return 0 - fi - # Idle detection via Stop hook: the on-idle-stop.sh hook writes a marker - # file when Claude finishes a response. If the marker exists and no phase - # has been written, Claude returned to the prompt without following the - # phase protocol. 3 consecutive polls = confirmed idle (not mid-turn). - local idle_marker="/tmp/claude-idle-${_session}.ts" - if [ -z "$current_phase" ] && [ -f "$idle_marker" ]; then - idle_pane_count=$(( idle_pane_count + 1 )) - if [ "$idle_pane_count" -ge 3 ]; then - _MONITOR_LOOP_EXIT="idle_prompt" - # Session is killed before the callback is invoked. - # Callbacks that handle PHASE:failed must not assume the session is alive. - agent_kill_session "${_session}" - if type "${callback}" &>/dev/null; then - "$callback" "PHASE:failed" - fi - return 0 - fi - else - idle_pane_count=0 - fi - continue - fi - - # Phase changed - last_mtime="$phase_mtime" - # shellcheck disable=SC2034 # read by phase-handler.sh callback - LAST_PHASE_MTIME="$phase_mtime" - idle_elapsed=0 - idle_pane_count=0 - - # Terminal phases - case "$current_phase" in - PHASE:done|PHASE:merged) - _MONITOR_LOOP_EXIT="done" - if type "${callback}" &>/dev/null; then - "$callback" "$current_phase" - fi - return 0 - ;; - PHASE:failed|PHASE:escalate) - _MONITOR_LOOP_EXIT="$current_phase" - if type "${callback}" &>/dev/null; then - "$callback" "$current_phase" - fi - return 0 - ;; - esac - - # Non-terminal phase — call callback - if type "${callback}" &>/dev/null; then - "$callback" "$current_phase" - fi - done -} - -# Write context to a file for re-injection after context compaction. -# The SessionStart compact hook reads this file and outputs it to stdout. -# Args: phase_file content -write_compact_context() { - local phase_file="$1" - local content="$2" - local context_file="${phase_file%.phase}.context" - printf '%s\n' "$content" > "$context_file" -} - -# Kill a tmux session gracefully (no-op if not found). -agent_kill_session() { - local session="${1:-}" - [ -n "$session" ] && tmux kill-session -t "$session" 2>/dev/null || true - rm -f "/tmp/claude-idle-${session}.ts" - rm -f "/tmp/phase-changed-${session}.marker" - rm -f "/tmp/claude-exited-${session}.ts" - rm -f "/tmp/claude-nudge-${session}.count" -} - -# Read the current phase from a phase file, stripped of whitespace. -# Usage: read_phase [file] — defaults to $PHASE_FILE -read_phase() { - local file="${1:-${PHASE_FILE:-}}" - { cat "$file" 2>/dev/null || true; } | head -1 | tr -d '[:space:]' -} diff --git a/lib/branch-protection.sh b/lib/branch-protection.sh index 81a2be1..e972977 100644 --- a/lib/branch-protection.sh +++ b/lib/branch-protection.sh @@ -51,14 +51,30 @@ setup_vault_branch_protection() { _bp_log "Setting up branch protection for ${branch} on ${FORGE_OPS_REPO}" - # Check if branch exists - local branch_exists - branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") + # Check if branch exists with retry loop (handles race condition after initial push) + local branch_exists="0" + local max_attempts=3 + local attempt=1 + + while [ "$attempt" -le "$max_attempts" ]; do + branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") + + if [ "$branch_exists" = "200" ]; then + _bp_log "Branch ${branch} exists on ${FORGE_OPS_REPO}" + break + fi + + if [ "$attempt" -lt "$max_attempts" ]; then + _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..." + sleep 2 + fi + attempt=$((attempt + 1)) + done if [ "$branch_exists" != "200" ]; then - _bp_log "ERROR: Branch ${branch} does not exist" + _bp_log "ERROR: Branch ${branch} does not exist on ${FORGE_OPS_REPO} after ${max_attempts} attempts" return 1 fi @@ -228,14 +244,30 @@ setup_profile_branch_protection() { local api_url api_url="${FORGE_URL}/api/v1/repos/${repo}" - # Check if branch exists - local branch_exists - branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") + # Check if branch exists with retry loop (handles race condition after initial push) + local branch_exists="0" + local max_attempts=3 + local attempt=1 + + while [ "$attempt" -le "$max_attempts" ]; do + branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") + + if [ "$branch_exists" = "200" ]; then + _bp_log "Branch ${branch} exists on ${repo}" + break + fi + + if [ "$attempt" -lt "$max_attempts" ]; then + _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..." + sleep 2 + fi + attempt=$((attempt + 1)) + done if [ "$branch_exists" != "200" ]; then - _bp_log "ERROR: Branch ${branch} does not exist on ${repo}" + _bp_log "ERROR: Branch ${branch} does not exist on ${repo} after ${max_attempts} attempts" return 1 fi @@ -379,7 +411,7 @@ remove_branch_protection() { # - Allow review-bot to approve PRs # # Args: -# $1 - Repo path in format 'owner/repo' (e.g., 'johba/disinto') +# $1 - Repo path in format 'owner/repo' (e.g., 'disinto-admin/disinto') # $2 - Branch to protect (default: main) # # Returns: 0 on success, 1 on failure @@ -398,14 +430,30 @@ setup_project_branch_protection() { local api_url api_url="${FORGE_URL}/api/v1/repos/${repo}" - # Check if branch exists - local branch_exists - branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") + # Check if branch exists with retry loop (handles race condition after initial push) + local branch_exists="0" + local max_attempts=3 + local attempt=1 + + while [ "$attempt" -le "$max_attempts" ]; do + branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") + + if [ "$branch_exists" = "200" ]; then + _bp_log "Branch ${branch} exists on ${repo}" + break + fi + + if [ "$attempt" -lt "$max_attempts" ]; then + _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..." + sleep 2 + fi + attempt=$((attempt + 1)) + done if [ "$branch_exists" != "200" ]; then - _bp_log "ERROR: Branch ${branch} does not exist on ${repo}" + _bp_log "ERROR: Branch ${branch} does not exist on ${repo} after ${max_attempts} attempts" return 1 fi @@ -536,7 +584,7 @@ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then echo "Required environment variables:" echo " FORGE_TOKEN Forgejo API token (admin user recommended)" echo " FORGE_URL Forgejo instance URL (e.g., https://codeberg.org)" - echo " FORGE_OPS_REPO Ops repo in format owner/repo (e.g., johba/disinto-ops)" + echo " FORGE_OPS_REPO Ops repo in format owner/repo (e.g., disinto-admin/disinto-ops)" exit 0 ;; esac diff --git a/lib/ci-helpers.sh b/lib/ci-helpers.sh index 42f306e..11c668e 100644 --- a/lib/ci-helpers.sh +++ b/lib/ci-helpers.sh @@ -7,27 +7,6 @@ set -euo pipefail # ci_commit_status() / ci_pipeline_number() require: woodpecker_api(), forge_api() (from env.sh) # classify_pipeline_failure() requires: woodpecker_api() (defined in env.sh) -# ensure_blocked_label_id — look up (or create) the "blocked" label, print its ID. -# Caches the result in _BLOCKED_LABEL_ID to avoid repeated API calls. -# Requires: FORGE_TOKEN, FORGE_API (from env.sh), forge_api() -ensure_blocked_label_id() { - if [ -n "${_BLOCKED_LABEL_ID:-}" ]; then - printf '%s' "$_BLOCKED_LABEL_ID" - return 0 - fi - _BLOCKED_LABEL_ID=$(forge_api GET "/labels" 2>/dev/null \ - | jq -r '.[] | select(.name == "blocked") | .id' 2>/dev/null || true) - if [ -z "$_BLOCKED_LABEL_ID" ]; then - _BLOCKED_LABEL_ID=$(curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_API}/labels" \ - -d '{"name":"blocked","color":"#e11d48"}' 2>/dev/null \ - | jq -r '.id // empty' 2>/dev/null || true) - fi - printf '%s' "$_BLOCKED_LABEL_ID" -} - # ensure_priority_label — look up (or create) the "priority" label, print its ID. # Caches the result in _PRIORITY_LABEL_ID to avoid repeated API calls. # Requires: FORGE_TOKEN, FORGE_API (from env.sh), forge_api() diff --git a/lib/ci-setup.sh b/lib/ci-setup.sh new file mode 100644 index 0000000..7c4c5dd --- /dev/null +++ b/lib/ci-setup.sh @@ -0,0 +1,455 @@ +#!/usr/bin/env bash +# ============================================================================= +# ci-setup.sh — CI setup functions for Woodpecker and cron configuration +# +# Internal functions (called via _load_ci_context + _*_impl): +# _install_cron_impl() - Install crontab entries for project agents +# _create_woodpecker_oauth_impl() - Create OAuth2 app on Forgejo for Woodpecker +# _generate_woodpecker_token_impl() - Auto-generate WOODPECKER_TOKEN via OAuth2 flow +# _activate_woodpecker_repo_impl() - Activate repo in Woodpecker +# +# Globals expected (asserted by _load_ci_context): +# FORGE_URL - Forge instance URL (e.g. http://localhost:3000) +# FORGE_TOKEN - Forge API token +# FACTORY_ROOT - Root of the disinto factory +# +# Usage: +# source "${FACTORY_ROOT}/lib/ci-setup.sh" +# ============================================================================= +set -euo pipefail + +# Assert required globals are set before using this module. +_load_ci_context() { + local missing=() + [ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL") + [ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN") + [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT") + if [ "${#missing[@]}" -gt 0 ]; then + echo "Error: ci-setup.sh requires these globals to be set: ${missing[*]}" >&2 + exit 1 + fi +} + +# Generate and optionally install cron entries for the project agents. +# Usage: install_cron <name> <toml_path> <auto_yes> <bare> +_install_cron_impl() { + local name="$1" toml="$2" auto_yes="$3" bare="${4:-false}" + + # In compose mode, skip host cron — the agents container runs cron internally + if [ "$bare" = false ]; then + echo "" + echo "Cron: skipped (agents container handles scheduling in compose mode)" + return + fi + + # Bare mode: crontab is required on the host + if ! command -v crontab &>/dev/null; then + echo "Error: crontab not found (required for bare-metal mode)" >&2 + echo " Install: apt install cron / brew install cron" >&2 + exit 1 + fi + + # Use absolute path for the TOML in cron entries + local abs_toml + abs_toml="$(cd "$(dirname "$toml")" && pwd)/$(basename "$toml")" + + local cron_block + cron_block="# disinto: ${name} +2,7,12,17,22,27,32,37,42,47,52,57 * * * * ${FACTORY_ROOT}/review/review-poll.sh ${abs_toml} >/dev/null 2>&1 +4,9,14,19,24,29,34,39,44,49,54,59 * * * * ${FACTORY_ROOT}/dev/dev-poll.sh ${abs_toml} >/dev/null 2>&1 +0 0,6,12,18 * * * cd ${FACTORY_ROOT} && bash gardener/gardener-run.sh ${abs_toml} >/dev/null 2>&1" + + echo "" + echo "Cron entries to install:" + echo "$cron_block" + echo "" + + # Check if cron entries already exist + local current_crontab + current_crontab=$(crontab -l 2>/dev/null || true) + if echo "$current_crontab" | grep -q "# disinto: ${name}"; then + echo "Cron: skipped (entries for ${name} already installed)" + return + fi + + if [ "$auto_yes" = false ] && [ -t 0 ]; then + read -rp "Install these cron entries? [y/N] " confirm + if [[ ! "$confirm" =~ ^[Yy] ]]; then + echo "Skipped cron install. Add manually with: crontab -e" + return + fi + fi + + # Append to existing crontab + if { crontab -l 2>/dev/null || true; printf '%s\n' "$cron_block"; } | crontab -; then + echo "Cron entries installed for ${name}" + else + echo "Error: failed to install cron entries" >&2 + return 1 + fi +} + +# Set up Woodpecker CI to use Forgejo as its forge backend. +# Creates an OAuth2 app on Forgejo for Woodpecker, activates the repo. +# Usage: create_woodpecker_oauth <forge_url> <repo_slug> +_create_woodpecker_oauth_impl() { + local forge_url="$1" + local _repo_slug="$2" # unused but required for signature compatibility + + echo "" + echo "── Woodpecker OAuth2 setup ────────────────────────────" + + # Create OAuth2 application on Forgejo for Woodpecker + local oauth2_name="woodpecker-ci" + local redirect_uri="http://localhost:8000/authorize" + local existing_app client_id client_secret + + # Check if OAuth2 app already exists + existing_app=$(curl -sf \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/user/applications/oauth2" 2>/dev/null \ + | jq -r --arg name "$oauth2_name" '.[] | select(.name == $name) | .client_id // empty' 2>/dev/null) || true + + if [ -n "$existing_app" ]; then + echo "OAuth2: ${oauth2_name} (already exists, client_id=${existing_app})" + client_id="$existing_app" + else + local oauth2_resp + oauth2_resp=$(curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/user/applications/oauth2" \ + -d "{\"name\":\"${oauth2_name}\",\"redirect_uris\":[\"${redirect_uri}\"],\"confidential_client\":true}" \ + 2>/dev/null) || oauth2_resp="" + + if [ -z "$oauth2_resp" ]; then + echo "Warning: failed to create OAuth2 app on Forgejo" >&2 + return + fi + + client_id=$(printf '%s' "$oauth2_resp" | jq -r '.client_id // empty') + client_secret=$(printf '%s' "$oauth2_resp" | jq -r '.client_secret // empty') + + if [ -z "$client_id" ]; then + echo "Warning: OAuth2 app creation returned no client_id" >&2 + return + fi + + echo "OAuth2: ${oauth2_name} created (client_id=${client_id})" + fi + + # Store Woodpecker forge config in .env + # WP_FORGEJO_CLIENT/SECRET match the docker-compose.yml variable references + # WOODPECKER_HOST must be host-accessible URL to match OAuth2 redirect_uri + local env_file="${FACTORY_ROOT}/.env" + local wp_vars=( + "WOODPECKER_FORGEJO=true" + "WOODPECKER_FORGEJO_URL=${forge_url}" + "WOODPECKER_HOST=http://localhost:8000" + ) + if [ -n "${client_id:-}" ]; then + wp_vars+=("WP_FORGEJO_CLIENT=${client_id}") + fi + if [ -n "${client_secret:-}" ]; then + wp_vars+=("WP_FORGEJO_SECRET=${client_secret}") + fi + + for var_line in "${wp_vars[@]}"; do + local var_name="${var_line%%=*}" + if grep -q "^${var_name}=" "$env_file" 2>/dev/null; then + sed -i "s|^${var_name}=.*|${var_line}|" "$env_file" + else + printf '%s\n' "$var_line" >> "$env_file" + fi + done + echo "Config: Woodpecker forge vars written to .env" +} + +# Auto-generate WOODPECKER_TOKEN by driving the Forgejo OAuth2 login flow. +# Requires _FORGE_ADMIN_PASS (set by setup_forge when admin user was just created). +# Called after compose stack is up, before activate_woodpecker_repo. +# Usage: generate_woodpecker_token <forge_url> +_generate_woodpecker_token_impl() { + local forge_url="$1" + local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}" + local env_file="${FACTORY_ROOT}/.env" + local admin_user="disinto-admin" + local admin_pass="${_FORGE_ADMIN_PASS:-}" + + # Skip if already set + if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then + echo "Config: WOODPECKER_TOKEN already set in .env" + return 0 + fi + + echo "" + echo "── Woodpecker token generation ────────────────────────" + + if [ -z "$admin_pass" ]; then + echo "Warning: Forgejo admin password not available — cannot generate WOODPECKER_TOKEN" >&2 + echo " Log into Woodpecker at ${wp_server} and create a token manually" >&2 + return 1 + fi + + # Wait for Woodpecker to become ready + echo -n "Waiting for Woodpecker" + local retries=0 + while ! curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; do + retries=$((retries + 1)) + if [ "$retries" -gt 30 ]; then + echo "" + echo "Warning: Woodpecker not ready at ${wp_server} — skipping token generation" >&2 + return 1 + fi + echo -n "." + sleep 2 + done + echo " ready" + + # Flow: Forgejo web login → OAuth2 authorize → Woodpecker callback → token + local cookie_jar auth_body_file + cookie_jar=$(mktemp /tmp/wp-auth-XXXXXX) + auth_body_file=$(mktemp /tmp/wp-body-XXXXXX) + + # Step 1: Log into Forgejo web UI (session cookie needed for OAuth consent) + local csrf + csrf=$(curl -sf -c "$cookie_jar" "${forge_url}/user/login" 2>/dev/null \ + | grep -o 'name="_csrf"[^>]*' | head -1 \ + | grep -oE '(content|value)="[^"]*"' | head -1 \ + | cut -d'"' -f2) || csrf="" + + if [ -z "$csrf" ]; then + echo "Warning: could not get Forgejo CSRF token — skipping token generation" >&2 + rm -f "$cookie_jar" "$auth_body_file" + return 1 + fi + + curl -sf -b "$cookie_jar" -c "$cookie_jar" -X POST \ + -o /dev/null \ + "${forge_url}/user/login" \ + --data-urlencode "_csrf=${csrf}" \ + --data-urlencode "user_name=${admin_user}" \ + --data-urlencode "password=${admin_pass}" \ + 2>/dev/null || true + + # Step 2: Start Woodpecker OAuth2 flow (captures authorize URL with state param) + local wp_redir + wp_redir=$(curl -sf -o /dev/null -w '%{redirect_url}' \ + "${wp_server}/authorize" 2>/dev/null) || wp_redir="" + + if [ -z "$wp_redir" ]; then + echo "Warning: Woodpecker did not provide OAuth redirect — skipping token generation" >&2 + rm -f "$cookie_jar" "$auth_body_file" + return 1 + fi + + # Rewrite internal Docker network URLs to host-accessible URLs. + # Handle both plain and URL-encoded forms of the internal hostnames. + local forge_url_enc wp_server_enc + forge_url_enc=$(printf '%s' "$forge_url" | sed 's|:|%3A|g; s|/|%2F|g') + wp_server_enc=$(printf '%s' "$wp_server" | sed 's|:|%3A|g; s|/|%2F|g') + wp_redir=$(printf '%s' "$wp_redir" \ + | sed "s|http://forgejo:3000|${forge_url}|g" \ + | sed "s|http%3A%2F%2Fforgejo%3A3000|${forge_url_enc}|g" \ + | sed "s|http://woodpecker:8000|${wp_server}|g" \ + | sed "s|http%3A%2F%2Fwoodpecker%3A8000|${wp_server_enc}|g") + + # Step 3: Hit Forgejo OAuth authorize endpoint with session + # First time: shows consent page. Already approved: redirects with code. + local auth_headers redirect_loc auth_code + auth_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \ + -D - -o "$auth_body_file" \ + "$wp_redir" 2>/dev/null) || auth_headers="" + + redirect_loc=$(printf '%s' "$auth_headers" \ + | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') + + if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then + # Auto-approved: extract code from redirect + auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/') + else + # Consent page: extract CSRF and all form fields, POST grant approval + local consent_csrf form_client_id form_state form_redirect_uri + consent_csrf=$(grep -o 'name="_csrf"[^>]*' "$auth_body_file" 2>/dev/null \ + | head -1 | grep -oE '(content|value)="[^"]*"' | head -1 \ + | cut -d'"' -f2) || consent_csrf="" + form_client_id=$(grep 'name="client_id"' "$auth_body_file" 2>/dev/null \ + | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_client_id="" + form_state=$(grep 'name="state"' "$auth_body_file" 2>/dev/null \ + | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_state="" + form_redirect_uri=$(grep 'name="redirect_uri"' "$auth_body_file" 2>/dev/null \ + | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_redirect_uri="" + + if [ -n "$consent_csrf" ]; then + local grant_headers + grant_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \ + -D - -o /dev/null -X POST \ + "${forge_url}/login/oauth/grant" \ + --data-urlencode "_csrf=${consent_csrf}" \ + --data-urlencode "client_id=${form_client_id}" \ + --data-urlencode "state=${form_state}" \ + --data-urlencode "scope=" \ + --data-urlencode "nonce=" \ + --data-urlencode "redirect_uri=${form_redirect_uri}" \ + --data-urlencode "granted=true" \ + 2>/dev/null) || grant_headers="" + + redirect_loc=$(printf '%s' "$grant_headers" \ + | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') + + if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then + auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/') + fi + fi + fi + + rm -f "$auth_body_file" + + if [ -z "${auth_code:-}" ]; then + echo "Warning: could not obtain OAuth2 authorization code — skipping token generation" >&2 + rm -f "$cookie_jar" + return 1 + fi + + # Step 4: Complete Woodpecker OAuth callback (exchanges code for session) + local state + state=$(printf '%s' "$wp_redir" | sed -n 's/.*[&?]state=\([^&]*\).*/\1/p') + + local wp_headers wp_token + wp_headers=$(curl -sf -c "$cookie_jar" \ + -D - -o /dev/null \ + "${wp_server}/authorize?code=${auth_code}&state=${state:-}" \ + 2>/dev/null) || wp_headers="" + + # Extract token from redirect URL (Woodpecker returns ?access_token=...) + redirect_loc=$(printf '%s' "$wp_headers" \ + | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') + + wp_token="" + if printf '%s' "${redirect_loc:-}" | grep -q 'access_token='; then + wp_token=$(printf '%s' "$redirect_loc" | sed 's/.*access_token=\([^&]*\).*/\1/') + fi + + # Fallback: check for user_sess cookie + if [ -z "$wp_token" ]; then + wp_token=$(awk '/user_sess/{print $NF}' "$cookie_jar" 2>/dev/null) || wp_token="" + fi + + rm -f "$cookie_jar" + + if [ -z "$wp_token" ]; then + echo "Warning: could not obtain Woodpecker token — skipping token generation" >&2 + return 1 + fi + + # Step 5: Create persistent personal access token via Woodpecker API + # WP v3 requires CSRF header for POST operations with session tokens. + local wp_csrf + wp_csrf=$(curl -sf -b "user_sess=${wp_token}" \ + "${wp_server}/web-config.js" 2>/dev/null \ + | sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p') || wp_csrf="" + + local pat_resp final_token + pat_resp=$(curl -sf -X POST \ + -b "user_sess=${wp_token}" \ + ${wp_csrf:+-H "X-CSRF-Token: ${wp_csrf}"} \ + "${wp_server}/api/user/token" \ + 2>/dev/null) || pat_resp="" + + final_token="" + if [ -n "$pat_resp" ]; then + final_token=$(printf '%s' "$pat_resp" \ + | jq -r 'if .token then .token elif .access_token then .access_token else empty end' \ + 2>/dev/null) || final_token="" + fi + + # Use persistent token if available, otherwise use session token + final_token="${final_token:-$wp_token}" + + # Save to .env + if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then + sed -i "s|^WOODPECKER_TOKEN=.*|WOODPECKER_TOKEN=${final_token}|" "$env_file" + else + printf 'WOODPECKER_TOKEN=%s\n' "$final_token" >> "$env_file" + fi + export WOODPECKER_TOKEN="$final_token" + echo "Config: WOODPECKER_TOKEN generated and saved to .env" +} + +# Activate a repo in Woodpecker CI. +# Usage: activate_woodpecker_repo <forge_repo> +_activate_woodpecker_repo_impl() { + local forge_repo="$1" + local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}" + + # Wait for Woodpecker to become ready after stack start + local retries=0 + while [ $retries -lt 10 ]; do + if curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; then + break + fi + retries=$((retries + 1)) + sleep 2 + done + + if ! curl -sf --max-time 5 "${wp_server}/api/version" >/dev/null 2>&1; then + echo "Woodpecker: not reachable at ${wp_server} after stack start, skipping repo activation" >&2 + return + fi + + echo "" + echo "── Woodpecker repo activation ─────────────────────────" + + local wp_token="${WOODPECKER_TOKEN:-}" + if [ -z "$wp_token" ]; then + echo "Warning: WOODPECKER_TOKEN not set — cannot activate repo" >&2 + echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2 + return + fi + + local wp_repo_id + wp_repo_id=$(curl -sf \ + -H "Authorization: Bearer ${wp_token}" \ + "${wp_server}/api/repos/lookup/${forge_repo}" 2>/dev/null \ + | jq -r '.id // empty' 2>/dev/null) || true + + if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then + echo "Repo: ${forge_repo} already active in Woodpecker (id=${wp_repo_id})" + else + # Get Forgejo repo numeric ID for WP activation + local forge_repo_id + forge_repo_id=$(curl -sf \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_URL:-http://localhost:3000}/api/v1/repos/${forge_repo}" 2>/dev/null \ + | jq -r '.id // empty' 2>/dev/null) || forge_repo_id="" + + local activate_resp + activate_resp=$(curl -sf -X POST \ + -H "Authorization: Bearer ${wp_token}" \ + "${wp_server}/api/repos?forge_remote_id=${forge_repo_id:-0}" \ + 2>/dev/null) || activate_resp="" + + wp_repo_id=$(printf '%s' "$activate_resp" | jq -r '.id // empty' 2>/dev/null) || true + + if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then + echo "Repo: ${forge_repo} activated in Woodpecker (id=${wp_repo_id})" + + # Set pipeline timeout to 5 minutes (default is 60) + if curl -sf -X PATCH \ + -H "Authorization: Bearer ${wp_token}" \ + -H "Content-Type: application/json" \ + "${wp_server}/api/repos/${wp_repo_id}" \ + -d '{"timeout": 5}' >/dev/null 2>&1; then + echo "Config: pipeline timeout set to 5 minutes" + fi + else + echo "Warning: could not activate repo in Woodpecker" >&2 + echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2 + fi + fi + + # Store repo ID for later TOML generation + if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then + _WP_REPO_ID="$wp_repo_id" + fi +} diff --git a/lib/env.sh b/lib/env.sh index cc0906c..0c1d73e 100755 --- a/lib/env.sh +++ b/lib/env.sh @@ -13,59 +13,61 @@ FACTORY_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" if [ "${DISINTO_CONTAINER:-}" = "1" ]; then DISINTO_DATA_DIR="${HOME}/data" DISINTO_LOG_DIR="${DISINTO_DATA_DIR}/logs" - mkdir -p "${DISINTO_DATA_DIR}" "${DISINTO_LOG_DIR}"/{dev,action,review,supervisor,vault,site,metrics} + mkdir -p "${DISINTO_DATA_DIR}" "${DISINTO_LOG_DIR}"/{dev,action,review,supervisor,vault,site,metrics,gardener,planner,predictor,architect,dispatcher} else DISINTO_LOG_DIR="${FACTORY_ROOT}" fi export DISINTO_LOG_DIR # Load secrets: prefer .env.enc (SOPS-encrypted), fall back to plaintext .env. -# Always source .env — cron jobs inside the container do NOT inherit compose -# env vars (FORGE_TOKEN, etc.). Compose-injected vars (like FORGE_URL) are -# already set and won't be clobbered since env.sh uses ${VAR:-default} patterns -# for derived values. FORGE_URL from .env (localhost:3000) is overridden below -# by the compose-injected value when running via docker exec. -if [ -f "$FACTORY_ROOT/.env.enc" ] && command -v sops &>/dev/null; then - set -a - _saved_forge_url="${FORGE_URL:-}" - _saved_forge_token="${FORGE_TOKEN:-}" - # Use temp file + validate dotenv format before sourcing (avoids eval injection) - # SOPS -d automatically verifies MAC/GCM authentication tag during decryption - _tmpenv=$(mktemp) || { echo "Error: failed to create temp file for .env.enc" >&2; exit 1; } - if ! sops -d --output-type dotenv "$FACTORY_ROOT/.env.enc" > "$_tmpenv" 2>/dev/null; then - echo "Error: failed to decrypt .env.enc — decryption failed, possible corruption" >&2 +# Inside containers (DISINTO_CONTAINER=1), compose environment is the source of truth. +# On bare metal, .env/.env.enc is sourced to provide default values. +if [ "${DISINTO_CONTAINER:-}" != "1" ]; then + if [ -f "$FACTORY_ROOT/.env.enc" ] && command -v sops &>/dev/null; then + set -a + _saved_forge_url="${FORGE_URL:-}" + # Use temp file + validate dotenv format before sourcing (avoids eval injection) + # SOPS -d automatically verifies MAC/GCM authentication tag during decryption + _tmpenv=$(mktemp) || { echo "Error: failed to create temp file for .env.enc" >&2; exit 1; } + if ! sops -d --output-type dotenv "$FACTORY_ROOT/.env.enc" > "$_tmpenv" 2>/dev/null; then + echo "Error: failed to decrypt .env.enc — decryption failed, possible corruption" >&2 + rm -f "$_tmpenv" + exit 1 + fi + # Validate: non-empty, non-comment lines must match KEY=value pattern + # Filter out blank lines and comments before validation + _validated=$(grep -E '^[A-Za-z_][A-Za-z0-9_]*=' "$_tmpenv" 2>/dev/null || true) + if [ -n "$_validated" ]; then + # Write validated content to a second temp file and source it + _validated_env=$(mktemp) + printf '%s\n' "$_validated" > "$_validated_env" + # shellcheck source=/dev/null + source "$_validated_env" + rm -f "$_validated_env" + else + echo "Error: .env.enc decryption output failed format validation" >&2 + rm -f "$_tmpenv" + exit 1 + fi rm -f "$_tmpenv" - exit 1 - fi - # Validate: non-empty, non-comment lines must match KEY=value pattern - # Filter out blank lines and comments before validation - _validated=$(grep -E '^[A-Za-z_][A-Za-z0-9_]*=' "$_tmpenv" 2>/dev/null || true) - if [ -n "$_validated" ]; then - # Write validated content to a second temp file and source it - _validated_env=$(mktemp) - printf '%s\n' "$_validated" > "$_validated_env" + set +a + [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url" + elif [ -f "$FACTORY_ROOT/.env" ]; then + # Preserve compose-injected FORGE_URL (localhost in .env != forgejo in Docker) + _saved_forge_url="${FORGE_URL:-}" + set -a # shellcheck source=/dev/null - source "$_validated_env" - rm -f "$_validated_env" - else - echo "Error: .env.enc decryption output failed format validation" >&2 - rm -f "$_tmpenv" - exit 1 + source "$FACTORY_ROOT/.env" + set +a + [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url" fi - rm -f "$_tmpenv" - set +a - [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url" - [ -n "$_saved_forge_token" ] && export FORGE_TOKEN="$_saved_forge_token" -elif [ -f "$FACTORY_ROOT/.env" ]; then - # Preserve compose-injected FORGE_URL (localhost in .env != forgejo in Docker) - _saved_forge_url="${FORGE_URL:-}" - _saved_forge_token="${FORGE_TOKEN:-}" - set -a - # shellcheck source=/dev/null - source "$FACTORY_ROOT/.env" - set +a - [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url" - [ -n "$_saved_forge_token" ] && export FORGE_TOKEN="$_saved_forge_token" +fi + +# Allow per-container token override (#375): .env sets the default FORGE_TOKEN +# (dev-bot), then FORGE_TOKEN_OVERRIDE replaces it for containers that need a +# different Forgejo identity (e.g. dev-qwen). +if [ -n "${FORGE_TOKEN_OVERRIDE:-}" ]; then + export FORGE_TOKEN="$FORGE_TOKEN_OVERRIDE" fi # PATH: foundry, node, system @@ -77,16 +79,11 @@ if [ -n "${PROJECT_TOML:-}" ] && [ -f "$PROJECT_TOML" ]; then source "${FACTORY_ROOT}/lib/load-project.sh" "$PROJECT_TOML" fi -# Forge token: new FORGE_TOKEN > legacy CODEBERG_TOKEN -if [ -z "${FORGE_TOKEN:-}" ]; then - FORGE_TOKEN="${CODEBERG_TOKEN:-}" -fi -export FORGE_TOKEN -export CODEBERG_TOKEN="${FORGE_TOKEN}" # backwards compat +# Forge token +export FORGE_TOKEN="${FORGE_TOKEN:-}" -# Review bot token: FORGE_REVIEW_TOKEN > legacy REVIEW_BOT_TOKEN +# Review bot token export FORGE_REVIEW_TOKEN="${FORGE_REVIEW_TOKEN:-${REVIEW_BOT_TOKEN:-}}" -export REVIEW_BOT_TOKEN="${FORGE_REVIEW_TOKEN}" # backwards compat # Per-agent tokens (#747): each agent gets its own Forgejo identity. # Falls back to FORGE_TOKEN for backwards compat with single-token setups. @@ -97,18 +94,14 @@ export FORGE_SUPERVISOR_TOKEN="${FORGE_SUPERVISOR_TOKEN:-${FORGE_TOKEN}}" export FORGE_PREDICTOR_TOKEN="${FORGE_PREDICTOR_TOKEN:-${FORGE_TOKEN}}" export FORGE_ARCHITECT_TOKEN="${FORGE_ARCHITECT_TOKEN:-${FORGE_TOKEN}}" -# Bot usernames filter: FORGE_BOT_USERNAMES > legacy CODEBERG_BOT_USERNAMES -export FORGE_BOT_USERNAMES="${FORGE_BOT_USERNAMES:-${CODEBERG_BOT_USERNAMES:-dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot}}" -export CODEBERG_BOT_USERNAMES="${FORGE_BOT_USERNAMES}" # backwards compat +# Bot usernames filter +export FORGE_BOT_USERNAMES="${FORGE_BOT_USERNAMES:-dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot}" -# Project config (FORGE_* preferred, CODEBERG_* fallback) -export FORGE_REPO="${FORGE_REPO:-${CODEBERG_REPO:-}}" -export CODEBERG_REPO="${FORGE_REPO}" # backwards compat +# Project config +export FORGE_REPO="${FORGE_REPO:-}" export FORGE_URL="${FORGE_URL:-http://localhost:3000}" export FORGE_API="${FORGE_API:-${FORGE_URL}/api/v1/repos/${FORGE_REPO}}" export FORGE_WEB="${FORGE_WEB:-${FORGE_URL}/${FORGE_REPO}}" -export CODEBERG_API="${FORGE_API}" # backwards compat -export CODEBERG_WEB="${FORGE_WEB}" # backwards compat # tea CLI login name: derived from FORGE_URL (codeberg vs local forgejo) if [ -z "${TEA_LOGIN:-}" ]; then case "${FORGE_URL}" in @@ -144,8 +137,12 @@ unset CLAWHUB_TOKEN 2>/dev/null || true export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1 # Shared log helper +# Usage: log "message" +# Output: [2026-04-03T14:00:00Z] agent: message +# Where agent is set via LOG_AGENT variable (defaults to caller's context) log() { - printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" + local agent="${LOG_AGENT:-agent}" + printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" } # ============================================================================= @@ -209,8 +206,6 @@ forge_api() { -H "Content-Type: application/json" \ "${FORGE_API}${path}" "$@" } -# Backwards-compat alias -codeberg_api() { forge_api "$@"; } # Paginate a Forge API GET endpoint and return all items as a merged JSON array. # Usage: forge_api_all /path (no existing query params) @@ -227,7 +222,8 @@ forge_api_all() { page=1 while true; do page_items=$(forge_api GET "${path_prefix}${sep}limit=50&page=${page}") - count=$(printf '%s' "$page_items" | jq 'length') + count=$(printf '%s' "$page_items" | jq 'length' 2>/dev/null) || count=0 + [ -z "$count" ] && count=0 [ "$count" -eq 0 ] && break all_items=$(printf '%s\n%s' "$all_items" "$page_items" | jq -s 'add') [ "$count" -lt 50 ] && break @@ -253,13 +249,13 @@ woodpecker_api() { fi curl -sfL \ - -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ - "${WOODPECKER_SERVER}/api${path}" "$@" + -H "Authorization: Bearer ${WOODPECKER_TOKEN:-}" \ + "${WOODPECKER_SERVER:-}/api${path}" "$@" } # Woodpecker DB query helper wpdb() { - PGPASSWORD="${WOODPECKER_DB_PASSWORD}" psql \ + PGPASSWORD="${WOODPECKER_DB_PASSWORD:-}" psql \ -U "${WOODPECKER_DB_USER:-woodpecker}" \ -h "${WOODPECKER_DB_HOST:-127.0.0.1}" \ -d "${WOODPECKER_DB_NAME:-woodpecker}" \ diff --git a/lib/file-action-issue.sh b/lib/file-action-issue.sh deleted file mode 100644 index abba4c8..0000000 --- a/lib/file-action-issue.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# file-action-issue.sh — File an action issue for a formula run -# -# Usage: source this file, then call file_action_issue. -# Requires: forge_api() from lib/env.sh, jq, lib/secret-scan.sh -# -# file_action_issue <formula_name> <title> <body> -# Sets FILED_ISSUE_NUM on success. -# Returns: 0=created, 1=duplicate exists, 2=label not found, 3=API error, 4=secrets detected - -# Load secret scanner -# shellcheck source=secret-scan.sh -source "$(dirname "${BASH_SOURCE[0]}")/secret-scan.sh" - -file_action_issue() { - local formula_name="$1" title="$2" body="$3" - FILED_ISSUE_NUM="" - - # Secret scan: reject issue bodies containing embedded secrets - if ! scan_for_secrets "$body"; then - echo "file-action-issue: BLOCKED — issue body for '${formula_name}' contains potential secrets. Use env var references instead." >&2 - return 4 - fi - - # Dedup: skip if an open action issue for this formula already exists - local open_actions - open_actions=$(forge_api_all "/issues?state=open&type=issues&labels=action" 2>/dev/null || true) - if [ -n "$open_actions" ] && [ "$open_actions" != "null" ]; then - local existing - existing=$(printf '%s' "$open_actions" | \ - jq --arg f "$formula_name" '[.[] | select(.title | test($f))] | length' 2>/dev/null || echo 0) - if [ "${existing:-0}" -gt 0 ]; then - return 1 - fi - fi - - # Fetch 'action' label ID - local action_label_id - action_label_id=$(forge_api GET "/labels" 2>/dev/null | \ - jq -r '.[] | select(.name == "action") | .id' 2>/dev/null || true) - if [ -z "$action_label_id" ]; then - return 2 - fi - - # Create the issue - local payload result - payload=$(jq -nc \ - --arg title "$title" \ - --arg body "$body" \ - --argjson labels "[$action_label_id]" \ - '{title: $title, body: $body, labels: $labels}') - - result=$(forge_api POST "/issues" -d "$payload" 2>/dev/null || true) - FILED_ISSUE_NUM=$(printf '%s' "$result" | jq -r '.number // empty' 2>/dev/null || true) - - if [ -z "$FILED_ISSUE_NUM" ]; then - return 3 - fi -} diff --git a/lib/forge-push.sh b/lib/forge-push.sh new file mode 100644 index 0000000..1da61f7 --- /dev/null +++ b/lib/forge-push.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# ============================================================================= +# forge-push.sh — push_to_forge() function +# +# Handles pushing a local clone to the Forgejo remote and verifying the push. +# +# Globals expected: +# FORGE_URL - Forge instance URL (e.g. http://localhost:3000) +# FORGE_TOKEN - API token for Forge operations (used for API verification) +# FORGE_PASS - Bot password for git HTTP push (#361: tokens rejected by Forgejo 11.x) +# FACTORY_ROOT - Root of the disinto factory +# PRIMARY_BRANCH - Primary branch name (e.g. main) +# +# Usage: +# source "${FACTORY_ROOT}/lib/forge-push.sh" +# push_to_forge <repo_root> <forge_url> <repo_slug> +# ============================================================================= +set -euo pipefail + +# Assert required globals are set before using this module. +_assert_forge_push_globals() { + local missing=() + [ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL") + [ -z "${FORGE_PASS:-}" ] && missing+=("FORGE_PASS") + [ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN") + [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT") + [ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH") + if [ "${#missing[@]}" -gt 0 ]; then + echo "Error: forge-push.sh requires these globals to be set: ${missing[*]}" >&2 + exit 1 + fi +} + +# Push local clone to the Forgejo remote. +push_to_forge() { + local repo_root="$1" forge_url="$2" repo_slug="$3" + + # Build authenticated remote URL: http://dev-bot:<password>@host:port/org/repo.git + # Forgejo 11.x rejects API tokens for git HTTP push (#361); password auth works. + if [ -z "${FORGE_PASS:-}" ]; then + echo "Error: FORGE_PASS not set — cannot push to Forgejo (see #361)" >&2 + return 1 + fi + local auth_url + auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_PASS}@|") + local remote_url="${auth_url}/${repo_slug}.git" + # Display URL without token + local display_url="${forge_url}/${repo_slug}.git" + + # Always set the remote URL to ensure credentials are current + if git -C "$repo_root" remote get-url forgejo >/dev/null 2>&1; then + git -C "$repo_root" remote set-url forgejo "$remote_url" + else + git -C "$repo_root" remote add forgejo "$remote_url" + fi + echo "Remote: forgejo -> ${display_url}" + + # Skip push if local repo has no commits (e.g. cloned from empty Forgejo repo) + if ! git -C "$repo_root" rev-parse HEAD >/dev/null 2>&1; then + echo "Push: skipped (local repo has no commits)" + return 0 + fi + + # Push all branches and tags + echo "Pushing: branches to forgejo" + if ! git -C "$repo_root" push forgejo --all 2>&1; then + echo "Error: failed to push branches to Forgejo" >&2 + return 1 + fi + echo "Pushing: tags to forgejo" + if ! git -C "$repo_root" push forgejo --tags 2>&1; then + echo "Error: failed to push tags to Forgejo" >&2 + return 1 + fi + + # Verify the repo is no longer empty (Forgejo may need a moment to index pushed refs) + local is_empty="true" + local verify_attempt + for verify_attempt in $(seq 1 5); do + local repo_info + repo_info=$(curl -sf --max-time 10 \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/repos/${repo_slug}" 2>/dev/null) || repo_info="" + if [ -z "$repo_info" ]; then + is_empty="skipped" + break # API unreachable, skip verification + fi + is_empty=$(printf '%s' "$repo_info" | jq -r '.empty // "unknown"') + if [ "$is_empty" != "true" ]; then + echo "Verify: repo is not empty (push confirmed)" + break + fi + if [ "$verify_attempt" -lt 5 ]; then + sleep 2 + fi + done + if [ "$is_empty" = "true" ]; then + echo "Warning: Forgejo repo still reports empty after push" >&2 + return 1 + fi +} diff --git a/lib/forge-setup.sh b/lib/forge-setup.sh new file mode 100644 index 0000000..d640755 --- /dev/null +++ b/lib/forge-setup.sh @@ -0,0 +1,550 @@ +#!/usr/bin/env bash +# ============================================================================= +# forge-setup.sh — setup_forge() and helpers for Forgejo provisioning +# +# Handles admin user creation, bot user creation, token generation, +# password resets, repo creation, and collaborator setup. +# +# Globals expected (asserted by _load_init_context): +# FORGE_URL - Forge instance URL (e.g. http://localhost:3000) +# FACTORY_ROOT - Root of the disinto factory +# PRIMARY_BRANCH - Primary branch name (e.g. main) +# +# Usage: +# source "${FACTORY_ROOT}/lib/forge-setup.sh" +# setup_forge <forge_url> <repo_slug> +# ============================================================================= +set -euo pipefail + +# Assert required globals are set before using this module. +_load_init_context() { + local missing=() + [ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL") + [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT") + [ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH") + if [ "${#missing[@]}" -gt 0 ]; then + echo "Error: forge-setup.sh requires these globals to be set: ${missing[*]}" >&2 + exit 1 + fi +} + +# Execute a command in the Forgejo container (for admin operations) +_forgejo_exec() { + local use_bare="${DISINTO_BARE:-false}" + if [ "$use_bare" = true ]; then + docker exec -u git disinto-forgejo "$@" + else + docker compose -f "${FACTORY_ROOT}/docker-compose.yml" exec -T -u git forgejo "$@" + fi +} + +# Provision or connect to a local Forgejo instance. +# Creates admin + bot users, generates API tokens, stores in .env. +# When $DISINTO_BARE is set, uses standalone docker run; otherwise uses compose. +setup_forge() { + local forge_url="$1" + local repo_slug="$2" + local use_bare="${DISINTO_BARE:-false}" + + echo "" + echo "── Forge setup ────────────────────────────────────────" + + # Check if Forgejo is already running + if curl -sf --max-time 5 "${forge_url}/api/v1/version" >/dev/null 2>&1; then + echo "Forgejo: ${forge_url} (already running)" + else + echo "Forgejo not reachable at ${forge_url}" + echo "Starting Forgejo via Docker..." + + if ! command -v docker &>/dev/null; then + echo "Error: docker not found — needed to provision Forgejo" >&2 + echo " Install Docker or start Forgejo manually at ${forge_url}" >&2 + exit 1 + fi + + # Extract port from forge_url + local forge_port + forge_port=$(printf '%s' "$forge_url" | sed -E 's|.*:([0-9]+)/?$|\1|') + forge_port="${forge_port:-3000}" + + if [ "$use_bare" = true ]; then + # Bare-metal mode: standalone docker run + mkdir -p "${FORGEJO_DATA_DIR}" + + if docker ps -a --format '{{.Names}}' | grep -q '^disinto-forgejo$'; then + docker start disinto-forgejo >/dev/null 2>&1 || true + else + docker run -d \ + --name disinto-forgejo \ + --restart unless-stopped \ + -p "${forge_port}:3000" \ + -p 2222:22 \ + -v "${FORGEJO_DATA_DIR}:/data" \ + -e "FORGEJO__database__DB_TYPE=sqlite3" \ + -e "FORGEJO__server__ROOT_URL=${forge_url}/" \ + -e "FORGEJO__server__HTTP_PORT=3000" \ + -e "FORGEJO__service__DISABLE_REGISTRATION=true" \ + codeberg.org/forgejo/forgejo:11.0 + fi + else + # Compose mode: start Forgejo via docker compose + docker compose -f "${FACTORY_ROOT}/docker-compose.yml" up -d forgejo + fi + + # Wait for Forgejo to become healthy + echo -n "Waiting for Forgejo to start" + local retries=0 + while ! curl -sf --max-time 3 "${forge_url}/api/v1/version" >/dev/null 2>&1; do + retries=$((retries + 1)) + if [ "$retries" -gt 60 ]; then + echo "" + echo "Error: Forgejo did not become ready within 60s" >&2 + exit 1 + fi + echo -n "." + sleep 1 + done + echo " ready" + fi + + # Wait for Forgejo database to accept writes (API may be ready before DB is) + echo -n "Waiting for Forgejo database" + local db_ready=false + for _i in $(seq 1 30); do + if _forgejo_exec forgejo admin user list >/dev/null 2>&1; then + db_ready=true + break + fi + echo -n "." + sleep 1 + done + echo "" + if [ "$db_ready" != true ]; then + echo "Error: Forgejo database not ready after 30s" >&2 + exit 1 + fi + + # Create admin user if it doesn't exist + local admin_user="disinto-admin" + local admin_pass + local env_file="${FACTORY_ROOT}/.env" + + # Re-read persisted admin password if available (#158) + if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then + admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-) + fi + # Generate a fresh password only when none was persisted + if [ -z "${admin_pass:-}" ]; then + admin_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + fi + + if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then + echo "Creating admin user: ${admin_user}" + local create_output + if ! create_output=$(_forgejo_exec forgejo admin user create \ + --admin \ + --username "${admin_user}" \ + --password "${admin_pass}" \ + --email "admin@disinto.local" \ + --must-change-password=false 2>&1); then + echo "Error: failed to create admin user '${admin_user}':" >&2 + echo " ${create_output}" >&2 + exit 1 + fi + # Forgejo 11.x ignores --must-change-password=false on create; + # explicitly clear the flag so basic-auth token creation works. + _forgejo_exec forgejo admin user change-password \ + --username "${admin_user}" \ + --password "${admin_pass}" \ + --must-change-password=false + + # Verify admin user was actually created + if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then + echo "Error: admin user '${admin_user}' not found after creation" >&2 + exit 1 + fi + + # Persist admin password to .env for idempotent re-runs (#158) + if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then + sed -i "s|^FORGE_ADMIN_PASS=.*|FORGE_ADMIN_PASS=${admin_pass}|" "$env_file" + else + printf 'FORGE_ADMIN_PASS=%s\n' "$admin_pass" >> "$env_file" + fi + else + echo "Admin user: ${admin_user} (already exists)" + # Only reset password if basic auth fails (#158, #267) + # Forgejo 11.x may ignore --must-change-password=false, blocking token creation + if ! curl -sf --max-time 5 -u "${admin_user}:${admin_pass}" \ + "${forge_url}/api/v1/user" >/dev/null 2>&1; then + _forgejo_exec forgejo admin user change-password \ + --username "${admin_user}" \ + --password "${admin_pass}" \ + --must-change-password=false + fi + fi + # Preserve password for Woodpecker OAuth2 token generation (#779) + _FORGE_ADMIN_PASS="$admin_pass" + + # Create human user (disinto-admin) as site admin if it doesn't exist + local human_user="disinto-admin" + local human_pass + human_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + + if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then + echo "Creating human user: ${human_user}" + local create_output + if ! create_output=$(_forgejo_exec forgejo admin user create \ + --admin \ + --username "${human_user}" \ + --password "${human_pass}" \ + --email "admin@disinto.local" \ + --must-change-password=false 2>&1); then + echo "Error: failed to create human user '${human_user}':" >&2 + echo " ${create_output}" >&2 + exit 1 + fi + # Forgejo 11.x ignores --must-change-password=false on create; + # explicitly clear the flag so basic-auth token creation works. + _forgejo_exec forgejo admin user change-password \ + --username "${human_user}" \ + --password "${human_pass}" \ + --must-change-password=false + + # Verify human user was actually created + if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then + echo "Error: human user '${human_user}' not found after creation" >&2 + exit 1 + fi + echo " Human user '${human_user}' created as site admin" + else + echo "Human user: ${human_user} (already exists)" + fi + + # Delete existing admin token if present (token sha1 is only returned at creation time) + local existing_token_id + existing_token_id=$(curl -sf \ + -u "${admin_user}:${admin_pass}" \ + "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \ + | jq -r '.[] | select(.name == "disinto-admin-token") | .id') || existing_token_id="" + if [ -n "$existing_token_id" ]; then + curl -sf -X DELETE \ + -u "${admin_user}:${admin_pass}" \ + "${forge_url}/api/v1/users/${admin_user}/tokens/${existing_token_id}" >/dev/null 2>&1 || true + fi + + # Create admin token (fresh, so sha1 is returned) + local admin_token + admin_token=$(curl -sf -X POST \ + -u "${admin_user}:${admin_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${admin_user}/tokens" \ + -d '{"name":"disinto-admin-token","scopes":["all"]}' 2>/dev/null \ + | jq -r '.sha1 // empty') || admin_token="" + + if [ -z "$admin_token" ]; then + echo "Error: failed to obtain admin API token" >&2 + exit 1 + fi + + # Get or create human user token + local human_token + if curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then + # Delete existing human token if present (token sha1 is only returned at creation time) + local existing_human_token_id + existing_human_token_id=$(curl -sf \ + -u "${human_user}:${human_pass}" \ + "${forge_url}/api/v1/users/${human_user}/tokens" 2>/dev/null \ + | jq -r '.[] | select(.name == "disinto-human-token") | .id') || existing_human_token_id="" + if [ -n "$existing_human_token_id" ]; then + curl -sf -X DELETE \ + -u "${human_user}:${human_pass}" \ + "${forge_url}/api/v1/users/${human_user}/tokens/${existing_human_token_id}" >/dev/null 2>&1 || true + fi + + # Create human token (fresh, so sha1 is returned) + human_token=$(curl -sf -X POST \ + -u "${human_user}:${human_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${human_user}/tokens" \ + -d '{"name":"disinto-human-token","scopes":["all"]}' 2>/dev/null \ + | jq -r '.sha1 // empty') || human_token="" + + if [ -n "$human_token" ]; then + # Store human token in .env + if grep -q '^HUMAN_TOKEN=' "$env_file" 2>/dev/null; then + sed -i "s|^HUMAN_TOKEN=.*|HUMAN_TOKEN=${human_token}|" "$env_file" + else + printf 'HUMAN_TOKEN=%s\n' "$human_token" >> "$env_file" + fi + export HUMAN_TOKEN="$human_token" + echo " Human token saved (HUMAN_TOKEN)" + fi + fi + + # Create bot users and tokens + # Each agent gets its own Forgejo account for identity and audit trail (#747). + # Map: bot-username -> env-var-name for the token + local -A bot_token_vars=( + [dev-bot]="FORGE_TOKEN" + [review-bot]="FORGE_REVIEW_TOKEN" + [planner-bot]="FORGE_PLANNER_TOKEN" + [gardener-bot]="FORGE_GARDENER_TOKEN" + [vault-bot]="FORGE_VAULT_TOKEN" + [supervisor-bot]="FORGE_SUPERVISOR_TOKEN" + [predictor-bot]="FORGE_PREDICTOR_TOKEN" + [architect-bot]="FORGE_ARCHITECT_TOKEN" + ) + # Map: bot-username -> env-var-name for the password + # Forgejo 11.x API tokens don't work for git HTTP push (#361). + # Store passwords so agents can use password auth for git operations. + local -A bot_pass_vars=( + [dev-bot]="FORGE_PASS" + [review-bot]="FORGE_REVIEW_PASS" + [planner-bot]="FORGE_PLANNER_PASS" + [gardener-bot]="FORGE_GARDENER_PASS" + [vault-bot]="FORGE_VAULT_PASS" + [supervisor-bot]="FORGE_SUPERVISOR_PASS" + [predictor-bot]="FORGE_PREDICTOR_PASS" + [architect-bot]="FORGE_ARCHITECT_PASS" + ) + + local bot_user bot_pass token token_var pass_var + + for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot architect-bot; do + bot_pass="bot-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + token_var="${bot_token_vars[$bot_user]}" + + # Check if bot user exists + local user_exists=false + if curl -sf --max-time 5 \ + -H "Authorization: token ${admin_token}" \ + "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then + user_exists=true + fi + + if [ "$user_exists" = false ]; then + echo "Creating bot user: ${bot_user}" + local create_output + if ! create_output=$(_forgejo_exec forgejo admin user create \ + --username "${bot_user}" \ + --password "${bot_pass}" \ + --email "${bot_user}@disinto.local" \ + --must-change-password=false 2>&1); then + echo "Error: failed to create bot user '${bot_user}':" >&2 + echo " ${create_output}" >&2 + exit 1 + fi + # Forgejo 11.x ignores --must-change-password=false on create; + # explicitly clear the flag so basic-auth token creation works. + _forgejo_exec forgejo admin user change-password \ + --username "${bot_user}" \ + --password "${bot_pass}" \ + --must-change-password=false + + # Verify bot user was actually created + if ! curl -sf --max-time 5 \ + -H "Authorization: token ${admin_token}" \ + "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then + echo "Error: bot user '${bot_user}' not found after creation" >&2 + exit 1 + fi + echo " ${bot_user} user created" + else + echo " ${bot_user} user exists (resetting password for token generation)" + # User exists but may not have a known password. + # Use admin API to reset the password so we can generate a new token. + _forgejo_exec forgejo admin user change-password \ + --username "${bot_user}" \ + --password "${bot_pass}" \ + --must-change-password=false || { + echo "Error: failed to reset password for existing bot user '${bot_user}'" >&2 + exit 1 + } + fi + + # Generate token via API (basic auth as the bot user — Forgejo requires + # basic auth on POST /users/{username}/tokens, token auth is rejected) + # First, try to delete existing tokens to avoid name collision + # Use bot user's own Basic Auth (we just set the password above) + local existing_token_ids + existing_token_ids=$(curl -sf \ + -u "${bot_user}:${bot_pass}" \ + "${forge_url}/api/v1/users/${bot_user}/tokens" 2>/dev/null \ + | jq -r '.[].id // empty' 2>/dev/null) || existing_token_ids="" + + # Delete any existing tokens for this user + if [ -n "$existing_token_ids" ]; then + while IFS= read -r tid; do + [ -n "$tid" ] && curl -sf -X DELETE \ + -u "${bot_user}:${bot_pass}" \ + "${forge_url}/api/v1/users/${bot_user}/tokens/${tid}" >/dev/null 2>&1 || true + done <<< "$existing_token_ids" + fi + + token=$(curl -sf -X POST \ + -u "${bot_user}:${bot_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${bot_user}/tokens" \ + -d "{\"name\":\"disinto-${bot_user}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \ + | jq -r '.sha1 // empty') || token="" + + if [ -z "$token" ]; then + echo "Error: failed to create API token for '${bot_user}'" >&2 + exit 1 + fi + + # Store token in .env under the per-agent variable name + if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then + sed -i "s|^${token_var}=.*|${token_var}=${token}|" "$env_file" + else + printf '%s=%s\n' "$token_var" "$token" >> "$env_file" + fi + export "${token_var}=${token}" + echo " ${bot_user} token generated and saved (${token_var})" + + # Store password in .env for git HTTP push (#361) + # Forgejo 11.x API tokens don't work for git push; password auth does. + pass_var="${bot_pass_vars[$bot_user]}" + if grep -q "^${pass_var}=" "$env_file" 2>/dev/null; then + sed -i "s|^${pass_var}=.*|${pass_var}=${bot_pass}|" "$env_file" + else + printf '%s=%s\n' "$pass_var" "$bot_pass" >> "$env_file" + fi + export "${pass_var}=${bot_pass}" + echo " ${bot_user} password saved (${pass_var})" + + # Backwards-compat aliases for dev-bot and review-bot + if [ "$bot_user" = "dev-bot" ]; then + export CODEBERG_TOKEN="$token" + elif [ "$bot_user" = "review-bot" ]; then + export REVIEW_BOT_TOKEN="$token" + fi + done + + # Create .profile repos for all bot users (if they don't already exist) + # This runs the same logic as hire-an-agent Step 2-3 for idempotent setup + echo "" + echo "── Setting up .profile repos ────────────────────────────" + + local -a bot_users=(dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot architect-bot) + local bot_user + + for bot_user in "${bot_users[@]}"; do + # Check if .profile repo already exists + if curl -sf --max-time 5 "${forge_url}/api/v1/repos/${bot_user}/.profile" >/dev/null 2>&1; then + echo " ${bot_user}/.profile already exists" + continue + fi + + echo "Creating ${bot_user}/.profile repo..." + + # Create the repo using the admin API to ensure it's created in the bot user's namespace + local create_output + create_output=$(curl -sf -X POST \ + -u "${admin_user}:${admin_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/admin/users/${bot_user}/repos" \ + -d "{\"name\":\".profile\",\"description\":\"${bot_user}'s .profile repo\",\"private\":true,\"auto_init\":false}" 2>&1) || true + + if echo "$create_output" | grep -q '"id":\|[0-9]'; then + echo " Created ${bot_user}/.profile (via admin API)" + else + echo " Warning: failed to create ${bot_user}/.profile: ${create_output}" >&2 + fi + done + + # Store FORGE_URL in .env if not already present + if ! grep -q '^FORGE_URL=' "$env_file" 2>/dev/null; then + printf 'FORGE_URL=%s\n' "$forge_url" >> "$env_file" + fi + + # Create the repo on Forgejo if it doesn't exist + local org_name="${repo_slug%%/*}" + local repo_name="${repo_slug##*/}" + + # Check if repo already exists + if ! curl -sf --max-time 5 \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/repos/${repo_slug}" >/dev/null 2>&1; then + + # Try creating org first (ignore if exists) + curl -sf -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/orgs" \ + -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true + + # Create repo under org + if ! curl -sf -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/orgs/${org_name}/repos" \ + -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then + # Fallback: create under the human user namespace using admin endpoint + if [ -n "${admin_token:-}" ]; then + if ! curl -sf -X POST \ + -H "Authorization: token ${admin_token}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/admin/users/${org_name}/repos" \ + -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then + echo "Error: failed to create repo '${repo_slug}' on Forgejo (admin endpoint)" >&2 + exit 1 + fi + elif [ -n "${HUMAN_TOKEN:-}" ]; then + if ! curl -sf -X POST \ + -H "Authorization: token ${HUMAN_TOKEN}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/user/repos" \ + -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then + echo "Error: failed to create repo '${repo_slug}' on Forgejo (user endpoint)" >&2 + exit 1 + fi + else + echo "Error: failed to create repo '${repo_slug}' — no admin or human token available" >&2 + exit 1 + fi + fi + + # Add all bot users as collaborators with appropriate permissions + # dev-bot: write (PR creation via lib/vault.sh) + # review-bot: read (PR review) + # planner-bot: write (prerequisites.md, memory) + # gardener-bot: write (backlog grooming) + # vault-bot: write (vault items) + # supervisor-bot: read (health monitoring) + # predictor-bot: read (pattern detection) + # architect-bot: write (sprint PRs) + local bot_perm + declare -A bot_permissions=( + [dev-bot]="write" + [review-bot]="read" + [planner-bot]="write" + [gardener-bot]="write" + [vault-bot]="write" + [supervisor-bot]="read" + [predictor-bot]="read" + [architect-bot]="write" + ) + for bot_user in "${!bot_permissions[@]}"; do + bot_perm="${bot_permissions[$bot_user]}" + curl -sf -X PUT \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/repos/${repo_slug}/collaborators/${bot_user}" \ + -d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1 || true + done + + # Add disinto-admin as admin collaborator + curl -sf -X PUT \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/repos/${repo_slug}/collaborators/disinto-admin" \ + -d '{"permission":"admin"}' >/dev/null 2>&1 || true + + echo "Repo: ${repo_slug} created on Forgejo" + else + echo "Repo: ${repo_slug} (already exists on Forgejo)" + fi + + echo "Forge: ${forge_url} (ready)" +} diff --git a/lib/formula-session.sh b/lib/formula-session.sh index e6c6aae..fe256bf 100644 --- a/lib/formula-session.sh +++ b/lib/formula-session.sh @@ -1,24 +1,34 @@ #!/usr/bin/env bash # formula-session.sh — Shared helpers for formula-driven cron agents # -# Provides reusable functions for the common cron-wrapper + tmux-session -# pattern used by planner-run.sh, predictor-run.sh, gardener-run.sh, and supervisor-run.sh. +# Provides reusable utility functions for the common cron-wrapper pattern +# used by planner-run.sh, predictor-run.sh, gardener-run.sh, and supervisor-run.sh. # # Functions: # acquire_cron_lock LOCK_FILE — PID lock with stale cleanup -# check_memory [MIN_MB] — skip if available RAM too low # load_formula FORMULA_FILE — sets FORMULA_CONTENT # build_context_block FILE [FILE ...] — sets CONTEXT_BLOCK -# start_formula_session SESSION WORKDIR PHASE_FILE — create tmux + claude -# build_prompt_footer [EXTRA_API] — sets PROMPT_FOOTER (API ref + env + phase) -# run_formula_and_monitor AGENT [TIMEOUT] [CALLBACK] — session start, inject, monitor, log -# formula_phase_callback PHASE — standard crash-recovery callback +# build_prompt_footer [EXTRA_API_LINES] — sets PROMPT_FOOTER (API ref + env) +# build_sdk_prompt_footer [EXTRA_API] — omits phase protocol (SDK mode) +# formula_worktree_setup WORKTREE — isolated worktree for formula execution # formula_prepare_profile_context — load lessons from .profile repo (pre-session) +# formula_lessons_block — return lessons block for prompt +# profile_write_journal ISSUE_NUM TITLE OUTCOME [FILES] — post-session journal +# profile_load_lessons — load lessons-learned.md into LESSONS_CONTEXT +# ensure_profile_repo [AGENT_IDENTITY] — clone/pull .profile repo +# _profile_has_repo — check if agent has .profile repo +# _count_undigested_journals — count journal entries to digest +# _profile_digest_journals — digest journals into lessons +# _profile_commit_and_push MESSAGE [FILES] — commit/push to .profile repo +# resolve_agent_identity — resolve agent user login from FORGE_TOKEN +# build_graph_section — run build-graph.py and set GRAPH_SECTION +# build_scratch_instruction SCRATCH_FILE — return context scratch instruction +# read_scratch_context SCRATCH_FILE — return scratch file content block +# ensure_ops_repo — clone/pull ops repo +# ops_commit_and_push MESSAGE [FILES] — commit/push to ops repo +# cleanup_stale_crashed_worktrees [HOURS] — thin wrapper around worktree_cleanup_stale # -# Requires: lib/agent-session.sh sourced first (for create_agent_session, -# agent_kill_session, agent_inject_into_session). -# Globals used by formula_phase_callback: SESSION_NAME, PHASE_FILE, -# PROJECT_REPO_ROOT, PROMPT (set by the calling script). +# Requires: lib/env.sh, lib/worktree.sh sourced first for shared helpers. # ── Cron guards ────────────────────────────────────────────────────────── @@ -40,18 +50,6 @@ acquire_cron_lock() { trap 'rm -f "$_CRON_LOCK_FILE"' EXIT } -# check_memory [MIN_MB] -# Exits 0 (skip) if available memory is below MIN_MB (default 2000). -check_memory() { - local min_mb="${1:-2000}" - local avail_mb - avail_mb=$(free -m | awk '/Mem:/{print $7}') - if [ "${avail_mb:-0}" -lt "$min_mb" ]; then - log "run: skipping — only ${avail_mb}MB available (need ${min_mb})" - exit 0 - fi -} - # ── Agent identity resolution ──────────────────────────────────────────── # resolve_agent_identity @@ -75,6 +73,24 @@ resolve_agent_identity() { return 0 } +# ── Forge remote resolution ────────────────────────────────────────────── + +# resolve_forge_remote +# Resolves FORGE_REMOTE by matching FORGE_URL hostname against git remotes. +# Falls back to "origin" if no match found. +# Requires: FORGE_URL, git repo with remotes configured. +# Exports: FORGE_REMOTE (always set). +resolve_forge_remote() { + # Extract hostname from FORGE_URL (e.g., https://codeberg.org/user/repo -> codeberg.org) + _forge_host=$(printf '%s' "$FORGE_URL" | sed 's|https\?://||; s|/.*||; s|:.*||') + # Find git remote whose push URL matches the forge host + FORGE_REMOTE=$(git remote -v | awk -v host="$_forge_host" '$2 ~ host && /\(push\)/ {print $1; exit}') + # Fallback to origin if no match found + FORGE_REMOTE="${FORGE_REMOTE:-origin}" + export FORGE_REMOTE + log "forge remote: ${FORGE_REMOTE}" +} + # ── .profile repo management ────────────────────────────────────────────── # ensure_profile_repo [AGENT_IDENTITY] @@ -134,7 +150,7 @@ ensure_profile_repo() { # Checks if the agent has a .profile repo by querying Forgejo API. # Returns 0 if repo exists, 1 otherwise. _profile_has_repo() { - local agent_identity="${1:-${AGENT_IDENTITY:-}}" + local agent_identity="${AGENT_IDENTITY:-}" if [ -z "$agent_identity" ]; then if ! resolve_agent_identity; then @@ -170,8 +186,8 @@ _count_undigested_journals() { # Runs a claude -p one-shot to digest undigested journals into lessons-learned.md # Returns 0 on success, 1 on failure. _profile_digest_journals() { - local agent_identity="${1:-${AGENT_IDENTITY:-}}" - local model="${2:-${CLAUDE_MODEL:-opus}}" + local agent_identity="${AGENT_IDENTITY:-}" + local model="${CLAUDE_MODEL:-opus}" if [ -z "$agent_identity" ]; then if ! resolve_agent_identity; then @@ -237,7 +253,6 @@ Write the complete, rewritten lessons-learned.md content below. No preamble, no output=$(claude -p "$digest_prompt" \ --output-format json \ --dangerously-skip-permissions \ - --max-tokens 1000 \ ${model:+--model "$model"} \ 2>>"$LOGFILE" || echo '{"result":"error"}') @@ -432,7 +447,6 @@ Write the journal entry below. Use markdown format." output=$(claude -p "$reflection_prompt" \ --output-format json \ --dangerously-skip-permissions \ - --max-tokens 500 \ ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} \ 2>>"$LOGFILE" || echo '{"result":"error"}') @@ -449,16 +463,15 @@ Write the journal entry below. Use markdown format." local journal_dir="${PROFILE_REPO_PATH}/journal" mkdir -p "$journal_dir" - # Write journal entry (append if exists) - local journal_file="${journal_dir}/issue-${issue_num}.md" - if [ -f "$journal_file" ]; then - printf '\n---\n\n' >> "$journal_file" - fi + # Write journal entry with timestamped filename for accumulation + local ts + ts=$(date -u +%Y%m%d-%H%M%S) + local journal_file="${journal_dir}/issue-${issue_num}-${ts}.md" printf '%s\n' "$journal_content" >> "$journal_file" - log "profile: wrote journal entry for issue #${issue_num}" + log "profile: wrote journal entry for issue #${issue_num} (${ts})" # Commit and push to .profile repo - _profile_commit_and_push "journal: issue #${issue_num} reflection" "journal/issue-${issue_num}.md" + _profile_commit_and_push "journal: issue #${issue_num} reflection (${ts})" "journal/issue-${issue_num}-${ts}.md" return 0 } @@ -557,7 +570,7 @@ $(cat "$ctx_path") done } -# ── Ops repo helpers ───────────────────────────────────────────────── +# ── Ops repo helpers ──────────────────────────────────────────────────── # ensure_ops_repo # Clones or pulls the ops repo so agents can read/write operational data. @@ -620,90 +633,6 @@ ops_commit_and_push() { ) } -# ── Session management ─────────────────────────────────────────────────── - -# start_formula_session SESSION WORKDIR PHASE_FILE -# Kills stale session, resets phase file, creates a per-agent git worktree -# for session isolation, and creates a new tmux + claude session in it. -# Sets _FORMULA_SESSION_WORKDIR to the worktree path (or original workdir -# on fallback). Callers must clean up via remove_formula_worktree after -# the session ends. -# Returns 0 on success, 1 on failure. -start_formula_session() { - local session="$1" workdir="$2" phase_file="$3" - agent_kill_session "$session" - rm -f "$phase_file" - - # Create per-agent git worktree for session isolation. - # Each agent gets its own CWD so Claude Code treats them as separate - # projects — no resume collisions between sequential formula runs. - _FORMULA_SESSION_WORKDIR="/tmp/disinto-${session}" - # Clean up any stale worktree from a previous run - git -C "$workdir" worktree remove "$_FORMULA_SESSION_WORKDIR" --force 2>/dev/null || true - if git -C "$workdir" worktree add "$_FORMULA_SESSION_WORKDIR" HEAD --detach 2>/dev/null; then - log "Created worktree: ${_FORMULA_SESSION_WORKDIR}" - else - log "WARNING: worktree creation failed — falling back to ${workdir}" - _FORMULA_SESSION_WORKDIR="$workdir" - fi - - log "Creating tmux session: ${session}" - if ! create_agent_session "$session" "$_FORMULA_SESSION_WORKDIR" "$phase_file"; then - log "ERROR: failed to create tmux session ${session}" - return 1 - fi -} - -# remove_formula_worktree -# Removes the worktree created by start_formula_session if it differs from -# PROJECT_REPO_ROOT. Safe to call multiple times. No-op if no worktree was created. -remove_formula_worktree() { - if [ -n "${_FORMULA_SESSION_WORKDIR:-}" ] \ - && [ "$_FORMULA_SESSION_WORKDIR" != "${PROJECT_REPO_ROOT:-}" ]; then - git -C "$PROJECT_REPO_ROOT" worktree remove "$_FORMULA_SESSION_WORKDIR" --force 2>/dev/null || true - log "Removed worktree: ${_FORMULA_SESSION_WORKDIR}" - fi -} - -# formula_phase_callback PHASE -# Standard crash-recovery phase callback for formula sessions. -# Requires globals: SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT. -# Uses _FORMULA_CRASH_COUNT (auto-initialized) for single-retry limit. -# shellcheck disable=SC2154 # SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT set by caller -formula_phase_callback() { - local phase="$1" - log "phase: ${phase}" - case "$phase" in - PHASE:crashed) - if [ "${_FORMULA_CRASH_COUNT:-0}" -gt 0 ]; then - log "ERROR: session crashed again after recovery — giving up" - return 0 - fi - _FORMULA_CRASH_COUNT=$(( ${_FORMULA_CRASH_COUNT:-0} + 1 )) - log "WARNING: tmux session died unexpectedly — attempting recovery" - if create_agent_session "${_MONITOR_SESSION:-$SESSION_NAME}" "${_FORMULA_SESSION_WORKDIR:-$PROJECT_REPO_ROOT}" "$PHASE_FILE" 2>/dev/null; then - agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" "$PROMPT" - log "Recovery session started" - else - log "ERROR: could not restart session after crash" - fi - ;; - PHASE:done|PHASE:failed|PHASE:escalate|PHASE:merged) - agent_kill_session "${_MONITOR_SESSION:-$SESSION_NAME}" - ;; - esac -} - -# ── Stale crashed worktree cleanup ───────────────────────────────────────── - -# cleanup_stale_crashed_worktrees [MAX_AGE_HOURS] -# Thin wrapper around worktree_cleanup_stale() from lib/worktree.sh. -# Kept for backwards compatibility with existing callers. -# Requires: lib/worktree.sh sourced. -cleanup_stale_crashed_worktrees() { - worktree_cleanup_stale "${1:-24}" -} - # ── Scratch file helpers (compaction survival) ──────────────────────────── # build_scratch_instruction SCRATCH_FILE @@ -779,25 +708,26 @@ build_sdk_prompt_footer() { # Creates an isolated worktree for synchronous formula execution. # Fetches primary branch, cleans stale worktree, creates new one, and # sets an EXIT trap for cleanup. -# Requires globals: PROJECT_REPO_ROOT, PRIMARY_BRANCH. +# Requires globals: PROJECT_REPO_ROOT, PRIMARY_BRANCH, FORGE_REMOTE. +# Ensure resolve_forge_remote() is called before this function. formula_worktree_setup() { local worktree="$1" cd "$PROJECT_REPO_ROOT" || return - git fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true + git fetch "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true worktree_cleanup "$worktree" - git worktree add "$worktree" "origin/${PRIMARY_BRANCH}" --detach 2>/dev/null + git worktree add "$worktree" "${FORGE_REMOTE}/${PRIMARY_BRANCH}" --detach 2>/dev/null # shellcheck disable=SC2064 # expand worktree now, not at trap time trap "worktree_cleanup '$worktree'" EXIT } -# ── Prompt + monitor helpers ────────────────────────────────────────────── +# ── Prompt helpers ────────────────────────────────────────────────────── # build_prompt_footer [EXTRA_API_LINES] -# Assembles the common forge API reference + environment + phase protocol -# block for formula prompts. Sets PROMPT_FOOTER. +# Assembles the common forge API reference + environment block for formula prompts. +# Sets PROMPT_FOOTER. # Pass additional API endpoint lines (pre-formatted, newline-prefixed) via $1. # Requires globals: FORGE_API, FACTORY_ROOT, PROJECT_REPO_ROOT, -# PRIMARY_BRANCH, PHASE_FILE. +# PRIMARY_BRANCH. build_prompt_footer() { local extra_api="${1:-}" # shellcheck disable=SC2034 # consumed by the calling script's PROMPT @@ -813,66 +743,15 @@ NEVER echo or include the actual token value in output — always reference \${F FACTORY_ROOT=${FACTORY_ROOT} PROJECT_REPO_ROOT=${PROJECT_REPO_ROOT} OPS_REPO_ROOT=${OPS_REPO_ROOT} -PRIMARY_BRANCH=${PRIMARY_BRANCH} -PHASE_FILE=${PHASE_FILE} - -## Phase protocol (REQUIRED) -When all work is done: - echo 'PHASE:done' > '${PHASE_FILE}' -On unrecoverable error: - printf 'PHASE:failed\nReason: %s\n' 'describe error' > '${PHASE_FILE}'" +PRIMARY_BRANCH=${PRIMARY_BRANCH}" } -# run_formula_and_monitor AGENT_NAME [TIMEOUT] -# Starts the formula session, injects PROMPT, monitors phase, and logs result. -# Requires globals: SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT, -# FORGE_REPO, CLAUDE_MODEL (exported). -# shellcheck disable=SC2154 # SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT set by caller -run_formula_and_monitor() { - local agent_name="$1" - local timeout="${2:-7200}" - local callback="${3:-formula_phase_callback}" +# ── Stale crashed worktree cleanup ──────────────────────────────────────── - if ! start_formula_session "$SESSION_NAME" "$PROJECT_REPO_ROOT" "$PHASE_FILE"; then - exit 1 - fi - - # Write phase protocol to context file for compaction survival - if [ -n "${PROMPT_FOOTER:-}" ]; then - write_compact_context "$PHASE_FILE" "$PROMPT_FOOTER" - fi - - agent_inject_into_session "$SESSION_NAME" "$PROMPT" - log "Prompt sent to tmux session" - - log "Monitoring phase file: ${PHASE_FILE}" - _FORMULA_CRASH_COUNT=0 - - monitor_phase_loop "$PHASE_FILE" "$timeout" "$callback" - - FINAL_PHASE=$(read_phase "$PHASE_FILE") - log "Final phase: ${FINAL_PHASE:-none}" - - if [ "$FINAL_PHASE" != "PHASE:done" ]; then - case "${_MONITOR_LOOP_EXIT:-}" in - idle_prompt) - log "${agent_name}: Claude returned to prompt without writing phase signal" - ;; - idle_timeout) - log "${agent_name}: timed out with no phase signal" - ;; - *) - log "${agent_name} finished without PHASE:done (phase: ${FINAL_PHASE:-none}, exit: ${_MONITOR_LOOP_EXIT:-})" - ;; - esac - fi - - # Preserve worktree on crash for debugging; clean up on success - if [ "${_MONITOR_LOOP_EXIT:-}" = "crashed" ]; then - worktree_preserve "${_FORMULA_SESSION_WORKDIR:-}" "crashed (agent=${agent_name})" - else - remove_formula_worktree - fi - - log "--- ${agent_name^} run done ---" +# cleanup_stale_crashed_worktrees [MAX_AGE_HOURS] +# Thin wrapper around worktree_cleanup_stale() from lib/worktree.sh. +# Kept for backwards compatibility with existing callers. +# Requires: lib/worktree.sh sourced. +cleanup_stale_crashed_worktrees() { + worktree_cleanup_stale "${1:-24}" } diff --git a/lib/generators.sh b/lib/generators.sh new file mode 100644 index 0000000..c9aea55 --- /dev/null +++ b/lib/generators.sh @@ -0,0 +1,596 @@ +#!/usr/bin/env bash +# ============================================================================= +# generators — template generation functions for disinto init +# +# Generates docker-compose.yml, Dockerfile, Caddyfile, staging index, and +# deployment pipeline configs. +# +# Globals expected (must be set before sourcing): +# FACTORY_ROOT - Root of the disinto factory +# PROJECT_NAME - Project name for the project repo (defaults to 'project') +# PRIMARY_BRANCH - Primary branch name (defaults to 'main') +# +# Usage: +# source "${FACTORY_ROOT}/lib/generators.sh" +# generate_compose "$forge_port" +# generate_caddyfile +# generate_staging_index +# generate_deploy_pipelines "$repo_root" "$project_name" +# ============================================================================= +set -euo pipefail + +# Assert required globals are set +: "${FACTORY_ROOT:?FACTORY_ROOT must be set}" +# PROJECT_NAME defaults to 'project' if not set (env.sh may have set it from FORGE_REPO) +PROJECT_NAME="${PROJECT_NAME:-project}" +# PRIMARY_BRANCH defaults to main (env.sh may have set it to 'master') +PRIMARY_BRANCH="${PRIMARY_BRANCH:-main}" + +# Parse project TOML for local-model agents and emit compose services. +# Writes service definitions to stdout; caller handles insertion into compose file. +_generate_local_model_services() { + local compose_file="$1" + local projects_dir="${FACTORY_ROOT}/projects" + local temp_file + temp_file=$(mktemp) + local has_services=false + local all_vols="" + + # Find all project TOML files and extract [agents.*] sections + for toml in "${projects_dir}"/*.toml; do + [ -f "$toml" ] || continue + + # Parse [agents.*] sections using Python - output YAML-compatible format + while IFS='=' read -r key value; do + case "$key" in + NAME) service_name="$value" ;; + BASE_URL) base_url="$value" ;; + MODEL) model="$value" ;; + ROLES) roles="$value" ;; + API_KEY) api_key="$value" ;; + FORGE_USER) forge_user="$value" ;; + COMPACT_PCT) compact_pct="$value" ;; + ---) + if [ -n "$service_name" ] && [ -n "$base_url" ]; then + cat >> "$temp_file" <<EOF + + agents-${service_name}: + build: + context: . + dockerfile: docker/agents/Dockerfile + container_name: disinto-agents-${service_name} + restart: unless-stopped + security_opt: + - apparmor=unconfined + volumes: + - agents-${service_name}-data:/home/agent/data + - project-repos:/home/agent/repos + - ${HOME}/.ssh:/home/agent/.ssh:ro + environment: + FORGE_URL: http://forgejo:3000 + FORGE_TOKEN: \${FORGE_TOKEN:-} + FORGE_REVIEW_TOKEN: \${FORGE_REVIEW_TOKEN:-} + FORGE_BOT_USERNAMES: \${FORGE_BOT_USERNAMES:-} + AGENT_ROLES: "${roles}" + CLAUDE_TIMEOUT: \${CLAUDE_TIMEOUT:-7200} + ANTHROPIC_BASE_URL: "${base_url}" + ANTHROPIC_API_KEY: "${api_key}" + CLAUDE_MODEL: "${model}" + CLAUDE_CONFIG_DIR: /home/agent/.claude-${service_name} + CLAUDE_CREDENTIALS_DIR: /home/agent/.claude-${service_name}/credentials + CLAUDE_AUTOCOMPACT_PCT_OVERRIDE: "${compact_pct}" + CLAUDE_CODE_ATTRIBUTION_HEADER: "0" + CLAUDE_CODE_ENABLE_TELEMETRY: "0" + DISINTO_CONTAINER: "1" + PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project} + WOODPECKER_DATA_DIR: /woodpecker-data + FORGE_BOT_USER_${service_name^^}: "${forge_user}" + depends_on: + - forgejo + - woodpecker + networks: + - disinto-net + profiles: ["agents-${service_name}"] + +EOF + has_services=true + fi + # Collect volume name for later + local vol_name=" agents-${service_name}-data:" + if [ -n "$all_vols" ]; then + all_vols="${all_vols} +${vol_name}" + else + all_vols="${vol_name}" + fi + service_name="" base_url="" model="" roles="" api_key="" forge_user="" compact_pct="" + ;; + esac + done < <(python3 -c ' +import sys, tomllib, json, re + +with open(sys.argv[1], "rb") as f: + cfg = tomllib.load(f) + +agents = cfg.get("agents", {}) +for name, config in agents.items(): + if not isinstance(config, dict): + continue + + base_url = config.get("base_url", "") + model = config.get("model", "") + if not base_url or not model: + continue + + roles = config.get("roles", ["dev"]) + roles_str = " ".join(roles) if isinstance(roles, list) else roles + api_key = config.get("api_key", "sk-no-key-required") + forge_user = config.get("forge_user", f"{name}-bot") + compact_pct = config.get("compact_pct", 60) + + safe_name = name.lower() + safe_name = re.sub(r"[^a-z0-9]", "-", safe_name) + + # Output as simple key=value lines + print(f"NAME={safe_name}") + print(f"BASE_URL={base_url}") + print(f"MODEL={model}") + print(f"ROLES={roles_str}") + print(f"API_KEY={api_key}") + print(f"FORGE_USER={forge_user}") + print(f"COMPACT_PCT={compact_pct}") + print("---") +' "$toml" 2>/dev/null) + done + + if [ "$has_services" = true ]; then + # Insert the services before the volumes section + local temp_compose + temp_compose=$(mktemp) + # Get everything before volumes: + sed -n '1,/^volumes:/p' "$compose_file" | sed '$d' > "$temp_compose" + # Add the services + cat "$temp_file" >> "$temp_compose" + # Add the volumes section and everything after + sed -n '/^volumes:/,$p' "$compose_file" >> "$temp_compose" + + # Add local-model volumes to the volumes section + if [ -n "$all_vols" ]; then + # Find the volumes section and add the new volumes + sed -i "/^volumes:/{n;:a;n;/^[a-z]/!{s/$/\n$all_vols/;b};ba}" "$temp_compose" + fi + + mv "$temp_compose" "$compose_file" + fi + + rm -f "$temp_file" +} + +# Generate docker-compose.yml in the factory root. +_generate_compose_impl() { + local forge_port="${1:-3000}" + local compose_file="${FACTORY_ROOT}/docker-compose.yml" + + # Check if compose file already exists + if [ -f "$compose_file" ]; then + echo "Compose: ${compose_file} (already exists, skipping)" + return 0 + fi + + cat > "$compose_file" <<'COMPOSEEOF' +# docker-compose.yml — generated by disinto init +# Brings up Forgejo, Woodpecker, and the agent runtime. + +services: + forgejo: + image: codeberg.org/forgejo/forgejo:11.0 + container_name: disinto-forgejo + restart: unless-stopped + security_opt: + - apparmor=unconfined + volumes: + - forgejo-data:/data + environment: + FORGEJO__database__DB_TYPE: sqlite3 + FORGEJO__server__ROOT_URL: http://forgejo:3000/ + FORGEJO__server__HTTP_PORT: "3000" + FORGEJO__security__INSTALL_LOCK: "true" + FORGEJO__service__DISABLE_REGISTRATION: "true" + FORGEJO__webhook__ALLOWED_HOST_LIST: "private" + networks: + - disinto-net + + woodpecker: + image: woodpeckerci/woodpecker-server:v3 + container_name: disinto-woodpecker + restart: unless-stopped + security_opt: + - apparmor=unconfined + ports: + - "8000:8000" + - "9000:9000" + volumes: + - woodpecker-data:/var/lib/woodpecker + environment: + WOODPECKER_FORGEJO: "true" + WOODPECKER_FORGEJO_URL: http://forgejo:3000 + WOODPECKER_FORGEJO_CLIENT: ${WP_FORGEJO_CLIENT:-} + WOODPECKER_FORGEJO_SECRET: ${WP_FORGEJO_SECRET:-} + WOODPECKER_HOST: ${WOODPECKER_HOST:-http://woodpecker:8000} + WOODPECKER_OPEN: "true" + WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-} + WOODPECKER_DATABASE_DRIVER: sqlite3 + WOODPECKER_DATABASE_DATASOURCE: /var/lib/woodpecker/woodpecker.sqlite + WOODPECKER_ENVIRONMENT: "FORGE_TOKEN:${FORGE_TOKEN}" + depends_on: + - forgejo + networks: + - disinto-net + + woodpecker-agent: + image: woodpeckerci/woodpecker-agent:v3 + container_name: disinto-woodpecker-agent + restart: unless-stopped + network_mode: host + privileged: true + security_opt: + - apparmor=unconfined + volumes: + - /var/run/docker.sock:/var/run/docker.sock + environment: + WOODPECKER_SERVER: localhost:9000 + WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-} + WOODPECKER_GRPC_SECURE: "false" + WOODPECKER_HEALTHCHECK_ADDR: ":3333" + WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net + WOODPECKER_MAX_WORKFLOWS: 1 + depends_on: + - woodpecker + + agents: + build: + context: . + dockerfile: docker/agents/Dockerfile + container_name: disinto-agents + restart: unless-stopped + security_opt: + - apparmor=unconfined + volumes: + - agent-data:/home/agent/data + - project-repos:/home/agent/repos + - ${HOME}/.claude:/home/agent/.claude + - ${HOME}/.claude.json:/home/agent/.claude.json:ro + - CLAUDE_BIN_PLACEHOLDER:/usr/local/bin/claude:ro + - ${HOME}/.ssh:/home/agent/.ssh:ro + - ${HOME}/.config/sops/age:/home/agent/.config/sops/age:ro + - woodpecker-data:/woodpecker-data:ro + environment: + FORGE_URL: http://forgejo:3000 + FORGE_TOKEN: ${FORGE_TOKEN:-} + FORGE_REVIEW_TOKEN: ${FORGE_REVIEW_TOKEN:-} + FORGE_PLANNER_TOKEN: ${FORGE_PLANNER_TOKEN:-} + FORGE_GARDENER_TOKEN: ${FORGE_GARDENER_TOKEN:-} + FORGE_VAULT_TOKEN: ${FORGE_VAULT_TOKEN:-} + FORGE_SUPERVISOR_TOKEN: ${FORGE_SUPERVISOR_TOKEN:-} + FORGE_PREDICTOR_TOKEN: ${FORGE_PREDICTOR_TOKEN:-} + FORGE_ARCHITECT_TOKEN: ${FORGE_ARCHITECT_TOKEN:-} + FORGE_BOT_USERNAMES: ${FORGE_BOT_USERNAMES:-} + WOODPECKER_TOKEN: ${WOODPECKER_TOKEN:-} + CLAUDE_TIMEOUT: ${CLAUDE_TIMEOUT:-7200} + CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: ${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-} + FORGE_ADMIN_PASS: ${FORGE_ADMIN_PASS:-} + DISINTO_CONTAINER: "1" + PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project} + WOODPECKER_DATA_DIR: /woodpecker-data + # IMPORTANT: agents get explicit environment variables (forge tokens, CI tokens, config). + # Vault-only secrets (GITHUB_TOKEN, CLAWHUB_TOKEN, deploy keys) live in + # .env.vault.enc and are NEVER injected here — only the runner + # container receives them at fire time (AD-006, #745). + depends_on: + - forgejo + - woodpecker + networks: + - disinto-net + + runner: + build: + context: . + dockerfile: docker/agents/Dockerfile + profiles: ["vault"] + security_opt: + - apparmor=unconfined + volumes: + - agent-data:/home/agent/data + environment: + FORGE_URL: http://forgejo:3000 + DISINTO_CONTAINER: "1" + PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project} + # Vault redesign in progress (PR-based approval, see #73-#77) + # This container is being replaced — entrypoint will be updated in follow-up + networks: + - disinto-net + + # Edge proxy — reverse proxy to Forgejo, Woodpecker, and staging + # Serves on ports 80/443, routes based on path + edge: + build: ./docker/edge + container_name: disinto-edge + security_opt: + - apparmor=unconfined + ports: + - "80:80" + - "443:443" + environment: + - DISINTO_VERSION=${DISINTO_VERSION:-main} + - FORGE_URL=http://forgejo:3000 + - FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto} + - FORGE_OPS_REPO=${FORGE_OPS_REPO:-disinto-admin/disinto-ops} + - FORGE_TOKEN=${FORGE_TOKEN:-} + - FORGE_ADMIN_USERS=${FORGE_ADMIN_USERS:-disinto-admin} + - FORGE_ADMIN_TOKEN=${FORGE_ADMIN_TOKEN:-} + - OPS_REPO_ROOT=/opt/disinto-ops + - PROJECT_REPO_ROOT=/opt/disinto + - PRIMARY_BRANCH=main + volumes: + - ./docker/Caddyfile:/etc/caddy/Caddyfile + - caddy_data:/data + - /var/run/docker.sock:/var/run/docker.sock + depends_on: + - forgejo + - woodpecker + - staging + networks: + - disinto-net + + # Staging container — static file server for staging artifacts + # Edge proxy routes to this container for default requests + staging: + image: caddy:alpine + command: ["caddy", "file-server", "--root", "/srv/site"] + security_opt: + - apparmor=unconfined + volumes: + - ./docker:/srv/site:ro + networks: + - disinto-net + + # Staging deployment slot — activated by Woodpecker staging pipeline (#755). + # Profile-gated: only starts when explicitly targeted by deploy commands. + # Customize image/ports/volumes for your project after init. + staging-deploy: + image: alpine:3 + profiles: ["staging"] + security_opt: + - apparmor=unconfined + environment: + DEPLOY_ENV: staging + networks: + - disinto-net + command: ["echo", "staging slot — replace with project image"] + +volumes: + forgejo-data: + woodpecker-data: + agent-data: + project-repos: + caddy_data: + +networks: + disinto-net: + driver: bridge +COMPOSEEOF + + # Patch PROJECT_REPO_ROOT — interpolate PROJECT_NAME at generation time + # (Docker Compose cannot resolve it; it's a shell variable, not a .env var) + sed -i "s|\${PROJECT_NAME:-project}|${PROJECT_NAME}|g" "$compose_file" + + # Patch the Claude CLI binary path — resolve from host PATH at init time. + local claude_bin + claude_bin="$(command -v claude 2>/dev/null || true)" + if [ -n "$claude_bin" ]; then + # Resolve symlinks to get the real binary path + claude_bin="$(readlink -f "$claude_bin")" + sed -i "s|CLAUDE_BIN_PLACEHOLDER|${claude_bin}|" "$compose_file" + else + echo "Warning: claude CLI not found in PATH — update docker-compose.yml volumes manually" >&2 + sed -i "s|CLAUDE_BIN_PLACEHOLDER|/usr/local/bin/claude|" "$compose_file" + fi + + # Patch the forgejo port mapping into the file if non-default + if [ "$forge_port" != "3000" ]; then + # Add port mapping to forgejo service so it's reachable from host during init + sed -i "/image: codeberg\.org\/forgejo\/forgejo:11\.0/a\\ ports:\\n - \"${forge_port}:3000\"" "$compose_file" + else + sed -i "/image: codeberg\.org\/forgejo\/forgejo:11\.0/a\\ ports:\\n - \"3000:3000\"" "$compose_file" + fi + + # Append local-model agent services if any are configured + _generate_local_model_services "$compose_file" + + echo "Created: ${compose_file}" +} + +# Generate docker/agents/ files if they don't already exist. +_generate_agent_docker_impl() { + local docker_dir="${FACTORY_ROOT}/docker/agents" + mkdir -p "$docker_dir" + + if [ ! -f "${docker_dir}/Dockerfile" ]; then + echo "Warning: docker/agents/Dockerfile not found — expected in repo" >&2 + fi + if [ ! -f "${docker_dir}/entrypoint.sh" ]; then + echo "Warning: docker/agents/entrypoint.sh not found — expected in repo" >&2 + fi +} + +# Generate docker/Caddyfile template for edge proxy. +_generate_caddyfile_impl() { + local docker_dir="${FACTORY_ROOT}/docker" + local caddyfile="${docker_dir}/Caddyfile" + + if [ -f "$caddyfile" ]; then + echo "Caddyfile: ${caddyfile} (already exists, skipping)" + return + fi + + cat > "$caddyfile" <<'CADDYFILEEOF' +# Caddyfile — edge proxy configuration +# IP-only binding at bootstrap; domain + TLS added later via vault resource request + +:80 { + # Reverse proxy to Forgejo + handle /forgejo/* { + reverse_proxy forgejo:3000 + } + + # Reverse proxy to Woodpecker CI + handle /ci/* { + reverse_proxy woodpecker:8000 + } + + # Default: proxy to staging container + handle { + reverse_proxy staging:80 + } +} +CADDYFILEEOF + + echo "Created: ${caddyfile}" +} + +# Generate docker/index.html default page. +_generate_staging_index_impl() { + local docker_dir="${FACTORY_ROOT}/docker" + local index_file="${docker_dir}/index.html" + + if [ -f "$index_file" ]; then + echo "Staging: ${index_file} (already exists, skipping)" + return + fi + + cat > "$index_file" <<'INDEXEOF' +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <title>Nothing shipped yet + + + +
+

Nothing shipped yet

+

CI pipelines will update this page with your staging artifacts.

+
+ + +INDEXEOF + + echo "Created: ${index_file}" +} + +# Generate template .woodpecker/ deployment pipeline configs in a project repo. +# Creates staging.yml and production.yml alongside the project's existing CI config. +# These pipelines trigger on Woodpecker's deployment event with environment filters. +_generate_deploy_pipelines_impl() { + local repo_root="$1" + local project_name="$2" + : "${project_name// /}" # Silence SC2034 - variable used in heredoc + local wp_dir="${repo_root}/.woodpecker" + + mkdir -p "$wp_dir" + + # Skip if deploy pipelines already exist + if [ -f "${wp_dir}/staging.yml" ] && [ -f "${wp_dir}/production.yml" ]; then + echo "Deploy: .woodpecker/{staging,production}.yml (already exist)" + return + fi + + if [ ! -f "${wp_dir}/staging.yml" ]; then + cat > "${wp_dir}/staging.yml" <<'STAGINGEOF' +# .woodpecker/staging.yml — Staging deployment pipeline +# Triggered by runner via Woodpecker promote API. +# Human approves promotion in vault → runner calls promote → this runs. + +when: + event: deployment + environment: staging + +steps: + - name: deploy-staging + image: docker:27 + commands: + - echo "Deploying to staging environment..." + - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from CI #${CI_PIPELINE_PARENT}" + # Pull the image built by CI and deploy to staging + # Customize these commands for your project: + # - docker compose -f docker-compose.yml --profile staging up -d + - echo "Staging deployment complete" + + - name: verify-staging + image: alpine:3 + commands: + - echo "Verifying staging deployment..." + # Add health checks, smoke tests, or integration tests here: + # - curl -sf http://staging:8080/health || exit 1 + - echo "Staging verification complete" +STAGINGEOF + echo "Created: ${wp_dir}/staging.yml" + fi + + if [ ! -f "${wp_dir}/production.yml" ]; then + cat > "${wp_dir}/production.yml" <<'PRODUCTIONEOF' +# .woodpecker/production.yml — Production deployment pipeline +# Triggered by runner via Woodpecker promote API. +# Human approves promotion in vault → runner calls promote → this runs. + +when: + event: deployment + environment: production + +steps: + - name: deploy-production + image: docker:27 + commands: + - echo "Deploying to production environment..." + - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from staging" + # Pull the verified image and deploy to production + # Customize these commands for your project: + # - docker compose -f docker-compose.yml up -d + - echo "Production deployment complete" + + - name: verify-production + image: alpine:3 + commands: + - echo "Verifying production deployment..." + # Add production health checks here: + # - curl -sf http://production:8080/health || exit 1 + - echo "Production verification complete" +PRODUCTIONEOF + echo "Created: ${wp_dir}/production.yml" + fi +} diff --git a/lib/hire-agent.sh b/lib/hire-agent.sh new file mode 100644 index 0000000..b15b2b7 --- /dev/null +++ b/lib/hire-agent.sh @@ -0,0 +1,471 @@ +#!/usr/bin/env bash +# ============================================================================= +# hire-agent — disinto_hire_an_agent() function +# +# Handles user creation, .profile repo setup, formula copying, branch protection, +# and state marker creation for hiring a new agent. +# +# Globals expected: +# FORGE_URL - Forge instance URL +# FORGE_TOKEN - Admin token for Forge operations +# FACTORY_ROOT - Root of the disinto factory +# PROJECT_NAME - Project name for email/domain generation +# +# Usage: +# source "${FACTORY_ROOT}/lib/hire-agent.sh" +# disinto_hire_an_agent [--formula ] [--local-model ] [--poll-interval ] +# ============================================================================= +set -euo pipefail + +disinto_hire_an_agent() { + local agent_name="${1:-}" + local role="${2:-}" + local formula_path="" + local local_model="" + local poll_interval="" + + if [ -z "$agent_name" ] || [ -z "$role" ]; then + echo "Error: agent-name and role required" >&2 + echo "Usage: disinto hire-an-agent [--formula ] [--local-model ] [--poll-interval ]" >&2 + exit 1 + fi + shift 2 + + # Parse flags + while [ $# -gt 0 ]; do + case "$1" in + --formula) + formula_path="$2" + shift 2 + ;; + --local-model) + local_model="$2" + shift 2 + ;; + --poll-interval) + poll_interval="$2" + shift 2 + ;; + *) + echo "Unknown option: $1" >&2 + exit 1 + ;; + esac + done + + # Default formula path — try both naming conventions + if [ -z "$formula_path" ]; then + formula_path="${FACTORY_ROOT}/formulas/${role}.toml" + if [ ! -f "$formula_path" ]; then + formula_path="${FACTORY_ROOT}/formulas/run-${role}.toml" + fi + fi + + # Validate formula exists + if [ ! -f "$formula_path" ]; then + echo "Error: formula not found at ${formula_path}" >&2 + exit 1 + fi + + echo "── Hiring agent: ${agent_name} (${role}) ───────────────────────" + echo "Formula: ${formula_path}" + if [ -n "$local_model" ]; then + echo "Local model: ${local_model}" + echo "Poll interval: ${poll_interval:-300}s" + fi + + # Ensure FORGE_TOKEN is set + if [ -z "${FORGE_TOKEN:-}" ]; then + echo "Error: FORGE_TOKEN not set" >&2 + exit 1 + fi + + # Get Forge URL + local forge_url="${FORGE_URL:-http://localhost:3000}" + echo "Forge: ${forge_url}" + + # Step 1: Create user via API (skip if exists) + echo "" + echo "Step 1: Creating user '${agent_name}' (if not exists)..." + + local user_pass="" + local admin_pass="" + + # Read admin password from .env for standalone runs (#184) + local env_file="${FACTORY_ROOT}/.env" + if [ -f "$env_file" ] && grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then + admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-) + fi + + # Get admin token early (needed for both user creation and password reset) + local admin_user="disinto-admin" + admin_pass="${admin_pass:-admin}" + local admin_token="" + local admin_token_name + admin_token_name="temp-token-$(date +%s)" + admin_token=$(curl -sf -X POST \ + -u "${admin_user}:${admin_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${admin_user}/tokens" \ + -d "{\"name\":\"${admin_token_name}\",\"scopes\":[\"all\"]}" 2>/dev/null \ + | jq -r '.sha1 // empty') || admin_token="" + if [ -z "$admin_token" ]; then + # Token might already exist — try listing + admin_token=$(curl -sf \ + -u "${admin_user}:${admin_pass}" \ + "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \ + | jq -r '.[0].sha1 // empty') || admin_token="" + fi + if [ -z "$admin_token" ]; then + echo "Error: failed to obtain admin API token" >&2 + echo " Cannot proceed without admin privileges" >&2 + exit 1 + fi + + if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then + echo " User '${agent_name}' already exists" + # Reset user password so we can get a token (#184) + user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + # Use Forgejo CLI to reset password (API PATCH ignores must_change_password in Forgejo 11.x) + if _forgejo_exec forgejo admin user change-password \ + --username "${agent_name}" \ + --password "${user_pass}" \ + --must-change-password=false >/dev/null 2>&1; then + echo " Reset password for existing user '${agent_name}'" + else + echo " Warning: could not reset password for existing user" >&2 + fi + else + # Create user using basic auth (admin token fallback would poison subsequent calls) + # Create the user + user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + if curl -sf -X POST \ + -u "${admin_user}:${admin_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/admin/users" \ + -d "{\"username\":\"${agent_name}\",\"password\":\"${user_pass}\",\"email\":\"${agent_name}@${PROJECT_NAME:-disinto}.local\",\"full_name\":\"${agent_name}\",\"active\":true,\"admin\":false,\"must_change_password\":false}" >/dev/null 2>&1; then + echo " Created user '${agent_name}'" + else + echo " Warning: failed to create user via admin API" >&2 + # Try alternative: user might already exist + if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then + echo " User '${agent_name}' exists (confirmed)" + else + echo " Error: failed to create user '${agent_name}'" >&2 + exit 1 + fi + fi + fi + + # Step 1.5: Generate Forge token for the new/existing user + echo "" + echo "Step 1.5: Generating Forge token for '${agent_name}'..." + + # Convert role to uppercase token variable name (e.g., architect -> FORGE_ARCHITECT_TOKEN) + local role_upper + role_upper=$(echo "$role" | tr '[:lower:]' '[:upper:]') + local token_var="FORGE_${role_upper}_TOKEN" + + # Generate token using the user's password (basic auth) + local agent_token="" + agent_token=$(curl -sf -X POST \ + -u "${agent_name}:${user_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${agent_name}/tokens" \ + -d "{\"name\":\"disinto-${agent_name}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \ + | jq -r '.sha1 // empty') || agent_token="" + + if [ -z "$agent_token" ]; then + # Token name collision — create with timestamp suffix + agent_token=$(curl -sf -X POST \ + -u "${agent_name}:${user_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${agent_name}/tokens" \ + -d "{\"name\":\"disinto-${agent_name}-$(date +%s)\",\"scopes\":[\"all\"]}" 2>/dev/null \ + | jq -r '.sha1 // empty') || agent_token="" + fi + + if [ -z "$agent_token" ]; then + echo " Warning: failed to create API token for '${agent_name}'" >&2 + else + # Store token in .env under the role-specific variable name + if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then + # Use sed with alternative delimiter and proper escaping for special chars in token + local escaped_token + escaped_token=$(printf '%s\n' "$agent_token" | sed 's/[&/\]/\\&/g') + sed -i "s|^${token_var}=.*|${token_var}=${escaped_token}|" "$env_file" + echo " ${agent_name} token updated (${token_var})" + else + printf '%s=%s\n' "$token_var" "$agent_token" >> "$env_file" + echo " ${agent_name} token saved (${token_var})" + fi + export "${token_var}=${agent_token}" + fi + + # Step 2: Create .profile repo on Forgejo + echo "" + echo "Step 2: Creating '${agent_name}/.profile' repo (if not exists)..." + + if curl -sf --max-time 5 "${forge_url}/api/v1/repos/${agent_name}/.profile" >/dev/null 2>&1; then + echo " Repo '${agent_name}/.profile' already exists" + else + # Create the repo using the admin API to ensure it's created in the agent's namespace. + # Using POST /api/v1/user/repos with a user token would create the repo under the + # authenticated user, which could be wrong if the token belongs to a different user. + # The admin API POST /api/v1/admin/users/{username}/repos explicitly creates in the + # specified user's namespace. + local create_output + create_output=$(curl -sf -X POST \ + -u "${admin_user}:${admin_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/admin/users/${agent_name}/repos" \ + -d "{\"name\":\".profile\",\"description\":\"${agent_name}'s .profile repo\",\"private\":true,\"auto_init\":false}" 2>&1) || true + + if echo "$create_output" | grep -q '"id":\|[0-9]'; then + echo " Created repo '${agent_name}/.profile' (via admin API)" + else + echo " Error: failed to create repo '${agent_name}/.profile'" >&2 + echo " Response: ${create_output}" >&2 + exit 1 + fi + fi + + # Step 3: Clone repo and create initial commit + echo "" + echo "Step 3: Cloning repo and creating initial commit..." + + local clone_dir="/tmp/.profile-clone-${agent_name}" + rm -rf "$clone_dir" + mkdir -p "$clone_dir" + + # Build authenticated clone URL using basic auth (user_pass is always set in Step 1) + if [ -z "${user_pass:-}" ]; then + echo " Error: no user password available for cloning" >&2 + exit 1 + fi + + local auth_url + auth_url=$(printf '%s' "$forge_url" | sed "s|://|://${agent_name}:${user_pass}@|") + auth_url="${auth_url}/${agent_name}/.profile.git" + + # Display unauthenticated URL (auth token only in actual git clone command) + echo " Cloning: ${forge_url}/${agent_name}/.profile.git" + + # Try authenticated clone first (required for private repos) + if ! git clone --quiet "$auth_url" "$clone_dir" 2>/dev/null; then + echo " Error: failed to clone repo with authentication" >&2 + echo " Note: Ensure the user has a valid API token with repository access" >&2 + rm -rf "$clone_dir" + exit 1 + fi + + # Configure git + git -C "$clone_dir" config user.name "disinto-admin" + git -C "$clone_dir" config user.email "disinto-admin@localhost" + + # Create directory structure + echo " Creating directory structure..." + mkdir -p "${clone_dir}/journal" + mkdir -p "${clone_dir}/knowledge" + touch "${clone_dir}/journal/.gitkeep" + touch "${clone_dir}/knowledge/.gitkeep" + + # Copy formula + echo " Copying formula..." + cp "$formula_path" "${clone_dir}/formula.toml" + + # Create README + if [ ! -f "${clone_dir}/README.md" ]; then + cat > "${clone_dir}/README.md" </dev/null; then + git -C "$clone_dir" commit -m "chore: initial .profile setup" -q + git -C "$clone_dir" push origin main >/dev/null 2>&1 || \ + git -C "$clone_dir" push origin master >/dev/null 2>&1 || true + echo " Committed: initial .profile setup" + else + echo " No changes to commit" + fi + + rm -rf "$clone_dir" + + # Step 4: Set up branch protection + echo "" + echo "Step 4: Setting up branch protection..." + + # Source branch-protection.sh helper + local bp_script="${FACTORY_ROOT}/lib/branch-protection.sh" + if [ -f "$bp_script" ]; then + # Source required environment + if [ -f "${FACTORY_ROOT}/lib/env.sh" ]; then + source "${FACTORY_ROOT}/lib/env.sh" + fi + + # Set up branch protection for .profile repo + if source "$bp_script" 2>/dev/null && setup_profile_branch_protection "${agent_name}/.profile" "main"; then + echo " Branch protection configured for main branch" + echo " - Requires 1 approval before merge" + echo " - Admin-only merge enforcement" + echo " - Journal branch created for direct agent pushes" + else + echo " Warning: could not configure branch protection (Forgejo API may not be available)" + echo " Note: Branch protection can be set up manually later" + fi + else + echo " Warning: branch-protection.sh not found at ${bp_script}" + fi + + # Step 5: Create state marker + echo "" + echo "Step 5: Creating state marker..." + + local state_dir="${FACTORY_ROOT}/state" + mkdir -p "$state_dir" + local state_file="${state_dir}/.${role}-active" + + if [ ! -f "$state_file" ]; then + touch "$state_file" + echo " Created: ${state_file}" + else + echo " State marker already exists: ${state_file}" + fi + + # Step 6: Set up local model agent (if --local-model specified) + if [ -n "$local_model" ]; then + echo "" + echo "Step 6: Configuring local model agent..." + + local override_file="${FACTORY_ROOT}/docker-compose.override.yml" + local override_dir + override_dir=$(dirname "$override_file") + mkdir -p "$override_dir" + + # Validate model endpoint is reachable + echo " Validating model endpoint: ${local_model}" + if ! curl -sf --max-time 10 "${local_model}/health" >/dev/null 2>&1; then + # Try /v1/chat/completions as fallback endpoint check + if ! curl -sf --max-time 10 "${local_model}/v1/chat/completions" >/dev/null 2>&1; then + echo " Warning: model endpoint may not be reachable at ${local_model}" + echo " Continuing with configuration..." + fi + else + echo " Model endpoint is reachable" + fi + + # Generate service name from agent name (lowercase) + local service_name="agents-${agent_name}" + service_name=$(echo "$service_name" | tr '[:upper:]' '[:lower:]') + + # Set default poll interval + local interval="${poll_interval:-300}" + + # Generate the override compose file + # Bash expands ${service_name}, ${local_model}, ${interval}, ${PROJECT_NAME} at generation time + # \$HOME, \$FORGE_TOKEN become ${HOME}, ${FORGE_TOKEN} in the file for docker-compose runtime expansion + cat > "$override_file" < "$tmpfile" + jq -Rs '{body:.}' < "$tmpfile" > "$tmpjson" + curl -sf -o /dev/null -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_API}/issues/${issue}/comments" \ + --data-binary @"$tmpjson" 2>/dev/null || true + rm -f "$tmpfile" "$tmpjson" +} + # --------------------------------------------------------------------------- # issue_block — add "blocked" label, post diagnostic comment, remove in-progress. # Args: issue_number reason [result_text] @@ -187,14 +209,9 @@ issue_block() { fi } > "$tmpfile" - # Post comment - jq -Rs '{body:.}' < "$tmpfile" > "${tmpfile}.json" - curl -sf -o /dev/null -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_API}/issues/${issue}/comments" \ - --data-binary @"${tmpfile}.json" 2>/dev/null || true - rm -f "$tmpfile" "${tmpfile}.json" + # Post comment using shared helper + _ilc_post_comment "$issue" "$(cat "$tmpfile")" + rm -f "$tmpfile" # Remove in-progress, add blocked local ip_id bk_id diff --git a/lib/load-project.sh b/lib/load-project.sh index dcddc94..134461c 100755 --- a/lib/load-project.sh +++ b/lib/load-project.sh @@ -10,7 +10,6 @@ # PROJECT_CONTAINERS, CHECK_PRS, CHECK_DEV_AGENT, # CHECK_PIPELINE_STALL, CI_STALE_MINUTES, # MIRROR_NAMES, MIRROR_URLS, MIRROR_ (per configured mirror) -# (plus backwards-compat aliases: CODEBERG_REPO, CODEBERG_API, CODEBERG_WEB) # # If no argument given, does nothing (allows poll scripts to work with # plain .env fallback for backwards compatibility). @@ -83,7 +82,7 @@ if mirrors: # Export parsed variables. # Inside the agents container (DISINTO_CONTAINER=1), compose already sets the # correct FORGE_URL (http://forgejo:3000) and path vars for the container -# environment. The TOML carries host-perspective values (localhost, /home/johba/…) +# environment. The TOML carries host-perspective values (localhost, /home/admin/…) # that would break container API calls and path resolution. Skip overriding # any env var that is already set when running inside the container. while IFS='=' read -r _key _val; do @@ -100,11 +99,9 @@ export FORGE_URL="${FORGE_URL:-http://localhost:3000}" if [ -n "$FORGE_REPO" ]; then export FORGE_API="${FORGE_URL}/api/v1/repos/${FORGE_REPO}" export FORGE_WEB="${FORGE_URL}/${FORGE_REPO}" + # Extract repo owner (first path segment of owner/repo) + export FORGE_REPO_OWNER="${FORGE_REPO%%/*}" fi -# Backwards-compat aliases -export CODEBERG_REPO="${FORGE_REPO}" -export CODEBERG_API="${FORGE_API:-}" -export CODEBERG_WEB="${FORGE_WEB:-}" # Derive PROJECT_REPO_ROOT if not explicitly set if [ -z "${PROJECT_REPO_ROOT:-}" ] && [ -n "${PROJECT_NAME:-}" ]; then @@ -116,9 +113,55 @@ if [ -z "${OPS_REPO_ROOT:-}" ] && [ -n "${PROJECT_NAME:-}" ]; then export OPS_REPO_ROOT="/home/${USER}/${PROJECT_NAME}-ops" fi +# Inside the container, always derive repo paths from PROJECT_NAME — the TOML +# carries host-perspective paths that do not exist in the container filesystem. +if [ "${DISINTO_CONTAINER:-}" = "1" ] && [ -n "${PROJECT_NAME:-}" ]; then + export PROJECT_REPO_ROOT="/home/agent/repos/${PROJECT_NAME}" + export OPS_REPO_ROOT="/home/agent/repos/${PROJECT_NAME}-ops" +fi + # Derive FORGE_OPS_REPO if not explicitly set if [ -z "${FORGE_OPS_REPO:-}" ] && [ -n "${FORGE_REPO:-}" ]; then export FORGE_OPS_REPO="${FORGE_REPO}-ops" fi +# Parse [agents.*] sections for local-model agents +# Exports AGENT__BASE_URL, AGENT__MODEL, AGENT__API_KEY, +# AGENT__ROLES, AGENT__FORGE_USER, AGENT__COMPACT_PCT +if command -v python3 &>/dev/null; then + _AGENT_VARS=$(python3 -c " +import sys, tomllib + +with open(sys.argv[1], 'rb') as f: + cfg = tomllib.load(f) + +agents = cfg.get('agents', {}) +for name, config in agents.items(): + if not isinstance(config, dict): + continue + # Emit variables in uppercase with the agent name + if 'base_url' in config: + print(f'AGENT_{name.upper()}_BASE_URL={config[\"base_url\"]}') + if 'model' in config: + print(f'AGENT_{name.upper()}_MODEL={config[\"model\"]}') + if 'api_key' in config: + print(f'AGENT_{name.upper()}_API_KEY={config[\"api_key\"]}') + if 'roles' in config: + roles = ' '.join(config['roles']) if isinstance(config['roles'], list) else config['roles'] + print(f'AGENT_{name.upper()}_ROLES={roles}') + if 'forge_user' in config: + print(f'AGENT_{name.upper()}_FORGE_USER={config[\"forge_user\"]}') + if 'compact_pct' in config: + print(f'AGENT_{name.upper()}_COMPACT_PCT={config[\"compact_pct\"]}') +" "$_PROJECT_TOML" 2>/dev/null) || true + + if [ -n "$_AGENT_VARS" ]; then + while IFS='=' read -r _key _val; do + [ -z "$_key" ] && continue + export "$_key=$_val" + done <<< "$_AGENT_VARS" + fi + unset _AGENT_VARS +fi + unset _PROJECT_TOML _PROJECT_VARS _key _val diff --git a/lib/ops-setup.sh b/lib/ops-setup.sh new file mode 100644 index 0000000..db6e674 --- /dev/null +++ b/lib/ops-setup.sh @@ -0,0 +1,368 @@ +#!/usr/bin/env bash +# ops-setup.sh — Setup ops repository (disinto-ops) +# +# Source from bin/disinto: +# source "$(dirname "$0")/../lib/ops-setup.sh" +# +# Required globals: FORGE_URL, FORGE_TOKEN, FACTORY_ROOT +# Optional: admin_token (falls back to FORGE_TOKEN for admin operations) +# +# Functions: +# setup_ops_repo [primary_branch] +# - Create ops repo on Forgejo if it doesn't exist +# - Configure bot collaborators with appropriate permissions +# - Clone or initialize ops repo locally +# - Seed directory structure (vault, knowledge, evidence) +# - Export _ACTUAL_OPS_SLUG for caller to use +# migrate_ops_repo [primary_branch] +# - Seed missing directories/files on existing ops repos (idempotent) +# - Creates .gitkeep files and template content for canonical structure +# +# Globals modified: +# _ACTUAL_OPS_SLUG - resolved ops repo slug after setup_ops_repo completes + +set -euo pipefail + +setup_ops_repo() { + + local forge_url="$1" ops_slug="$2" ops_root="$3" primary_branch="${4:-main}" + local org_name="${ops_slug%%/*}" + local ops_name="${ops_slug##*/}" + + echo "" + echo "── Ops repo setup ─────────────────────────────────────" + + # Determine the actual ops repo location by searching across possible namespaces + # This handles cases where the repo was created under a different namespace + # due to past bugs (e.g., dev-bot/disinto-ops instead of disinto-admin/disinto-ops) + local actual_ops_slug="" + local -a possible_namespaces=( "$org_name" "dev-bot" "disinto-admin" ) + local http_code + + for ns in "${possible_namespaces[@]}"; do + slug="${ns}/${ops_name}" + if curl -sf --max-time 5 \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/repos/${slug}" >/dev/null 2>&1; then + actual_ops_slug="$slug" + echo "Ops repo: ${slug} (found at ${slug})" + break + fi + done + + # If not found, try to create it in the configured namespace + if [ -z "$actual_ops_slug" ]; then + echo "Creating ops repo in namespace: ${org_name}" + # Create org if it doesn't exist + curl -sf -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/orgs" \ + -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true + if curl -sf -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/orgs/${org_name}/repos" \ + -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" >/dev/null 2>&1; then + actual_ops_slug="${org_name}/${ops_name}" + echo "Ops repo: ${actual_ops_slug} created on Forgejo" + else + # Fallback: use admin API to create repo under the target namespace + http_code=$(curl -s -o /dev/null -w "%{http_code}" \ + -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/admin/users/${org_name}/repos" \ + -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" 2>/dev/null || echo "0") + if [ "$http_code" = "201" ]; then + actual_ops_slug="${org_name}/${ops_name}" + echo "Ops repo: ${actual_ops_slug} created on Forgejo (via admin API)" + else + echo "Error: failed to create ops repo '${org_name}/${ops_name}' (HTTP ${http_code})" >&2 + return 1 + fi + fi + fi + + # Configure collaborators on the ops repo + local bot_user bot_perm + declare -A bot_permissions=( + [dev-bot]="write" + [review-bot]="read" + [planner-bot]="write" + [gardener-bot]="write" + [vault-bot]="write" + [supervisor-bot]="read" + [predictor-bot]="read" + [architect-bot]="write" + ) + + # Add all bot users as collaborators with appropriate permissions + # vault branch protection (#77) requires: + # - Admin-only merge to main (enforced by admin_enforced: true) + # - Bots can push branches and create PRs, but cannot merge + for bot_user in "${!bot_permissions[@]}"; do + bot_perm="${bot_permissions[$bot_user]}" + if curl -sf -X PUT \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/repos/${actual_ops_slug}/collaborators/${bot_user}" \ + -d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1; then + echo " + ${bot_user} = ${bot_perm} collaborator" + else + echo " ! ${bot_user} = ${bot_perm} (already set or failed)" + fi + done + + # Add disinto-admin as admin collaborator + if curl -sf -X PUT \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/repos/${actual_ops_slug}/collaborators/disinto-admin" \ + -d '{"permission":"admin"}' >/dev/null 2>&1; then + echo " + disinto-admin = admin collaborator" + else + echo " ! disinto-admin = admin (already set or failed)" + fi + + # Clone ops repo locally if not present + if [ ! -d "${ops_root}/.git" ]; then + local auth_url + auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_TOKEN}@|") + local clone_url="${auth_url}/${actual_ops_slug}.git" + echo "Cloning: ops repo -> ${ops_root}" + if git clone --quiet "$clone_url" "$ops_root" 2>/dev/null; then + echo "Ops repo: ${actual_ops_slug} cloned successfully" + else + echo "Initializing: ops repo at ${ops_root}" + mkdir -p "$ops_root" + git -C "$ops_root" init --initial-branch="${primary_branch}" -q + # Set remote to the actual ops repo location + git -C "$ops_root" remote add origin "${forge_url}/${actual_ops_slug}.git" + echo "Ops repo: ${actual_ops_slug} initialized locally" + fi + else + echo "Ops repo: ${ops_root} (already exists locally)" + # Verify remote is correct + local current_remote + current_remote=$(git -C "$ops_root" remote get-url origin 2>/dev/null || true) + local expected_remote="${forge_url}/${actual_ops_slug}.git" + if [ -n "$current_remote" ] && [ "$current_remote" != "$expected_remote" ]; then + echo " Fixing: remote URL from ${current_remote} to ${expected_remote}" + git -C "$ops_root" remote set-url origin "$expected_remote" + fi + fi + + # Seed directory structure + local seeded=false + mkdir -p "${ops_root}/vault/pending" + mkdir -p "${ops_root}/vault/approved" + mkdir -p "${ops_root}/vault/fired" + mkdir -p "${ops_root}/vault/rejected" + mkdir -p "${ops_root}/knowledge" + mkdir -p "${ops_root}/evidence/engagement" + mkdir -p "${ops_root}/evidence/red-team" + mkdir -p "${ops_root}/evidence/holdout" + mkdir -p "${ops_root}/evidence/evolution" + mkdir -p "${ops_root}/evidence/user-test" + mkdir -p "${ops_root}/sprints" + [ -f "${ops_root}/sprints/.gitkeep" ] || { touch "${ops_root}/sprints/.gitkeep"; seeded=true; } + [ -f "${ops_root}/evidence/red-team/.gitkeep" ] || { touch "${ops_root}/evidence/red-team/.gitkeep"; seeded=true; } + [ -f "${ops_root}/evidence/holdout/.gitkeep" ] || { touch "${ops_root}/evidence/holdout/.gitkeep"; seeded=true; } + [ -f "${ops_root}/evidence/evolution/.gitkeep" ] || { touch "${ops_root}/evidence/evolution/.gitkeep"; seeded=true; } + [ -f "${ops_root}/evidence/user-test/.gitkeep" ] || { touch "${ops_root}/evidence/user-test/.gitkeep"; seeded=true; } + + if [ ! -f "${ops_root}/README.md" ]; then + cat > "${ops_root}/README.md" < **Note:** Journal directories (journal/planner/ and journal/supervisor/) have been removed from the ops repo. Agent journals are now stored in each agent's .profile repo on Forgejo. + +## Branch protection + +- \`main\`: 2 reviewers required for vault items +- Journal/evidence commits may use lighter rules +OPSEOF + seeded=true + fi + + # Copy vault policy.toml template if not already present + if [ ! -f "${ops_root}/vault/policy.toml" ]; then + local policy_src="${FACTORY_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/vault/policy.toml" + if [ -f "$policy_src" ]; then + cp "$policy_src" "${ops_root}/vault/policy.toml" + echo " + Copied vault/policy.toml template" + seeded=true + fi + fi + + # Create stub files if they don't exist + [ -f "${ops_root}/portfolio.md" ] || { echo "# Portfolio" > "${ops_root}/portfolio.md"; seeded=true; } + [ -f "${ops_root}/prerequisites.md" ] || { echo "# Prerequisite Tree" > "${ops_root}/prerequisites.md"; seeded=true; } + [ -f "${ops_root}/RESOURCES.md" ] || { echo "# Resources" > "${ops_root}/RESOURCES.md"; seeded=true; } + + # Commit and push seed content + if [ "$seeded" = true ] && [ -d "${ops_root}/.git" ]; then + # Auto-configure repo-local git identity if missing (#778) + if [ -z "$(git -C "$ops_root" config user.name 2>/dev/null)" ]; then + git -C "$ops_root" config user.name "disinto-admin" + fi + if [ -z "$(git -C "$ops_root" config user.email 2>/dev/null)" ]; then + git -C "$ops_root" config user.email "disinto-admin@localhost" + fi + + git -C "$ops_root" add -A + if ! git -C "$ops_root" diff --cached --quiet 2>/dev/null; then + git -C "$ops_root" commit -m "chore: seed ops repo structure" -q + # Push if remote exists + if git -C "$ops_root" remote get-url origin >/dev/null 2>&1; then + if git -C "$ops_root" push origin "${primary_branch}" -q 2>/dev/null; then + echo "Seeded: ops repo with initial structure" + else + echo "Warning: failed to push seed content to ops repo" >&2 + fi + fi + fi + fi + + # Export resolved slug for the caller to write back to the project TOML + _ACTUAL_OPS_SLUG="${actual_ops_slug}" +} + +# migrate_ops_repo — Seed missing ops repo directories and files on existing deployments +# +# This function is idempotent — safe to run on every container start. +# It checks for missing directories/files and creates them with .gitkeep files +# or template content as appropriate. +# +# Called from entrypoint.sh after setup_ops_repo() to bring pre-#407 deployments +# up to date with the canonical ops repo structure. +migrate_ops_repo() { + local ops_root="${1:-}" + local primary_branch="${2:-main}" + + # Validate ops_root argument + if [ -z "$ops_root" ]; then + # Try to determine ops_root from environment or project config + if [ -n "${OPS_REPO_ROOT:-}" ]; then + ops_root="${OPS_REPO_ROOT}" + elif [ -n "${PROJECT_TOML:-}" ] && [ -f "$PROJECT_TOML" ]; then + source "$(dirname "$0")/load-project.sh" "$PROJECT_TOML" + ops_root="${OPS_REPO_ROOT:-}" + fi + fi + + # Skip if we still don't have an ops root + if [ -z "$ops_root" ]; then + echo "migrate_ops_repo: skipping — no ops repo root determined" + return 0 + fi + + # Verify it's a git repo + if [ ! -d "${ops_root}/.git" ]; then + echo "migrate_ops_repo: skipping — ${ops_root} is not a git repo" + return 0 + fi + + echo "" + echo "── Ops repo migration ───────────────────────────────────" + echo "Checking ${ops_root} for missing directories and files..." + + local migrated=false + + # Canonical ops repo structure (post #407) + # Directories to ensure exist with .gitkeep files + local -a dir_keepfiles=( + "${ops_root}/vault/pending/.gitkeep" + "${ops_root}/vault/approved/.gitkeep" + "${ops_root}/vault/fired/.gitkeep" + "${ops_root}/vault/rejected/.gitkeep" + "${ops_root}/knowledge/.gitkeep" + "${ops_root}/evidence/engagement/.gitkeep" + "${ops_root}/evidence/red-team/.gitkeep" + "${ops_root}/evidence/holdout/.gitkeep" + "${ops_root}/evidence/evolution/.gitkeep" + "${ops_root}/evidence/user-test/.gitkeep" + "${ops_root}/sprints/.gitkeep" + ) + + # Create missing directories and .gitkeep files + for keepfile in "${dir_keepfiles[@]}"; do + local dir + dir=$(dirname "$keepfile") + if [ ! -f "$keepfile" ]; then + mkdir -p "$dir" + touch "$keepfile" + echo " + Created: ${keepfile}" + migrated=true + fi + done + + # Template files to create if missing (starter content) + local -a template_files=( + "${ops_root}/portfolio.md" + "${ops_root}/prerequisites.md" + "${ops_root}/RESOURCES.md" + ) + + for tfile in "${template_files[@]}"; do + if [ ! -f "$tfile" ]; then + local title + title=$(basename "$tfile" | sed 's/\.md$//; s/_/ /g' | sed 's/\b\(.\)/\u\1/g') + { + echo "# ${title}" + echo "" + echo "## Overview" + echo "" + echo "" + } > "$tfile" + echo " + Created: ${tfile}" + migrated=true + fi + done + + # Commit and push changes if any were made + if [ "$migrated" = true ]; then + # Auto-configure repo-local git identity if missing + if [ -z "$(git -C "$ops_root" config user.name 2>/dev/null)" ]; then + git -C "$ops_root" config user.name "disinto-admin" + fi + if [ -z "$(git -C "$ops_root" config user.email 2>/dev/null)" ]; then + git -C "$ops_root" config user.email "disinto-admin@localhost" + fi + + git -C "$ops_root" add -A + if ! git -C "$ops_root" diff --cached --quiet 2>/dev/null; then + git -C "$ops_root" commit -m "chore: migrate ops repo structure to canonical layout" -q + # Push if remote exists + if git -C "$ops_root" remote get-url origin >/dev/null 2>&1; then + if git -C "$ops_root" push origin "${primary_branch}" -q 2>/dev/null; then + echo "Migrated: ops repo structure updated and pushed" + else + echo "Warning: failed to push migration to ops repo" >&2 + fi + fi + fi + else + echo " (all directories and files already present)" + fi +} diff --git a/lib/pr-lifecycle.sh b/lib/pr-lifecycle.sh index c4ba4c5..e097f34 100644 --- a/lib/pr-lifecycle.sh +++ b/lib/pr-lifecycle.sh @@ -357,11 +357,18 @@ pr_close() { local pr_num="$1" _prl_log "closing PR #${pr_num}" - curl -sf -X PATCH \ + local resp http_code + resp=$(curl -sf -w "\n%{http_code}" -X PATCH \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ "${FORGE_API}/pulls/${pr_num}" \ - -d '{"state":"closed"}' >/dev/null 2>&1 || true + -d '{"state":"closed"}' 2>/dev/null) || true + http_code=$(printf '%s\n' "$resp" | tail -1) + if [ "$http_code" != "200" ] && [ "$http_code" != "204" ]; then + _prl_log "pr_close FAILED: HTTP ${http_code} for PR #${pr_num}" + return 1 + fi + _prl_log "PR #${pr_num} closed" } # --------------------------------------------------------------------------- @@ -398,11 +405,18 @@ pr_walk_to_merge() { if [ "${_PR_CI_FAILURE_TYPE:-}" = "infra" ] && [ "$ci_retry_count" -lt 1 ]; then ci_retry_count=$((ci_retry_count + 1)) _prl_log "infra failure — retriggering CI (retry ${ci_retry_count})" + local rebase_output rebase_rc ( cd "$worktree" && \ git commit --allow-empty -m "ci: retrigger after infra failure" --no-verify && \ git fetch "$remote" "${PRIMARY_BRANCH}" 2>/dev/null && \ git rebase "${remote}/${PRIMARY_BRANCH}" && \ - git push --force-with-lease "$remote" HEAD ) 2>&1 | tail -5 || true + git push --force-with-lease "$remote" HEAD ) > /tmp/rebase-output-$$ 2>&1 + rebase_rc=$? + rebase_output=$(cat /tmp/rebase-output-$$) + rm -f /tmp/rebase-output-$$ + if [ "$rebase_rc" -ne 0 ]; then + _prl_log "rebase/push failed (exit code $rebase_rc): $(echo "$rebase_output" | tail -5)" + fi continue fi @@ -474,11 +488,7 @@ Fix the issue, run tests, commit, rebase on ${PRIMARY_BRANCH}, and push: _PR_WALK_EXIT_REASON="merged" return 0 fi - if [ "$rc" -eq 2 ]; then - _PR_WALK_EXIT_REASON="merge_blocked" - return 1 - fi - # Merge failed (conflict) — ask agent to rebase + # Merge failed (conflict or HTTP 405) — ask agent to rebase _prl_log "merge failed — invoking agent to rebase" agent_run --resume "$session_id" --worktree "$worktree" \ "PR #${pr_num} approved but merge failed: ${_PR_MERGE_ERROR:-unknown} @@ -524,8 +534,7 @@ Commit, rebase on ${PRIMARY_BRANCH}, and push: # build_phase_protocol_prompt — Generate push/commit instructions for Claude. # # For the synchronous agent_run architecture: tells Claude how to commit and -# push (no phase files). For the tmux session architecture, use the -# build_phase_protocol_prompt in dev/phase-handler.sh instead. +# push (no phase files). # # Args: branch [remote] # Stdout: instruction text diff --git a/lib/profile.sh b/lib/profile.sh deleted file mode 100644 index 79f8514..0000000 --- a/lib/profile.sh +++ /dev/null @@ -1,210 +0,0 @@ -#!/usr/bin/env bash -# profile.sh — Helpers for agent .profile repo management -# -# Source after lib/env.sh and lib/formula-session.sh: -# source "$(dirname "$0")/../lib/env.sh" -# source "$(dirname "$0")/lib/formula-session.sh" -# source "$(dirname "$0")/lib/profile.sh" -# -# Required globals: FORGE_TOKEN, FORGE_URL, AGENT_IDENTITY, PROFILE_REPO_PATH -# -# Functions: -# profile_propose_formula NEW_FORMULA CONTENT REASON — create PR to update formula.toml - -set -euo pipefail - -# Internal log helper -_profile_log() { - if declare -f log >/dev/null 2>&1; then - log "profile: $*" - else - printf '[%s] profile: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >&2 - fi -} - -# ----------------------------------------------------------------------------- -# profile_propose_formula — Propose a formula change via PR -# -# Creates a branch, writes updated formula.toml, opens a PR, and returns PR number. -# Branch is protected (requires admin approval per #87). -# -# Args: -# $1 - NEW_FORMULA_CONTENT: The complete new formula.toml content -# $2 - REASON: Human-readable explanation of what changed and why -# -# Returns: -# 0 on success, prints PR number to stdout -# 1 on failure -# -# Example: -# source "$(dirname "$0")/../lib/env.sh" -# source "$(dirname "$0")/lib/formula-session.sh" -# source "$(dirname "$0")/lib/profile.sh" -# AGENT_IDENTITY="dev-bot" -# ensure_profile_repo "$AGENT_IDENTITY" -# profile_propose_formula "$new_formula" "Added new prompt pattern for code review" -# ----------------------------------------------------------------------------- -profile_propose_formula() { - local new_formula="$1" - local reason="$2" - - if [ -z "${AGENT_IDENTITY:-}" ]; then - _profile_log "ERROR: AGENT_IDENTITY not set" - return 1 - fi - - if [ -z "${PROFILE_REPO_PATH:-}" ]; then - _profile_log "ERROR: PROFILE_REPO_PATH not set — ensure_profile_repo not called" - return 1 - fi - - if [ -z "${FORGE_TOKEN:-}" ]; then - _profile_log "ERROR: FORGE_TOKEN not set" - return 1 - fi - - if [ -z "${FORGE_URL:-}" ]; then - _profile_log "ERROR: FORGE_URL not set" - return 1 - fi - - # Generate short description from reason for branch name - local short_desc - short_desc=$(printf '%s' "$reason" | \ - tr '[:upper:]' '[:lower:]' | \ - sed 's/[^a-z0-9 ]//g' | \ - sed 's/ */ /g' | \ - sed 's/^ *//;s/ *$//' | \ - cut -c1-40 | \ - tr ' ' '-') - - if [ -z "$short_desc" ]; then - short_desc="formula-update" - fi - - local branch_name="formula/${short_desc}" - local formula_path="${PROFILE_REPO_PATH}/formula.toml" - - _profile_log "Proposing formula change: ${branch_name}" - _profile_log "Reason: ${reason}" - - # Ensure we're on main branch and up-to-date - _profile_log "Fetching .profile repo" - ( - cd "$PROFILE_REPO_PATH" || return 1 - - git fetch origin main --quiet 2>/dev/null || \ - git fetch origin master --quiet 2>/dev/null || true - - # Reset to main/master - if git checkout main --quiet 2>/dev/null; then - git pull --ff-only origin main --quiet 2>/dev/null || true - elif git checkout master --quiet 2>/dev/null; then - git pull --ff-only origin master --quiet 2>/dev/null || true - else - _profile_log "ERROR: Failed to checkout main/master branch" - return 1 - fi - - # Create and checkout new branch - git checkout -b "$branch_name" 2>/dev/null || { - _profile_log "Branch ${branch_name} may already exist" - git checkout "$branch_name" 2>/dev/null || return 1 - } - - # Write formula.toml - printf '%s' "$new_formula" > "$formula_path" - - # Commit the change - git config user.name "${AGENT_IDENTITY}" || true - git config user.email "${AGENT_IDENTITY}@users.noreply.codeberg.org" || true - - git add "$formula_path" - git commit -m "formula: ${reason}" --no-verify || { - _profile_log "No changes to commit (formula unchanged)" - # Check if branch has any commits - if git rev-parse HEAD >/dev/null 2>&1; then - : # branch has commits, continue - else - _profile_log "ERROR: Failed to create commit" - return 1 - fi - } - - # Push branch - local remote="${FORGE_REMOTE:-origin}" - git push --set-upstream "$remote" "$branch_name" --quiet 2>/dev/null || { - _profile_log "ERROR: Failed to push branch" - return 1 - } - - _profile_log "Branch pushed: ${branch_name}" - - # Create PR - local forge_url="${FORGE_URL%/}" - local api_url="${forge_url}/api/v1/repos/${AGENT_IDENTITY}/.profile" - local primary_branch="main" - - # Check if main or master is the primary branch - if ! curl -sf -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/git/branches/main" 2>/dev/null | grep -q "200"; then - primary_branch="master" - fi - - local pr_title="formula: ${reason}" - local pr_body="# Formula Update - -**Reason:** ${reason} - ---- -*This PR was auto-generated by ${AGENT_IDENTITY}.* -" - - local pr_response http_code - local pr_json - pr_json=$(jq -n \ - --arg t "$pr_title" \ - --arg b "$pr_body" \ - --arg h "$branch_name" \ - --arg base "$primary_branch" \ - '{title:$t, body:$b, head:$h, base:$base}') || { - _profile_log "ERROR: Failed to build PR JSON" - return 1 - } - - pr_response=$(curl -s -w "\n%{http_code}" -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${api_url}/pulls" \ - -d "$pr_json" || true) - - http_code=$(printf '%s\n' "$pr_response" | tail -1) - pr_response=$(printf '%s\n' "$pr_response" | sed '$d') - - if [ "$http_code" = "201" ] || [ "$http_code" = "200" ]; then - local pr_num - pr_num=$(printf '%s' "$pr_response" | jq -r '.number') - _profile_log "PR created: #${pr_num}" - printf '%s' "$pr_num" - return 0 - else - # Check if PR already exists (409 conflict) - if [ "$http_code" = "409" ]; then - local existing_pr - existing_pr=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/pulls?state=open&head=${AGENT_IDENTITY}:formula/${short_desc}" 2>/dev/null | \ - jq -r '.[0].number // empty') || true - if [ -n "$existing_pr" ]; then - _profile_log "PR already exists: #${existing_pr}" - printf '%s' "$existing_pr" - return 0 - fi - fi - _profile_log "ERROR: Failed to create PR (HTTP ${http_code})" - return 1 - fi - ) - - return $? -} diff --git a/lib/release.sh b/lib/release.sh new file mode 100644 index 0000000..1f993ec --- /dev/null +++ b/lib/release.sh @@ -0,0 +1,178 @@ +#!/usr/bin/env bash +# ============================================================================= +# release.sh — disinto_release() function +# +# Handles vault TOML creation, branch setup on ops repo, PR creation, +# and auto-merge request for a versioned release. +# +# Globals expected: +# FORGE_URL - Forge instance URL (e.g. http://localhost:3000) +# FORGE_TOKEN - API token for Forge operations +# FORGE_OPS_REPO - Ops repo slug (e.g. disinto-admin/myproject-ops) +# FACTORY_ROOT - Root of the disinto factory +# PRIMARY_BRANCH - Primary branch name (e.g. main) +# +# Usage: +# source "${FACTORY_ROOT}/lib/release.sh" +# disinto_release +# ============================================================================= +set -euo pipefail + +# Source vault.sh for _vault_log helper +source "${FACTORY_ROOT}/lib/vault.sh" + +# Assert required globals are set before using this module. +_assert_release_globals() { + local missing=() + [ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL") + [ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN") + [ -z "${FORGE_OPS_REPO:-}" ] && missing+=("FORGE_OPS_REPO") + [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT") + [ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH") + if [ "${#missing[@]}" -gt 0 ]; then + echo "Error: release.sh requires these globals to be set: ${missing[*]}" >&2 + exit 1 + fi +} + +disinto_release() { + _assert_release_globals + + local version="${1:-}" + local formula_path="${FACTORY_ROOT}/formulas/release.toml" + + if [ -z "$version" ]; then + echo "Error: version required" >&2 + echo "Usage: disinto release " >&2 + echo "Example: disinto release v1.2.0" >&2 + exit 1 + fi + + # Validate version format (must start with 'v' followed by semver) + if ! echo "$version" | grep -qE '^v[0-9]+\.[0-9]+\.[0-9]+$'; then + echo "Error: version must be in format v1.2.3 (semver with 'v' prefix)" >&2 + exit 1 + fi + + # Load project config to get FORGE_OPS_REPO + if [ -z "${PROJECT_NAME:-}" ]; then + # PROJECT_NAME is unset - detect project TOML from projects/ directory + local found_toml + found_toml=$(find "${FACTORY_ROOT}/projects" -maxdepth 1 -name "*.toml" ! -name "*.example" 2>/dev/null | head -1) + if [ -n "$found_toml" ]; then + source "${FACTORY_ROOT}/lib/load-project.sh" "$found_toml" + fi + else + local project_toml="${FACTORY_ROOT}/projects/${PROJECT_NAME}.toml" + if [ -f "$project_toml" ]; then + source "${FACTORY_ROOT}/lib/load-project.sh" "$project_toml" + fi + fi + + # Check formula exists + if [ ! -f "$formula_path" ]; then + echo "Error: release formula not found at ${formula_path}" >&2 + exit 1 + fi + + # Get the ops repo root + local ops_root="${FACTORY_ROOT}/../disinto-ops" + if [ ! -d "${ops_root}/.git" ]; then + echo "Error: ops repo not found at ${ops_root}" >&2 + echo " Run 'disinto init' to set up the ops repo first" >&2 + exit 1 + fi + + # Generate a unique ID for the vault item + local id="release-${version//./}" + local vault_toml="${ops_root}/vault/actions/${id}.toml" + + # Create vault TOML with the specific version + cat > "$vault_toml" </dev/null || true + + # Push branch + git push -u origin "$branch_name" 2>/dev/null || { + echo "Error: failed to push branch" >&2 + exit 1 + } + ) + + # Create PR + local pr_response + pr_response=$(curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls" \ + -d "{\"title\":\"${pr_title}\",\"head\":\"${branch_name}\",\"base\":\"${PRIMARY_BRANCH}\",\"body\":\"$(echo "$pr_body" | sed ':a;N;$!ba;s/\n/\\n/g')\"}" 2>/dev/null) || { + echo "Error: failed to create PR" >&2 + echo "Response: ${pr_response}" >&2 + exit 1 + } + + local pr_number + pr_number=$(echo "$pr_response" | jq -r '.number') + + local pr_url="${FORGE_URL}/${FORGE_OPS_REPO}/pulls/${pr_number}" + + # Enable auto-merge on the PR — Forgejo will auto-merge after approval + _vault_log "Enabling auto-merge for PR #${pr_number}" + curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls/${pr_number}/merge" \ + -d '{"Do":"merge","merge_when_checks_succeed":true}' >/dev/null 2>&1 || { + echo "Warning: failed to enable auto-merge (may already be enabled or not supported)" >&2 + } + + echo "" + echo "Release PR created: ${pr_url}" + echo "" + echo "Next steps:" + echo " 1. Review the PR" + echo " 2. Approve the PR (auto-merge will trigger after approval)" + echo " 3. The vault runner will execute the release formula" + echo "" + echo "After merge, the release will:" + echo " 1. Tag Forgejo main with ${version}" + echo " 2. Push tag to mirrors (Codeberg, GitHub)" + echo " 3. Build and tag the agents Docker image" + echo " 4. Restart agent containers" +} diff --git a/lib/stack-lock.sh b/lib/stack-lock.sh new file mode 100644 index 0000000..6c8c1ed --- /dev/null +++ b/lib/stack-lock.sh @@ -0,0 +1,197 @@ +#!/usr/bin/env bash +# stack-lock.sh — File-based lock protocol for singleton project stack access +# +# Prevents CI pipelines and the reproduce-agent from stepping on each other +# when sharing a single project stack (e.g. harb docker compose). +# +# Lock file: /home/agent/data/locks/-stack.lock +# Contents: {"holder": "reproduce-agent-42", "since": "...", "heartbeat": "..."} +# +# Protocol: +# 1. stack_lock_check — inspect current lock state +# 2. stack_lock_acquire — wait until lock is free, then claim it +# 3. stack_lock_release — delete lock file when done +# +# Heartbeat: callers must update the heartbeat every 2 minutes while holding +# the lock by calling stack_lock_heartbeat. A heartbeat older than 10 minutes +# is considered stale — the next acquire will break it. +# +# Usage: +# source "$(dirname "$0")/../lib/stack-lock.sh" +# stack_lock_acquire "ci-pipeline-$BUILD_NUMBER" "myproject" +# trap 'stack_lock_release "myproject"' EXIT +# # ... do work ... +# stack_lock_release "myproject" + +set -euo pipefail + +STACK_LOCK_DIR="${HOME}/data/locks" +STACK_LOCK_POLL_INTERVAL=30 # seconds between retry polls +STACK_LOCK_STALE_SECONDS=600 # 10 minutes — heartbeat older than this = stale +STACK_LOCK_MAX_WAIT=3600 # 1 hour — give up after this many seconds + +# _stack_lock_path +# Print the path of the lock file for the given project. +_stack_lock_path() { + local project="$1" + echo "${STACK_LOCK_DIR}/${project}-stack.lock" +} + +# _stack_lock_now +# Print current UTC timestamp in ISO-8601 format. +_stack_lock_now() { + date -u +"%Y-%m-%dT%H:%M:%SZ" +} + +# _stack_lock_epoch +# Convert an ISO-8601 UTC timestamp to a Unix epoch integer. +_stack_lock_epoch() { + local ts="$1" + # Strip trailing Z, replace T with space for `date -d` + date -u -d "${ts%Z}" +%s 2>/dev/null || date -u -j -f "%Y-%m-%dT%H:%M:%S" "${ts%Z}" +%s 2>/dev/null +} + +# stack_lock_check +# Print lock status to stdout: "free", "held:", or "stale:". +# Returns 0 in all cases (status is in stdout). +stack_lock_check() { + local project="$1" + local lock_file + lock_file="$(_stack_lock_path "$project")" + + if [ ! -f "$lock_file" ]; then + echo "free" + return 0 + fi + + local holder heartbeat + holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder","unknown"))' "$lock_file" 2>/dev/null || echo "unknown") + heartbeat=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("heartbeat",""))' "$lock_file" 2>/dev/null || echo "") + + if [ -z "$heartbeat" ]; then + echo "stale:${holder}" + return 0 + fi + + local hb_epoch now_epoch age + hb_epoch=$(_stack_lock_epoch "$heartbeat" 2>/dev/null || echo "0") + now_epoch=$(date -u +%s) + age=$(( now_epoch - hb_epoch )) + + if [ "$age" -gt "$STACK_LOCK_STALE_SECONDS" ]; then + echo "stale:${holder}" + else + echo "held:${holder}" + fi +} + +# stack_lock_acquire [max_wait_seconds] +# Acquire the lock for on behalf of . +# Polls every STACK_LOCK_POLL_INTERVAL seconds. +# Breaks stale locks automatically. +# Exits non-zero if the lock cannot be acquired within max_wait_seconds. +stack_lock_acquire() { + local holder="$1" + local project="$2" + local max_wait="${3:-$STACK_LOCK_MAX_WAIT}" + local lock_file + lock_file="$(_stack_lock_path "$project")" + local deadline + deadline=$(( $(date -u +%s) + max_wait )) + + mkdir -p "$STACK_LOCK_DIR" + + while true; do + local status + status=$(stack_lock_check "$project") + + case "$status" in + free) + # Write to temp file then rename to avoid partial reads by other processes + local tmp_lock + tmp_lock=$(mktemp "${STACK_LOCK_DIR}/.lock-tmp-XXXXXX") + local now + now=$(_stack_lock_now) + printf '{"holder": "%s", "since": "%s", "heartbeat": "%s"}\n' \ + "$holder" "$now" "$now" > "$tmp_lock" + mv "$tmp_lock" "$lock_file" + echo "[stack-lock] acquired lock for ${project} as ${holder}" >&2 + return 0 + ;; + stale:*) + local stale_holder="${status#stale:}" + echo "[stack-lock] breaking stale lock held by ${stale_holder} for ${project}" >&2 + rm -f "$lock_file" + # Loop back immediately to re-check and claim + ;; + held:*) + local cur_holder="${status#held:}" + local remaining + remaining=$(( deadline - $(date -u +%s) )) + if [ "$remaining" -le 0 ]; then + echo "[stack-lock] timed out waiting for lock on ${project} (held by ${cur_holder})" >&2 + return 1 + fi + echo "[stack-lock] ${project} locked by ${cur_holder}, waiting ${STACK_LOCK_POLL_INTERVAL}s (${remaining}s left)..." >&2 + sleep "$STACK_LOCK_POLL_INTERVAL" + ;; + *) + echo "[stack-lock] unexpected status '${status}' for ${project}" >&2 + return 1 + ;; + esac + done +} + +# stack_lock_heartbeat +# Update the heartbeat timestamp in the lock file. +# Should be called every 2 minutes while holding the lock. +# No-op if the lock file is absent or held by a different holder. +stack_lock_heartbeat() { + local holder="$1" + local project="$2" + local lock_file + lock_file="$(_stack_lock_path "$project")" + + [ -f "$lock_file" ] || return 0 + + local current_holder + current_holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder",""))' "$lock_file" 2>/dev/null || echo "") + [ "$current_holder" = "$holder" ] || return 0 + + local since + since=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("since",""))' "$lock_file" 2>/dev/null || echo "") + local now + now=$(_stack_lock_now) + + local tmp_lock + tmp_lock=$(mktemp "${STACK_LOCK_DIR}/.lock-tmp-XXXXXX") + printf '{"holder": "%s", "since": "%s", "heartbeat": "%s"}\n' \ + "$holder" "$since" "$now" > "$tmp_lock" + mv "$tmp_lock" "$lock_file" +} + +# stack_lock_release [holder_id] +# Release the lock for . +# If holder_id is provided, only releases if the lock is held by that holder +# (prevents accidentally releasing someone else's lock). +stack_lock_release() { + local project="$1" + local holder="${2:-}" + local lock_file + lock_file="$(_stack_lock_path "$project")" + + [ -f "$lock_file" ] || return 0 + + if [ -n "$holder" ]; then + local current_holder + current_holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder",""))' "$lock_file" 2>/dev/null || echo "") + if [ "$current_holder" != "$holder" ]; then + echo "[stack-lock] refusing to release: lock held by '${current_holder}', not '${holder}'" >&2 + return 1 + fi + fi + + rm -f "$lock_file" + echo "[stack-lock] released lock for ${project}" >&2 +} diff --git a/lib/vault.sh b/lib/vault.sh index 812d464..484fd57 100644 --- a/lib/vault.sh +++ b/lib/vault.sh @@ -39,6 +39,60 @@ _vault_ops_api() { printf '%s' "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}" } +# ----------------------------------------------------------------------------- +# _vault_commit_direct — Commit low-tier action directly to ops main +# Args: ops_api tmp_toml_file action_id +# Uses FORGE_ADMIN_TOKEN to bypass PR workflow +# ----------------------------------------------------------------------------- +_vault_commit_direct() { + local ops_api="$1" + local tmp_toml="$2" + local action_id="$3" + local file_path="vault/actions/${action_id}.toml" + + # Use FORGE_ADMIN_TOKEN for direct commit (vault-bot identity) + local admin_token="${FORGE_ADMIN_TOKEN:-${FORGE_TOKEN}}" + if [ -z "$admin_token" ]; then + echo "ERROR: FORGE_ADMIN_TOKEN is required for low-tier commits" >&2 + return 1 + fi + + # Get main branch SHA + local main_sha + main_sha=$(curl -sf -H "Authorization: token ${admin_token}" \ + "${ops_api}/git/branches/${PRIMARY_BRANCH:-main}" 2>/dev/null | \ + jq -r '.commit.id // empty' || true) + + if [ -z "$main_sha" ]; then + main_sha=$(curl -sf -H "Authorization: token ${admin_token}" \ + "${ops_api}/git/refs/heads/${PRIMARY_BRANCH:-main}" 2>/dev/null | \ + jq -r '.object.sha // empty' || true) + fi + + if [ -z "$main_sha" ]; then + echo "ERROR: could not get main branch SHA" >&2 + return 1 + fi + + _vault_log "Committing ${file_path} directly to ${PRIMARY_BRANCH:-main}" + + # Encode TOML content as base64 + local encoded_content + encoded_content=$(base64 -w 0 < "$tmp_toml") + + # Commit directly to main branch using Forgejo content API + if ! curl -sf -X PUT \ + -H "Authorization: token ${admin_token}" \ + -H "Content-Type: application/json" \ + "${ops_api}/contents/${file_path}" \ + -d "{\"message\":\"vault: add ${action_id} (low-tier)\",\"branch\":\"${PRIMARY_BRANCH:-main}\",\"content\":\"${encoded_content}\",\"committer\":{\"name\":\"vault-bot\",\"email\":\"vault-bot@${FORGE_REPO}\"},\"overwrite\":true}" >/dev/null 2>&1; then + echo "ERROR: failed to write ${file_path} to ${PRIMARY_BRANCH:-main}" >&2 + return 1 + fi + + _vault_log "Direct commit successful for ${action_id}" +} + # ----------------------------------------------------------------------------- # vault_request — Create a vault PR or return existing one # Args: action_id toml_content @@ -59,6 +113,9 @@ vault_request() { return 1 fi + # Get admin token for API calls (FORGE_ADMIN_TOKEN for low-tier, FORGE_TOKEN otherwise) + local admin_token="${FORGE_ADMIN_TOKEN:-${FORGE_TOKEN}}" + # Check if PR already exists for this action local existing_pr existing_pr=$(pr_find_by_branch "vault/${action_id}" "$(_vault_ops_api)") || true @@ -99,7 +156,34 @@ vault_request() { return 1 fi - # Extract values for PR creation + # Get ops repo API URL + local ops_api + ops_api="$(_vault_ops_api)" + + # Classify the action to determine if PR bypass is allowed + local classify_script="${FACTORY_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/vault/classify.sh" + local vault_tier + vault_tier=$("$classify_script" "${VAULT_ACTION_FORMULA:-}" "${VAULT_BLAST_RADIUS_OVERRIDE:-}") || { + # Classification failed, default to high tier (require PR) + vault_tier="high" + _vault_log "Warning: classification failed, defaulting to high tier" + } + export VAULT_TIER="${vault_tier}" + + # For low-tier actions, commit directly to ops main using FORGE_ADMIN_TOKEN + if [ "$vault_tier" = "low" ]; then + _vault_log "low-tier — committed directly to ops main" + # Add dispatch_mode field to indicate direct commit (no PR) + local direct_toml + direct_toml=$(mktemp /tmp/vault-direct-XXXXXX.toml) + trap 'rm -f "$tmp_toml" "$direct_toml"' RETURN + # Prepend dispatch_mode = "direct" to the TOML + printf 'dispatch_mode = "direct"\n%s\n' "$toml_content" > "$direct_toml" + _vault_commit_direct "$ops_api" "$direct_toml" "${action_id}" + return 0 + fi + + # Extract values for PR creation (medium/high tier) local pr_title pr_body pr_title="vault: ${action_id}" pr_body="Vault action: ${action_id} @@ -113,16 +197,12 @@ Secrets: ${VAULT_ACTION_SECRETS:-} This vault action has been created by an agent and requires admin approval before execution. See the TOML file for details." - # Get ops repo API URL - local ops_api - ops_api="$(_vault_ops_api)" - # Create branch local branch="vault/${action_id}" local branch_exists branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Authorization: token ${admin_token}" \ "${ops_api}/git/branches/${branch}" 2>/dev/null || echo "0") if [ "$branch_exists" != "200" ]; then @@ -131,13 +211,13 @@ before execution. See the TOML file for details." # Get the commit SHA of main branch local main_sha - main_sha=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + main_sha=$(curl -sf -H "Authorization: token ${admin_token}" \ "${ops_api}/git/branches/${PRIMARY_BRANCH:-main}" 2>/dev/null | \ jq -r '.commit.id // empty' || true) if [ -z "$main_sha" ]; then # Fallback: get from refs - main_sha=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + main_sha=$(curl -sf -H "Authorization: token ${admin_token}" \ "${ops_api}/git/refs/heads/${PRIMARY_BRANCH:-main}" 2>/dev/null | \ jq -r '.object.sha // empty' || true) fi @@ -149,7 +229,7 @@ before execution. See the TOML file for details." # Create the branch if ! curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Authorization: token ${admin_token}" \ -H "Content-Type: application/json" \ "${ops_api}/git/branches" \ -d "{\"ref\":\"${branch}\",\"sha\":\"${main_sha}\"}" >/dev/null 2>&1; then @@ -170,7 +250,7 @@ before execution. See the TOML file for details." # Upload file using Forgejo content API if ! curl -sf -X PUT \ - -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Authorization: token ${admin_token}" \ -H "Content-Type: application/json" \ "${ops_api}/contents/${file_path}" \ -d "{\"message\":\"vault: add ${action_id}\",\"branch\":\"${branch}\",\"content\":\"${encoded_content}\",\"committer\":{\"name\":\"vault-bot\",\"email\":\"vault-bot@${FORGE_REPO}\"},\"overwrite\":true}" >/dev/null 2>&1; then @@ -190,7 +270,7 @@ before execution. See the TOML file for details." # Enable auto-merge on the PR — Forgejo will auto-merge after approval _vault_log "Enabling auto-merge for PR #${pr_num}" curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Authorization: token ${admin_token}" \ -H "Content-Type: application/json" \ "${ops_api}/pulls/${pr_num}/merge" \ -d '{"Do":"merge","merge_when_checks_succeed":true}' >/dev/null 2>&1 || { @@ -202,18 +282,18 @@ before execution. See the TOML file for details." # Get label IDs local vault_label_id pending_label_id - vault_label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + vault_label_id=$(curl -sf -H "Authorization: token ${admin_token}" \ "${ops_api}/labels" 2>/dev/null | \ jq -r --arg n "vault" '.[] | select(.name == $n) | .id // empty' || true) - pending_label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + pending_label_id=$(curl -sf -H "Authorization: token ${admin_token}" \ "${ops_api}/labels" 2>/dev/null | \ jq -r --arg n "pending-approval" '.[] | select(.name == $n) | .id // empty' || true) # Add labels if they exist if [ -n "$vault_label_id" ]; then curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Authorization: token ${admin_token}" \ -H "Content-Type: application/json" \ "${ops_api}/issues/${pr_num}/labels" \ -d "[{\"id\":${vault_label_id}}]" >/dev/null 2>&1 || true @@ -221,7 +301,7 @@ before execution. See the TOML file for details." if [ -n "$pending_label_id" ]; then curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Authorization: token ${admin_token}" \ -H "Content-Type: application/json" \ "${ops_api}/issues/${pr_num}/labels" \ -d "[{\"id\":${pending_label_id}}]" >/dev/null 2>&1 || true diff --git a/planner/AGENTS.md b/planner/AGENTS.md index 84b511b..d9dd705 100644 --- a/planner/AGENTS.md +++ b/planner/AGENTS.md @@ -1,4 +1,4 @@ - + # Planner Agent **Role**: Strategic planning using a Prerequisite Tree (Theory of Constraints), @@ -65,7 +65,7 @@ component, not work. tree, humans steer by editing VISION.md. Tree grows organically as the planner discovers new prerequisites during runs - `$OPS_REPO_ROOT/knowledge/planner-memory.md` — Persistent memory across runs (in ops repo) -- `$OPS_REPO_ROOT/journal/planner/*.md` — Daily raw logs from each planner run (in ops repo) + **Constraint focus**: The planner uses Theory of Constraints to avoid premature issue filing. Only the top 3 unresolved prerequisites that block the most diff --git a/planner/planner-run.sh b/planner/planner-run.sh index 663703c..3c71d44 100755 --- a/planner/planner-run.sh +++ b/planner/planner-run.sh @@ -35,7 +35,7 @@ source "$FACTORY_ROOT/lib/guard.sh" # shellcheck source=../lib/agent-sdk.sh source "$FACTORY_ROOT/lib/agent-sdk.sh" -LOG_FILE="$SCRIPT_DIR/planner.log" +LOG_FILE="${DISINTO_LOG_DIR}/planner/planner.log" # shellcheck disable=SC2034 # consumed by agent-sdk.sh LOGFILE="$LOG_FILE" # shellcheck disable=SC2034 # consumed by agent-sdk.sh @@ -43,20 +43,29 @@ SID_FILE="/tmp/planner-session-${PROJECT_NAME}.sid" SCRATCH_FILE="/tmp/planner-${PROJECT_NAME}-scratch.md" WORKTREE="/tmp/${PROJECT_NAME}-planner-run" -log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } +# Override LOG_AGENT for consistent agent identification +# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() +LOG_AGENT="planner" + +# Override log() to append to planner-specific log file +# shellcheck disable=SC2034 +log() { + local agent="${LOG_AGENT:-planner}" + printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE" +} # ── Guards ──────────────────────────────────────────────────────────────── check_active planner acquire_cron_lock "/tmp/planner-run.lock" -check_memory 2000 +memory_guard 2000 log "--- Planner run start ---" +# ── Resolve forge remote for git operations ───────────────────────────── +resolve_forge_remote + # ── Resolve agent identity for .profile repo ──────────────────────────── -if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_PLANNER_TOKEN:-}" ]; then - AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_PLANNER_TOKEN}" \ - "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true) -fi +resolve_agent_identity || true # ── Load formula + context ─────────────────────────────────────────────── load_formula_or_profile "planner" "$FACTORY_ROOT/formulas/run-planner.toml" || exit 1 diff --git a/predictor/AGENTS.md b/predictor/AGENTS.md index 327a842..02eb43f 100644 --- a/predictor/AGENTS.md +++ b/predictor/AGENTS.md @@ -1,4 +1,4 @@ - + # Predictor Agent **Role**: Abstract adversary (the "goblin"). Runs a 2-step formula diff --git a/predictor/predictor-run.sh b/predictor/predictor-run.sh index 266829c..889fe1c 100755 --- a/predictor/predictor-run.sh +++ b/predictor/predictor-run.sh @@ -36,7 +36,7 @@ source "$FACTORY_ROOT/lib/guard.sh" # shellcheck source=../lib/agent-sdk.sh source "$FACTORY_ROOT/lib/agent-sdk.sh" -LOG_FILE="$SCRIPT_DIR/predictor.log" +LOG_FILE="${DISINTO_LOG_DIR}/predictor/predictor.log" # shellcheck disable=SC2034 # consumed by agent-sdk.sh LOGFILE="$LOG_FILE" # shellcheck disable=SC2034 # consumed by agent-sdk.sh @@ -44,20 +44,29 @@ SID_FILE="/tmp/predictor-session-${PROJECT_NAME}.sid" SCRATCH_FILE="/tmp/predictor-${PROJECT_NAME}-scratch.md" WORKTREE="/tmp/${PROJECT_NAME}-predictor-run" -log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } +# Override LOG_AGENT for consistent agent identification +# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() +LOG_AGENT="predictor" + +# Override log() to append to predictor-specific log file +# shellcheck disable=SC2034 +log() { + local agent="${LOG_AGENT:-predictor}" + printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE" +} # ── Guards ──────────────────────────────────────────────────────────────── check_active predictor acquire_cron_lock "/tmp/predictor-run.lock" -check_memory 2000 +memory_guard 2000 log "--- Predictor run start ---" +# ── Resolve forge remote for git operations ───────────────────────────── +resolve_forge_remote + # ── Resolve agent identity for .profile repo ──────────────────────────── -if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_PREDICTOR_TOKEN:-}" ]; then - AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_PREDICTOR_TOKEN}" \ - "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true) -fi +resolve_agent_identity || true # ── Load formula + context ─────────────────────────────────────────────── load_formula_or_profile "predictor" "$FACTORY_ROOT/formulas/run-predictor.toml" || exit 1 diff --git a/projects/disinto.toml.example b/projects/disinto.toml.example index ea0b8c5..8721545 100644 --- a/projects/disinto.toml.example +++ b/projects/disinto.toml.example @@ -5,7 +5,7 @@ name = "disinto" repo = "johba/disinto" -ops_repo = "johba/disinto-ops" +ops_repo = "disinto-admin/disinto-ops" forge_url = "http://localhost:3000" repo_root = "/home/YOU/dark-factory" ops_repo_root = "/home/YOU/disinto-ops" @@ -23,6 +23,18 @@ check_prs = true check_dev_agent = true check_pipeline_stall = false +# Local-model agents (optional) — configure to use llama-server or similar +# for local LLM inference. Each agent gets its own container with isolated +# credentials and configuration. +# +# [agents.llama] +# base_url = "http://10.10.10.1:8081" +# model = "unsloth/Qwen3.5-35B-A3B" +# api_key = "sk-no-key-required" +# roles = ["dev"] +# forge_user = "dev-qwen" +# compact_pct = 60 + # [mirrors] # github = "git@github.com:johba/disinto.git" # codeberg = "git@codeberg.org:johba/disinto.git" diff --git a/review/AGENTS.md b/review/AGENTS.md index e010ff5..d0e5b7b 100644 --- a/review/AGENTS.md +++ b/review/AGENTS.md @@ -1,4 +1,4 @@ - + # Review Agent **Role**: AI-powered PR review — post structured findings and formal @@ -9,8 +9,8 @@ whose CI has passed and that lack a review for the current HEAD SHA, then spawns `review-pr.sh `. **Key files**: -- `review/review-poll.sh` — Cron scheduler: finds unreviewed PRs with passing CI. Sources `lib/guard.sh` and calls `check_active reviewer` — skips if `$FACTORY_ROOT/state/.reviewer-active` is absent. -- `review/review-pr.sh` — Creates/reuses a tmux session (`review-{project}-{pr}`), injects PR diff, waits for Claude to write structured JSON output, posts markdown review + formal forge review, auto-creates follow-up issues for pre-existing tech debt. Before starting the session, runs `lib/build-graph.py --changed-files ` and appends the JSON structural analysis (affected objectives, orphaned prerequisites, thin evidence) to the review prompt. Graph failures are non-fatal — review proceeds without it. +- `review/review-poll.sh` — Cron scheduler: finds unreviewed PRs with passing CI. Sources `lib/guard.sh` and calls `check_active reviewer` — skips if `$FACTORY_ROOT/state/.reviewer-active` is absent. **Circuit breaker**: counts existing `` comments; skips a PR if ≥3 consecutive errors for the same HEAD SHA (prevents flooding on repeated review failures). +- `review/review-pr.sh` — Creates/reuses a tmux session (`review-{project}-{pr}`), injects PR diff, waits for Claude to write structured JSON output, posts markdown review + formal forge review, auto-creates follow-up issues for pre-existing tech debt. **cd at startup**: changes to `$PROJECT_REPO_ROOT` early in the script — before any git commands — because the factory root is not a git repo after image rebuild (#408). Calls `resolve_forge_remote()` at startup to determine the correct git remote name (avoids hardcoded 'origin'). Before starting the session, runs `lib/build-graph.py --changed-files ` and appends the JSON structural analysis (affected objectives, orphaned prerequisites, thin evidence) to the review prompt. Graph failures are non-fatal — review proceeds without it. **Environment variables consumed**: - `FORGE_TOKEN` — Dev-agent token (must not be the same account as FORGE_REVIEW_TOKEN) diff --git a/review/review-poll.sh b/review/review-poll.sh index 47d37df..72a6e85 100755 --- a/review/review-poll.sh +++ b/review/review-poll.sh @@ -23,8 +23,15 @@ LOGFILE="${DISINTO_LOG_DIR}/review/review-poll.log" MAX_REVIEWS=3 REVIEW_IDLE_TIMEOUT=14400 # 4h: kill review session if idle +# Override LOG_AGENT for consistent agent identification +# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() +LOG_AGENT="review" + +# Override log() to append to review-specific log file +# shellcheck disable=SC2034 log() { - printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" + local agent="${LOG_AGENT:-review}" + printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOGFILE" } # Log rotation @@ -126,10 +133,11 @@ if [ -n "$REVIEW_SIDS" ]; then log " #${pr_num} re-review: new commits (${reviewed_sha:0:7}→${current_sha:0:7})" - if "${SCRIPT_DIR}/review-pr.sh" "$pr_num" 2>&1; then + review_output=$("${SCRIPT_DIR}/review-pr.sh" "$pr_num" 2>&1) && review_rc=0 || review_rc=$? + if [ "$review_rc" -eq 0 ]; then REVIEWED=$((REVIEWED + 1)) else - log " #${pr_num} re-review failed" + log " #${pr_num} re-review failed (exit code $review_rc): $(echo "$review_output" | tail -3)" fi [ "$REVIEWED" -lt "$MAX_REVIEWS" ] || break @@ -180,10 +188,11 @@ while IFS= read -r line; do log " #${PR_NUM} error check: ${ERROR_COMMENTS:-0} prior error(s) for ${PR_SHA:0:7}" - if "${SCRIPT_DIR}/review-pr.sh" "$PR_NUM" 2>&1; then + review_output=$("${SCRIPT_DIR}/review-pr.sh" "$PR_NUM" 2>&1) && review_rc=0 || review_rc=$? + if [ "$review_rc" -eq 0 ]; then REVIEWED=$((REVIEWED + 1)) else - log " #${PR_NUM} review failed" + log " #${PR_NUM} review failed (exit code $review_rc): $(echo "$review_output" | tail -3)" fi if [ "$REVIEWED" -ge "$MAX_REVIEWS" ]; then diff --git a/review/review-pr.sh b/review/review-pr.sh index 8a9a29d..a0e0ada 100755 --- a/review/review-pr.sh +++ b/review/review-pr.sh @@ -35,6 +35,10 @@ git -C "$FACTORY_ROOT" pull --ff-only origin main 2>/dev/null || true # --- Config --- PR_NUMBER="${1:?Usage: review-pr.sh [--force]}" + +# Change to project repo early — required before any git commands +# (factory root is not a git repo after image rebuild) +cd "${PROJECT_REPO_ROOT}" FORCE="${2:-}" API="${FORGE_API}" LOGFILE="${DISINTO_LOG_DIR}/review/review.log" @@ -58,13 +62,15 @@ if [ -f "$LOGFILE" ] && [ "$(stat -c%s "$LOGFILE" 2>/dev/null || echo 0)" -gt 10 mv "$LOGFILE" "$LOGFILE.old" fi +# ============================================================================= +# RESOLVE FORGE REMOTE FOR GIT OPERATIONS +# ============================================================================= +resolve_forge_remote + # ============================================================================= # RESOLVE AGENT IDENTITY FOR .PROFILE REPO # ============================================================================= -if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_TOKEN:-}" ]; then - AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true) -fi +resolve_agent_identity || true # ============================================================================= # MEMORY GUARD @@ -131,7 +137,7 @@ PREV_REV=$(printf '%s' "$ALL_COMMENTS" | jq -r --arg s "$PR_SHA" \ if [ -n "$PREV_REV" ] && [ "$PREV_REV" != "null" ]; then PREV_BODY=$(printf '%s' "$PREV_REV" | jq -r '.body') PREV_SHA=$(printf '%s' "$PREV_BODY" | grep -oP ' + # Supervisor Agent **Role**: Health monitoring and auto-remediation, executed as a formula-driven @@ -9,19 +9,17 @@ resources or human decisions, files vault items instead of escalating directly. **Trigger**: `supervisor-run.sh` runs every 20 min via cron. Sources `lib/guard.sh` and calls `check_active supervisor` first — skips if -`$FACTORY_ROOT/state/.supervisor-active` is absent. Then creates a tmux session -with `claude --model sonnet`, injects `formulas/run-supervisor.toml` with -pre-collected metrics as context, monitors the phase file, and cleans up on -completion or timeout (20 min max session). No action issues — the supervisor -runs directly from cron like the planner and predictor. +`$FACTORY_ROOT/state/.supervisor-active` is absent. Then runs `claude -p` +via `agent-sdk.sh`, injects `formulas/run-supervisor.toml` with +pre-collected metrics as context, and cleans up on completion or timeout (20 min max session). +No action issues — the supervisor runs directly from cron like the planner and predictor. **Key files**: - `supervisor/supervisor-run.sh` — Cron wrapper + orchestrator: lock, memory guard, - runs preflight.sh, sources disinto project config, creates tmux session, injects - formula prompt with metrics, monitors phase file, handles crash recovery via - `run_formula_and_monitor` + runs preflight.sh, sources disinto project config, runs claude -p via agent-sdk.sh, + injects formula prompt with metrics, handles crash recovery - `supervisor/preflight.sh` — Data collection: system resources (RAM, disk, swap, - load), Docker status, active tmux sessions + phase files, lock files, agent log + load), Docker status, active sessions + phase files, lock files, agent log tails, CI pipeline status, open PRs, issue counts, stale worktrees, blocked issues. Also performs **stale phase cleanup**: scans `/tmp/*-session-*.phase` files for `PHASE:escalate` entries and auto-removes any whose linked issue @@ -31,11 +29,8 @@ runs directly from cron like the planner and predictor. - `formulas/run-supervisor.toml` — Execution spec: five steps (preflight review, health-assessment, decide-actions, report, journal) with `needs` dependencies. Claude evaluates all metrics and takes actions in a single interactive session -- `$OPS_REPO_ROOT/journal/supervisor/*.md` — Daily health logs from each supervisor run - `$OPS_REPO_ROOT/knowledge/*.md` — Domain-specific remediation guides (memory, disk, CI, git, dev-agent, review-agent, forge) -- `supervisor/supervisor-poll.sh` — Legacy bash orchestrator (superseded by - supervisor-run.sh + formula) **Alert priorities**: P0 (memory crisis), P1 (disk), P2 (factory stopped/stalled), P3 (degraded PRs, circular deps, stale deps), P4 (housekeeping). @@ -46,5 +41,5 @@ P3 (degraded PRs, circular deps, stale deps), P4 (housekeeping). - `WOODPECKER_TOKEN`, `WOODPECKER_SERVER`, `WOODPECKER_DB_PASSWORD`, `WOODPECKER_DB_USER`, `WOODPECKER_DB_HOST`, `WOODPECKER_DB_NAME` — CI database queries **Lifecycle**: supervisor-run.sh (cron */20) → lock + memory guard → run -preflight.sh (collect metrics) → load formula + context → create tmux -session → Claude assesses health, auto-fixes, writes journal → `PHASE:done`. +preflight.sh (collect metrics) → load formula + context → run claude -p via agent-sdk.sh +→ Claude assesses health, auto-fixes, writes journal → `PHASE:done`. diff --git a/supervisor/supervisor-poll.sh b/supervisor/supervisor-poll.sh deleted file mode 100755 index 42ab1dd..0000000 --- a/supervisor/supervisor-poll.sh +++ /dev/null @@ -1,808 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail -# supervisor-poll.sh — Supervisor agent: bash checks + claude -p for fixes -# -# Two-layer architecture: -# 1. Factory infrastructure (project-agnostic): RAM, disk, swap, docker, stale processes -# 2. Per-project checks (config-driven): CI, PRs, dev-agent, deps — iterated over projects/*.toml -# -# Runs every 10min via cron. -# -# Cron: */10 * * * * /path/to/disinto/supervisor/supervisor-poll.sh -# -# Peek: cat /tmp/supervisor-status -# Log: tail -f /path/to/disinto/supervisor/supervisor.log - -source "$(dirname "$0")/../lib/env.sh" -source "$(dirname "$0")/../lib/ci-helpers.sh" - -LOGFILE="${DISINTO_LOG_DIR}/supervisor/supervisor.log" -STATUSFILE="/tmp/supervisor-status" -LOCKFILE="/tmp/supervisor-poll.lock" -PROMPT_FILE="${FACTORY_ROOT}/formulas/run-supervisor.toml" -PROJECTS_DIR="${FACTORY_ROOT}/projects" - -METRICS_FILE="${DISINTO_LOG_DIR}/metrics/supervisor-metrics.jsonl" - -emit_metric() { - printf '%s\n' "$1" >> "$METRICS_FILE" -} - -# Count all matching items from a paginated forge API endpoint. -# Usage: codeberg_count_paginated "/issues?state=open&labels=backlog&type=issues" -# Returns total count across all pages (max 20 pages = 1000 items). -codeberg_count_paginated() { - local endpoint="$1" total=0 page=1 count - while true; do - count=$(forge_api GET "${endpoint}&limit=50&page=${page}" 2>/dev/null | jq 'length' 2>/dev/null || echo 0) - total=$((total + ${count:-0})) - [ "${count:-0}" -lt 50 ] && break - page=$((page + 1)) - [ "$page" -gt 20 ] && break - done - echo "$total" -} - -rotate_metrics() { - [ -f "$METRICS_FILE" ] || return 0 - local cutoff tmpfile - cutoff=$(date -u -d '30 days ago' +%Y-%m-%dT%H:%M) - tmpfile="${METRICS_FILE}.tmp" - jq -c --arg cutoff "$cutoff" 'select(.ts >= $cutoff)' \ - "$METRICS_FILE" > "$tmpfile" 2>/dev/null - # Only replace if jq produced output, or the source is already empty - if [ -s "$tmpfile" ] || [ ! -s "$METRICS_FILE" ]; then - mv "$tmpfile" "$METRICS_FILE" - else - rm -f "$tmpfile" - fi -} - -# Prevent overlapping runs -if [ -f "$LOCKFILE" ]; then - LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null) - if kill -0 "$LOCK_PID" 2>/dev/null; then - exit 0 - fi - rm -f "$LOCKFILE" -fi -echo $$ > "$LOCKFILE" -trap 'rm -f "$LOCKFILE" "$STATUSFILE"' EXIT -mkdir -p "$(dirname "$METRICS_FILE")" -rotate_metrics - -flog() { - printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" -} - -status() { - printf '[%s] supervisor: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" > "$STATUSFILE" - flog "$*" -} - -# Alerts by priority -P0_ALERTS="" -P1_ALERTS="" -P2_ALERTS="" -P3_ALERTS="" -P4_ALERTS="" - -p0() { P0_ALERTS="${P0_ALERTS}• [P0] $*\n"; flog "P0: $*"; } -p1() { P1_ALERTS="${P1_ALERTS}• [P1] $*\n"; flog "P1: $*"; } -p2() { P2_ALERTS="${P2_ALERTS}• [P2] $*\n"; flog "P2: $*"; } -p3() { P3_ALERTS="${P3_ALERTS}• [P3] $*\n"; flog "P3: $*"; } -p4() { P4_ALERTS="${P4_ALERTS}• [P4] $*\n"; flog "P4: $*"; } - -FIXES="" -fixed() { FIXES="${FIXES}• ✅ $*\n"; flog "FIXED: $*"; } - -# ############################################################################# -# LAYER 1: FACTORY INFRASTRUCTURE -# (project-agnostic, runs once) -# ############################################################################# - -# ============================================================================= -# P0: MEMORY — check first, fix first -# ============================================================================= -status "P0: checking memory" - -AVAIL_MB=$(free -m | awk '/Mem:/{print $7}') -SWAP_USED_MB=$(free -m | awk '/Swap:/{print $3}') - -if [ "${AVAIL_MB:-9999}" -lt 500 ] || { [ "${SWAP_USED_MB:-0}" -gt 3000 ] && [ "${AVAIL_MB:-9999}" -lt 2000 ]; }; then - flog "MEMORY CRISIS: avail=${AVAIL_MB}MB swap_used=${SWAP_USED_MB}MB — auto-fixing" - - # Kill stale agent-spawned claude processes (>3h old) — skip interactive sessions - STALE_CLAUDES=$(pgrep -f "claude -p" --older 10800 2>/dev/null || true) - if [ -n "$STALE_CLAUDES" ]; then - echo "$STALE_CLAUDES" | xargs kill 2>/dev/null || true - fixed "Killed stale claude processes: ${STALE_CLAUDES}" - fi - - # Drop filesystem caches - sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null 2>&1 - fixed "Dropped filesystem caches" - - # Re-check after fixes - AVAIL_MB_AFTER=$(free -m | awk '/Mem:/{print $7}') - SWAP_AFTER=$(free -m | awk '/Swap:/{print $3}') - - if [ "${AVAIL_MB_AFTER:-0}" -lt 500 ] || [ "${SWAP_AFTER:-0}" -gt 3000 ]; then - p0 "Memory still critical after auto-fix: avail=${AVAIL_MB_AFTER}MB swap=${SWAP_AFTER}MB" - else - flog "Memory recovered: avail=${AVAIL_MB_AFTER}MB swap=${SWAP_AFTER}MB" - fi -fi - -# P0 alerts already logged — clear so they are not duplicated in the final consolidated log -if [ -n "$P0_ALERTS" ]; then - P0_ALERTS="" -fi - -# ============================================================================= -# P1: DISK -# ============================================================================= -status "P1: checking disk" - -DISK_PERCENT=$(df -h / | awk 'NR==2{print $5}' | tr -d '%') - -if [ "${DISK_PERCENT:-0}" -gt 80 ]; then - flog "DISK PRESSURE: ${DISK_PERCENT}% — auto-cleaning" - - # Docker cleanup (safe — keeps images) - sudo docker system prune -f >/dev/null 2>&1 && fixed "Docker prune" - - # Truncate logs >10MB - for logfile in "${DISINTO_LOG_DIR}"/{dev,review,supervisor}/*.log; do - if [ -f "$logfile" ]; then - SIZE_KB=$(du -k "$logfile" 2>/dev/null | cut -f1) - if [ "${SIZE_KB:-0}" -gt 10240 ]; then - truncate -s 0 "$logfile" - fixed "Truncated $(basename "$logfile") (was ${SIZE_KB}KB)" - fi - fi - done - - # Woodpecker log_entries cleanup - LOG_ENTRIES_MB=$(wpdb -c "SELECT pg_size_pretty(pg_total_relation_size('log_entries'));" 2>/dev/null | xargs) - if echo "$LOG_ENTRIES_MB" | grep -qP '\d+\s*(GB|MB)'; then - SIZE_NUM=$(echo "$LOG_ENTRIES_MB" | grep -oP '\d+') - SIZE_UNIT=$(echo "$LOG_ENTRIES_MB" | grep -oP '(GB|MB)') - if [ "$SIZE_UNIT" = "GB" ] || { [ "$SIZE_UNIT" = "MB" ] && [ "$SIZE_NUM" -gt 500 ]; }; then - wpdb -c "DELETE FROM log_entries WHERE id < (SELECT max(id) - 100000 FROM log_entries);" 2>/dev/null - fixed "Trimmed Woodpecker log_entries (was ${LOG_ENTRIES_MB})" - fi - fi - - DISK_AFTER=$(df -h / | awk 'NR==2{print $5}' | tr -d '%') - if [ "${DISK_AFTER:-0}" -gt 80 ]; then - p1 "Disk still ${DISK_AFTER}% after auto-clean" - else - flog "Disk recovered: ${DISK_AFTER}%" - fi -fi - -# P1 alerts already logged — clear so they are not duplicated in the final consolidated log -if [ -n "$P1_ALERTS" ]; then - P1_ALERTS="" -fi - -# Emit infra metric -_RAM_TOTAL_MB=$(free -m | awk '/Mem:/{print $2}') -_RAM_USED_PCT=$(( ${_RAM_TOTAL_MB:-0} > 0 ? (${_RAM_TOTAL_MB:-0} - ${AVAIL_MB:-0}) * 100 / ${_RAM_TOTAL_MB:-1} : 0 )) -emit_metric "$(jq -nc \ - --arg ts "$(date -u +%Y-%m-%dT%H:%MZ)" \ - --argjson ram "${_RAM_USED_PCT:-0}" \ - --argjson disk "${DISK_PERCENT:-0}" \ - --argjson swap "${SWAP_USED_MB:-0}" \ - '{ts:$ts,type:"infra",ram_used_pct:$ram,disk_used_pct:$disk,swap_mb:$swap}' 2>/dev/null)" 2>/dev/null || true - -# ============================================================================= -# P4-INFRA: HOUSEKEEPING — stale processes, log rotation (project-agnostic) -# ============================================================================= -status "P4: infra housekeeping" - -# Stale agent-spawned claude processes (>3h) — skip interactive sessions -STALE_CLAUDES=$(pgrep -f "claude -p" --older 10800 2>/dev/null || true) -if [ -n "$STALE_CLAUDES" ]; then - echo "$STALE_CLAUDES" | xargs kill 2>/dev/null || true - fixed "Killed stale claude processes: $(echo $STALE_CLAUDES | wc -w) procs" -fi - -# Rotate logs >5MB -for logfile in "${DISINTO_LOG_DIR}"/{dev,review,supervisor}/*.log; do - if [ -f "$logfile" ]; then - SIZE_KB=$(du -k "$logfile" 2>/dev/null | cut -f1) - if [ "${SIZE_KB:-0}" -gt 5120 ]; then - mv "$logfile" "${logfile}.old" 2>/dev/null - fixed "Rotated $(basename "$logfile")" - fi - fi -done - -# ############################################################################# -# LAYER 2: PER-PROJECT CHECKS -# (iterated over projects/*.toml, config-driven) -# ############################################################################# - -# Infra retry tracking (shared across projects, created once) -_RETRY_DIR="/tmp/supervisor-infra-retries" -mkdir -p "$_RETRY_DIR" - -# Function: run all per-project checks for the currently loaded project config -check_project() { - local proj_name="${PROJECT_NAME:-unknown}" - flog "── checking project: ${proj_name} (${FORGE_REPO}) ──" - - # =========================================================================== - # P2: FACTORY STOPPED — CI, dev-agent, git - # =========================================================================== - status "P2: ${proj_name}: checking pipeline" - - # CI stuck - STUCK_CI=$(wpdb -c "SELECT count(*) FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status='running' AND EXTRACT(EPOCH FROM now() - to_timestamp(started)) > 1200;" 2>/dev/null | xargs || true) - [ "${STUCK_CI:-0}" -gt 0 ] 2>/dev/null && p2 "${proj_name}: CI: ${STUCK_CI} pipeline(s) running >20min" - - PENDING_CI=$(wpdb -c "SELECT count(*) FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status='pending' AND EXTRACT(EPOCH FROM now() - to_timestamp(created)) > 1800;" 2>/dev/null | xargs || true) - [ "${PENDING_CI:-0}" -gt 0 ] && p2 "${proj_name}: CI: ${PENDING_CI} pipeline(s) pending >30min" - - # Emit CI metric (last completed pipeline within 24h — skip if project has no recent CI) - _CI_ROW=$(wpdb -A -F ',' -c "SELECT id, COALESCE(ROUND(EXTRACT(EPOCH FROM (to_timestamp(finished) - to_timestamp(started)))/60)::int, 0), status FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status IN ('success','failure','error') AND finished > 0 AND to_timestamp(finished) > now() - interval '24 hours' ORDER BY id DESC LIMIT 1;" 2>/dev/null | grep -E '^[0-9]' | head -1 || true) - if [ -n "$_CI_ROW" ]; then - _CI_ID=$(echo "$_CI_ROW" | cut -d',' -f1 | tr -d ' ') - _CI_DUR=$(echo "$_CI_ROW" | cut -d',' -f2 | tr -d ' ') - _CI_STAT=$(echo "$_CI_ROW" | cut -d',' -f3 | tr -d ' ') - emit_metric "$(jq -nc \ - --arg ts "$(date -u +%Y-%m-%dT%H:%MZ)" \ - --arg proj "$proj_name" \ - --argjson pipeline "${_CI_ID:-0}" \ - --argjson duration "${_CI_DUR:-0}" \ - --arg status "${_CI_STAT:-unknown}" \ - '{ts:$ts,type:"ci",project:$proj,pipeline:$pipeline,duration_min:$duration,status:$status}' 2>/dev/null)" 2>/dev/null || true - fi - - # =========================================================================== - # P2e: INFRA FAILURES — auto-retrigger pipelines with infra failures - # =========================================================================== - if [ "${CHECK_INFRA_RETRY:-true}" = "true" ]; then - status "P2e: ${proj_name}: checking infra failures" - - # Recent failed pipelines (last 6h) - _failed_nums=$(wpdb -A -c " - SELECT number FROM pipelines - WHERE repo_id = ${WOODPECKER_REPO_ID} - AND status IN ('failure', 'error') - AND finished > 0 - AND to_timestamp(finished) > now() - interval '6 hours' - ORDER BY number DESC LIMIT 5;" 2>/dev/null \ - | tr -d ' ' | grep -E '^[0-9]+$' || true) - - # shellcheck disable=SC2086 - for _pip_num in $_failed_nums; do - [ -z "$_pip_num" ] && continue - - # Check retry count; alert if retries exhausted - _retry_file="${_RETRY_DIR}/${WOODPECKER_REPO_ID}-${_pip_num}" - _retries=0 - [ -f "$_retry_file" ] && _retries=$(cat "$_retry_file" 2>/dev/null || echo 0) - if [ "${_retries:-0}" -ge 2 ]; then - p2 "${proj_name}: Pipeline #${_pip_num}: infra retries exhausted (2/2), needs manual investigation" - continue - fi - - # Classify failure type via shared helper - _classification=$(classify_pipeline_failure "${WOODPECKER_REPO_ID}" "$_pip_num" 2>/dev/null || echo "code") - - if [[ "$_classification" == infra* ]]; then - _infra_reason="${_classification#infra }" - _new_retries=$(( _retries + 1 )) - if woodpecker_api "/repos/${WOODPECKER_REPO_ID}/pipelines/${_pip_num}" \ - -X POST >/dev/null 2>&1; then - echo "$_new_retries" > "$_retry_file" - fixed "${proj_name}: Retriggered pipeline #${_pip_num} (${_infra_reason}, retry ${_new_retries}/2)" - else - p2 "${proj_name}: Pipeline #${_pip_num}: infra failure (${_infra_reason}) but retrigger API call failed" - flog "${proj_name}: Failed to retrigger pipeline #${_pip_num}: API error" - fi - fi - done - - # Clean up stale retry tracking files (>24h) - find "$_RETRY_DIR" -type f -mmin +1440 -delete 2>/dev/null || true - fi - - # Dev-agent health (only if monitoring enabled) - if [ "${CHECK_DEV_AGENT:-true}" = "true" ]; then - DEV_LOCK="/tmp/dev-agent-${proj_name}.lock" - if [ -f "$DEV_LOCK" ]; then - DEV_PID=$(cat "$DEV_LOCK" 2>/dev/null) - if ! kill -0 "$DEV_PID" 2>/dev/null; then - rm -f "$DEV_LOCK" - fixed "${proj_name}: Removed stale dev-agent lock (PID ${DEV_PID} dead)" - else - DEV_STATUS_AGE=$(stat -c %Y "/tmp/dev-agent-status-${proj_name}" 2>/dev/null || echo 0) - NOW_EPOCH=$(date +%s) - STATUS_AGE_MIN=$(( (NOW_EPOCH - DEV_STATUS_AGE) / 60 )) - if [ "$STATUS_AGE_MIN" -gt 30 ]; then - p2 "${proj_name}: Dev-agent: status unchanged for ${STATUS_AGE_MIN}min" - fi - fi - fi - fi - - # Git repo health - if [ -d "${PROJECT_REPO_ROOT}" ]; then - cd "${PROJECT_REPO_ROOT}" 2>/dev/null || true - GIT_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") - GIT_REBASE=$([ -d .git/rebase-merge ] || [ -d .git/rebase-apply ] && echo "yes" || echo "no") - - if [ "$GIT_REBASE" = "yes" ]; then - git rebase --abort 2>/dev/null && git checkout "${PRIMARY_BRANCH}" 2>/dev/null && \ - fixed "${proj_name}: Aborted stale rebase, switched to ${PRIMARY_BRANCH}" || \ - p2 "${proj_name}: Git: stale rebase, auto-abort failed" - fi - if [ "$GIT_BRANCH" != "${PRIMARY_BRANCH}" ] && [ "$GIT_BRANCH" != "unknown" ]; then - git checkout "${PRIMARY_BRANCH}" 2>/dev/null && \ - fixed "${proj_name}: Switched repo from '${GIT_BRANCH}' to ${PRIMARY_BRANCH}" || \ - p2 "${proj_name}: Git: on '${GIT_BRANCH}' instead of ${PRIMARY_BRANCH}" - fi - fi - - # =========================================================================== - # P2b: FACTORY STALLED — backlog exists but no agent running - # =========================================================================== - if [ "${CHECK_PIPELINE_STALL:-true}" = "true" ]; then - status "P2: ${proj_name}: checking pipeline stall" - - BACKLOG_COUNT=$(forge_api GET "/issues?state=open&labels=backlog&type=issues&limit=1" 2>/dev/null | jq -r 'length' 2>/dev/null || echo "0") - IN_PROGRESS=$(forge_api GET "/issues?state=open&labels=in-progress&type=issues&limit=1" 2>/dev/null | jq -r 'length' 2>/dev/null || echo "0") - - if [ "${BACKLOG_COUNT:-0}" -gt 0 ] && [ "${IN_PROGRESS:-0}" -eq 0 ]; then - DEV_LOG="${DISINTO_LOG_DIR}/dev/dev-agent.log" - if [ -f "$DEV_LOG" ]; then - LAST_LOG_EPOCH=$(stat -c %Y "$DEV_LOG" 2>/dev/null || echo 0) - else - LAST_LOG_EPOCH=0 - fi - NOW_EPOCH=$(date +%s) - IDLE_MIN=$(( (NOW_EPOCH - LAST_LOG_EPOCH) / 60 )) - - if [ "$IDLE_MIN" -gt 20 ]; then - p2 "${proj_name}: Pipeline stalled: ${BACKLOG_COUNT} backlog issue(s), no agent ran for ${IDLE_MIN}min" - fi - fi - fi - - # =========================================================================== - # P2c: DEV-AGENT PRODUCTIVITY — all backlog blocked for too long - # =========================================================================== - if [ "${CHECK_DEV_AGENT:-true}" = "true" ]; then - status "P2: ${proj_name}: checking dev-agent productivity" - - DEV_LOG_FILE="${DISINTO_LOG_DIR}/dev/dev-agent.log" - if [ -f "$DEV_LOG_FILE" ]; then - RECENT_POLLS=$(tail -100 "$DEV_LOG_FILE" | grep "poll:" | tail -6) - TOTAL_RECENT=$(echo "$RECENT_POLLS" | grep -c "." || true) - BLOCKED_IN_RECENT=$(echo "$RECENT_POLLS" | grep -c "no ready issues" || true) - if [ "$TOTAL_RECENT" -ge 6 ] && [ "$BLOCKED_IN_RECENT" -eq "$TOTAL_RECENT" ]; then - p2 "${proj_name}: Dev-agent blocked: last ${BLOCKED_IN_RECENT} polls all report 'no ready issues'" - fi - fi - fi - - # =========================================================================== - # P3: FACTORY DEGRADED — derailed PRs, unreviewed PRs - # =========================================================================== - if [ "${CHECK_PRS:-true}" = "true" ]; then - status "P3: ${proj_name}: checking PRs" - - OPEN_PRS=$(forge_api GET "/pulls?state=open&limit=10" 2>/dev/null | jq -r '.[].number' 2>/dev/null || true) - for pr in $OPEN_PRS; do - PR_JSON=$(forge_api GET "/pulls/${pr}" 2>/dev/null || true) - [ -z "$PR_JSON" ] && continue - PR_SHA=$(echo "$PR_JSON" | jq -r '.head.sha // ""') - [ -z "$PR_SHA" ] && continue - - CI_STATE=$(ci_commit_status "$PR_SHA" 2>/dev/null || true) - - MERGEABLE=$(echo "$PR_JSON" | jq -r '.mergeable // true') - if [ "$MERGEABLE" = "false" ] && ci_passed "$CI_STATE"; then - p3 "${proj_name}: PR #${pr}: CI pass but merge conflict — needs rebase" - elif [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then - UPDATED=$(echo "$PR_JSON" | jq -r '.updated_at // ""') - if [ -n "$UPDATED" ]; then - UPDATED_EPOCH=$(date -d "$UPDATED" +%s 2>/dev/null || echo 0) - NOW_EPOCH=$(date +%s) - AGE_MIN=$(( (NOW_EPOCH - UPDATED_EPOCH) / 60 )) - [ "$AGE_MIN" -gt 30 ] && p3 "${proj_name}: PR #${pr}: CI=${CI_STATE}, stale ${AGE_MIN}min" - fi - elif ci_passed "$CI_STATE"; then - HAS_REVIEW=$(forge_api GET "/issues/${pr}/comments?limit=50" 2>/dev/null | \ - jq -r --arg sha "$PR_SHA" '[.[] | select(.body | contains(" + +## What was expected + + + +## Steps to reproduce + + +1. +2. +3. + +## Environment + + +- Browser/Client: +- Wallet (if applicable): +- Network (if applicable): +- Version: diff --git a/tests/mock-forgejo.py b/tests/mock-forgejo.py index d8be511..c65b522 100755 --- a/tests/mock-forgejo.py +++ b/tests/mock-forgejo.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Mock Forgejo API server for CI smoke tests. -Implements 15 Forgejo API endpoints that disinto init calls. +Implements 16 Forgejo API endpoints that disinto init calls. State stored in-memory (dicts), responds instantly. """ @@ -135,6 +135,7 @@ class ForgejoHandler(BaseHTTPRequestHandler): # Users patterns (r"^users/([^/]+)$", f"handle_{method}_users_username"), (r"^users/([^/]+)/tokens$", f"handle_{method}_users_username_tokens"), + (r"^users/([^/]+)/tokens/([^/]+)$", f"handle_{method}_users_username_tokens_token_id"), (r"^users/([^/]+)/repos$", f"handle_{method}_users_username_repos"), # Repos patterns (r"^repos/([^/]+)/([^/]+)$", f"handle_{method}_repos_owner_repo"), @@ -149,6 +150,7 @@ class ForgejoHandler(BaseHTTPRequestHandler): # Admin patterns (r"^admin/users$", f"handle_{method}_admin_users"), (r"^admin/users/([^/]+)$", f"handle_{method}_admin_users_username"), + (r"^admin/users/([^/]+)/repos$", f"handle_{method}_admin_users_username_repos"), # Org patterns (r"^orgs$", f"handle_{method}_orgs"), ] @@ -294,7 +296,10 @@ class ForgejoHandler(BaseHTTPRequestHandler): def handle_GET_users_username_tokens(self, query): """GET /api/v1/users/{username}/tokens""" + # Support both token auth (for listing own tokens) and basic auth (for admin listing) username = require_token(self) + if not username: + username = require_basic_auth(self) if not username: json_response(self, 401, {"message": "invalid authentication"}) return @@ -303,6 +308,38 @@ class ForgejoHandler(BaseHTTPRequestHandler): tokens = [t for t in state["tokens"].values() if t.get("username") == username] json_response(self, 200, tokens) + def handle_DELETE_users_username_tokens_token_id(self, query): + """DELETE /api/v1/users/{username}/tokens/{id}""" + # Support both token auth and basic auth + username = require_token(self) + if not username: + username = require_basic_auth(self) + if not username: + json_response(self, 401, {"message": "invalid authentication"}) + return + + parts = self.path.split("/") + if len(parts) >= 8: + token_id_str = parts[7] + else: + json_response(self, 404, {"message": "token not found"}) + return + + # Find and delete token by ID + deleted = False + for tok_sha1, tok in list(state["tokens"].items()): + if tok.get("id") == int(token_id_str) and tok.get("username") == username: + del state["tokens"][tok_sha1] + deleted = True + break + + if deleted: + self.send_response(204) + self.send_header("Content-Length", 0) + self.end_headers() + else: + json_response(self, 404, {"message": "token not found"}) + def handle_POST_users_username_tokens(self, query): """POST /api/v1/users/{username}/tokens""" username = require_basic_auth(self) @@ -460,6 +497,55 @@ class ForgejoHandler(BaseHTTPRequestHandler): state["repos"][key] = repo json_response(self, 201, repo) + def handle_POST_admin_users_username_repos(self, query): + """POST /api/v1/admin/users/{username}/repos + Admin API to create a repo under a specific user namespace. + This allows creating repos in any user's namespace when authenticated as admin. + """ + require_token(self) + + parts = self.path.split("/") + if len(parts) >= 6: + target_user = parts[4] + else: + json_response(self, 400, {"message": "username required"}) + return + + if target_user not in state["users"]: + json_response(self, 404, {"message": "user not found"}) + return + + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length).decode("utf-8") + data = json.loads(body) if body else {} + + repo_name = data.get("name") + if not repo_name: + json_response(self, 400, {"message": "name is required"}) + return + + repo_id = next_ids["repos"] + next_ids["repos"] += 1 + + key = f"{target_user}/{repo_name}" + repo = { + "id": repo_id, + "full_name": key, + "name": repo_name, + "owner": {"id": state["users"][target_user]["id"], "login": target_user}, + "empty": not data.get("auto_init", False), + "default_branch": data.get("default_branch", "main"), + "description": data.get("description", ""), + "private": data.get("private", False), + "html_url": f"https://example.com/{key}", + "ssh_url": f"git@example.com:{key}.git", + "clone_url": f"https://example.com/{key}.git", + "created_at": "2026-04-01T00:00:00Z", + } + + state["repos"][key] = repo + json_response(self, 201, repo) + def handle_POST_user_repos(self, query): """POST /api/v1/user/repos""" require_token(self) diff --git a/tests/smoke-init.sh b/tests/smoke-init.sh index 80f8994..a8371bd 100644 --- a/tests/smoke-init.sh +++ b/tests/smoke-init.sh @@ -15,7 +15,8 @@ set -euo pipefail FACTORY_ROOT="$(cd "$(dirname "$0")/.." && pwd)" -FORGE_URL="${FORGE_URL:-http://localhost:3000}" +# Always use localhost for mock Forgejo (in case FORGE_URL is set from docker-compose) +export FORGE_URL="http://localhost:3000" MOCK_BIN="/tmp/smoke-mock-bin" TEST_SLUG="smoke-org/smoke-repo" FAILED=0 @@ -24,6 +25,8 @@ fail() { printf 'FAIL: %s\n' "$*" >&2; FAILED=1; } pass() { printf 'PASS: %s\n' "$*"; } cleanup() { + # Kill any leftover mock-forgejo.py processes by name + pkill -f "mock-forgejo.py" 2>/dev/null || true rm -rf "$MOCK_BIN" /tmp/smoke-test-repo \ "${FACTORY_ROOT}/projects/smoke-repo.toml" # Restore .env only if we created the backup @@ -172,6 +175,18 @@ else fail "disinto init exited non-zero" fi +# ── Idempotency test: run init again ─────────────────────────────────────── +echo "=== Idempotency test: running disinto init again ===" +if bash "${FACTORY_ROOT}/bin/disinto" init \ + "${TEST_SLUG}" \ + --bare --yes \ + --forge-url "$FORGE_URL" \ + --repo-root "/tmp/smoke-test-repo"; then + pass "disinto init (re-run) completed successfully" +else + fail "disinto init (re-run) exited non-zero" +fi + # ── 4. Verify Forgejo state ───────────────────────────────────────────────── echo "=== 4/6 Verifying Forgejo state ===" diff --git a/vault/SCHEMA.md b/vault/SCHEMA.md index 0a465c3..cb7bc00 100644 --- a/vault/SCHEMA.md +++ b/vault/SCHEMA.md @@ -21,6 +21,7 @@ secrets = ["CLAWHUB_TOKEN"] model = "sonnet" tools = ["clawhub"] timeout_minutes = 30 +blast_radius = "low" # optional: overrides policy.toml tier ("low"|"medium"|"high") ``` ## Field Specifications @@ -41,6 +42,7 @@ timeout_minutes = 30 | `model` | string | `sonnet` | Override the default Claude model for this action | | `tools` | array of strings | `[]` | MCP tools to enable during execution | | `timeout_minutes` | integer | `60` | Maximum execution time in minutes | +| `blast_radius` | string | _(from policy.toml)_ | Override blast-radius tier for this invocation. Valid values: `"low"`, `"medium"`, `"high"`. See [docs/BLAST-RADIUS.md](../docs/BLAST-RADIUS.md) | ## Secret Names diff --git a/vault/classify.sh b/vault/classify.sh new file mode 100755 index 0000000..f91ab25 --- /dev/null +++ b/vault/classify.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# classify.sh — Blast-radius classification engine +# +# Reads the ops-repo policy.toml and prints the tier for a given formula. +# An optional blast_radius override (from the action TOML) takes precedence. +# +# Usage: classify.sh [blast_radius_override] +# Output: prints "low", "medium", or "high" to stdout; exits 0 +# +# Source lib/env.sh directly (not vault-env.sh) to avoid circular dependency: +# vault-env.sh calls classify.sh, so classify.sh must not source vault-env.sh. +# The only variable needed here is OPS_REPO_ROOT, which comes from lib/env.sh. +# shellcheck source=../lib/env.sh +set -euo pipefail + +source "$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/lib/env.sh" + +formula="${1:-}" +override="${2:-}" + +if [ -z "$formula" ]; then + echo "Usage: classify.sh [blast_radius_override]" >&2 + exit 1 +fi + +# If the action TOML provides a blast_radius override, use it directly +if [[ "$override" =~ ^(low|medium|high)$ ]]; then + echo "$override" + exit 0 +fi + +# Read tier from ops-repo policy.toml +policy_file="${OPS_REPO_ROOT}/vault/policy.toml" + +if [ -f "$policy_file" ]; then + # Parse: look for `formula_name = "tier"` under [tiers] + # Escape regex metacharacters in formula name for safe grep + escaped_formula=$(printf '%s' "$formula" | sed 's/[].[*^$\\]/\\&/g') + # grep may find no match (exit 1); guard with || true to avoid pipefail abort + tier=$(sed -n '/^\[tiers\]/,/^\[/{/^\[tiers\]/d;/^\[/d;p}' "$policy_file" \ + | { grep -E "^${escaped_formula}[[:space:]]*=" || true; } \ + | sed -E 's/^[^=]+=[[:space:]]*"([^"]+)".*/\1/' \ + | head -n1) + + if [[ "$tier" =~ ^(low|medium|high)$ ]]; then + echo "$tier" + exit 0 + fi +fi + +# Default-deny: unknown formulas are high +echo "high" +exit 0 diff --git a/vault/examples/release.toml b/vault/examples/release.toml index f8af6d1..0f1ce66 100644 --- a/vault/examples/release.toml +++ b/vault/examples/release.toml @@ -12,7 +12,7 @@ # id = "release-v120" # formula = "release" # context = "Release v1.2.0" -# secrets = [] +# secrets = ["GITHUB_TOKEN", "CODEBERG_TOKEN"] # # Steps executed by the release formula: # 1. preflight - Validate prerequisites (version, FORGE_TOKEN, Docker) @@ -26,7 +26,7 @@ id = "release-v120" formula = "release" context = "Release v1.2.0 — includes vault redesign, .profile system, architect agent" -secrets = [] +secrets = ["GITHUB_TOKEN", "CODEBERG_TOKEN"] # Optional: specify a larger model for complex release logic # model = "sonnet" diff --git a/vault/policy.toml b/vault/policy.toml new file mode 100644 index 0000000..5ba2667 --- /dev/null +++ b/vault/policy.toml @@ -0,0 +1,30 @@ +# vault/policy.toml — Blast-radius tier classification for formulas +# +# Each formula maps to a tier: "low", "medium", or "high". +# Unknown formulas default to "high" (default-deny). +# +# This file is a template. `disinto init` copies it to +# $OPS_REPO_ROOT/vault/policy.toml where operators can override tiers +# per-deployment without a disinto PR. + +[tiers] +# Read-only / internal bookkeeping — no external side-effects +groom-backlog = "low" +triage = "low" +reproduce = "low" +review-pr = "low" + +# Create issues, PRs, or internal plans — visible but reversible +dev = "medium" +run-planner = "medium" +run-gardener = "medium" +run-predictor = "medium" +run-supervisor = "medium" +run-architect = "medium" +upgrade-dependency = "medium" + +# External-facing or irreversible operations +run-publish-site = "high" +run-rent-a-human = "high" +add-rpc-method = "high" +release = "high" diff --git a/vault/vault-env.sh b/vault/vault-env.sh index 8e7f7c6..d9a17db 100644 --- a/vault/vault-env.sh +++ b/vault/vault-env.sh @@ -7,16 +7,29 @@ source "$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/lib/env.sh" # Use vault-bot's own Forgejo identity FORGE_TOKEN="${FORGE_VAULT_TOKEN:-${FORGE_TOKEN}}" +export FORGE_TOKEN + +# Export FORGE_ADMIN_TOKEN for direct commits (low-tier bypass) +# This token is used to commit directly to ops main without PR workflow +export FORGE_ADMIN_TOKEN="${FORGE_ADMIN_TOKEN:-}" # Vault redesign in progress (PR-based approval workflow) # This file is kept for shared env setup; scripts being replaced by #73 +# Blast-radius classification — set VAULT_TIER if a formula is known +# Callers may set VAULT_ACTION_FORMULA before sourcing, or pass it later. +if [ -n "${VAULT_ACTION_FORMULA:-}" ]; then + VAULT_TIER=$("$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/classify.sh" \ + "$VAULT_ACTION_FORMULA" "${VAULT_BLAST_RADIUS_OVERRIDE:-}") + export VAULT_TIER +fi + # ============================================================================= # VAULT ACTION VALIDATION # ============================================================================= # Allowed secret names - must match keys in .env.vault.enc -VAULT_ALLOWED_SECRETS="CLAWHUB_TOKEN GITHUB_TOKEN DEPLOY_KEY NPM_TOKEN DOCKER_HUB_TOKEN" +VAULT_ALLOWED_SECRETS="CLAWHUB_TOKEN GITHUB_TOKEN CODEBERG_TOKEN DEPLOY_KEY NPM_TOKEN DOCKER_HUB_TOKEN" # Validate a vault action TOML file # Usage: validate_vault_action @@ -60,7 +73,7 @@ validate_vault_action() { local unknown_fields unknown_fields=$(echo "$toml_content" | grep -E '^[a-zA-Z_][a-zA-Z0-9_]*\s*=' | sed -E 's/^([a-zA-Z_][a-zA-Z0-9_]*)\s*=.*/\1/' | sort -u | while read -r field; do case "$field" in - id|formula|context|secrets|model|tools|timeout_minutes) ;; + id|formula|context|secrets|model|tools|timeout_minutes|dispatch_mode|blast_radius) ;; *) echo "$field" ;; esac done) @@ -86,9 +99,9 @@ validate_vault_action() { return 1 fi - # Validate formula exists in formulas/ - if [ ! -f "$formulas_dir/${formula}.toml" ]; then - echo "ERROR: Formula not found: $formula" >&2 + # Validate formula exists in formulas/ (.toml for Claude reasoning, .sh for mechanical) + if [ ! -f "$formulas_dir/${formula}.toml" ] && [ ! -f "$formulas_dir/${formula}.sh" ]; then + echo "ERROR: Formula not found: $formula (checked .toml and .sh)" >&2 return 1 fi