diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index d9781fe..0000000 --- a/.dockerignore +++ /dev/null @@ -1,20 +0,0 @@ -# Secrets — prevent .env files from being baked into the image -.env -.env.enc -.env.vault -.env.vault.enc - -# Version control — .git is huge and not needed in image -.git - -# Archives — not needed at runtime -*.tar.gz - -# Prometheus data — large, ephemeral data -prometheus-data/ - -# Compose files — only needed at runtime via volume mount -docker-compose.yml - -# Project TOML files — gitignored anyway, won't be in build context -projects/*.toml diff --git a/.env.example b/.env.example index 037abe1..6124671 100644 --- a/.env.example +++ b/.env.example @@ -20,7 +20,6 @@ FORGE_URL=http://localhost:3000 # [CONFIG] local Forgejo instance # Each agent has its own Forgejo account and API token (#747). # Per-agent tokens fall back to FORGE_TOKEN if not set. FORGE_TOKEN= # [SECRET] dev-bot API token (default for all agents) -FORGE_TOKEN_DEVQWEN= # [SECRET] dev-qwen API token (for agents-llama) FORGE_REVIEW_TOKEN= # [SECRET] review-bot API token FORGE_PLANNER_TOKEN= # [SECRET] planner-bot API token FORGE_GARDENER_TOKEN= # [SECRET] gardener-bot API token diff --git a/.woodpecker/agent-smoke.sh b/.woodpecker/agent-smoke.sh index 40fc580..85de2ad 100644 --- a/.woodpecker/agent-smoke.sh +++ b/.woodpecker/agent-smoke.sh @@ -6,6 +6,8 @@ # 2. Every custom function called by agent scripts is defined in lib/ or the script itself # # Fast (<10s): no network, no tmux, no Claude needed. +# Would have caught: kill_tmux_session (renamed), create_agent_session (missing), +# read_phase (missing from dev-agent.sh scope) set -euo pipefail @@ -19,16 +21,12 @@ FAILED=0 # Uses awk instead of grep -Eo for busybox/Alpine compatibility (#296). get_fns() { local f="$1" - # Pure-awk implementation: avoids grep/sed cross-platform differences - # (BusyBox grep BRE quirks, sed ; separator issues on Alpine). - awk ' - /^[[:space:]]*[a-zA-Z_][a-zA-Z0-9_][a-zA-Z0-9_]*[[:space:]]*[(][)]/ { - line = $0 - gsub(/^[[:space:]]+/, "", line) - sub(/[[:space:]]*[(].*/, "", line) - print line - } - ' "$f" 2>/dev/null | sort -u || true + # BRE mode (no -E). Use [(][)] for literal parens — unambiguous across + # GNU grep and BusyBox grep (some BusyBox builds treat bare () as grouping + # even in BRE). BRE one-or-more via [X][X]* instead of +. + grep '^[[:space:]]*[a-zA-Z_][a-zA-Z0-9_][a-zA-Z0-9_]*[[:space:]]*[(][)]' "$f" 2>/dev/null \ + | sed 's/^[[:space:]]*//; s/[[:space:]]*[(][)].*$//' \ + | sort -u || true } # Extract call-position identifiers that look like custom function calls: @@ -97,12 +95,13 @@ echo "=== 2/2 Function resolution ===" # # Included — these are inline-sourced by agent scripts: # lib/env.sh — sourced by every agent (log, forge_api, etc.) +# lib/agent-session.sh — sourced by orchestrators (create_agent_session, monitor_phase_loop, etc.) # lib/agent-sdk.sh — sourced by SDK agents (agent_run, agent_recover_session) # lib/ci-helpers.sh — sourced by pollers and review (ci_passed, classify_pipeline_failure, etc.) # lib/load-project.sh — sourced by env.sh when PROJECT_TOML is set # lib/file-action-issue.sh — sourced by gardener-run.sh (file_action_issue) -# lib/secret-scan.sh — sourced by file-action-issue.sh (scan_for_secrets, redact_secrets) -# lib/formula-session.sh — sourced by formula-driven agents (acquire_cron_lock, check_memory, etc.) +# lib/secret-scan.sh — sourced by file-action-issue.sh, phase-handler.sh (scan_for_secrets, redact_secrets) +# lib/formula-session.sh — sourced by formula-driven agents (acquire_cron_lock, run_formula_and_monitor, etc.) # lib/mirrors.sh — sourced by merge sites (mirror_push) # lib/guard.sh — sourced by all cron entry points (check_active) # lib/issue-lifecycle.sh — sourced by agents for issue claim/release/block/deps @@ -117,7 +116,7 @@ echo "=== 2/2 Function resolution ===" # If a new lib file is added and sourced by agents, add it to LIB_FUNS below # and add a check_script call for it in the lib files section further down. LIB_FUNS=$( - for f in lib/agent-sdk.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do + for f in lib/agent-session.sh lib/agent-sdk.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do if [ -f "$f" ]; then get_fns "$f"; fi done | sort -u ) @@ -181,12 +180,13 @@ check_script() { # These are already in LIB_FUNS (their definitions are available to agents), # but this verifies calls *within* each lib file are also resolvable. check_script lib/env.sh lib/mirrors.sh +check_script lib/agent-session.sh check_script lib/agent-sdk.sh check_script lib/ci-helpers.sh check_script lib/secret-scan.sh check_script lib/file-action-issue.sh lib/secret-scan.sh check_script lib/tea-helpers.sh lib/secret-scan.sh -check_script lib/formula-session.sh +check_script lib/formula-session.sh lib/agent-session.sh check_script lib/load-project.sh check_script lib/mirrors.sh lib/env.sh check_script lib/guard.sh @@ -199,13 +199,15 @@ check_script lib/ci-debug.sh check_script lib/parse-deps.sh # Agent scripts — list cross-sourced files where function scope flows across files. +# phase-handler.sh defines default callback stubs; sourcing agents may override. check_script dev/dev-agent.sh +check_script dev/phase-handler.sh lib/secret-scan.sh check_script dev/dev-poll.sh check_script dev/phase-test.sh check_script gardener/gardener-run.sh check_script review/review-pr.sh lib/agent-sdk.sh check_script review/review-poll.sh -check_script planner/planner-run.sh lib/formula-session.sh +check_script planner/planner-run.sh lib/agent-session.sh lib/formula-session.sh check_script supervisor/supervisor-poll.sh check_script supervisor/update-prompt.sh check_script supervisor/supervisor-run.sh diff --git a/.woodpecker/ci.yml b/.woodpecker/ci.yml index fc2f12a..08ae24d 100644 --- a/.woodpecker/ci.yml +++ b/.woodpecker/ci.yml @@ -8,19 +8,6 @@ when: event: [push, pull_request] -# Override default clone to authenticate against Forgejo using FORGE_TOKEN. -# Required because Forgejo is configured with REQUIRE_SIGN_IN, so anonymous -# git clones fail with exit code 128. FORGE_TOKEN is injected globally via -# WOODPECKER_ENVIRONMENT in docker-compose.yml (generated by lib/generators.sh). -clone: - git: - image: alpine/git - commands: - - AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:$FORGE_TOKEN@|") - - git clone --depth 1 "$AUTH_URL" . - - git fetch --depth 1 origin "$CI_COMMIT_REF" - - git checkout FETCH_HEAD - steps: - name: shellcheck image: koalaman/shellcheck-alpine:stable diff --git a/.woodpecker/detect-duplicates.py b/.woodpecker/detect-duplicates.py index 35f3aa8..1d2c195 100644 --- a/.woodpecker/detect-duplicates.py +++ b/.woodpecker/detect-duplicates.py @@ -267,31 +267,41 @@ def main() -> int: "2653705045fdf65072cccfd16eb04900": "Standard prompt template (GRAPH_SECTION, SCRATCH_CONTEXT, FORMULA_CONTENT)", "93726a3c799b72ed2898a55552031921": "Standard prompt template continuation (SCRATCH_CONTEXT, FORMULA_CONTENT, SCRATCH_INSTRUCTION)", "c11eaaacab69c9a2d3c38c75215eca84": "Standard prompt template end (FORMULA_CONTENT, SCRATCH_INSTRUCTION)", - # Appears in stack_lock_acquire (lib/stack-lock.sh) and lib/pr-lifecycle.sh - "29d4f34b703f44699237713cc8d8065b": "Structural end-of-while-loop+case (return 1, esac, done, closing brace)", - # Forgejo org-creation API call pattern shared between forge-setup.sh and ops-setup.sh - # Extracted from bin/disinto (not a .sh file, excluded from prior scans) into lib/forge-setup.sh - "059b11945140c172465f9126b829ed7f": "Forgejo org-creation curl pattern (forge-setup.sh + ops-setup.sh)", - # Docker compose environment block for agents service (generators.sh + hire-agent.sh) - # Intentional duplicate - both generate the same docker-compose.yml template - "8066210169a462fe565f18b6a26a57e0": "Docker compose environment block (generators.sh + hire-agent.sh) - old", - "fd978fcd726696e0f280eba2c5198d50": "Docker compose environment block continuation (generators.sh + hire-agent.sh) - old", - "e2760ccc2d4b993a3685bd8991594eb2": "Docker compose env_file + depends_on block (generators.sh + hire-agent.sh) - old", - # The hash shown in output is 161a80f7 - need to match exactly what the script finds - "161a80f7296d6e9d45895607b7f5b9c9": "Docker compose env_file + depends_on block (generators.sh + hire-agent.sh) - old", - # New hash after explicit environment fix (#381) - "83fa229b86a7fdcb1d3591ab8e718f9d": "Docker compose explicit environment block (generators.sh + hire-agent.sh) - #381", - # Verification mode helper functions - intentionally duplicated in dispatcher and entrypoint - # These functions check if bug-report parent issues have all sub-issues closed - "b783d403276f78b49ad35840845126a1": "Verification helper: sub_issues variable declaration", - "4b19b9a1bdfbc62f003fc237ed270ed9": "Verification helper: python3 -c invocation", - "cc1d0a9f85dfe0cc32e9ef6361cb8c3a": "Verification helper: Python imports and args", - "768926748b811ebd30f215f57db5de40": "Verification helper: json.load from /dev/stdin", - "4c58586a30bcf6b009c02010ed8f6256": "Verification helper: sub_issues list initialization", - "53ea3d6359f51d622467bd77b079cc88": "Verification helper: iterate issues in data", - "21aec56a99d5252b23fb9a38b895e8e8": "Verification helper: check body for Decomposed from pattern", - "60ea98b3604557d539193b2a6624e232": "Verification helper: append sub-issue number", - "9f6ae8e7811575b964279d8820494eb0": "Verification helper: for loop done pattern", + # install_project_crons function in entrypoint.sh and entrypoint-llama.sh (intentional duplicate) + "007e1390498374c68ab5d66aa6d277b2": "install_project_crons function in entrypoints (window 007e1390)", + "04143957d4c63e8a16ac28bddaff589b": "install_project_crons function in entrypoints (window 04143957)", + "076a19221cde674b2fce20a17292fa78": "install_project_crons function in entrypoints (window 076a1922)", + "0d498287626e105f16b24948aed53584": "install_project_crons function in entrypoints (window 0d498287)", + "137b746928011acd758c7a9c690810b2": "install_project_crons function in entrypoints (window 137b7469)", + "287d33d98d21e3e07e0869e56ad94527": "install_project_crons function in entrypoints (window 287d33d9)", + "325a3d54a15e59d333ec2a20c062cc8c": "install_project_crons function in entrypoints (window 325a3d54)", + "34e1943d5738f540d67c5c6bd3e60b20": "install_project_crons function in entrypoints (window 34e1943d)", + "3dabd19698f9705b05376c38042ccce8": "install_project_crons function in entrypoints (window 3dabd196)", + "446b420f7f9821a2553bc4995d1fac25": "install_project_crons function in entrypoints (window 446b420f)", + "4826cf4896b792368c7b4d77573d0f8b": "install_project_crons function in entrypoints (window 4826cf48)", + "4e564d3bbda0ef33962af6042736dc1e": "install_project_crons function in entrypoints (window 4e564d3b)", + "5a3d92b22e5d5bca8cce17d581ac6803": "install_project_crons function in entrypoints (window 5a3d92b2)", + "63c20c5a31cf5e08f3a901ddf6db98af": "install_project_crons function in entrypoints (window 63c20c5a)", + "77547751325562fac397bbfd3a21c88e": "install_project_crons function in entrypoints (window 77547751)", + "80bdff63e54b4a260043d264b83d8eb0": "install_project_crons function in entrypoints (window 80bdff63)", + "84e55706393f731b293890dd6d830316": "install_project_crons function in entrypoints (window 84e55706)", + "85f8a9d029ee9efecca73fd30449ccf4": "install_project_crons function in entrypoints (window 85f8a9d0)", + "86e28dae676c905c5aa0035128e20e46": "install_project_crons function in entrypoints (window 86e28dae)", + "a222b73bcd6a57adb2315726e81ab6cf": "install_project_crons function in entrypoints (window a222b73b)", + "abd6c7efe66f533c48c883c2a6998886": "install_project_crons function in entrypoints (window abd6c7ef)", + "bcfeb67ce4939181330afea4949a95cf": "install_project_crons function in entrypoints (window bcfeb67c)", + "c1248c98f978c48e4a1e5009a1440917": "install_project_crons function in entrypoints (window c1248c98)", + "c40571185b3306345ecf9ac33ab352a6": "install_project_crons function in entrypoints (window c4057118)", + "c566639b237036a7a385982274d3d271": "install_project_crons function in entrypoints (window c566639b)", + "d9cd2f3d874c32366d577ea0d334cd1a": "install_project_crons function in entrypoints (window d9cd2f3d)", + "df4d3e905b12f2c68b206e45dddf9214": "install_project_crons function in entrypoints (window df4d3e90)", + "e8e65ccf867fc6cbe49695ecdce2518e": "install_project_crons function in entrypoints (window e8e65ccf)", + "eb8b298f06cda4359cc171206e0014bf": "install_project_crons function in entrypoints (window eb8b298f)", + "ecdf0daa2f2845359a6a4aa12d327246": "install_project_crons function in entrypoints (window ecdf0daa)", + "eeac93b2fba4de4589d36ca20845ec9f": "install_project_crons function in entrypoints (window eeac93b2)", + "f08a7139db9c96cd3526549c499c0332": "install_project_crons function in entrypoints (window f08a7139)", + "f0917809bdf28ff93fff0749e7e7fea0": "install_project_crons function in entrypoints (window f0917809)", + "f0e4101f9b90c2fa921e088057a96db7": "install_project_crons function in entrypoints (window f0e4101f)", } if not sh_files: diff --git a/.woodpecker/smoke-init.yml b/.woodpecker/smoke-init.yml index 3953053..3e1f33a 100644 --- a/.woodpecker/smoke-init.yml +++ b/.woodpecker/smoke-init.yml @@ -4,7 +4,6 @@ when: - "bin/disinto" - "lib/load-project.sh" - "lib/env.sh" - - "lib/generators.sh" - "tests/**" - ".woodpecker/smoke-init.yml" @@ -13,7 +12,6 @@ steps: image: python:3-alpine commands: - apk add --no-cache bash curl jq git coreutils - - python3 tests/mock-forgejo.py & echo $! > /tmp/mock-forgejo.pid + - python3 tests/mock-forgejo.py & - sleep 2 - bash tests/smoke-init.sh - - kill $(cat /tmp/mock-forgejo.pid) 2>/dev/null || true diff --git a/AGENTS.md b/AGENTS.md index 78f1c29..7fcca01 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,4 +1,4 @@ - + # Disinto — Agent Instructions ## What this repo is @@ -21,16 +21,17 @@ See `README.md` for the full architecture and `disinto-factory/SKILL.md` for set ``` disinto/ (code repo) -├── dev/ dev-poll.sh, dev-agent.sh, phase-test.sh — issue implementation +├── dev/ dev-poll.sh, dev-agent.sh, phase-handler.sh — issue implementation ├── review/ review-poll.sh, review-pr.sh — PR review ├── gardener/ gardener-run.sh — direct cron executor for run-gardener formula ├── predictor/ predictor-run.sh — daily cron executor for run-predictor formula ├── planner/ planner-run.sh — direct cron executor for run-planner formula ├── supervisor/ supervisor-run.sh — formula-driven health monitoring (cron wrapper) │ preflight.sh — pre-flight data collection for supervisor formula +│ supervisor-poll.sh — legacy bash orchestrator (superseded) ├── architect/ architect-run.sh — strategic decomposition of vision into sprints ├── vault/ vault-env.sh — shared env setup (vault redesign in progress, see #73-#77) -├── lib/ env.sh, agent-sdk.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, formula-session.sh, stack-lock.sh, forge-setup.sh, forge-push.sh, ops-setup.sh, ci-setup.sh, generators.sh, hire-agent.sh, release.sh, build-graph.py +├── lib/ env.sh, agent-session.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, formula-session.sh, profile.sh, build-graph.py ├── projects/ *.toml.example — templates; *.toml — local per-box config (gitignored) ├── formulas/ Issue templates (TOML specs for multi-step agent tasks) └── docs/ Protocol docs (PHASE-PROTOCOL.md, EVIDENCE-ARCHITECTURE.md) @@ -52,9 +53,35 @@ disinto-ops/ (ops repo — {project}-ops) ## Agent .profile Model -Each agent has a `.profile` repository on Forgejo storing `knowledge/lessons-learned.md` (injected into each session prompt) and `journal/` reflection entries (digested into lessons). Pre-session: `formula_prepare_profile_context()` loads lessons. Post-session: `profile_write_journal` records reflections. See `lib/profile.sh`. +Each agent has a `.profile` repository on Forgejo that stores: +- `formula.toml` — agent-specific formula (optional, falls back to `formulas/.toml`) +- `knowledge/lessons-learned.md` — distilled lessons from journal entries +- `journal/` — session reflection entries (archived after digestion) -> **Terminology note:** "Formulas" are TOML issue templates in `formulas/` that orchestrate multi-step agent tasks. Distinct from "processes" in `docs/EVIDENCE-ARCHITECTURE.md`. +### How it works + +1. **Pre-session:** The agent calls `formula_prepare_profile_context()` which: + - Resolves the agent's Forgejo identity from their token + - Clones/pulls the `.profile` repo to a local cache + - Loads `knowledge/lessons-learned.md` into `LESSONS_CONTEXT` for prompt injection + - Automatically digests journals if >10 undigested entries exist + +2. **Prompt injection:** Lessons are injected into the agent prompt: + ``` + ## Lessons learned (from .profile/knowledge/lessons-learned.md) + + ``` + +3. **Post-session:** The agent calls `profile_write_journal` which: + - Generates a reflection entry about the session + - Writes it to `journal/issue-{N}.md` + - Commits and pushes to the `.profile` repo + - Journals are archived after being digested into lessons-learned.md + +> **Terminology note:** "Formulas" in this repo are TOML issue templates in `formulas/` that +> orchestrate multi-step agent tasks (e.g., `run-gardener.toml`, `run-planner.toml`). This is +> distinct from "processes" described in `docs/EVIDENCE-ARCHITECTURE.md`, which are measurement +> and mutation pipelines that read external platforms and write structured evidence to git. ## Tech stack @@ -119,9 +146,6 @@ Issues flow: `backlog` → `in-progress` → PR → CI → review → merge → | `blocked` | Issue is stuck — agent session failed, crashed, timed out, or CI exhausted. Diagnostic comment on the issue has details. Also used for unmet dependencies. | dev-agent.sh, dev-poll.sh (on failure) | | `tech-debt` | Pre-existing issue flagged by AI reviewer, not introduced by a PR. | review-pr.sh (auto-created follow-ups) | | `underspecified` | Dev-agent refused the issue as too large or vague. | dev-poll.sh (on preflight `too_large`), dev-agent.sh (on mid-run `too_large` refusal) | -| `bug-report` | Issue describes user-facing broken behavior with reproduction steps. Separate triage track for reproduction automation. | Gardener (bug-report detection in grooming) | -| `in-triage` | Bug reproduced but root cause not obvious — triage agent investigates. Set alongside `bug-report`. | reproduce-agent (when reproduction succeeds but cause unclear) | -| `rejected` | Issue formally rejected — cannot reproduce, out of scope, or invalid. | reproduce-agent, humans | | `vision` | Goal anchors — high-level objectives from VISION.md. | Planner, humans | | `prediction/unreviewed` | Unprocessed prediction filed by predictor. | predictor-run.sh | | `prediction/dismissed` | Prediction triaged as DISMISS — planner disagrees, closed with reason. | Planner (triage-predictions step) | diff --git a/architect/AGENTS.md b/architect/AGENTS.md index 64b325e..c2e99ba 100644 --- a/architect/AGENTS.md +++ b/architect/AGENTS.md @@ -1,4 +1,4 @@ - + # Architect — Agent Instructions ## What this agent is diff --git a/architect/architect-run.sh b/architect/architect-run.sh index 0edeb70..b3d2513 100755 --- a/architect/architect-run.sh +++ b/architect/architect-run.sh @@ -36,7 +36,7 @@ source "$FACTORY_ROOT/lib/guard.sh" # shellcheck source=../lib/agent-sdk.sh source "$FACTORY_ROOT/lib/agent-sdk.sh" -LOG_FILE="${DISINTO_LOG_DIR}/architect/architect.log" +LOG_FILE="$SCRIPT_DIR/architect.log" # shellcheck disable=SC2034 # consumed by agent-sdk.sh LOGFILE="$LOG_FILE" # shellcheck disable=SC2034 # consumed by agent-sdk.sh @@ -44,40 +44,19 @@ SID_FILE="/tmp/architect-session-${PROJECT_NAME}.sid" SCRATCH_FILE="/tmp/architect-${PROJECT_NAME}-scratch.md" WORKTREE="/tmp/${PROJECT_NAME}-architect-run" -# Override LOG_AGENT for consistent agent identification -# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() -LOG_AGENT="architect" - -# Override log() to append to architect-specific log file -# shellcheck disable=SC2034 -log() { - local agent="${LOG_AGENT:-architect}" - printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE" -} +log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } # ── Guards ──────────────────────────────────────────────────────────────── check_active architect acquire_cron_lock "/tmp/architect-run.lock" -memory_guard 2000 +check_memory 2000 log "--- Architect run start ---" -# ── Resolve forge remote for git operations ───────────────────────────── -resolve_forge_remote - -# ── Resolve agent identity for .profile repo ──────────────────────────── -if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_ARCHITECT_TOKEN:-}" ]; then - AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_ARCHITECT_TOKEN}" \ - "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true) -fi - # ── Load formula + context ─────────────────────────────────────────────── -load_formula_or_profile "architect" "$FACTORY_ROOT/formulas/run-architect.toml" || exit 1 +load_formula "$FACTORY_ROOT/formulas/run-architect.toml" build_context_block VISION.md AGENTS.md ops:prerequisites.md -# ── Prepare .profile context (lessons injection) ───────────────────────── -formula_prepare_profile_context - # ── Build structural analysis graph ────────────────────────────────────── build_graph_section @@ -105,7 +84,6 @@ and file sub-issues after design forks are resolved. ${CONTEXT_BLOCK} ${GRAPH_SECTION} ${SCRATCH_CONTEXT} -$(formula_lessons_block) ## Formula ${FORMULA_CONTENT} @@ -126,8 +104,4 @@ agent_run --worktree "$WORKTREE" "$PROMPT" log "agent_run complete" rm -f "$SCRATCH_FILE" - -# Write journal entry post-session -profile_write_journal "architect-run" "Architect run $(date -u +%Y-%m-%d)" "complete" "" || true - log "--- Architect run done ---" diff --git a/bin/disinto b/bin/disinto index 7d507a7..c3b454f 100755 --- a/bin/disinto +++ b/bin/disinto @@ -25,13 +25,6 @@ set -euo pipefail FACTORY_ROOT="$(cd "$(dirname "$0")/.." && pwd)" source "${FACTORY_ROOT}/lib/env.sh" -source "${FACTORY_ROOT}/lib/ops-setup.sh" -source "${FACTORY_ROOT}/lib/hire-agent.sh" -source "${FACTORY_ROOT}/lib/forge-setup.sh" -source "${FACTORY_ROOT}/lib/generators.sh" -source "${FACTORY_ROOT}/lib/forge-push.sh" -source "${FACTORY_ROOT}/lib/ci-setup.sh" -source "${FACTORY_ROOT}/lib/release.sh" # ── Helpers ────────────────────────────────────────────────────────────────── @@ -167,38 +160,397 @@ write_secrets_encrypted() { return 0 } -export FORGEJO_DATA_DIR="${HOME}/.disinto/forgejo" +FORGEJO_DATA_DIR="${HOME}/.disinto/forgejo" # Generate docker-compose.yml in the factory root. -# (Implementation in lib/generators.sh) generate_compose() { - _generate_compose_impl "$@" + local forge_port="${1:-3000}" + local compose_file="${FACTORY_ROOT}/docker-compose.yml" + + cat > "$compose_file" <<'COMPOSEEOF' +# docker-compose.yml — generated by disinto init +# Brings up Forgejo, Woodpecker, and the agent runtime. + +services: + forgejo: + image: codeberg.org/forgejo/forgejo:11.0 + restart: unless-stopped + security_opt: + - apparmor=unconfined + volumes: + - forgejo-data:/data + environment: + FORGEJO__database__DB_TYPE: sqlite3 + FORGEJO__server__ROOT_URL: http://forgejo:3000/ + FORGEJO__server__HTTP_PORT: "3000" + FORGEJO__security__INSTALL_LOCK: "true" + FORGEJO__service__DISABLE_REGISTRATION: "true" + FORGEJO__webhook__ALLOWED_HOST_LIST: "private" + networks: + - disinto-net + + woodpecker: + image: woodpeckerci/woodpecker-server:v3 + restart: unless-stopped + security_opt: + - apparmor=unconfined + ports: + - "8000:8000" + - "9000:9000" + volumes: + - woodpecker-data:/var/lib/woodpecker + environment: + WOODPECKER_FORGEJO: "true" + WOODPECKER_FORGEJO_URL: http://forgejo:3000 + WOODPECKER_FORGEJO_CLIENT: ${WP_FORGEJO_CLIENT:-} + WOODPECKER_FORGEJO_SECRET: ${WP_FORGEJO_SECRET:-} + WOODPECKER_HOST: ${WOODPECKER_HOST:-http://woodpecker:8000} + WOODPECKER_OPEN: "true" + WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-} + WOODPECKER_DATABASE_DRIVER: sqlite3 + WOODPECKER_DATABASE_DATASOURCE: /var/lib/woodpecker/woodpecker.sqlite + depends_on: + - forgejo + networks: + - disinto-net + + woodpecker-agent: + image: woodpeckerci/woodpecker-agent:v3 + restart: unless-stopped + network_mode: host + privileged: true + volumes: + - /var/run/docker.sock:/var/run/docker.sock + environment: + WOODPECKER_SERVER: localhost:9000 + WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-} + WOODPECKER_GRPC_SECURE: "false" + WOODPECKER_HEALTHCHECK_ADDR: ":3333" + WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net + WOODPECKER_MAX_WORKFLOWS: 1 + depends_on: + - woodpecker + + agents: + build: + context: . + dockerfile: docker/agents/Dockerfile + restart: unless-stopped + security_opt: + - apparmor=unconfined + volumes: + - agent-data:/home/agent/data + - project-repos:/home/agent/repos + - ${HOME}/.claude:/home/agent/.claude + - ${HOME}/.claude.json:/home/agent/.claude.json:ro + - CLAUDE_BIN_PLACEHOLDER:/usr/local/bin/claude:ro + - ${HOME}/.ssh:/home/agent/.ssh:ro + - ${HOME}/.config/sops/age:/home/agent/.config/sops/age:ro + - woodpecker-data:/woodpecker-data:ro + environment: + FORGE_URL: http://forgejo:3000 + WOODPECKER_SERVER: http://woodpecker:8000 + DISINTO_CONTAINER: "1" + PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project} + WOODPECKER_DATA_DIR: /woodpecker-data + env_file: + - .env + # IMPORTANT: agents get .env only (forge tokens, CI tokens, config). + # Vault-only secrets (GITHUB_TOKEN, CLAWHUB_TOKEN, deploy keys) live in + # .env.vault.enc and are NEVER injected here — only the runner + # container receives them at fire time (AD-006, #745). + depends_on: + - forgejo + - woodpecker + networks: + - disinto-net + + runner: + build: + context: . + dockerfile: docker/agents/Dockerfile + profiles: ["vault"] + security_opt: + - apparmor=unconfined + volumes: + - agent-data:/home/agent/data + environment: + FORGE_URL: http://forgejo:3000 + DISINTO_CONTAINER: "1" + PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project} + # Vault redesign in progress (PR-based approval, see #73-#77) + # This container is being replaced — entrypoint will be updated in follow-up + networks: + - disinto-net + + # Edge proxy — reverse proxy to Forgejo, Woodpecker, and staging + # Serves on ports 80/443, routes based on path + edge: + build: ./docker/edge + ports: + - "80:80" + - "443:443" + environment: + - DISINTO_VERSION=${DISINTO_VERSION:-main} + - FORGE_URL=http://forgejo:3000 + - FORGE_REPO=johba/disinto + - FORGE_OPS_REPO=johba/disinto-ops + - FORGE_TOKEN=${FORGE_TOKEN:-} + - FORGE_ADMIN_USERS=${FORGE_ADMIN_USERS:-disinto-admin,johba} + - FORGE_ADMIN_TOKEN=${FORGE_ADMIN_TOKEN:-} + - OPS_REPO_ROOT=/opt/disinto-ops + - PROJECT_REPO_ROOT=/opt/disinto + - PRIMARY_BRANCH=main + volumes: + - ./docker/Caddyfile:/etc/caddy/Caddyfile + - caddy_data:/data + - /var/run/docker.sock:/var/run/docker.sock + depends_on: + - forgejo + - woodpecker + - staging + networks: + - disinto-net + + # Staging container — static file server for staging artifacts + # Edge proxy routes to this container for default requests + staging: + image: caddy:alpine + command: ["caddy", "file-server", "--root", "/srv/site"] + volumes: + - ./docker:/srv/site:ro + networks: + - disinto-net + + # Staging deployment slot — activated by Woodpecker staging pipeline (#755). + # Profile-gated: only starts when explicitly targeted by deploy commands. + # Customize image/ports/volumes for your project after init. + staging-deploy: + image: alpine:3 + profiles: ["staging"] + security_opt: + - apparmor=unconfined + environment: + DEPLOY_ENV: staging + networks: + - disinto-net + command: ["echo", "staging slot — replace with project image"] + +volumes: + forgejo-data: + woodpecker-data: + agent-data: + project-repos: + caddy_data: + +networks: + disinto-net: + driver: bridge +COMPOSEEOF + + # Patch the Claude CLI binary path — resolve from host PATH at init time. + local claude_bin + claude_bin="$(command -v claude 2>/dev/null || true)" + if [ -n "$claude_bin" ]; then + # Resolve symlinks to get the real binary path + claude_bin="$(readlink -f "$claude_bin")" + sed -i "s|CLAUDE_BIN_PLACEHOLDER|${claude_bin}|" "$compose_file" + else + echo "Warning: claude CLI not found in PATH — update docker-compose.yml volumes manually" >&2 + sed -i "s|CLAUDE_BIN_PLACEHOLDER|/usr/local/bin/claude|" "$compose_file" + fi + + # Patch the forgejo port mapping into the file if non-default + if [ "$forge_port" != "3000" ]; then + # Add port mapping to forgejo service so it's reachable from host during init + sed -i "/image: codeberg\.org\/forgejo\/forgejo:11\.0/a\\ ports:\\n - \"${forge_port}:3000\"" "$compose_file" + else + sed -i "/image: codeberg\.org\/forgejo\/forgejo:11\.0/a\\ ports:\\n - \"3000:3000\"" "$compose_file" + fi + + echo "Created: ${compose_file}" } # Generate docker/agents/ files if they don't already exist. -# (Implementation in lib/generators.sh) generate_agent_docker() { - _generate_agent_docker_impl "$@" + local docker_dir="${FACTORY_ROOT}/docker/agents" + mkdir -p "$docker_dir" + + if [ ! -f "${docker_dir}/Dockerfile" ]; then + echo "Warning: docker/agents/Dockerfile not found — expected in repo" >&2 + fi + if [ ! -f "${docker_dir}/entrypoint.sh" ]; then + echo "Warning: docker/agents/entrypoint.sh not found — expected in repo" >&2 + fi } # Generate docker/Caddyfile template for edge proxy. -# (Implementation in lib/generators.sh) generate_caddyfile() { - _generate_caddyfile_impl "$@" + local docker_dir="${FACTORY_ROOT}/docker" + local caddyfile="${docker_dir}/Caddyfile" + + if [ -f "$caddyfile" ]; then + echo "Caddyfile: ${caddyfile} (already exists, skipping)" + return + fi + + cat > "$caddyfile" <<'CADDYFILEEOF' +# Caddyfile — edge proxy configuration +# IP-only binding at bootstrap; domain + TLS added later via vault resource request + +:80 { + # Reverse proxy to Forgejo + handle /forgejo/* { + reverse_proxy forgejo:3000 + } + + # Reverse proxy to Woodpecker CI + handle /ci/* { + reverse_proxy woodpecker:8000 + } + + # Default: proxy to staging container + handle { + reverse_proxy staging:80 + } +} +CADDYFILEEOF + + echo "Created: ${caddyfile}" } # Generate docker/index.html default page. -# (Implementation in lib/generators.sh) generate_staging_index() { - _generate_staging_index_impl "$@" + local docker_dir="${FACTORY_ROOT}/docker" + local index_file="${docker_dir}/index.html" + + if [ -f "$index_file" ]; then + echo "Staging: ${index_file} (already exists, skipping)" + return + fi + + cat > "$index_file" <<'INDEXEOF' + + + + + + Nothing shipped yet + + + +
+

Nothing shipped yet

+

CI pipelines will update this page with your staging artifacts.

+
+ + +INDEXEOF + + echo "Created: ${index_file}" } # Generate template .woodpecker/ deployment pipeline configs in a project repo. # Creates staging.yml and production.yml alongside the project's existing CI config. # These pipelines trigger on Woodpecker's deployment event with environment filters. -# (Implementation in lib/generators.sh) generate_deploy_pipelines() { - _generate_deploy_pipelines_impl "$@" + local repo_root="$1" project_name="$2" + local wp_dir="${repo_root}/.woodpecker" + + mkdir -p "$wp_dir" + + # Skip if deploy pipelines already exist + if [ -f "${wp_dir}/staging.yml" ] && [ -f "${wp_dir}/production.yml" ]; then + echo "Deploy: .woodpecker/{staging,production}.yml (already exist)" + return + fi + + if [ ! -f "${wp_dir}/staging.yml" ]; then + cat > "${wp_dir}/staging.yml" <<'STAGINGEOF' +# .woodpecker/staging.yml — Staging deployment pipeline +# Triggered by runner via Woodpecker promote API. +# Human approves promotion in vault → runner calls promote → this runs. + +when: + event: deployment + environment: staging + +steps: + - name: deploy-staging + image: docker:27 + commands: + - echo "Deploying to staging environment..." + - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from CI #${CI_PIPELINE_PARENT}" + # Pull the image built by CI and deploy to staging + # Customize these commands for your project: + # - docker compose -f docker-compose.yml --profile staging up -d + - echo "Staging deployment complete" + + - name: verify-staging + image: alpine:3 + commands: + - echo "Verifying staging deployment..." + # Add health checks, smoke tests, or integration tests here: + # - curl -sf http://staging:8080/health || exit 1 + - echo "Staging verification complete" +STAGINGEOF + echo "Created: ${wp_dir}/staging.yml" + fi + + if [ ! -f "${wp_dir}/production.yml" ]; then + cat > "${wp_dir}/production.yml" <<'PRODUCTIONEOF' +# .woodpecker/production.yml — Production deployment pipeline +# Triggered by runner via Woodpecker promote API. +# Human approves promotion in vault → runner calls promote → this runs. + +when: + event: deployment + environment: production + +steps: + - name: deploy-production + image: docker:27 + commands: + - echo "Deploying to production environment..." + - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from staging" + # Pull the verified image and deploy to production + # Customize these commands for your project: + # - docker compose -f docker-compose.yml up -d + - echo "Production deployment complete" + + - name: verify-production + image: alpine:3 + commands: + - echo "Verifying production deployment..." + # Add production health checks here: + # - curl -sf http://production:8080/health || exit 1 + - echo "Production verification complete" +PRODUCTIONEOF + echo "Created: ${wp_dir}/production.yml" + fi } # Check whether compose mode is active (docker-compose.yml exists). @@ -206,11 +558,646 @@ is_compose_mode() { [ -f "${FACTORY_ROOT}/docker-compose.yml" ] } +# Provision or connect to a local Forgejo instance. +# Creates admin + bot users, generates API tokens, stores in .env. +# When $DISINTO_BARE is set, uses standalone docker run; otherwise uses compose. +setup_forge() { + local forge_url="$1" + local repo_slug="$2" + local use_bare="${DISINTO_BARE:-false}" + + echo "" + echo "── Forge setup ────────────────────────────────────────" + + # Helper: run a command inside the Forgejo container + _forgejo_exec() { + if [ "$use_bare" = true ]; then + docker exec -u git disinto-forgejo "$@" + else + docker compose -f "${FACTORY_ROOT}/docker-compose.yml" exec -T -u git forgejo "$@" + fi + } + + # Check if Forgejo is already running + if curl -sf --max-time 5 "${forge_url}/api/v1/version" >/dev/null 2>&1; then + echo "Forgejo: ${forge_url} (already running)" + else + echo "Forgejo not reachable at ${forge_url}" + echo "Starting Forgejo via Docker..." + + if ! command -v docker &>/dev/null; then + echo "Error: docker not found — needed to provision Forgejo" >&2 + echo " Install Docker or start Forgejo manually at ${forge_url}" >&2 + exit 1 + fi + + # Extract port from forge_url + local forge_port + forge_port=$(printf '%s' "$forge_url" | sed -E 's|.*:([0-9]+)/?$|\1|') + forge_port="${forge_port:-3000}" + + if [ "$use_bare" = true ]; then + # Bare-metal mode: standalone docker run + mkdir -p "${FORGEJO_DATA_DIR}" + + if docker ps -a --format '{{.Names}}' | grep -q '^disinto-forgejo$'; then + docker start disinto-forgejo >/dev/null 2>&1 || true + else + docker run -d \ + --name disinto-forgejo \ + --restart unless-stopped \ + -p "${forge_port}:3000" \ + -p 2222:22 \ + -v "${FORGEJO_DATA_DIR}:/data" \ + -e "FORGEJO__database__DB_TYPE=sqlite3" \ + -e "FORGEJO__server__ROOT_URL=${forge_url}/" \ + -e "FORGEJO__server__HTTP_PORT=3000" \ + -e "FORGEJO__service__DISABLE_REGISTRATION=true" \ + codeberg.org/forgejo/forgejo:11.0 + fi + else + # Compose mode: start Forgejo via docker compose + docker compose -f "${FACTORY_ROOT}/docker-compose.yml" up -d forgejo + fi + + # Wait for Forgejo to become healthy + echo -n "Waiting for Forgejo to start" + local retries=0 + while ! curl -sf --max-time 3 "${forge_url}/api/v1/version" >/dev/null 2>&1; do + retries=$((retries + 1)) + if [ "$retries" -gt 60 ]; then + echo "" + echo "Error: Forgejo did not become ready within 60s" >&2 + exit 1 + fi + echo -n "." + sleep 1 + done + echo " ready" + fi + + # Wait for Forgejo database to accept writes (API may be ready before DB is) + echo -n "Waiting for Forgejo database" + local db_ready=false + for _i in $(seq 1 30); do + if _forgejo_exec forgejo admin user list >/dev/null 2>&1; then + db_ready=true + break + fi + echo -n "." + sleep 1 + done + echo "" + if [ "$db_ready" != true ]; then + echo "Error: Forgejo database not ready after 30s" >&2 + exit 1 + fi + + # Create admin user if it doesn't exist + local admin_user="disinto-admin" + local admin_pass + local env_file="${FACTORY_ROOT}/.env" + + # Re-read persisted admin password if available (#158) + if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then + admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-) + fi + # Generate a fresh password only when none was persisted + if [ -z "${admin_pass:-}" ]; then + admin_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + fi + + if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then + echo "Creating admin user: ${admin_user}" + local create_output + if ! create_output=$(_forgejo_exec forgejo admin user create \ + --admin \ + --username "${admin_user}" \ + --password "${admin_pass}" \ + --email "admin@disinto.local" \ + --must-change-password=false 2>&1); then + echo "Error: failed to create admin user '${admin_user}':" >&2 + echo " ${create_output}" >&2 + exit 1 + fi + # Forgejo 11.x ignores --must-change-password=false on create; + # explicitly clear the flag so basic-auth token creation works. + _forgejo_exec forgejo admin user change-password \ + --username "${admin_user}" \ + --password "${admin_pass}" \ + --must-change-password=false + + # Verify admin user was actually created + if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then + echo "Error: admin user '${admin_user}' not found after creation" >&2 + exit 1 + fi + + # Persist admin password to .env for idempotent re-runs (#158) + if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then + sed -i "s|^FORGE_ADMIN_PASS=.*|FORGE_ADMIN_PASS=${admin_pass}|" "$env_file" + else + printf 'FORGE_ADMIN_PASS=%s\n' "$admin_pass" >> "$env_file" + fi + else + echo "Admin user: ${admin_user} (already exists)" + # Reset password to the persisted value so basic-auth works (#158) + _forgejo_exec forgejo admin user change-password \ + --username "${admin_user}" \ + --password "${admin_pass}" \ + --must-change-password=false + fi + # Preserve password for Woodpecker OAuth2 token generation (#779) + _FORGE_ADMIN_PASS="$admin_pass" + + # Create human user (johba) as site admin if it doesn't exist + local human_user="johba" + local human_pass + human_pass="human-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + + if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then + echo "Creating human user: ${human_user}" + local create_output + if ! create_output=$(_forgejo_exec forgejo admin user create \ + --admin \ + --username "${human_user}" \ + --password "${human_pass}" \ + --email "johba@disinto.local" \ + --must-change-password=false 2>&1); then + echo "Error: failed to create human user '${human_user}':" >&2 + echo " ${create_output}" >&2 + exit 1 + fi + # Forgejo 11.x ignores --must-change-password=false on create; + # explicitly clear the flag so basic-auth token creation works. + _forgejo_exec forgejo admin user change-password \ + --username "${human_user}" \ + --password "${human_pass}" \ + --must-change-password=false + + # Verify human user was actually created + if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then + echo "Error: human user '${human_user}' not found after creation" >&2 + exit 1 + fi + echo " Human user '${human_user}' created as site admin" + else + echo "Human user: ${human_user} (already exists)" + fi + + # Get or create admin token + local admin_token + admin_token=$(curl -sf -X POST \ + -u "${admin_user}:${admin_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${admin_user}/tokens" \ + -d '{"name":"disinto-admin-token","scopes":["all"]}' 2>/dev/null \ + | jq -r '.sha1 // empty') || admin_token="" + + if [ -z "$admin_token" ]; then + # Token might already exist — try listing + admin_token=$(curl -sf \ + -u "${admin_user}:${admin_pass}" \ + "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \ + | jq -r '.[0].sha1 // empty') || admin_token="" + fi + + if [ -z "$admin_token" ]; then + echo "Error: failed to obtain admin API token" >&2 + exit 1 + fi + + # Get or create human user token + local human_token + if curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then + human_token=$(curl -sf -X POST \ + -u "${human_user}:${human_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${human_user}/tokens" \ + -d '{"name":"disinto-human-token","scopes":["all"]}' 2>/dev/null \ + | jq -r '.sha1 // empty') || human_token="" + + if [ -z "$human_token" ]; then + # Token might already exist — try listing + human_token=$(curl -sf \ + -u "${human_user}:${human_pass}" \ + "${forge_url}/api/v1/users/${human_user}/tokens" 2>/dev/null \ + | jq -r '.[0].sha1 // empty') || human_token="" + fi + + if [ -n "$human_token" ]; then + # Store human token in .env + if grep -q '^HUMAN_TOKEN=' "$env_file" 2>/dev/null; then + sed -i "s|^HUMAN_TOKEN=.*|HUMAN_TOKEN=${human_token}|" "$env_file" + else + printf 'HUMAN_TOKEN=%s\n' "$human_token" >> "$env_file" + fi + export HUMAN_TOKEN="$human_token" + echo " Human token saved (HUMAN_TOKEN)" + fi + fi + + # Create bot users and tokens + # Each agent gets its own Forgejo account for identity and audit trail (#747). + # Map: bot-username -> env-var-name for the token + local -A bot_token_vars=( + [dev-bot]="FORGE_TOKEN" + [review-bot]="FORGE_REVIEW_TOKEN" + [planner-bot]="FORGE_PLANNER_TOKEN" + [gardener-bot]="FORGE_GARDENER_TOKEN" + [vault-bot]="FORGE_VAULT_TOKEN" + [supervisor-bot]="FORGE_SUPERVISOR_TOKEN" + [predictor-bot]="FORGE_PREDICTOR_TOKEN" + [architect-bot]="FORGE_ARCHITECT_TOKEN" + ) + + local bot_user bot_pass token token_var + + for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot architect-bot; do + bot_pass="bot-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + token_var="${bot_token_vars[$bot_user]}" + + if ! curl -sf --max-time 5 \ + -H "Authorization: token ${admin_token}" \ + "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then + echo "Creating bot user: ${bot_user}" + local create_output + if ! create_output=$(_forgejo_exec forgejo admin user create \ + --username "${bot_user}" \ + --password "${bot_pass}" \ + --email "${bot_user}@disinto.local" \ + --must-change-password=false 2>&1); then + echo "Error: failed to create bot user '${bot_user}':" >&2 + echo " ${create_output}" >&2 + exit 1 + fi + # Forgejo 11.x ignores --must-change-password=false on create; + # explicitly clear the flag so basic-auth token creation works. + _forgejo_exec forgejo admin user change-password \ + --username "${bot_user}" \ + --password "${bot_pass}" \ + --must-change-password=false + + # Verify bot user was actually created + if ! curl -sf --max-time 5 \ + -H "Authorization: token ${admin_token}" \ + "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then + echo "Error: bot user '${bot_user}' not found after creation" >&2 + exit 1 + fi + fi + + # Generate token via API (basic auth as the bot user — Forgejo requires + # basic auth on POST /users/{username}/tokens, token auth is rejected) + token=$(curl -sf -X POST \ + -u "${bot_user}:${bot_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${bot_user}/tokens" \ + -d "{\"name\":\"disinto-${bot_user}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \ + | jq -r '.sha1 // empty') || token="" + + if [ -z "$token" ]; then + # Token name collision — create with timestamp suffix + token=$(curl -sf -X POST \ + -u "${bot_user}:${bot_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${bot_user}/tokens" \ + -d "{\"name\":\"disinto-${bot_user}-$(date +%s)\",\"scopes\":[\"all\"]}" 2>/dev/null \ + | jq -r '.sha1 // empty') || token="" + fi + + if [ -z "$token" ]; then + echo "Error: failed to create API token for '${bot_user}'" >&2 + exit 1 + fi + + # Store token in .env under the per-agent variable name + if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then + sed -i "s|^${token_var}=.*|${token_var}=${token}|" "$env_file" + else + printf '%s=%s\n' "$token_var" "$token" >> "$env_file" + fi + export "${token_var}=${token}" + echo " ${bot_user} token saved (${token_var})" + + # Backwards-compat aliases for dev-bot and review-bot + if [ "$bot_user" = "dev-bot" ]; then + export CODEBERG_TOKEN="$token" + elif [ "$bot_user" = "review-bot" ]; then + export REVIEW_BOT_TOKEN="$token" + fi + done + + # Store FORGE_URL in .env if not already present + if ! grep -q '^FORGE_URL=' "$env_file" 2>/dev/null; then + printf 'FORGE_URL=%s\n' "$forge_url" >> "$env_file" + fi + + # Create the repo on Forgejo if it doesn't exist + local org_name="${repo_slug%%/*}" + local repo_name="${repo_slug##*/}" + + # Check if repo already exists + if ! curl -sf --max-time 5 \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/repos/${repo_slug}" >/dev/null 2>&1; then + + # Try creating org first (ignore if exists) + curl -sf -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/orgs" \ + -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true + + # Create repo under org + if ! curl -sf -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/orgs/${org_name}/repos" \ + -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then + # Fallback: create under the human user namespace using admin endpoint + if [ -n "${admin_token:-}" ]; then + if ! curl -sf -X POST \ + -H "Authorization: token ${admin_token}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/admin/users/${org_name}/repos" \ + -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then + echo "Error: failed to create repo '${repo_slug}' on Forgejo (admin endpoint)" >&2 + exit 1 + fi + elif [ -n "${HUMAN_TOKEN:-}" ]; then + if ! curl -sf -X POST \ + -H "Authorization: token ${HUMAN_TOKEN}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/user/repos" \ + -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then + echo "Error: failed to create repo '${repo_slug}' on Forgejo (user endpoint)" >&2 + exit 1 + fi + else + echo "Error: failed to create repo '${repo_slug}' — no admin or human token available" >&2 + exit 1 + fi + fi + + # Add all bot users as collaborators with appropriate permissions + # dev-bot: write (PR creation via lib/vault.sh) + # review-bot: read (PR review) + # planner-bot: write (prerequisites.md, memory) + # gardener-bot: write (backlog grooming) + # vault-bot: write (vault items) + # supervisor-bot: read (health monitoring) + # predictor-bot: read (pattern detection) + # architect-bot: write (sprint PRs) + local bot_user bot_perm + declare -A bot_permissions=( + [dev-bot]="write" + [review-bot]="read" + [planner-bot]="write" + [gardener-bot]="write" + [vault-bot]="write" + [supervisor-bot]="read" + [predictor-bot]="read" + [architect-bot]="write" + ) + for bot_user in "${!bot_permissions[@]}"; do + bot_perm="${bot_permissions[$bot_user]}" + curl -sf -X PUT \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/repos/${repo_slug}/collaborators/${bot_user}" \ + -d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1 || true + done + + # Add disinto-admin as admin collaborator + curl -sf -X PUT \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/repos/${repo_slug}/collaborators/disinto-admin" \ + -d '{"permission":"admin"}' >/dev/null 2>&1 || true + + echo "Repo: ${repo_slug} created on Forgejo" + else + echo "Repo: ${repo_slug} (already exists on Forgejo)" + fi + + echo "Forge: ${forge_url} (ready)" +} + # Create and seed the {project}-ops repo on Forgejo with initial directory structure. # The ops repo holds operational data: vault items, journals, evidence, prerequisites. -# ops repo setup is now in lib/ops-setup.sh +setup_ops_repo() { + local forge_url="$1" ops_slug="$2" ops_root="$3" primary_branch="${4:-main}" + local org_name="${ops_slug%%/*}" + local ops_name="${ops_slug##*/}" -# push_to_forge() is sourced from lib/forge-push.sh + echo "" + echo "── Ops repo setup ─────────────────────────────────────" + + # Check if ops repo already exists on Forgejo + if curl -sf --max-time 5 \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/repos/${ops_slug}" >/dev/null 2>&1; then + echo "Ops repo: ${ops_slug} (already exists on Forgejo)" + else + # Create ops repo under org (or human user if org creation failed) + if ! curl -sf -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/orgs/${org_name}/repos" \ + -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" >/dev/null 2>&1; then + # Fallback: create under the human user namespace + curl -sf -X POST \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/johba/repos" \ + -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data\"}" >/dev/null 2>&1 || true + fi + + # Add all bot users as collaborators with appropriate permissions + # vault branch protection (#77) requires: + # - Admin-only merge to main (enforced by admin_enforced: true) + # - Bots can push branches and create PRs, but cannot merge + local bot_user bot_perm + declare -A bot_permissions=( + [dev-bot]="write" + [review-bot]="read" + [planner-bot]="write" + [gardener-bot]="write" + [vault-bot]="write" + [supervisor-bot]="read" + [predictor-bot]="read" + [architect-bot]="write" + ) + for bot_user in "${!bot_permissions[@]}"; do + bot_perm="${bot_permissions[$bot_user]}" + curl -sf -X PUT \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/repos/${ops_slug}/collaborators/${bot_user}" \ + -d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1 || true + done + + # Add disinto-admin as admin collaborator + curl -sf -X PUT \ + -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/repos/${ops_slug}/collaborators/disinto-admin" \ + -d '{"permission":"admin"}' >/dev/null 2>&1 || true + + echo "Ops repo: ${ops_slug} created on Forgejo" + fi + + # Clone ops repo locally if not present + if [ ! -d "${ops_root}/.git" ]; then + local auth_url + auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_TOKEN}@|") + local clone_url="${auth_url}/${ops_slug}.git" + echo "Cloning: ops repo -> ${ops_root}" + git clone --quiet "$clone_url" "$ops_root" 2>/dev/null || { + echo "Initializing: ops repo at ${ops_root}" + mkdir -p "$ops_root" + git -C "$ops_root" init --initial-branch="${primary_branch}" -q + } + else + echo "Ops repo: ${ops_root} (already exists locally)" + fi + + # Seed directory structure + local seeded=false + mkdir -p "${ops_root}/vault/pending" + mkdir -p "${ops_root}/vault/approved" + mkdir -p "${ops_root}/vault/fired" + mkdir -p "${ops_root}/vault/rejected" + mkdir -p "${ops_root}/knowledge" + mkdir -p "${ops_root}/evidence/engagement" + + if [ ! -f "${ops_root}/README.md" ]; then + cat > "${ops_root}/README.md" < **Note:** Journal directories (journal/planner/ and journal/supervisor/) have been removed from the ops repo. Agent journals are now stored in each agent's .profile repo on Forgejo. + +## Branch protection + +- \`main\`: 2 reviewers required for vault items +- Journal/evidence commits may use lighter rules +OPSEOF + seeded=true + fi + + # Create stub files if they don't exist + [ -f "${ops_root}/portfolio.md" ] || { echo "# Portfolio" > "${ops_root}/portfolio.md"; seeded=true; } + [ -f "${ops_root}/prerequisites.md" ] || { echo "# Prerequisite Tree" > "${ops_root}/prerequisites.md"; seeded=true; } + [ -f "${ops_root}/RESOURCES.md" ] || { echo "# Resources" > "${ops_root}/RESOURCES.md"; seeded=true; } + + # Commit and push seed content + if [ "$seeded" = true ] && [ -d "${ops_root}/.git" ]; then + # Auto-configure repo-local git identity if missing (#778) + if [ -z "$(git -C "$ops_root" config user.name 2>/dev/null)" ]; then + git -C "$ops_root" config user.name "disinto-admin" + fi + if [ -z "$(git -C "$ops_root" config user.email 2>/dev/null)" ]; then + git -C "$ops_root" config user.email "disinto-admin@localhost" + fi + + git -C "$ops_root" add -A + if ! git -C "$ops_root" diff --cached --quiet 2>/dev/null; then + git -C "$ops_root" commit -m "chore: seed ops repo structure" -q + # Push if remote exists + if git -C "$ops_root" remote get-url origin >/dev/null 2>&1; then + git -C "$ops_root" push origin "${primary_branch}" -q 2>/dev/null || true + fi + fi + echo "Seeded: ops repo with initial structure" + fi +} + +# Push local clone to the Forgejo remote. +push_to_forge() { + local repo_root="$1" forge_url="$2" repo_slug="$3" + + # Build authenticated remote URL: http://dev-bot:@host:port/org/repo.git + if [ -z "${FORGE_TOKEN:-}" ]; then + echo "Error: FORGE_TOKEN not set — cannot push to Forgejo" >&2 + return 1 + fi + local auth_url + auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_TOKEN}@|") + local remote_url="${auth_url}/${repo_slug}.git" + # Display URL without token + local display_url="${forge_url}/${repo_slug}.git" + + # Always set the remote URL to ensure credentials are current + if git -C "$repo_root" remote get-url forgejo >/dev/null 2>&1; then + git -C "$repo_root" remote set-url forgejo "$remote_url" + else + git -C "$repo_root" remote add forgejo "$remote_url" + fi + echo "Remote: forgejo -> ${display_url}" + + # Skip push if local repo has no commits (e.g. cloned from empty Forgejo repo) + if ! git -C "$repo_root" rev-parse HEAD >/dev/null 2>&1; then + echo "Push: skipped (local repo has no commits)" + return 0 + fi + + # Push all branches and tags + echo "Pushing: branches to forgejo" + if ! git -C "$repo_root" push forgejo --all 2>&1; then + echo "Error: failed to push branches to Forgejo" >&2 + return 1 + fi + echo "Pushing: tags to forgejo" + if ! git -C "$repo_root" push forgejo --tags 2>&1; then + echo "Error: failed to push tags to Forgejo" >&2 + return 1 + fi + + # Verify the repo is no longer empty (Forgejo may need a moment to index pushed refs) + local is_empty="true" + local verify_attempt + for verify_attempt in $(seq 1 5); do + local repo_info + repo_info=$(curl -sf --max-time 10 \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/repos/${repo_slug}" 2>/dev/null) || repo_info="" + if [ -z "$repo_info" ]; then + is_empty="skipped" + break # API unreachable, skip verification + fi + is_empty=$(printf '%s' "$repo_info" | jq -r '.empty // "unknown"') + if [ "$is_empty" != "true" ]; then + echo "Verify: repo is not empty (push confirmed)" + break + fi + if [ "$verify_attempt" -lt 5 ]; then + sleep 2 + fi + done + if [ "$is_empty" = "true" ]; then + echo "Warning: Forgejo repo still reports empty after push" >&2 + return 1 + fi +} # Preflight check — verify all factory requirements before proceeding. preflight_check() { @@ -379,15 +1366,6 @@ create_labels() { ["underspecified"]="#fbca04" ["vision"]="#0e8a16" ["action"]="#1d76db" - ["prediction/unreviewed"]="#a2eeef" - ["prediction/dismissed"]="#d73a4a" - ["prediction/actioned"]="#28a745" - ["bug-report"]="#e11d48" - ["needs-triage"]="#f9d0c4" - ["reproduced"]="#0e8a16" - ["cannot-reproduce"]="#cccccc" - ["in-triage"]="#1d76db" - ["rejected"]="#cccccc" ) echo "Creating labels on ${repo}..." @@ -400,11 +1378,9 @@ create_labels() { | grep -o '"name":"[^"]*"' | cut -d'"' -f4) || existing="" local name color - local created=0 skipped=0 failed=0 - for name in backlog in-progress blocked tech-debt underspecified vision action bug-report prediction/unreviewed prediction/dismissed prediction/actioned needs-triage reproduced cannot-reproduce in-triage rejected; do + for name in backlog in-progress blocked tech-debt underspecified vision action; do if echo "$existing" | grep -qx "$name"; then echo " . ${name} (already exists)" - skipped=$((skipped + 1)) continue fi color="${labels[$name]}" @@ -413,15 +1389,11 @@ create_labels() { -H "Content-Type: application/json" \ "${api}/labels" \ -d "{\"name\":\"${name}\",\"color\":\"${color}\"}" >/dev/null 2>&1; then - echo " + ${name} (created)" - created=$((created + 1)) + echo " + ${name}" else echo " ! ${name} (failed to create)" - failed=$((failed + 1)) fi done - - echo "Labels: ${created} created, ${skipped} skipped, ${failed} failed" } # Generate a minimal VISION.md template in the target project. @@ -461,57 +1433,404 @@ EOF echo " Commit this to your repo when ready" } -# Copy issue templates from templates/ to target project repo. -copy_issue_templates() { - local repo_root="$1" - local template_dir="${FACTORY_ROOT}/templates" - local target_dir="${repo_root}/.forgejo/ISSUE_TEMPLATE" +# Generate and optionally install cron entries for the project agents. +install_cron() { + local name="$1" toml="$2" auto_yes="$3" bare="${4:-false}" - # Skip if templates directory doesn't exist - if [ ! -d "$template_dir" ]; then + # In compose mode, skip host cron — the agents container runs cron internally + if [ "$bare" = false ]; then + echo "" + echo "Cron: skipped (agents container handles scheduling in compose mode)" return fi - # Create target directory - mkdir -p "$target_dir" + # Bare mode: crontab is required on the host + if ! command -v crontab &>/dev/null; then + echo "Error: crontab not found (required for bare-metal mode)" >&2 + echo " Install: apt install cron / brew install cron" >&2 + exit 1 + fi - # Copy each template file if it doesn't already exist - for template in "$template_dir"/issue/*; do - [ -f "$template" ] || continue - local filename - filename=$(basename "$template") - local target_path="${target_dir}/${filename}" - if [ ! -f "$target_path" ]; then - cp "$template" "$target_path" - echo "Copied: ${target_path}" + # Use absolute path for the TOML in cron entries + local abs_toml + abs_toml="$(cd "$(dirname "$toml")" && pwd)/$(basename "$toml")" + + local cron_block + cron_block="# disinto: ${name} +2,7,12,17,22,27,32,37,42,47,52,57 * * * * ${FACTORY_ROOT}/review/review-poll.sh ${abs_toml} >/dev/null 2>&1 +4,9,14,19,24,29,34,39,44,49,54,59 * * * * ${FACTORY_ROOT}/dev/dev-poll.sh ${abs_toml} >/dev/null 2>&1 +0 0,6,12,18 * * * cd ${FACTORY_ROOT} && bash gardener/gardener-run.sh ${abs_toml} >/dev/null 2>&1" + + echo "" + echo "Cron entries to install:" + echo "$cron_block" + echo "" + + if [ "$auto_yes" = false ] && [ -t 0 ]; then + read -rp "Install these cron entries? [y/N] " confirm + if [[ ! "$confirm" =~ ^[Yy] ]]; then + echo "Skipped cron install. Add manually with: crontab -e" + return + fi + fi + + # Append to existing crontab + { crontab -l 2>/dev/null || true; printf '%s\n' "$cron_block"; } | crontab - + echo "Cron entries installed" +} + +# Set up Woodpecker CI to use Forgejo as its forge backend. +# Creates an OAuth2 app on Forgejo for Woodpecker, activates the repo. +create_woodpecker_oauth() { + local forge_url="$1" repo_slug="$2" + + echo "" + echo "── Woodpecker OAuth2 setup ────────────────────────────" + + # Create OAuth2 application on Forgejo for Woodpecker + local oauth2_name="woodpecker-ci" + local redirect_uri="http://localhost:8000/authorize" + local existing_app client_id client_secret + + # Check if OAuth2 app already exists + existing_app=$(curl -sf \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${forge_url}/api/v1/user/applications/oauth2" 2>/dev/null \ + | jq -r --arg name "$oauth2_name" '.[] | select(.name == $name) | .client_id // empty' 2>/dev/null) || true + + if [ -n "$existing_app" ]; then + echo "OAuth2: ${oauth2_name} (already exists, client_id=${existing_app})" + client_id="$existing_app" + else + local oauth2_resp + oauth2_resp=$(curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/user/applications/oauth2" \ + -d "{\"name\":\"${oauth2_name}\",\"redirect_uris\":[\"${redirect_uri}\"],\"confidential_client\":true}" \ + 2>/dev/null) || oauth2_resp="" + + if [ -z "$oauth2_resp" ]; then + echo "Warning: failed to create OAuth2 app on Forgejo" >&2 + return + fi + + client_id=$(printf '%s' "$oauth2_resp" | jq -r '.client_id // empty') + client_secret=$(printf '%s' "$oauth2_resp" | jq -r '.client_secret // empty') + + if [ -z "$client_id" ]; then + echo "Warning: OAuth2 app creation returned no client_id" >&2 + return + fi + + echo "OAuth2: ${oauth2_name} created (client_id=${client_id})" + fi + + # Store Woodpecker forge config in .env + # WP_FORGEJO_CLIENT/SECRET match the docker-compose.yml variable references + # WOODPECKER_HOST must be host-accessible URL to match OAuth2 redirect_uri + local env_file="${FACTORY_ROOT}/.env" + local wp_vars=( + "WOODPECKER_FORGEJO=true" + "WOODPECKER_FORGEJO_URL=${forge_url}" + "WOODPECKER_HOST=http://localhost:8000" + ) + if [ -n "${client_id:-}" ]; then + wp_vars+=("WP_FORGEJO_CLIENT=${client_id}") + fi + if [ -n "${client_secret:-}" ]; then + wp_vars+=("WP_FORGEJO_SECRET=${client_secret}") + fi + + for var_line in "${wp_vars[@]}"; do + local var_name="${var_line%%=*}" + if grep -q "^${var_name}=" "$env_file" 2>/dev/null; then + sed -i "s|^${var_name}=.*|${var_line}|" "$env_file" else - echo "Skipped: ${target_path} (already exists)" + printf '%s\n' "$var_line" >> "$env_file" fi done + echo "Config: Woodpecker forge vars written to .env" } -# Install cron entries for project agents (implementation in lib/ci-setup.sh) -install_cron() { - _load_ci_context - _install_cron_impl "$@" -} - -# Create Woodpecker OAuth2 app on Forgejo (implementation in lib/ci-setup.sh) -create_woodpecker_oauth() { - _load_ci_context - _create_woodpecker_oauth_impl "$@" -} - -# Generate WOODPECKER_TOKEN via Forgejo OAuth2 flow (implementation in lib/ci-setup.sh) +# Auto-generate WOODPECKER_TOKEN by driving the Forgejo OAuth2 login flow. +# Requires _FORGE_ADMIN_PASS (set by setup_forge when admin user was just created). +# Called after compose stack is up, before activate_woodpecker_repo. generate_woodpecker_token() { - _load_ci_context - _generate_woodpecker_token_impl "$@" + local forge_url="$1" + local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}" + local env_file="${FACTORY_ROOT}/.env" + local admin_user="disinto-admin" + local admin_pass="${_FORGE_ADMIN_PASS:-}" + + # Skip if already set + if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then + echo "Config: WOODPECKER_TOKEN already set in .env" + return 0 + fi + + echo "" + echo "── Woodpecker token generation ────────────────────────" + + if [ -z "$admin_pass" ]; then + echo "Warning: Forgejo admin password not available — cannot generate WOODPECKER_TOKEN" >&2 + echo " Log into Woodpecker at ${wp_server} and create a token manually" >&2 + return 1 + fi + + # Wait for Woodpecker to become ready + echo -n "Waiting for Woodpecker" + local retries=0 + while ! curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; do + retries=$((retries + 1)) + if [ "$retries" -gt 30 ]; then + echo "" + echo "Warning: Woodpecker not ready at ${wp_server} — skipping token generation" >&2 + return 1 + fi + echo -n "." + sleep 2 + done + echo " ready" + + # Flow: Forgejo web login → OAuth2 authorize → Woodpecker callback → token + local cookie_jar auth_body_file + cookie_jar=$(mktemp /tmp/wp-auth-XXXXXX) + auth_body_file=$(mktemp /tmp/wp-body-XXXXXX) + + # Step 1: Log into Forgejo web UI (session cookie needed for OAuth consent) + local csrf + csrf=$(curl -sf -c "$cookie_jar" "${forge_url}/user/login" 2>/dev/null \ + | grep -o 'name="_csrf"[^>]*' | head -1 \ + | grep -oE '(content|value)="[^"]*"' | head -1 \ + | cut -d'"' -f2) || csrf="" + + if [ -z "$csrf" ]; then + echo "Warning: could not get Forgejo CSRF token — skipping token generation" >&2 + rm -f "$cookie_jar" "$auth_body_file" + return 1 + fi + + curl -sf -b "$cookie_jar" -c "$cookie_jar" -X POST \ + -o /dev/null \ + "${forge_url}/user/login" \ + --data-urlencode "_csrf=${csrf}" \ + --data-urlencode "user_name=${admin_user}" \ + --data-urlencode "password=${admin_pass}" \ + 2>/dev/null || true + + # Step 2: Start Woodpecker OAuth2 flow (captures authorize URL with state param) + local wp_redir + wp_redir=$(curl -sf -o /dev/null -w '%{redirect_url}' \ + "${wp_server}/authorize" 2>/dev/null) || wp_redir="" + + if [ -z "$wp_redir" ]; then + echo "Warning: Woodpecker did not provide OAuth redirect — skipping token generation" >&2 + rm -f "$cookie_jar" "$auth_body_file" + return 1 + fi + + # Rewrite internal Docker network URLs to host-accessible URLs. + # Handle both plain and URL-encoded forms of the internal hostnames. + local forge_url_enc wp_server_enc + forge_url_enc=$(printf '%s' "$forge_url" | sed 's|:|%3A|g; s|/|%2F|g') + wp_server_enc=$(printf '%s' "$wp_server" | sed 's|:|%3A|g; s|/|%2F|g') + wp_redir=$(printf '%s' "$wp_redir" \ + | sed "s|http://forgejo:3000|${forge_url}|g" \ + | sed "s|http%3A%2F%2Fforgejo%3A3000|${forge_url_enc}|g" \ + | sed "s|http://woodpecker:8000|${wp_server}|g" \ + | sed "s|http%3A%2F%2Fwoodpecker%3A8000|${wp_server_enc}|g") + + # Step 3: Hit Forgejo OAuth authorize endpoint with session + # First time: shows consent page. Already approved: redirects with code. + local auth_headers redirect_loc auth_code + auth_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \ + -D - -o "$auth_body_file" \ + "$wp_redir" 2>/dev/null) || auth_headers="" + + redirect_loc=$(printf '%s' "$auth_headers" \ + | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') + + if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then + # Auto-approved: extract code from redirect + auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/') + else + # Consent page: extract CSRF and all form fields, POST grant approval + local consent_csrf form_client_id form_state form_redirect_uri + consent_csrf=$(grep -o 'name="_csrf"[^>]*' "$auth_body_file" 2>/dev/null \ + | head -1 | grep -oE '(content|value)="[^"]*"' | head -1 \ + | cut -d'"' -f2) || consent_csrf="" + form_client_id=$(grep 'name="client_id"' "$auth_body_file" 2>/dev/null \ + | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_client_id="" + form_state=$(grep 'name="state"' "$auth_body_file" 2>/dev/null \ + | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_state="" + form_redirect_uri=$(grep 'name="redirect_uri"' "$auth_body_file" 2>/dev/null \ + | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_redirect_uri="" + + if [ -n "$consent_csrf" ]; then + local grant_headers + grant_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \ + -D - -o /dev/null -X POST \ + "${forge_url}/login/oauth/grant" \ + --data-urlencode "_csrf=${consent_csrf}" \ + --data-urlencode "client_id=${form_client_id}" \ + --data-urlencode "state=${form_state}" \ + --data-urlencode "scope=" \ + --data-urlencode "nonce=" \ + --data-urlencode "redirect_uri=${form_redirect_uri}" \ + --data-urlencode "granted=true" \ + 2>/dev/null) || grant_headers="" + + redirect_loc=$(printf '%s' "$grant_headers" \ + | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') + + if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then + auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/') + fi + fi + fi + + rm -f "$auth_body_file" + + if [ -z "${auth_code:-}" ]; then + echo "Warning: could not obtain OAuth2 authorization code — skipping token generation" >&2 + rm -f "$cookie_jar" + return 1 + fi + + # Step 4: Complete Woodpecker OAuth callback (exchanges code for session) + local state + state=$(printf '%s' "$wp_redir" | sed -n 's/.*[&?]state=\([^&]*\).*/\1/p') + + local wp_headers wp_token + wp_headers=$(curl -sf -c "$cookie_jar" \ + -D - -o /dev/null \ + "${wp_server}/authorize?code=${auth_code}&state=${state:-}" \ + 2>/dev/null) || wp_headers="" + + # Extract token from redirect URL (Woodpecker returns ?access_token=...) + redirect_loc=$(printf '%s' "$wp_headers" \ + | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') + + wp_token="" + if printf '%s' "${redirect_loc:-}" | grep -q 'access_token='; then + wp_token=$(printf '%s' "$redirect_loc" | sed 's/.*access_token=\([^&]*\).*/\1/') + fi + + # Fallback: check for user_sess cookie + if [ -z "$wp_token" ]; then + wp_token=$(awk '/user_sess/{print $NF}' "$cookie_jar" 2>/dev/null) || wp_token="" + fi + + rm -f "$cookie_jar" + + if [ -z "$wp_token" ]; then + echo "Warning: could not obtain Woodpecker token — skipping token generation" >&2 + return 1 + fi + + # Step 5: Create persistent personal access token via Woodpecker API + # WP v3 requires CSRF header for POST operations with session tokens. + local wp_csrf + wp_csrf=$(curl -sf -b "user_sess=${wp_token}" \ + "${wp_server}/web-config.js" 2>/dev/null \ + | sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p') || wp_csrf="" + + local pat_resp final_token + pat_resp=$(curl -sf -X POST \ + -b "user_sess=${wp_token}" \ + ${wp_csrf:+-H "X-CSRF-Token: ${wp_csrf}"} \ + "${wp_server}/api/user/token" \ + 2>/dev/null) || pat_resp="" + + final_token="" + if [ -n "$pat_resp" ]; then + final_token=$(printf '%s' "$pat_resp" \ + | jq -r 'if .token then .token elif .access_token then .access_token else empty end' \ + 2>/dev/null) || final_token="" + fi + + # Use persistent token if available, otherwise use session token + final_token="${final_token:-$wp_token}" + + # Save to .env + if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then + sed -i "s|^WOODPECKER_TOKEN=.*|WOODPECKER_TOKEN=${final_token}|" "$env_file" + else + printf 'WOODPECKER_TOKEN=%s\n' "$final_token" >> "$env_file" + fi + export WOODPECKER_TOKEN="$final_token" + echo "Config: WOODPECKER_TOKEN generated and saved to .env" } -# Activate repo in Woodpecker CI (implementation in lib/ci-setup.sh) activate_woodpecker_repo() { - _load_ci_context - _activate_woodpecker_repo_impl "$@" + local forge_repo="$1" + local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}" + + # Wait for Woodpecker to become ready after stack start + local retries=0 + while [ $retries -lt 10 ]; do + if curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; then + break + fi + retries=$((retries + 1)) + sleep 2 + done + + if ! curl -sf --max-time 5 "${wp_server}/api/version" >/dev/null 2>&1; then + echo "Woodpecker: not reachable at ${wp_server} after stack start, skipping repo activation" >&2 + return + fi + + echo "" + echo "── Woodpecker repo activation ─────────────────────────" + + local wp_token="${WOODPECKER_TOKEN:-}" + if [ -z "$wp_token" ]; then + echo "Warning: WOODPECKER_TOKEN not set — cannot activate repo" >&2 + echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2 + return + fi + + local wp_repo_id + wp_repo_id=$(curl -sf \ + -H "Authorization: Bearer ${wp_token}" \ + "${wp_server}/api/repos/lookup/${forge_repo}" 2>/dev/null \ + | jq -r '.id // empty' 2>/dev/null) || true + + if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then + echo "Repo: ${forge_repo} already active in Woodpecker (id=${wp_repo_id})" + else + # Get Forgejo repo numeric ID for WP activation + local forge_repo_id + forge_repo_id=$(curl -sf \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_URL:-http://localhost:3000}/api/v1/repos/${forge_repo}" 2>/dev/null \ + | jq -r '.id // empty' 2>/dev/null) || forge_repo_id="" + + local activate_resp + activate_resp=$(curl -sf -X POST \ + -H "Authorization: Bearer ${wp_token}" \ + "${wp_server}/api/repos?forge_remote_id=${forge_repo_id:-0}" \ + 2>/dev/null) || activate_resp="" + + wp_repo_id=$(printf '%s' "$activate_resp" | jq -r '.id // empty' 2>/dev/null) || true + + if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then + echo "Repo: ${forge_repo} activated in Woodpecker (id=${wp_repo_id})" + + # Set pipeline timeout to 5 minutes (default is 60) + curl -sf -X PATCH -H "Authorization: Bearer ${wp_token}" -H "Content-Type: application/json" "${wp_server}/api/repos/${wp_repo_id}" -d '{"timeout": 5}' >/dev/null 2>&1 && echo "Config: pipeline timeout set to 5 minutes" || true + else + echo "Warning: could not activate repo in Woodpecker" >&2 + echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2 + fi + fi + + # Store repo ID for later TOML generation + if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then + _WP_REPO_ID="$wp_repo_id" + fi } # ── init command ───────────────────────────────────────────────────────────── @@ -656,15 +1975,13 @@ p.write_text(text) echo "Branch: ${branch}" # Set up {project}-ops repo (#757) - # Always use disinto-admin as the ops repo owner — forge_repo owner may be - # the calling user (e.g. johba) but the ops repo belongs to disinto-admin. - local ops_slug="disinto-admin/${project_name}-ops" + local ops_slug="${forge_repo}-ops" local ops_root="/home/${USER}/${project_name}-ops" setup_ops_repo "$forge_url" "$ops_slug" "$ops_root" "$branch" # Set up vault branch protection on ops repo (#77) # This ensures admin-only merge to main, blocking bots from merging vault PRs - # Use HUMAN_TOKEN (disinto-admin) or FORGE_TOKEN (dev-bot) for admin operations + # Use HUMAN_TOKEN (johba) or FORGE_TOKEN (dev-bot) for admin operations export FORGE_OPS_REPO="$ops_slug" # Source env.sh to ensure FORGE_TOKEN is available source "${FACTORY_ROOT}/lib/env.sh" @@ -688,24 +2005,6 @@ p.write_text(text) echo "Created: ${toml_path}" fi - # Update ops_repo in TOML with the resolved actual ops slug. - # Uses in-place substitution to prevent duplicate keys on repeated init runs. - # If the key is missing (manually created TOML), it is inserted after the repo line. - if [ -n "${_ACTUAL_OPS_SLUG:-}" ] && [ -f "$toml_path" ]; then - python3 -c " -import sys, re, pathlib -p = pathlib.Path(sys.argv[1]) -text = p.read_text() -new_val = 'ops_repo = \"' + sys.argv[2] + '\"' -if re.search(r'^ops_repo\s*=', text, re.MULTILINE): - text = re.sub(r'^ops_repo\s*=\s*.*\$', new_val, text, flags=re.MULTILINE) -else: - text = re.sub(r'^(repo\s*=\s*\"[^\"]*\")', r'\1\n' + new_val, text, flags=re.MULTILINE) -p.write_text(text) -" "$toml_path" "${_ACTUAL_OPS_SLUG}" - echo "Updated: ops_repo in ${toml_path}" - fi - # Create OAuth2 app on Forgejo for Woodpecker (before compose up) _WP_REPO_ID="" create_woodpecker_oauth "$forge_url" "$forge_repo" @@ -742,9 +2041,6 @@ p.write_text(text) # Generate template deployment pipeline configs in project repo generate_deploy_pipelines "$repo_root" "$project_name" - # Copy issue templates to target project - copy_issue_templates "$repo_root" - # Install cron jobs install_cron "$project_name" "$toml_path" "$auto_yes" "$bare" @@ -753,36 +2049,17 @@ p.write_text(text) if [ -n "${MIRROR_NAMES:-}" ]; then echo "Mirrors: setting up remotes" local mname murl - local mirrors_ok=true for mname in $MIRROR_NAMES; do murl=$(eval "echo \"\$MIRROR_$(echo "$mname" | tr '[:lower:]' '[:upper:]')\"") || true [ -z "$murl" ] && continue - if git -C "$repo_root" remote get-url "$mname" >/dev/null 2>&1; then - if git -C "$repo_root" remote set-url "$mname" "$murl"; then - echo " + ${mname} -> ${murl} (updated)" - else - echo " ! ${mname} -> ${murl} (failed to update URL)" - mirrors_ok=false - fi - else - if git -C "$repo_root" remote add "$mname" "$murl"; then - echo " + ${mname} -> ${murl} (added)" - else - echo " ! ${mname} -> ${murl} (failed to add remote)" - mirrors_ok=false - fi - fi + git -C "$repo_root" remote add "$mname" "$murl" 2>/dev/null \ + || git -C "$repo_root" remote set-url "$mname" "$murl" 2>/dev/null || true + echo " + ${mname} -> ${murl}" done # Initial sync: push current primary branch to mirrors - if [ "$mirrors_ok" = true ]; then - source "${FACTORY_ROOT}/lib/mirrors.sh" - export PROJECT_REPO_ROOT="$repo_root" - if mirror_push; then - echo "Mirrors: initial sync complete" - else - echo "Warning: mirror push failed" >&2 - fi - fi + source "${FACTORY_ROOT}/lib/mirrors.sh" + export PROJECT_REPO_ROOT="$repo_root" + mirror_push fi # Encrypt secrets if SOPS + age are available @@ -821,16 +2098,9 @@ p.write_text(text) # Activate default agents (zero-cost when idle — they only invoke Claude # when there is actual work, so an empty project burns no LLM tokens) mkdir -p "${FACTORY_ROOT}/state" - - # State files are idempotent — create if missing, skip if present - for state_file in ".dev-active" ".reviewer-active" ".gardener-active"; do - if [ -f "${FACTORY_ROOT}/state/${state_file}" ]; then - echo "State: ${state_file} (already active)" - else - touch "${FACTORY_ROOT}/state/${state_file}" - echo "State: ${state_file} (created)" - fi - done + touch "${FACTORY_ROOT}/state/.dev-active" + touch "${FACTORY_ROOT}/state/.reviewer-active" + touch "${FACTORY_ROOT}/state/.gardener-active" echo "" echo "Done. Project ${project_name} is ready." @@ -1295,10 +2565,594 @@ disinto_shell() { # Creates a Forgejo user and .profile repo for an agent. # Usage: disinto hire-an-agent [--formula ] -# disinto_hire_an_agent() is sourced from lib/hire-agent.sh +disinto_hire_an_agent() { + local agent_name="${1:-}" + local role="${2:-}" + local formula_path="" + local local_model="" + local poll_interval="" + + if [ -z "$agent_name" ] || [ -z "$role" ]; then + echo "Error: agent-name and role required" >&2 + echo "Usage: disinto hire-an-agent [--formula ] [--local-model ] [--poll-interval ]" >&2 + exit 1 + fi + shift 2 + + # Parse flags + while [ $# -gt 0 ]; do + case "$1" in + --formula) + formula_path="$2" + shift 2 + ;; + --local-model) + local_model="$2" + shift 2 + ;; + --poll-interval) + poll_interval="$2" + shift 2 + ;; + *) + echo "Unknown option: $1" >&2 + exit 1 + ;; + esac + done + + # Default formula path + if [ -z "$formula_path" ]; then + formula_path="${FACTORY_ROOT}/formulas/${role}.toml" + fi + + # Validate formula exists + if [ ! -f "$formula_path" ]; then + echo "Error: formula not found at ${formula_path}" >&2 + exit 1 + fi + + echo "── Hiring agent: ${agent_name} (${role}) ───────────────────────" + echo "Formula: ${formula_path}" + if [ -n "$local_model" ]; then + echo "Local model: ${local_model}" + echo "Poll interval: ${poll_interval:-300}s" + fi + + # Ensure FORGE_TOKEN is set + if [ -z "${FORGE_TOKEN:-}" ]; then + echo "Error: FORGE_TOKEN not set" >&2 + exit 1 + fi + + # Get Forge URL + local forge_url="${FORGE_URL:-http://localhost:3000}" + echo "Forge: ${forge_url}" + + # Step 1: Create user via API (skip if exists) + echo "" + echo "Step 1: Creating user '${agent_name}' (if not exists)..." + + local user_exists=false + local user_pass="" + local admin_pass="" + + # Read admin password from .env for standalone runs (#184) + local env_file="${FACTORY_ROOT}/.env" + if [ -f "$env_file" ] && grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then + admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-) + fi + + # Get admin token early (needed for both user creation and password reset) + local admin_user="disinto-admin" + admin_pass="${admin_pass:-admin}" + local admin_token="" + local admin_token_name="temp-token-$(date +%s)" + admin_token=$(curl -sf -X POST \ + -u "${admin_user}:${admin_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${admin_user}/tokens" \ + -d "{\"name\":\"${admin_token_name}\",\"scopes\":[\"all\"]}" 2>/dev/null \ + | jq -r '.sha1 // empty') || admin_token="" + if [ -z "$admin_token" ]; then + # Token might already exist — try listing + admin_token=$(curl -sf \ + -u "${admin_user}:${admin_pass}" \ + "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \ + | jq -r '.[0].sha1 // empty') || admin_token="" + fi + if [ -z "$admin_token" ]; then + echo "Error: failed to obtain admin API token" >&2 + echo " Cannot proceed without admin privileges" >&2 + exit 1 + fi + + if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then + user_exists=true + echo " User '${agent_name}' already exists" + # Reset user password so we can get a token (#184) + user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + # Reset password using basic auth (admin token fallback would poison subsequent calls) + if curl -sf -X PATCH \ + -u "${admin_user}:${admin_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/admin/users/${agent_name}" \ + -d "{\"password\":\"${user_pass}\"}" >/dev/null 2>&1; then + echo " Reset password for existing user '${agent_name}'" + else + echo " Warning: could not reset password for existing user" >&2 + fi + else + # Create user using basic auth (admin token fallback would poison subsequent calls) + # Create the user + user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" + if curl -sf -X POST \ + -u "${admin_user}:${admin_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/admin/users" \ + -d "{\"username\":\"${agent_name}\",\"password\":\"${user_pass}\",\"email\":\"${agent_name}@${PROJECT_NAME:-disinto}.local\",\"full_name\":\"${agent_name}\",\"active\":true,\"admin\":false,\"must_change_password\":false}" >/dev/null 2>&1; then + echo " Created user '${agent_name}'" + else + echo " Warning: failed to create user via admin API" >&2 + # Try alternative: user might already exist + if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then + user_exists=true + echo " User '${agent_name}' exists (confirmed)" + else + echo " Error: failed to create user '${agent_name}'" >&2 + exit 1 + fi + fi + fi + + # Step 2: Create .profile repo on Forgejo + echo "" + echo "Step 2: Creating '${agent_name}/.profile' repo (if not exists)..." + + local repo_exists=false + if curl -sf --max-time 5 "${forge_url}/api/v1/repos/${agent_name}/.profile" >/dev/null 2>&1; then + repo_exists=true + echo " Repo '${agent_name}/.profile' already exists" + else + # Get user token for creating repo + # Always try to get token using user_pass (set in Step 1 for new users, reset for existing) + local user_token="" + user_token=$(curl -sf -X POST \ + -u "${agent_name}:${user_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/users/${agent_name}/tokens" \ + -d "{\"name\":\".profile-repo-token\",\"scopes\":[\"repository\"]}" 2>/dev/null \ + | jq -r '.sha1 // empty') || user_token="" + + if [ -z "$user_token" ]; then + # Try listing existing tokens + user_token=$(curl -sf \ + -u "${agent_name}:${user_pass}" \ + "${forge_url}/api/v1/users/${agent_name}/tokens" 2>/dev/null \ + | jq -r '.[0].sha1 // empty') || user_token="" + fi + + # Create the repo using the user's namespace (user/repos with user_token creates in that user's namespace) + # or use admin API to create in specific user's namespace + local repo_created=false + local create_output + + if [ -n "$user_token" ]; then + # Try creating as the agent user (user token creates in that user's namespace) + create_output=$(curl -sf -X POST \ + -H "Authorization: token ${user_token}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/user/repos" \ + -d "{\"name\":\".profile\",\"description\":\"${agent_name}'s .profile repo\",\"private\":true,\"auto_init\":false}" 2>&1) || true + + if echo "$create_output" | grep -q '"id":\|[0-9]'; then + repo_created=true + echo " Created repo '${agent_name}/.profile'" + fi + fi + + # If user token failed or wasn't available, use admin API to create in agent's namespace + if [ "$repo_created" = false ]; then + echo " Using admin API to create repo in ${agent_name}'s namespace" + create_output=$(curl -sf -X POST \ + -u "${admin_user}:${admin_pass}" \ + -H "Content-Type: application/json" \ + "${forge_url}/api/v1/admin/users/${agent_name}/repos" \ + -d "{\"name\":\".profile\",\"description\":\"${agent_name}'s .profile repo\",\"private\":true,\"auto_init\":false}" 2>&1) || true + + if echo "$create_output" | grep -q '"id":\|[0-9]'; then + repo_created=true + echo " Created repo '${agent_name}/.profile' (via admin API)" + fi + fi + + if [ "$repo_created" = false ]; then + echo " Error: failed to create repo '${agent_name}/.profile'" >&2 + echo " Response: ${create_output}" >&2 + exit 1 + fi + fi + + # Step 3: Clone repo and create initial commit + echo "" + echo "Step 3: Cloning repo and creating initial commit..." + + local clone_dir="/tmp/.profile-clone-${agent_name}" + rm -rf "$clone_dir" + mkdir -p "$clone_dir" + + # Build authenticated clone URL using basic auth (user_pass is always set in Step 1) + if [ -z "${user_pass:-}" ]; then + echo " Error: no user password available for cloning" >&2 + exit 1 + fi + + local clone_url="${forge_url}/${agent_name}/.profile.git" + local auth_url + auth_url=$(printf '%s' "$forge_url" | sed "s|://|://${agent_name}:${user_pass}@|") + auth_url="${auth_url}/.profile.git" + + # Display unauthenticated URL (auth token only in actual git clone command) + echo " Cloning: ${forge_url}/${agent_name}/.profile.git" + + # Try authenticated clone first (required for private repos) + if ! git clone --quiet "$auth_url" "$clone_dir" 2>/dev/null; then + echo " Error: failed to clone repo with authentication" >&2 + echo " Note: Ensure the user has a valid API token with repository access" >&2 + rm -rf "$clone_dir" + exit 1 + fi + + # Configure git + git -C "$clone_dir" config user.name "disinto-admin" + git -C "$clone_dir" config user.email "disinto-admin@localhost" + + # Create directory structure + echo " Creating directory structure..." + mkdir -p "${clone_dir}/journal" + mkdir -p "${clone_dir}/knowledge" + touch "${clone_dir}/journal/.gitkeep" + touch "${clone_dir}/knowledge/.gitkeep" + + # Copy formula + echo " Copying formula..." + cp "$formula_path" "${clone_dir}/formula.toml" + + # Create README + if [ ! -f "${clone_dir}/README.md" ]; then + cat > "${clone_dir}/README.md" </dev/null; then + git -C "$clone_dir" commit -m "chore: initial .profile setup" -q + git -C "$clone_dir" push origin main 2>&1 >/dev/null || \ + git -C "$clone_dir" push origin master 2>&1 >/dev/null || true + echo " Committed: initial .profile setup" + else + echo " No changes to commit" + fi + + rm -rf "$clone_dir" + + # Step 4: Set up branch protection + echo "" + echo "Step 4: Setting up branch protection..." + + # Source branch-protection.sh helper + local bp_script="${FACTORY_ROOT}/lib/branch-protection.sh" + if [ -f "$bp_script" ]; then + # Source required environment + if [ -f "${FACTORY_ROOT}/lib/env.sh" ]; then + source "${FACTORY_ROOT}/lib/env.sh" + fi + + # Set up branch protection for .profile repo + if source "$bp_script" 2>/dev/null && setup_profile_branch_protection "${agent_name}/.profile" "main"; then + echo " Branch protection configured for main branch" + echo " - Requires 1 approval before merge" + echo " - Admin-only merge enforcement" + echo " - Journal branch created for direct agent pushes" + else + echo " Warning: could not configure branch protection (Forgejo API may not be available)" + echo " Note: Branch protection can be set up manually later" + fi + else + echo " Warning: branch-protection.sh not found at ${bp_script}" + fi + + # Step 5: Create state marker + echo "" + echo "Step 5: Creating state marker..." + + local state_dir="${FACTORY_ROOT}/state" + mkdir -p "$state_dir" + local state_file="${state_dir}/.${role}-active" + + if [ ! -f "$state_file" ]; then + touch "$state_file" + echo " Created: ${state_file}" + else + echo " State marker already exists: ${state_file}" + fi + + # Step 6: Set up local model agent (if --local-model specified) + if [ -n "$local_model" ]; then + echo "" + echo "Step 6: Configuring local model agent..." + + local compose_file="${FACTORY_ROOT}/docker-compose.yml" + local override_file="${FACTORY_ROOT}/docker-compose.override.yml" + local override_dir + override_dir=$(dirname "$override_file") + mkdir -p "$override_dir" + + # Validate model endpoint is reachable + echo " Validating model endpoint: ${local_model}" + if ! curl -sf --max-time 10 "${local_model}/health" >/dev/null 2>&1; then + # Try /v1/chat/completions as fallback endpoint check + if ! curl -sf --max-time 10 "${local_model}/v1/chat/completions" >/dev/null 2>&1; then + echo " Warning: model endpoint may not be reachable at ${local_model}" + echo " Continuing with configuration..." + fi + else + echo " Model endpoint is reachable" + fi + + # Generate service name from agent name (lowercase) + local service_name="agents-${agent_name}" + service_name=$(echo "$service_name" | tr '[:upper:]' '[:lower:]') + + # Set default poll interval + local interval="${poll_interval:-300}" + + # Generate the override compose file + # Bash expands ${service_name}, ${local_model}, ${interval}, ${PROJECT_NAME} at generation time + # \$HOME, \$FORGE_TOKEN become ${HOME}, ${FORGE_TOKEN} in the file for docker-compose runtime expansion + cat > "$override_file" < +# Example: disinto release v1.2.0 + +disinto_release() { + local version="${1:-}" + local formula_path="${FACTORY_ROOT}/formulas/release.toml" + + if [ -z "$version" ]; then + echo "Error: version required" >&2 + echo "Usage: disinto release " >&2 + echo "Example: disinto release v1.2.0" >&2 + exit 1 + fi + + # Validate version format (must start with 'v' followed by semver) + if ! echo "$version" | grep -qE '^v[0-9]+\.[0-9]+\.[0-9]+$'; then + echo "Error: version must be in format v1.2.3 (semver with 'v' prefix)" >&2 + exit 1 + fi + + # Load project config to get FORGE_OPS_REPO + if [ -z "${PROJECT_NAME:-}" ]; then + # PROJECT_NAME is unset - detect project TOML from projects/ directory + local found_toml + found_toml=$(find "${FACTORY_ROOT}/projects" -maxdepth 1 -name "*.toml" ! -name "*.example" 2>/dev/null | head -1) + if [ -n "$found_toml" ]; then + source "${FACTORY_ROOT}/lib/load-project.sh" "$found_toml" + fi + else + local project_toml="${FACTORY_ROOT}/projects/${PROJECT_NAME}.toml" + if [ -f "$project_toml" ]; then + source "${FACTORY_ROOT}/lib/load-project.sh" "$project_toml" + fi + fi + + # Check formula exists + if [ ! -f "$formula_path" ]; then + echo "Error: release formula not found at ${formula_path}" >&2 + exit 1 + fi + + # Get the ops repo root + local ops_root="${FACTORY_ROOT}/../disinto-ops" + if [ ! -d "${ops_root}/.git" ]; then + echo "Error: ops repo not found at ${ops_root}" >&2 + echo " Run 'disinto init' to set up the ops repo first" >&2 + exit 1 + fi + + # Generate a unique ID for the vault item + local id="release-${version//./}" + local vault_toml="${ops_root}/vault/actions/${id}.toml" + + # Create vault TOML with the specific version + cat > "$vault_toml" </dev/null || true + + # Push branch + git push -u origin "$branch_name" 2>/dev/null || { + echo "Error: failed to push branch" >&2 + exit 1 + } + + # Create PR + local pr_response + pr_response=$(curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls" \ + -d "{\"title\":\"${pr_title}\",\"head\":\"${branch_name}\",\"base\":\"main\",\"body\":\"$(echo "$pr_body" | sed ':a;N;$!ba;s/\n/\\n/g')\"}" 2>/dev/null) || { + echo "Error: failed to create PR" >&2 + echo "Response: ${pr_response}" >&2 + exit 1 + } + + local pr_number + pr_number=$(echo "$pr_response" | jq -r '.number') + + local pr_url="${FORGE_URL}/${FORGE_OPS_REPO}/pulls/${pr_number}" + + # Enable auto-merge on the PR — Forgejo will auto-merge after approval + _vault_log "Enabling auto-merge for PR #${pr_number}" + curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls/${pr_number}/merge" \ + -d '{"Do":"merge","merge_when_checks_succeed":true}' >/dev/null 2>&1 || { + echo "Warning: failed to enable auto-merge (may already be enabled or not supported)" >&2 + } + + echo "" + echo "Release PR created: ${pr_url}" + echo "" + echo "Next steps:" + echo " 1. Review the PR" + echo " 2. Approve the PR (auto-merge will trigger after approval)" + echo " 3. The vault runner will execute the release formula" + echo "" + echo "After merge, the release will:" + echo " 1. Tag Forgejo main with ${version}" + echo " 2. Push tag to mirrors (Codeberg, GitHub)" + echo " 3. Build and tag the agents Docker image" + echo " 4. Restart agent containers" +} # ── ci-logs command ────────────────────────────────────────────────────────── # Reads CI logs from the Woodpecker SQLite database. diff --git a/dev/AGENTS.md b/dev/AGENTS.md index e8a0ead..2b787f1 100644 --- a/dev/AGENTS.md +++ b/dev/AGENTS.md @@ -1,4 +1,4 @@ - + # Dev Agent **Role**: Implement issues autonomously — write code, push branches, address @@ -14,8 +14,9 @@ in-progress issues are also picked up. The direct-merge scan runs before the loc check so approved PRs get merged even while a dev-agent session is active. **Key files**: -- `dev/dev-poll.sh` — Cron scheduler: finds next ready issue, handles merge/rebase of approved PRs, tracks CI fix attempts. Formula guard skips issues labeled `formula`, `prediction/dismissed`, or `prediction/unreviewed`. **Race prevention**: checks issue assignee before claiming — skips if assigned to a different bot user. **Stale branch abandonment**: closes PRs and deletes branches that are behind `$PRIMARY_BRANCH` (restarts poll cycle for a fresh start). **Stale in-progress recovery**: on each poll cycle, scans for issues labeled `in-progress`. If the issue is assigned to `$BOT_USER` (this agent), sets `BLOCKED_BY_INPROGRESS=true` — my thread is busy. If assigned to another agent, logs and falls through (does not block). If no assignee, no open PR, and no agent lock file — removes `in-progress`, adds `blocked` with a human-triage comment. **Per-agent open-PR gate**: before starting new work, filters open waiting PRs to only those assigned to this agent (`$BOT_USER`). Other agents' PRs do not block this agent's pipeline (#358, #369). +- `dev/dev-poll.sh` — Cron scheduler: finds next ready issue, handles merge/rebase of approved PRs, tracks CI fix attempts. Formula guard skips issues labeled `formula`, `prediction/dismissed`, or `prediction/unreviewed` (replaced `prediction/backlog` — that label no longer exists) - `dev/dev-agent.sh` — Orchestrator: claims issue, creates worktree + tmux session with interactive `claude`, monitors phase file, injects CI results and review feedback, merges on approval +- `dev/phase-handler.sh` — Phase callback functions: `post_refusal_comment()`, `_on_phase_change()`, `build_phase_protocol_prompt()`. `do_merge()` detects already-merged PRs on HTTP 405 (race with dev-poll's pre-lock scan) and returns success instead of escalating. Sources `lib/mirrors.sh` and calls `mirror_push()` after every successful merge. - `dev/phase-test.sh` — Integration test for the phase protocol **Environment variables consumed** (via `lib/env.sh` + project TOML): @@ -32,7 +33,7 @@ check so approved PRs get merged even while a dev-agent session is active. **Crash recovery**: on `PHASE:crashed` or non-zero exit, the worktree is **preserved** (not destroyed) for debugging. Location logged. Supervisor housekeeping removes stale crashed worktrees older than 24h. -**Lifecycle**: dev-poll.sh (`check_active dev`) → dev-agent.sh → tmux session → phase file +**Lifecycle**: dev-poll.sh (`check_active dev`) → dev-agent.sh → tmux `dev-{project}-{issue}` → phase file drives CI/review loop → merge + `mirror_push()` → close issue. On respawn after `PHASE:escalate`, the stale phase file is cleared first so the session starts clean; the reinject prompt tells Claude not to re-escalate for the same reason. diff --git a/dev/dev-poll.sh b/dev/dev-poll.sh index 484da76..7d852df 100755 --- a/dev/dev-poll.sh +++ b/dev/dev-poll.sh @@ -42,11 +42,6 @@ log() { printf '[%s] poll: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" } -# Resolve current agent identity once at startup — cache for all assignee checks -BOT_USER=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API%%/repos*}/user" | jq -r '.login') || BOT_USER="" -log "running as agent: ${BOT_USER}" - # ============================================================================= # CI FIX TRACKER: per-PR counter to avoid infinite respawn loops (max 3) # ============================================================================= @@ -99,68 +94,6 @@ is_blocked() { | jq -e '.[] | select(.name == "blocked")' >/dev/null 2>&1 } -# ============================================================================= -# STALENESS DETECTION FOR IN-PROGRESS ISSUES -# ============================================================================= - -# Check if there's an open PR for a specific issue -# Args: issue_number -# Returns: 0 if open PR exists, 1 if not -open_pr_exists() { - local issue="$1" - local branch="fix/issue-${issue}" - local pr_num - - pr_num=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls?state=open&limit=20" | \ - jq -r --arg branch "$branch" \ - '.[] | select(.head.ref == $branch) | .number' | head -1) || true - - [ -n "$pr_num" ] -} - -# Relabel a stale in-progress issue to blocked with diagnostic comment -# Args: issue_number reason -# Uses shared helpers from lib/issue-lifecycle.sh -relabel_stale_issue() { - local issue="$1" reason="$2" - - log "relabeling stale in-progress issue #${issue} to blocked: ${reason}" - - # Remove in-progress label - local ip_id - ip_id=$(_ilc_in_progress_id) - if [ -n "$ip_id" ]; then - curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${issue}/labels/${ip_id}" >/dev/null 2>&1 || true - fi - - # Add blocked label - local bk_id - bk_id=$(_ilc_blocked_id) - if [ -n "$bk_id" ]; then - curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/issues/${issue}/labels" \ - -d "{\"labels\":[${bk_id}]}" >/dev/null 2>&1 || true - fi - - # Post diagnostic comment using shared helper - local comment_body - comment_body=$( - printf '%s\n\n' '### Stale in-progress issue detected' - printf '%s\n' '| Field | Value |' - printf '%s\n' '|---|---|' - printf '| Detection reason | `%s` |\n' "$reason" - printf '| Timestamp | `%s` |\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" - printf '%s\n' '**Status:** This issue was labeled `in-progress` but has no assignee, no open PR, and no agent lock file.' - printf '%s\n' '**Action required:** A maintainer should triage this issue.' - ) - _ilc_post_comment "$issue" "$comment_body" - - _ilc_log "stale issue #${issue} relabeled to blocked: ${reason}" -} - # ============================================================================= # HELPER: handle CI-exhaustion check/block (DRY for 3 call sites) # Sets CI_FIX_ATTEMPTS for caller use. Returns 0 if exhausted, 1 if not. @@ -345,16 +278,6 @@ for i in $(seq 0 $(($(echo "$PL_PRS" | jq 'length') - 1))); do jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true if [ "${PL_HAS_APPROVE:-0}" -gt 0 ]; then - # Check if issue is assigned to this agent — only merge own PRs - if [ "$PL_ISSUE" -gt 0 ]; then - PR_ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${PL_ISSUE}") || true - PR_ISSUE_ASSIGNEE=$(echo "$PR_ISSUE_JSON" | jq -r '.assignee.login // ""') || true - if [ -n "$PR_ISSUE_ASSIGNEE" ] && [ "$PR_ISSUE_ASSIGNEE" != "$BOT_USER" ]; then - log "PR #${PL_PR_NUM} (issue #${PL_ISSUE}) assigned to ${PR_ISSUE_ASSIGNEE} — skipping merge (not mine)" - continue - fi - fi if try_direct_merge "$PL_PR_NUM" "$PL_ISSUE"; then PL_MERGED_ANY=true fi @@ -378,9 +301,6 @@ if [ -f "$LOCKFILE" ]; then rm -f "$LOCKFILE" fi -# --- Fetch origin refs before any stale branch checks --- -git fetch origin --prune 2>/dev/null || true - # --- Memory guard --- memory_guard 2000 @@ -389,210 +309,129 @@ memory_guard 2000 # ============================================================================= log "checking for in-progress issues" +# Get current bot identity for assignee checks +BOT_USER=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API%%/repos*}/user" | jq -r '.login') || BOT_USER="" + ORPHANS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/issues?state=open&labels=in-progress&limit=10&type=issues") ORPHAN_COUNT=$(echo "$ORPHANS_JSON" | jq 'length') -BLOCKED_BY_INPROGRESS=false if [ "$ORPHAN_COUNT" -gt 0 ]; then ISSUE_NUM=$(echo "$ORPHANS_JSON" | jq -r '.[0].number') - # Staleness check: if no assignee, no open PR, and no agent lock, the issue is stale - OPEN_PR=false - if curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + # Formula guard: formula-labeled issues should not be worked on by dev-agent. + # Remove in-progress label and skip to prevent infinite respawn cycle (#115). + ORPHAN_LABELS=$(echo "$ORPHANS_JSON" | jq -r '.[0].labels[].name' 2>/dev/null) || true + SKIP_LABEL=$(echo "$ORPHAN_LABELS" | grep -oE '^(formula|prediction/dismissed|prediction/unreviewed)$' | head -1) || true + if [ -n "$SKIP_LABEL" ]; then + log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — removing in-progress, skipping" + IP_ID=$(_ilc_in_progress_id) + curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true + exit 0 + fi + + # Check if there's already an open PR for this issue + HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${API}/pulls?state=open&limit=20" | \ - jq -e --arg branch "fix/issue-${ISSUE_NUM}" \ - '.[] | select(.head.ref == $branch)' >/dev/null 2>&1; then - OPEN_PR=true - fi + jq -r --arg branch "fix/issue-${ISSUE_NUM}" \ + '.[] | select(.head.ref == $branch) | .number' | head -1) || true - # Check if issue has an assignee — only block on issues assigned to this agent - assignee=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" "${API}/issues/${ISSUE_NUM}" | jq -r '.assignee.login // ""') - if [ -n "$assignee" ]; then - if [ "$assignee" = "$BOT_USER" ]; then - # Check if my PR has review feedback to address before exiting - HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls?state=open&limit=20" | \ - jq -r --arg branch "fix/issue-${ISSUE_NUM}" \ - '.[] | select(.head.ref == $branch) | .number' | head -1) || true + if [ -n "$HAS_PR" ]; then + # Check if branch is stale (behind primary branch) + BRANCH="fix/issue-${ISSUE_NUM}" + AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "999") + if [ "$AHEAD" -gt 0 ]; then + log "issue #${ISSUE_NUM} PR #${HAS_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR" + # Close the PR via API + curl -sf -X PATCH \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/pulls/${HAS_PR}" \ + -d '{"state":"closed"}' >/dev/null 2>&1 || true + # Delete the branch via git push + git -C "${PROJECT_REPO_ROOT:-}" push origin --delete "${BRANCH}" 2>/dev/null || true + # Reset to fresh start on primary branch + git -C "${PROJECT_REPO_ROOT:-}" checkout "${PRIMARY_BRANCH}" 2>/dev/null || true + git -C "${PROJECT_REPO_ROOT:-}" pull --ff-only origin "${PRIMARY_BRANCH}" 2>/dev/null || true + # Exit to restart poll cycle (issue will be picked up fresh) + exit 0 + fi - if [ -n "$HAS_PR" ]; then - # Check for REQUEST_CHANGES review feedback - REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${HAS_PR}/reviews") || true - HAS_CHANGES=$(echo "$REVIEWS_JSON" | \ - jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true + PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${HAS_PR}" | jq -r '.head.sha') || true + CI_STATE=$(ci_commit_status "$PR_SHA") || true - if [ "${HAS_CHANGES:-0}" -gt 0 ]; then - log "issue #${ISSUE_NUM} has review feedback — spawning agent" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)" - BLOCKED_BY_INPROGRESS=true - else - log "issue #${ISSUE_NUM} assigned to me — my thread is busy" - BLOCKED_BY_INPROGRESS=true - fi - else - log "issue #${ISSUE_NUM} assigned to me — my thread is busy" - BLOCKED_BY_INPROGRESS=true + # Non-code PRs (docs, formulas, evidence) may have no CI — treat as passed + if ! ci_passed "$CI_STATE" && ! ci_required_for_pr "$HAS_PR"; then + CI_STATE="success" + log "PR #${HAS_PR} has no code files — treating CI as passed" + fi + + # Check formal reviews (single fetch to avoid race window) + REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${HAS_PR}/reviews") || true + HAS_APPROVE=$(echo "$REVIEWS_JSON" | \ + jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true + HAS_CHANGES=$(echo "$REVIEWS_JSON" | \ + jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true + + if ci_passed "$CI_STATE" && [ "${HAS_APPROVE:-0}" -gt 0 ]; then + if try_direct_merge "$HAS_PR" "$ISSUE_NUM"; then + exit 0 fi + # Direct merge failed (conflicts?) — fall back to dev-agent + log "falling back to dev-agent for PR #${HAS_PR} merge" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)" + exit 0 + + # Do NOT gate REQUEST_CHANGES on ci_passed: act immediately even if CI is + # pending/unknown. Definitive CI failure is handled by the elif below. + elif [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then + log "issue #${ISSUE_NUM} PR #${HAS_PR} has REQUEST_CHANGES — spawning agent" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)" + exit 0 + + elif ci_failed "$CI_STATE"; then + if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM" "check_only"; then + # Fall through to backlog scan instead of exit + : + else + # Increment at actual launch time (not on guard-hit paths) + if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM"; then + exit 0 # exhausted between check and launch + fi + log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix (attempt ${CI_FIX_ATTEMPTS}/3)" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)" + exit 0 + fi + else - log "issue #${ISSUE_NUM} assigned to ${assignee} — their thread, not blocking" - # Issue assigned to another agent — don't block, fall through to backlog + log "issue #${ISSUE_NUM} has open PR #${HAS_PR} (CI: ${CI_STATE}, waiting)" + exit 0 fi - fi + else + # Check assignee before adopting orphaned issue + ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${ISSUE_NUM}") || true + ASSIGNEE=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true - # Only proceed with in-progress checks if not blocked by another agent - if [ "$BLOCKED_BY_INPROGRESS" = false ]; then - # Check for dev-agent lock file (agent may be running in another container) - LOCK_FILE="/tmp/dev-impl-summary-${PROJECT_NAME}-${ISSUE_NUM}.txt" - if [ -f "$LOCK_FILE" ]; then - log "issue #${ISSUE_NUM} has agent lock file — trusting active work" - BLOCKED_BY_INPROGRESS=true + if [ -n "$ASSIGNEE" ] && [ "$ASSIGNEE" != "$BOT_USER" ]; then + log "issue #${ISSUE_NUM} assigned to ${ASSIGNEE} — skipping (not orphaned)" + # Remove in-progress label since this agent isn't working on it + IP_ID=$(_ilc_in_progress_id) + curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true + exit 0 fi - if [ "$OPEN_PR" = false ] && [ "$BLOCKED_BY_INPROGRESS" = false ]; then - log "issue #${ISSUE_NUM} is stale (no assignee, no open PR, no agent lock) — relabeling to blocked" - relabel_stale_issue "$ISSUE_NUM" "no_assignee_no_open_pr_no_lock" - BLOCKED_BY_INPROGRESS=true - fi - - # Formula guard: formula-labeled issues should not be worked on by dev-agent. - # Remove in-progress label and skip to prevent infinite respawn cycle (#115). - if [ "$BLOCKED_BY_INPROGRESS" = false ]; then - ORPHAN_LABELS=$(echo "$ORPHANS_JSON" | jq -r '.[0].labels[].name' 2>/dev/null) || true - SKIP_LABEL=$(echo "$ORPHAN_LABELS" | grep -oE '^(formula|prediction/dismissed|prediction/unreviewed)$' | head -1) || true - if [ -n "$SKIP_LABEL" ]; then - log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — removing in-progress, skipping" - IP_ID=$(_ilc_in_progress_id) - curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true - BLOCKED_BY_INPROGRESS=true - fi - fi - - # Check if there's already an open PR for this issue - if [ "$BLOCKED_BY_INPROGRESS" = false ]; then - HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls?state=open&limit=20" | \ - jq -r --arg branch "fix/issue-${ISSUE_NUM}" \ - '.[] | select(.head.ref == $branch) | .number' | head -1) || true - - if [ -n "$HAS_PR" ]; then - # Check if branch is stale (behind primary branch) - BRANCH="fix/issue-${ISSUE_NUM}" - AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "0") - if [ "$AHEAD" -gt 0 ]; then - log "issue #${ISSUE_NUM} PR #${HAS_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR" - # Close the PR via API - curl -sf -X PATCH \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${API}/pulls/${HAS_PR}" \ - -d '{"state":"closed"}' >/dev/null 2>&1 || true - # Delete the branch via git push - git -C "${PROJECT_REPO_ROOT:-}" push origin --delete "${BRANCH}" 2>/dev/null || true - # Reset to fresh start on primary branch - git -C "${PROJECT_REPO_ROOT:-}" checkout "${PRIMARY_BRANCH}" 2>/dev/null || true - git -C "${PROJECT_REPO_ROOT:-}" pull --ff-only origin "${PRIMARY_BRANCH}" 2>/dev/null || true - BLOCKED_BY_INPROGRESS=true - fi - - # Only process PR if not abandoned (stale branch check above) - if [ "$BLOCKED_BY_INPROGRESS" = false ]; then - PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${HAS_PR}" | jq -r '.head.sha') || true - CI_STATE=$(ci_commit_status "$PR_SHA") || true - - # Non-code PRs (docs, formulas, evidence) may have no CI — treat as passed - if ! ci_passed "$CI_STATE" && ! ci_required_for_pr "$HAS_PR"; then - CI_STATE="success" - log "PR #${HAS_PR} has no code files — treating CI as passed" - fi - - # Check formal reviews (single fetch to avoid race window) - REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${HAS_PR}/reviews") || true - HAS_APPROVE=$(echo "$REVIEWS_JSON" | \ - jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true - HAS_CHANGES=$(echo "$REVIEWS_JSON" | \ - jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true - - if ci_passed "$CI_STATE" && [ "${HAS_APPROVE:-0}" -gt 0 ]; then - if try_direct_merge "$HAS_PR" "$ISSUE_NUM"; then - BLOCKED_BY_INPROGRESS=true - else - # Direct merge failed (conflicts?) — fall back to dev-agent - log "falling back to dev-agent for PR #${HAS_PR} merge" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)" - BLOCKED_BY_INPROGRESS=true - fi - - # Do NOT gate REQUEST_CHANGES on ci_passed: act immediately even if CI is - # pending/unknown. Definitive CI failure is handled by the elif below. - elif [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then - # Check if issue is assigned to this agent — skip if assigned to another bot - ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${ISSUE_NUM}") || true - assignee=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true - if [ -n "$assignee" ] && [ "$assignee" != "$BOT_USER" ]; then - log "issue #${ISSUE_NUM} PR #${HAS_PR} REQUEST_CHANGES but assigned to ${assignee} — skipping" - # Don't block — fall through to backlog - BLOCKED_BY_INPROGRESS=false - else - log "issue #${ISSUE_NUM} PR #${HAS_PR} has REQUEST_CHANGES — spawning agent" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)" - BLOCKED_BY_INPROGRESS=true - fi - - elif ci_failed "$CI_STATE"; then - if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM" "check_only"; then - # Fall through to backlog scan instead of exit - : - else - # Increment at actual launch time (not on guard-hit paths) - if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM"; then - BLOCKED_BY_INPROGRESS=true # exhausted between check and launch - else - log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix (attempt ${CI_FIX_ATTEMPTS}/3)" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)" - BLOCKED_BY_INPROGRESS=true - fi - fi - - else - log "issue #${ISSUE_NUM} has open PR #${HAS_PR} (CI: ${CI_STATE}, waiting)" - BLOCKED_BY_INPROGRESS=true - fi - fi - else - # Check assignee before adopting orphaned issue - ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${ISSUE_NUM}") || true - ASSIGNEE=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true - - if [ -n "$ASSIGNEE" ] && [ "$ASSIGNEE" != "$BOT_USER" ]; then - log "issue #${ISSUE_NUM} assigned to ${ASSIGNEE} — skipping (not orphaned)" - # Remove in-progress label since this agent isn't working on it - IP_ID=$(_ilc_in_progress_id) - curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true - # Don't block — fall through to backlog - else - log "recovering orphaned issue #${ISSUE_NUM} (no PR found, assigned to ${BOT_USER:-unassigned})" - nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & - log "started dev-agent PID $! for issue #${ISSUE_NUM} (recovery)" - BLOCKED_BY_INPROGRESS=true - fi - fi - fi - fi - - # If blocked by in-progress work, exit now - if [ "$BLOCKED_BY_INPROGRESS" = true ]; then + log "recovering orphaned issue #${ISSUE_NUM} (no PR found, assigned to ${BOT_USER:-unassigned})" + nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 & + log "started dev-agent PID $! for issue #${ISSUE_NUM} (recovery)" exit 0 fi fi @@ -662,14 +501,6 @@ for i in $(seq 0 $(($(echo "$OPEN_PRS" | jq 'length') - 1))); do # Stuck: REQUEST_CHANGES or CI failure -> spawn agent if [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then - # Check if issue is assigned to this agent — skip if assigned to another bot - ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${STUCK_ISSUE}") || true - assignee=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true - if [ -n "$assignee" ] && [ "$assignee" != "$BOT_USER" ]; then - log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) REQUEST_CHANGES but assigned to ${assignee} — skipping" - continue # skip this PR, check next stuck PR or fall through to backlog - fi log "PR #${PR_NUM} (issue #${STUCK_ISSUE}) has REQUEST_CHANGES — fixing first" nohup "${SCRIPT_DIR}/dev-agent.sh" "$STUCK_ISSUE" >> "$LOGFILE" 2>&1 & log "started dev-agent PID $! for stuck PR #${PR_NUM}" @@ -762,7 +593,7 @@ for i in $(seq 0 $((BACKLOG_COUNT - 1))); do if [ -n "$EXISTING_PR" ]; then # Check if branch is stale (behind primary branch) BRANCH="fix/issue-${ISSUE_NUM}" - AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "0") + AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "999") if [ "$AHEAD" -gt 0 ]; then log "issue #${ISSUE_NUM} PR #${EXISTING_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR" # Close the PR via API @@ -837,32 +668,9 @@ done # Single-threaded per project: if any issue has an open PR waiting for review/CI, # don't start new work — let the pipeline drain first -# But only block on PRs assigned to this agent (per-agent logic from #358) if [ -n "$READY_ISSUE" ] && [ -n "${WAITING_PRS:-}" ]; then - # Filter to only this agent's waiting PRs - MY_WAITING_PRS="" - for pr_num in $(echo "$WAITING_PRS" | tr ',' ' '); do - pr_num="${pr_num#\#}" # Remove leading # - # Check if this PR's issue is assigned to this agent - pr_info=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/pulls/${pr_num}" 2>/dev/null) || true - pr_branch=$(echo "$pr_info" | jq -r '.head.ref') || true - issue_num=$(echo "$pr_branch" | grep -oP '(?<=fix/issue-)\d+' || true) - if [ -z "$issue_num" ]; then - continue - fi - issue_assignee=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ - "${API}/issues/${issue_num}" 2>/dev/null | jq -r '.assignee.login // ""') || true - if [ -n "$issue_assignee" ] && [ "$issue_assignee" = "$BOT_USER" ]; then - MY_WAITING_PRS="${MY_WAITING_PRS:-}${MY_WAITING_PRS:+, }#${pr_num}" - fi - done - - if [ -n "$MY_WAITING_PRS" ]; then - log "holding #${READY_ISSUE} — waiting for my open PR(s) to land first: ${MY_WAITING_PRS}" - exit 0 - fi - log "other agents' PRs waiting: ${WAITING_PRS} — proceeding with #${READY_ISSUE}" + log "holding #${READY_ISSUE} — waiting for open PR(s) to land first: ${WAITING_PRS}" + exit 0 fi if [ -z "$READY_ISSUE" ]; then diff --git a/dev/phase-handler.sh b/dev/phase-handler.sh new file mode 100644 index 0000000..8f3b3b4 --- /dev/null +++ b/dev/phase-handler.sh @@ -0,0 +1,820 @@ +#!/usr/bin/env bash +# dev/phase-handler.sh — Phase callback functions for dev-agent.sh +# +# Source this file from agent orchestrators after lib/agent-session.sh is loaded. +# Defines: post_refusal_comment(), _on_phase_change(), build_phase_protocol_prompt() +# +# Required globals (set by calling agent before or after sourcing): +# ISSUE, FORGE_TOKEN, API, FORGE_WEB, PROJECT_NAME, FACTORY_ROOT +# BRANCH, PHASE_FILE, WORKTREE, IMPL_SUMMARY_FILE +# PRIMARY_BRANCH, SESSION_NAME, LOGFILE, ISSUE_TITLE +# WOODPECKER_REPO_ID, WOODPECKER_TOKEN, WOODPECKER_SERVER +# +# Globals with defaults (agents can override after sourcing): +# PR_NUMBER, CI_POLL_TIMEOUT, MAX_CI_FIXES, MAX_REVIEW_ROUNDS, +# REVIEW_POLL_TIMEOUT, CI_RETRY_COUNT, CI_FIX_COUNT, REVIEW_ROUND, +# CLAIMED, PHASE_POLL_INTERVAL +# +# Calls back to agent-defined helpers: +# cleanup_worktree(), cleanup_labels(), status(), log() +# +# shellcheck shell=bash +# shellcheck disable=SC2154 # globals are set in dev-agent.sh before calling +# shellcheck disable=SC2034 # CLAIMED is read by cleanup() in dev-agent.sh + +# Load secret scanner for redacting tmux output before posting to issues +# shellcheck source=../lib/secret-scan.sh +source "$(dirname "${BASH_SOURCE[0]}")/../lib/secret-scan.sh" + +# Load shared CI helpers (is_infra_step, classify_pipeline_failure, etc.) +# shellcheck source=../lib/ci-helpers.sh +source "$(dirname "${BASH_SOURCE[0]}")/../lib/ci-helpers.sh" + +# Load mirror push helper +# shellcheck source=../lib/mirrors.sh +source "$(dirname "${BASH_SOURCE[0]}")/../lib/mirrors.sh" + +# --- Default callback stubs (agents can override after sourcing) --- +# cleanup_worktree and cleanup_labels are called during phase transitions. +# Provide no-op defaults so phase-handler.sh is self-contained; sourcing +# agents override these with real implementations. +if ! declare -f cleanup_worktree >/dev/null 2>&1; then + cleanup_worktree() { :; } +fi +if ! declare -f cleanup_labels >/dev/null 2>&1; then + cleanup_labels() { :; } +fi + +# --- Default globals (agents can override after sourcing) --- +: "${CI_POLL_TIMEOUT:=1800}" +: "${REVIEW_POLL_TIMEOUT:=10800}" +: "${MAX_CI_FIXES:=3}" +: "${MAX_REVIEW_ROUNDS:=5}" +: "${CI_RETRY_COUNT:=0}" +: "${CI_FIX_COUNT:=0}" +: "${REVIEW_ROUND:=0}" +: "${PR_NUMBER:=}" +: "${CLAIMED:=false}" +: "${PHASE_POLL_INTERVAL:=30}" + +# --- Post diagnostic comment + label issue as blocked --- +# Captures tmux pane output, posts a structured comment on the issue, removes +# in-progress label, and adds the "blocked" label. +# +# Args: reason [session_name] +# Uses globals: ISSUE, SESSION_NAME, PR_NUMBER, FORGE_TOKEN, API +post_blocked_diagnostic() { + local reason="$1" + local session="${2:-${SESSION_NAME:-}}" + + # Capture last 50 lines from tmux pane (before kill) + local tmux_output="" + if [ -n "$session" ] && tmux has-session -t "$session" 2>/dev/null; then + tmux_output=$(tmux capture-pane -p -t "$session" -S -50 2>/dev/null || true) + fi + + # Redact any secrets from tmux output before posting to issue + if [ -n "$tmux_output" ]; then + tmux_output=$(redact_secrets "$tmux_output") + fi + + # Build diagnostic comment body + local comment + comment="### Session failure diagnostic + +| Field | Value | +|---|---| +| Exit reason | \`${reason}\` | +| Timestamp | \`$(date -u +%Y-%m-%dT%H:%M:%SZ)\` |" + [ -n "${PR_NUMBER:-}" ] && [ "${PR_NUMBER:-0}" != "0" ] && \ + comment="${comment} +| PR | #${PR_NUMBER} |" + + if [ -n "$tmux_output" ]; then + comment="${comment} + +
Last 50 lines from tmux pane + +\`\`\` +${tmux_output} +\`\`\` +
" + fi + + # Post comment to issue + curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}/comments" \ + -d "$(jq -nc --arg b "$comment" '{body:$b}')" >/dev/null 2>&1 || true + + # Remove in-progress, add blocked + cleanup_labels + local blocked_id + blocked_id=$(ensure_blocked_label_id) + if [ -n "$blocked_id" ]; then + curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}/labels" \ + -d "{\"labels\":[${blocked_id}]}" >/dev/null 2>&1 || true + fi + CLAIMED=false + _BLOCKED_POSTED=true +} + +# --- Build phase protocol prompt (shared across agents) --- +# Generates the phase-signaling instructions for Claude prompts. +# Args: phase_file summary_file branch [remote] +# Output: The protocol text (stdout) +build_phase_protocol_prompt() { + local _pf="$1" _sf="$2" _br="$3" _remote="${4:-${FORGE_REMOTE:-origin}}" + cat <<_PHASE_PROTOCOL_EOF_ +## Phase-Signaling Protocol (REQUIRED) + +You are running in a persistent tmux session managed by an orchestrator. +Communicate progress by writing to the phase file. The orchestrator watches +this file and injects events (CI results, review feedback) back into this session. + +### Key files +\`\`\` +PHASE_FILE="${_pf}" +SUMMARY_FILE="${_sf}" +\`\`\` + +### Phase transitions — write these exactly: + +**After committing and pushing your branch:** +\`\`\`bash +# Rebase on target branch before push to avoid merge conflicts +git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH} +git push ${_remote} ${_br} +# Write a short summary of what you implemented: +printf '%s' "" > "\${SUMMARY_FILE}" +# Signal the orchestrator to create the PR and watch for CI: +echo "PHASE:awaiting_ci" > "${_pf}" +\`\`\` +Then STOP and wait. The orchestrator will inject CI results. + +**When you receive a "CI passed" injection:** +\`\`\`bash +echo "PHASE:awaiting_review" > "${_pf}" +\`\`\` +Then STOP and wait. The orchestrator will inject review feedback. + +**When you receive a "CI failed:" injection:** +Fix the CI issue, then rebase on target branch and push: +\`\`\`bash +git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH} +git push --force-with-lease ${_remote} ${_br} +echo "PHASE:awaiting_ci" > "${_pf}" +\`\`\` +Then STOP and wait. + +**When you receive a "Review: REQUEST_CHANGES" injection:** +Address ALL review feedback, then rebase on target branch and push: +\`\`\`bash +git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH} +git push --force-with-lease ${_remote} ${_br} +echo "PHASE:awaiting_ci" > "${_pf}" +\`\`\` +(CI runs again after each push — always write awaiting_ci, not awaiting_review) + +**When you need human help (CI exhausted, merge blocked, stuck on a decision):** +\`\`\`bash +printf 'PHASE:escalate\nReason: %s\n' "describe what you need" > "${_pf}" +\`\`\` +Then STOP and wait. A human will review and respond via the forge. + +**On unrecoverable failure:** +\`\`\`bash +printf 'PHASE:failed\nReason: %s\n' "describe what failed" > "${_pf}" +\`\`\` +_PHASE_PROTOCOL_EOF_ +} + +# --- Merge helper --- +# do_merge — attempt to merge PR via forge API. +# Args: pr_num +# Returns: +# 0 = merged successfully +# 1 = other failure (conflict, network error, etc.) +# 2 = not enough approvals (HTTP 405) — PHASE:escalate already written +do_merge() { + local pr_num="$1" + local merge_response merge_http_code merge_body + merge_response=$(curl -s -w "\n%{http_code}" -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H 'Content-Type: application/json' \ + "${API}/pulls/${pr_num}/merge" \ + -d '{"Do":"merge","delete_branch_after_merge":true}') || true + merge_http_code=$(echo "$merge_response" | tail -1) + merge_body=$(echo "$merge_response" | sed '$d') + + if [ "$merge_http_code" = "200" ] || [ "$merge_http_code" = "204" ]; then + log "do_merge: PR #${pr_num} merged (HTTP ${merge_http_code})" + return 0 + fi + + # HTTP 405 — could be "merge requirements not met" OR "already merged" (race with dev-poll). + # Before escalating, check whether the PR was already merged by another agent. + if [ "$merge_http_code" = "405" ]; then + local pr_state + pr_state=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${pr_num}" | jq -r '.merged // false') || pr_state="false" + if [ "$pr_state" = "true" ]; then + log "do_merge: PR #${pr_num} already merged (detected after HTTP 405) — treating as success" + return 0 + fi + log "do_merge: PR #${pr_num} blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" + printf 'PHASE:escalate\nReason: %s\n' \ + "PR #${pr_num} merge blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" \ + > "$PHASE_FILE" + return 2 + fi + + log "do_merge: PR #${pr_num} merge failed (HTTP ${merge_http_code}): ${merge_body:0:200}" + return 1 +} + +# --- Refusal comment helper --- +post_refusal_comment() { + local emoji="$1" title="$2" body="$3" + local last_has_title + last_has_title=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/issues/${ISSUE}/comments?limit=5" | \ + jq -r --arg t "Dev-agent: ${title}" '[.[] | .body // ""] | any(contains($t)) | tostring') || true + if [ "$last_has_title" = "true" ]; then + log "skipping duplicate refusal comment: ${title}" + return 0 + fi + local comment + comment="${emoji} **Dev-agent: ${title}** + +${body} + +--- +*Automated assessment by dev-agent · $(date -u '+%Y-%m-%d %H:%M UTC')*" + printf '%s' "$comment" > "/tmp/refusal-comment.txt" + jq -Rs '{body: .}' < "/tmp/refusal-comment.txt" > "/tmp/refusal-comment.json" + curl -sf -o /dev/null -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/issues/${ISSUE}/comments" \ + --data-binary @"/tmp/refusal-comment.json" 2>/dev/null || \ + log "WARNING: failed to post refusal comment" + rm -f "/tmp/refusal-comment.txt" "/tmp/refusal-comment.json" +} + +# ============================================================================= +# PHASE DISPATCH CALLBACK +# ============================================================================= + +# _on_phase_change — Phase dispatch callback for monitor_phase_loop +# Receives the current phase as $1. +# Returns 0 to continue the loop, 1 to break (terminal phase reached). +_on_phase_change() { + local phase="$1" + + # ── PHASE: awaiting_ci ────────────────────────────────────────────────────── + if [ "$phase" = "PHASE:awaiting_ci" ]; then + # Release session lock — Claude is idle during CI polling (#724) + session_lock_release + + # Create PR if not yet created + if [ -z "${PR_NUMBER:-}" ]; then + status "creating PR for issue #${ISSUE}" + IMPL_SUMMARY="" + if [ -f "$IMPL_SUMMARY_FILE" ]; then + # Don't treat refusal JSON as a PR summary + if ! jq -e '.status' < "$IMPL_SUMMARY_FILE" >/dev/null 2>&1; then + IMPL_SUMMARY=$(head -c 4000 "$IMPL_SUMMARY_FILE") + fi + fi + + printf 'Fixes #%s\n\n## Changes\n%s' "$ISSUE" "$IMPL_SUMMARY" > "/tmp/pr-body-${ISSUE}.txt" + jq -n \ + --arg title "fix: ${ISSUE_TITLE} (#${ISSUE})" \ + --rawfile body "/tmp/pr-body-${ISSUE}.txt" \ + --arg head "$BRANCH" \ + --arg base "${PRIMARY_BRANCH}" \ + '{title: $title, body: $body, head: $head, base: $base}' > "/tmp/pr-request-${ISSUE}.json" + + PR_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${API}/pulls" \ + --data-binary @"/tmp/pr-request-${ISSUE}.json") + + PR_HTTP_CODE=$(echo "$PR_RESPONSE" | tail -1) + PR_RESPONSE_BODY=$(echo "$PR_RESPONSE" | sed '$d') + rm -f "/tmp/pr-body-${ISSUE}.txt" "/tmp/pr-request-${ISSUE}.json" + + if [ "$PR_HTTP_CODE" = "201" ] || [ "$PR_HTTP_CODE" = "200" ]; then + PR_NUMBER=$(echo "$PR_RESPONSE_BODY" | jq -r '.number') + log "created PR #${PR_NUMBER}" + elif [ "$PR_HTTP_CODE" = "409" ]; then + # PR already exists (race condition) — find it + FOUND_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -r --arg branch "$BRANCH" \ + '.[] | select(.head.ref == $branch) | .number' | head -1) || true + if [ -n "$FOUND_PR" ]; then + PR_NUMBER="$FOUND_PR" + log "PR already exists: #${PR_NUMBER}" + else + log "ERROR: PR creation got 409 but no existing PR found" + agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP 409, no existing PR found). Check the forge API. Retry by writing PHASE:awaiting_ci again after verifying the branch was pushed." + return 0 + fi + else + log "ERROR: PR creation failed (HTTP ${PR_HTTP_CODE})" + agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP ${PR_HTTP_CODE}). Check branch was pushed: git push ${FORGE_REMOTE:-origin} ${BRANCH}. Then write PHASE:awaiting_ci again." + return 0 + fi + fi + + # No CI configured? Treat as success immediately + if [ "${WOODPECKER_REPO_ID:-2}" = "0" ]; then + log "no CI configured — treating as passed" + agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER} (no CI configured for this project). +Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback." + return 0 + fi + + # Poll CI until done or timeout + status "waiting for CI on PR #${PR_NUMBER}" + CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || \ + curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') + + CI_DONE=false + CI_STATE="unknown" + CI_POLL_ELAPSED=0 + while [ "$CI_POLL_ELAPSED" -lt "$CI_POLL_TIMEOUT" ]; do + sleep 30 + CI_POLL_ELAPSED=$(( CI_POLL_ELAPSED + 30 )) + + # Check session still alive during CI wait (exit_marker + tmux fallback) + if [ -f "/tmp/claude-exited-${SESSION_NAME}.ts" ] || ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then + log "session died during CI wait" + break + fi + + # Re-fetch HEAD — Claude may have pushed new commits since loop started + CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || echo "$CI_CURRENT_SHA") + + CI_STATE=$(ci_commit_status "$CI_CURRENT_SHA") + if [ "$CI_STATE" = "success" ] || [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then + CI_DONE=true + [ "$CI_STATE" = "success" ] && CI_FIX_COUNT=0 + break + fi + done + + if ! $CI_DONE; then + log "TIMEOUT: CI didn't complete in ${CI_POLL_TIMEOUT}s" + agent_inject_into_session "$SESSION_NAME" "CI TIMEOUT: CI did not complete within 30 minutes for PR #${PR_NUMBER} (SHA: ${CI_CURRENT_SHA:0:7}). This may be an infrastructure issue. Write PHASE:escalate if you cannot proceed." + return 0 + fi + + log "CI: ${CI_STATE}" + + if [ "$CI_STATE" = "success" ]; then + agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER}. +Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback: + echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\"" + else + # Fetch CI error details + PIPELINE_NUM=$(ci_pipeline_number "$CI_CURRENT_SHA") + + FAILED_STEP="" + FAILED_EXIT="" + IS_INFRA=false + if [ -n "$PIPELINE_NUM" ]; then + FAILED_INFO=$(curl -sf \ + -H "Authorization: Bearer ${WOODPECKER_TOKEN}" \ + "${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines/${PIPELINE_NUM}" | \ + jq -r '.workflows[]?.children[]? | select(.state=="failure") | "\(.name)|\(.exit_code)"' | head -1 || true) + FAILED_STEP=$(echo "$FAILED_INFO" | cut -d'|' -f1) + FAILED_EXIT=$(echo "$FAILED_INFO" | cut -d'|' -f2) + fi + + log "CI failed: step=${FAILED_STEP:-unknown} exit=${FAILED_EXIT:-?}" + + if [ -n "$FAILED_STEP" ] && is_infra_step "$FAILED_STEP" "${FAILED_EXIT:-0}" >/dev/null 2>&1; then + IS_INFRA=true + fi + + if [ "$IS_INFRA" = true ] && [ "${CI_RETRY_COUNT:-0}" -lt 1 ]; then + CI_RETRY_COUNT=$(( CI_RETRY_COUNT + 1 )) + log "infra failure — retrigger CI (retry ${CI_RETRY_COUNT})" + (cd "$WORKTREE" && git commit --allow-empty \ + -m "ci: retrigger after infra failure (#${ISSUE})" --no-verify 2>&1 | tail -1) + # Rebase on target branch before push to avoid merge conflicts + if ! (cd "$WORKTREE" && \ + git fetch "${FORGE_REMOTE:-origin}" "${PRIMARY_BRANCH}" 2>/dev/null && \ + git rebase "${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH}" 2>&1 | tail -5); then + log "rebase conflict detected — aborting, agent must resolve" + (cd "$WORKTREE" && git rebase --abort 2>/dev/null || git reset --hard HEAD 2>/dev/null) || true + agent_inject_into_session "$SESSION_NAME" "REBASE CONFLICT: Cannot rebase onto ${PRIMARY_BRANCH} automatically. + +Please resolve merge conflicts manually: +1. Check conflict status: git status +2. Resolve conflicts in the conflicted files +3. Stage resolved files: git add +4. Continue rebase: git rebase --continue + +If you cannot resolve conflicts, abort: git rebase --abort +Then write PHASE:escalate with a reason." + return 0 + fi + # Rebase succeeded — push the result + (cd "$WORKTREE" && git push --force-with-lease "${FORGE_REMOTE:-origin}" "$BRANCH" 2>&1 | tail -3) + # Touch phase file so we recheck CI on the new SHA + # Do NOT update LAST_PHASE_MTIME here — let the main loop detect the fresh mtime + touch "$PHASE_FILE" + CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || true) + return 0 + fi + + CI_FIX_COUNT=$(( CI_FIX_COUNT + 1 )) + _ci_pipeline_url="${WOODPECKER_SERVER}/repos/${WOODPECKER_REPO_ID}/pipeline/${PIPELINE_NUM:-0}" + if [ "$CI_FIX_COUNT" -gt "$MAX_CI_FIXES" ]; then + log "CI failure not recoverable after ${CI_FIX_COUNT} fix attempts — escalating" + printf 'PHASE:escalate\nReason: ci_exhausted after %d attempts (step: %s)\n' "$CI_FIX_COUNT" "${FAILED_STEP:-unknown}" > "$PHASE_FILE" + # Do NOT update LAST_PHASE_MTIME here — let the main loop detect PHASE:escalate + return 0 + fi + + CI_ERROR_LOG="" + if [ -n "$PIPELINE_NUM" ]; then + CI_ERROR_LOG=$(bash "${FACTORY_ROOT}/lib/ci-debug.sh" failures "$PIPELINE_NUM" 2>/dev/null | tail -80 | head -c 8000 || echo "") + fi + + # Save CI result for crash recovery + printf 'CI failed (attempt %d/%d)\nStep: %s\nExit: %s\n\n%s' \ + "$CI_FIX_COUNT" "$MAX_CI_FIXES" "${FAILED_STEP:-unknown}" "${FAILED_EXIT:-?}" "$CI_ERROR_LOG" \ + > "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt" 2>/dev/null || true + + agent_inject_into_session "$SESSION_NAME" "CI failed on PR #${PR_NUMBER} (attempt ${CI_FIX_COUNT}/${MAX_CI_FIXES}). + +Failed step: ${FAILED_STEP:-unknown} (exit code ${FAILED_EXIT:-?}, pipeline #${PIPELINE_NUM:-?}) + +CI debug tool: + bash ${FACTORY_ROOT}/lib/ci-debug.sh failures ${PIPELINE_NUM:-0} + bash ${FACTORY_ROOT}/lib/ci-debug.sh logs ${PIPELINE_NUM:-0} + +Error snippet: +${CI_ERROR_LOG:-No logs available. Use ci-debug.sh to query the pipeline.} + +Instructions: +1. Run ci-debug.sh failures to get the full error output. +2. Read the failing test file(s) — understand what the tests EXPECT. +3. Fix the root cause — do NOT weaken tests. +4. Rebase on target branch and push: git fetch ${FORGE_REMOTE:-origin} ${PRIMARY_BRANCH} && git rebase ${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH} + git push --force-with-lease ${FORGE_REMOTE:-origin} ${BRANCH} +5. Write: echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\" +6. Stop and wait." + fi + + # ── PHASE: awaiting_review ────────────────────────────────────────────────── + elif [ "$phase" = "PHASE:awaiting_review" ]; then + # Release session lock — Claude is idle during review wait (#724) + session_lock_release + status "waiting for review on PR #${PR_NUMBER:-?}" + CI_FIX_COUNT=0 # Reset CI fix budget for this review cycle + + if [ -z "${PR_NUMBER:-}" ]; then + log "WARNING: awaiting_review but PR_NUMBER unknown — searching for PR" + FOUND_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls?state=open&limit=20" | \ + jq -r --arg branch "$BRANCH" \ + '.[] | select(.head.ref == $branch) | .number' | head -1) || true + if [ -n "$FOUND_PR" ]; then + PR_NUMBER="$FOUND_PR" + log "found PR #${PR_NUMBER}" + else + agent_inject_into_session "$SESSION_NAME" "ERROR: Cannot find open PR for branch ${BRANCH}. Did you push? Verify with git status and git push ${FORGE_REMOTE:-origin} ${BRANCH}, then write PHASE:awaiting_ci." + return 0 + fi + fi + + REVIEW_POLL_ELAPSED=0 + REVIEW_FOUND=false + while [ "$REVIEW_POLL_ELAPSED" -lt "$REVIEW_POLL_TIMEOUT" ]; do + sleep 300 # 5 min between review checks + REVIEW_POLL_ELAPSED=$(( REVIEW_POLL_ELAPSED + 300 )) + + # Check session still alive (exit_marker + tmux fallback) + if [ -f "/tmp/claude-exited-${SESSION_NAME}.ts" ] || ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then + log "session died during review wait" + REVIEW_FOUND=false + break + fi + + # Check if phase was updated while we wait (e.g., Claude reacted to something) + NEW_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0) + if [ "$NEW_MTIME" -gt "$LAST_PHASE_MTIME" ]; then + log "phase file updated during review wait — re-entering main loop" + # Do NOT update LAST_PHASE_MTIME here — leave it stale so the outer + # loop detects the change on its next tick and dispatches the new phase. + REVIEW_FOUND=true # Prevent timeout injection + # Clean up review-poll sentinel if it exists (session already advanced) + rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}" + break + fi + + REVIEW_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') || true + REVIEW_COMMENT=$(forge_api_all "/issues/${PR_NUMBER}/comments" | \ + jq -r --arg sha "$REVIEW_SHA" \ + '[.[] | select(.body | contains(" + # Gardener Agent **Role**: Backlog grooming — detect duplicate issues, missing acceptance diff --git a/gardener/gardener-run.sh b/gardener/gardener-run.sh index b524b62..62e9eb1 100755 --- a/gardener/gardener-run.sh +++ b/gardener/gardener-run.sh @@ -45,7 +45,7 @@ source "$FACTORY_ROOT/lib/agent-sdk.sh" # shellcheck source=../lib/pr-lifecycle.sh source "$FACTORY_ROOT/lib/pr-lifecycle.sh" -LOG_FILE="${DISINTO_LOG_DIR}/gardener/gardener.log" +LOG_FILE="$SCRIPT_DIR/gardener.log" # shellcheck disable=SC2034 # consumed by agent-sdk.sh LOGFILE="$LOG_FILE" # shellcheck disable=SC2034 # consumed by agent-sdk.sh @@ -55,22 +55,20 @@ RESULT_FILE="/tmp/gardener-result-${PROJECT_NAME}.txt" GARDENER_PR_FILE="/tmp/gardener-pr-${PROJECT_NAME}.txt" WORKTREE="/tmp/${PROJECT_NAME}-gardener-run" -# Override LOG_AGENT for consistent agent identification -# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() -LOG_AGENT="gardener" +log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } # ── Guards ──────────────────────────────────────────────────────────────── check_active gardener acquire_cron_lock "/tmp/gardener-run.lock" -memory_guard 2000 +check_memory 2000 log "--- Gardener run start ---" -# ── Resolve forge remote for git operations ───────────────────────────── -resolve_forge_remote - # ── Resolve agent identity for .profile repo ──────────────────────────── -resolve_agent_identity || true +if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_GARDENER_TOKEN:-}" ]; then + AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_GARDENER_TOKEN}" \ + "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true) +fi # ── Load formula + context ─────────────────────────────────────────────── load_formula_or_profile "gardener" "$FACTORY_ROOT/formulas/run-gardener.toml" || exit 1 @@ -129,7 +127,16 @@ ${SCRATCH_INSTRUCTION} ${PROMPT_FOOTER}" # ── Create worktree ────────────────────────────────────────────────────── -formula_worktree_setup "$WORKTREE" +cd "$PROJECT_REPO_ROOT" +git fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true +worktree_cleanup "$WORKTREE" +git worktree add "$WORKTREE" "origin/${PRIMARY_BRANCH}" --detach 2>/dev/null + +cleanup() { + worktree_cleanup "$WORKTREE" + rm -f "$GARDENER_PR_FILE" +} +trap cleanup EXIT # ── Post-merge manifest execution ──────────────────────────────────────── # Reads gardener/pending-actions.json and executes each action via API. @@ -158,21 +165,19 @@ _gardener_execute_manifest() { case "$action" in add_label) - local label label_id http_code resp + local label label_id label=$(jq -r ".[$i].label" "$manifest_file") label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${FORGE_API}/labels" | jq -r --arg n "$label" \ '.[] | select(.name == $n) | .id') || true if [ -n "$label_id" ]; then - resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \ + if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${issue}/labels" \ - -d "{\"labels\":[${label_id}]}" 2>/dev/null) || true - http_code=$(echo "$resp" | tail -1) - if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then + -d "{\"labels\":[${label_id}]}" >/dev/null 2>&1; then log "manifest: add_label '${label}' to #${issue}" else - log "manifest: FAILED add_label '${label}' to #${issue}: HTTP ${http_code}" + log "manifest: FAILED add_label '${label}' to #${issue}" fi else log "manifest: FAILED add_label — label '${label}' not found" @@ -180,19 +185,17 @@ _gardener_execute_manifest() { ;; remove_label) - local label label_id http_code resp + local label label_id label=$(jq -r ".[$i].label" "$manifest_file") label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ "${FORGE_API}/labels" | jq -r --arg n "$label" \ '.[] | select(.name == $n) | .id') || true if [ -n "$label_id" ]; then - resp=$(curl -sf -w "\n%{http_code}" -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_API}/issues/${issue}/labels/${label_id}" 2>/dev/null) || true - http_code=$(echo "$resp" | tail -1) - if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then + if curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues/${issue}/labels/${label_id}" >/dev/null 2>&1; then log "manifest: remove_label '${label}' from #${issue}" else - log "manifest: FAILED remove_label '${label}' from #${issue}: HTTP ${http_code}" + log "manifest: FAILED remove_label '${label}' from #${issue}" fi else log "manifest: FAILED remove_label — label '${label}' not found" @@ -200,38 +203,34 @@ _gardener_execute_manifest() { ;; close) - local reason http_code resp + local reason reason=$(jq -r ".[$i].reason // empty" "$manifest_file") - resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ + if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${issue}" \ - -d '{"state":"closed"}' 2>/dev/null) || true - http_code=$(echo "$resp" | tail -1) - if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then + -d '{"state":"closed"}' >/dev/null 2>&1; then log "manifest: closed #${issue} (${reason})" else - log "manifest: FAILED close #${issue}: HTTP ${http_code}" + log "manifest: FAILED close #${issue}" fi ;; comment) - local body escaped_body http_code resp + local body escaped_body body=$(jq -r ".[$i].body" "$manifest_file") escaped_body=$(printf '%s' "$body" | jq -Rs '.') - resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \ + if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${issue}/comments" \ - -d "{\"body\":${escaped_body}}" 2>/dev/null) || true - http_code=$(echo "$resp" | tail -1) - if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then + -d "{\"body\":${escaped_body}}" >/dev/null 2>&1; then log "manifest: commented on #${issue}" else - log "manifest: FAILED comment on #${issue}: HTTP ${http_code}" + log "manifest: FAILED comment on #${issue}" fi ;; create_issue) - local title body labels escaped_title escaped_body label_ids http_code resp + local title body labels escaped_title escaped_body label_ids title=$(jq -r ".[$i].title" "$manifest_file") body=$(jq -r ".[$i].body" "$manifest_file") labels=$(jq -r ".[$i].labels // [] | .[]" "$manifest_file") @@ -251,46 +250,40 @@ _gardener_execute_manifest() { done <<< "$labels" [ -n "$ids_json" ] && label_ids="[${ids_json}]" fi - resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \ + if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues" \ - -d "{\"title\":${escaped_title},\"body\":${escaped_body},\"labels\":${label_ids}}" 2>/dev/null) || true - http_code=$(echo "$resp" | tail -1) - if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then + -d "{\"title\":${escaped_title},\"body\":${escaped_body},\"labels\":${label_ids}}" >/dev/null 2>&1; then log "manifest: created issue '${title}'" else - log "manifest: FAILED create_issue '${title}': HTTP ${http_code}" + log "manifest: FAILED create_issue '${title}'" fi ;; edit_body) - local body escaped_body http_code resp + local body escaped_body body=$(jq -r ".[$i].body" "$manifest_file") escaped_body=$(printf '%s' "$body" | jq -Rs '.') - resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ + if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/issues/${issue}" \ - -d "{\"body\":${escaped_body}}" 2>/dev/null) || true - http_code=$(echo "$resp" | tail -1) - if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then + -d "{\"body\":${escaped_body}}" >/dev/null 2>&1; then log "manifest: edited body of #${issue}" else - log "manifest: FAILED edit_body #${issue}: HTTP ${http_code}" + log "manifest: FAILED edit_body #${issue}" fi ;; close_pr) - local pr http_code resp + local pr pr=$(jq -r ".[$i].pr" "$manifest_file") - resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ + if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \ -H 'Content-Type: application/json' \ "${FORGE_API}/pulls/${pr}" \ - -d '{"state":"closed"}' 2>/dev/null) || true - http_code=$(echo "$resp" | tail -1) - if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then + -d '{"state":"closed"}' >/dev/null 2>&1; then log "manifest: closed PR #${pr}" else - log "manifest: FAILED close_pr #${pr}: HTTP ${http_code}" + log "manifest: FAILED close_pr #${pr}" fi ;; @@ -335,9 +328,9 @@ if [ -n "$PR_NUMBER" ]; then if [ "$_PR_WALK_EXIT_REASON" = "merged" ]; then # Post-merge: pull primary, mirror push, execute manifest - git -C "$PROJECT_REPO_ROOT" fetch "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true + git -C "$PROJECT_REPO_ROOT" fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true git -C "$PROJECT_REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true - git -C "$PROJECT_REPO_ROOT" pull --ff-only "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true + git -C "$PROJECT_REPO_ROOT" pull --ff-only origin "$PRIMARY_BRANCH" 2>/dev/null || true mirror_push _gardener_execute_manifest rm -f "$SCRATCH_FILE" diff --git a/gardener/pending-actions.json b/gardener/pending-actions.json index a148369..747973c 100644 --- a/gardener/pending-actions.json +++ b/gardener/pending-actions.json @@ -1,7 +1,32 @@ [ { "action": "edit_body", - "issue": 356, - "body": "## Problem\n\nThe entrypoint hardcodes `REPRODUCE_FORMULA` to `formulas/reproduce.toml` (line 26) and never checks the `DISINTO_FORMULA` environment variable passed by the dispatcher for triage runs.\n\nThe dispatcher sets `-e DISINTO_FORMULA=triage` for triage dispatch, but the entrypoint ignores it — always running the reproduce formula.\n\n## Fix\n\nAt line 26, select the formula based on `DISINTO_FORMULA`:\n\n```bash\ncase \"${DISINTO_FORMULA:-reproduce}\" in\n triage)\n ACTIVE_FORMULA=\"${DISINTO_DIR}/formulas/triage.toml\"\n ;;\n *)\n ACTIVE_FORMULA=\"${DISINTO_DIR}/formulas/reproduce.toml\"\n ;;\nesac\n```\n\nThen use `ACTIVE_FORMULA` everywhere `REPRODUCE_FORMULA` is currently used.\n\nAlso update log messages to reflect which formula is running (\"Starting triage-agent\" vs \"Starting reproduce-agent\").\n\n## Affected files\n\n- `docker/reproduce/entrypoint-reproduce.sh` — line 26 and all references to REPRODUCE_FORMULA\n\n## Acceptance criteria\n\n- [ ] `DISINTO_FORMULA=triage` selects `formulas/triage.toml` in the entrypoint\n- [ ] `DISINTO_FORMULA=reproduce` (or unset) still runs `formulas/reproduce.toml`\n- [ ] Log messages reflect which formula is active (\"Starting triage-agent\" / \"Starting reproduce-agent\")\n- [ ] All `REPRODUCE_FORMULA` references replaced with `ACTIVE_FORMULA`\n" + "issue": 765, + "body": "Depends on: none\n\n## Goal\n\nThe disinto website becomes a versioned artifact: built by CI, published to Codeberg's generic package registry, deployed to staging automatically. Version visible in footer.\n\n## Files to add/change\n\n### `site/VERSION`\n```\n0.1.0\n```\n\n### `site/build.sh`\n```bash\n#!/bin/bash\nVERSION=$(cat VERSION)\nmkdir -p dist\ncp *.html *.jpg *.webp *.png *.ico *.xml robots.txt dist/\nsed -i \"s|Built from scrap, powered by a single battery.|v${VERSION} · Built from scrap, powered by a single battery.|\" dist/index.html\necho \"$VERSION\" > dist/VERSION\n```\n\n### `site/index.html`\nNo template placeholder needed — `build.sh` does the sed replacement on the existing footer text.\n\n### `.woodpecker/site.yml`\n```yaml\nwhen:\n path: \"site/**\"\n event: push\n branch: main\n\nsteps:\n - name: build\n image: alpine\n commands:\n - cd site && sh build.sh\n - VERSION=$(cat site/VERSION)\n - tar czf site-${VERSION}.tar.gz -C site/dist .\n\n - name: publish\n image: alpine\n commands:\n - apk add curl\n - VERSION=$(cat site/VERSION)\n - >-\n curl -sf --user \"johba:$$FORGE_TOKEN\"\n --upload-file site-${VERSION}.tar.gz\n \"https://codeberg.org/api/packages/johba/generic/disinto-site/${VERSION}/site-${VERSION}.tar.gz\"\n environment:\n FORGE_TOKEN:\n from_secret: forge_token\n\n - name: deploy-staging\n image: alpine\n commands:\n - apk add curl\n - VERSION=$(cat site/VERSION)\n - >-\n curl -sf --user \"johba:$$FORGE_TOKEN\"\n \"https://codeberg.org/api/packages/johba/generic/disinto-site/${VERSION}/site-${VERSION}.tar.gz\"\n -o site.tar.gz\n - rm -rf /srv/staging/*\n - tar xzf site.tar.gz -C /srv/staging/\n environment:\n FORGE_TOKEN:\n from_secret: forge_token\n volumes:\n - /home/debian/staging-site:/srv/staging\n```\n\n## Infra setup (manual, before first run)\n- `mkdir -p /home/debian/staging-site`\n- Add to Caddyfile: `staging.disinto.ai { root * /home/debian/staging-site; file_server }`\n- DNS: `staging.disinto.ai` A record → same IP as `disinto.ai`\n- Reload Caddy: `sudo systemctl reload caddy`\n- Add `forge_token` as Woodpecker repo secret for johba/disinto (if not already set)\n- Add `/home/debian/staging-site` to `WOODPECKER_BACKEND_DOCKER_VOLUMES`\n\n## Verification\n- [ ] Merge PR that touches `site/` → CI runs site pipeline\n- [ ] Package appears at `codeberg.org/johba/-/packages/generic/disinto-site/0.1.0`\n- [ ] `staging.disinto.ai` serves the site with `v0.1.0` in footer\n- [ ] `disinto.ai` (production) unchanged\n\n## Related\n- #764 — docker stack edge proxy + staging (future: this moves inside the stack)\n- #755 — vault-gated production promotion (production deploy comes later)\n\n## Affected files\n- `site/VERSION` — new, holds current version string\n- `site/build.sh` — new, builds dist/ with version injected into footer\n- `.woodpecker/site.yml` — new, CI pipeline for build/publish/deploy-staging" + }, + { + "action": "edit_body", + "issue": 764, + "body": "Depends on: none (builds on existing docker-compose generation in `bin/disinto`)\n\n## Design\n\n`disinto init` + `disinto up` starts two additional containers as base factory infrastructure:\n\n### Edge proxy (Caddy)\n- Reverse proxies to Forgejo and Woodpecker\n- Serves staging site\n- Runs on ports 80/443\n- At bootstrap: IP-only, self-signed TLS or HTTP\n- Domain + Let's Encrypt added later via vault resource request\n\n### Staging container (Caddy)\n- Static file server for the project's staging artifacts\n- Starts with a default \"Nothing shipped yet\" page\n- CI pipelines write to a shared volume to update staging content\n- No vault approval needed — staging is the factory's sandbox\n\n### docker-compose addition\n```yaml\nservices:\n edge:\n image: caddy:alpine\n ports:\n - \"80:80\"\n - \"443:443\"\n volumes:\n - ./Caddyfile:/etc/caddy/Caddyfile\n - caddy_data:/data\n depends_on:\n - forgejo\n - woodpecker-server\n - staging\n\n staging:\n image: caddy:alpine\n volumes:\n - staging-site:/srv/site\n # Not exposed directly — edge proxies to it\n\nvolumes:\n caddy_data:\n staging-site:\n```\n\n### Caddyfile (generated by `disinto init`)\n```\n# IP-only at bootstrap, domain added later\n:80 {\n handle /forgejo/* {\n reverse_proxy forgejo:3000\n }\n handle /ci/* {\n reverse_proxy woodpecker-server:8000\n }\n handle {\n reverse_proxy staging:80\n }\n}\n```\n\n### Staging update flow\n1. CI builds artifact (site tarball, etc.)\n2. CI step writes to `staging-site` volume\n3. Staging container serves updated content immediately\n4. No restart needed — Caddy serves files directly\n\n### Domain lifecycle\n- Bootstrap: no domain, edge serves on IP\n- Later: factory files vault resource request for domain\n- Human buys domain, sets DNS\n- Caddyfile updated with domain, Let's Encrypt auto-provisions TLS\n\n## Affected files\n- `bin/disinto` — `generate_compose()` adds edge + staging services\n- New: default staging page (\"Nothing shipped yet\")\n- New: Caddyfile template in `docker/`\n\n## Related\n- #755 — vault-gated deployment promotion (production comes later)\n- #757 — ops repo (domain is a resource requested through vault)\n\n## Acceptance criteria\n- [ ] `disinto init` generates a `docker-compose.yml` that includes `edge` (Caddy) and `staging` containers\n- [ ] Edge proxy routes `/forgejo/*` → Forgejo, `/ci/*` → Woodpecker, default → staging container\n- [ ] Staging container serves a default \"Nothing shipped yet\" page on first boot\n- [ ] `docker/` directory contains a Caddyfile template generated by `disinto init`\n- [ ] `disinto up` starts all containers including edge and staging without manual steps" + }, + { + "action": "edit_body", + "issue": 761, + "body": "Depends on: #747\n\n## Design\n\nEach agent account on the bundled Forgejo gets a `.profile` repo. This repo holds the agent's formula (copied from disinto at creation time) and its journal.\n\n### Structure\n```\n{agent-bot}/.profile/\n├── formula.toml # snapshot of the formula at agent creation time\n├── journal/ # daily logs of what the agent did\n│ ├── 2026-03-26.md\n│ └── ...\n└── knowledge/ # learned patterns, best-practices (optional, agent can evolve)\n```\n\n### Lifecycle\n1. **Create agent** — `disinto init` or `disinto spawn-agent` creates Forgejo account + `.profile` repo\n2. **Copy formula** — current `formulas/{role}.toml` from disinto repo is copied to `.profile/formula.toml`\n3. **Agent reads its own formula** — at session start, agent reads from its `.profile`, not from the disinto repo\n4. **Agent writes journal** — daily entries pushed to `.profile/journal/`\n5. **Agent can evolve knowledge** — best-practices, heuristics, patterns written to `.profile/knowledge/`\n\n### What this enables\n\n**A/B testing formulas:** Create two agents from different formula versions, run both against the same backlog, compare results (cycle time, CI pass rate, review rejection rate).\n\n**Rollback:** New formula worse? Kill agent, spawn from older formula version.\n\n**Audit:** What formula was this agent running when it produced that PR? Check its `.profile` at that git commit.\n\n**Drift tracking:** Diff what an agent learned (`.profile/knowledge/`) vs what it started with. Measure formula evolution over time.\n\n**Portability:** Move agent to different box — `git clone` its `.profile`.\n\n### Disinto repo becomes the template\n\n```\ndisinto repo:\n formulas/dev-agent.toml ← canonical template, evolves\n formulas/review-agent.toml\n formulas/planner.toml\n ...\n\nRunning agents:\n dev-bot-v2/.profile/formula.toml ← snapshot from formulas/dev-agent.toml@v2\n dev-bot-v3/.profile/formula.toml ← snapshot from formulas/dev-agent.toml@v3\n review-bot/.profile/formula.toml ← snapshot from formulas/review-agent.toml\n```\n\nThe formula in the disinto repo is the template. The `.profile` copy is the instance. They can diverge — that's a feature, not a bug.\n\n## Affected files\n- `bin/disinto` — agent creation copies formula to .profile\n- Agent session scripts — read formula from .profile instead of local formulas/ dir\n- Planner/supervisor — can read other agents' journals from their .profile repos\n\n## Related\n- #747 — per-agent Forgejo accounts (prerequisite)\n- #757 — ops repo (shared concerns stay there: vault, portfolio, resources)\n\n## Acceptance criteria\n- [ ] `disinto spawn-agent` (or `disinto init`) creates a Forgejo account + `.profile` repo for each agent bot\n- [ ] Current `formulas/{role}.toml` is copied to `.profile/formula.toml` at agent creation time\n- [ ] Agent session script reads its formula from `.profile/formula.toml`, not from the repo's `formulas/` directory\n- [ ] Agent writes daily journal entries to `.profile/journal/YYYY-MM-DD.md`" + }, + { + "action": "edit_body", + "issue": 742, + "body": "## Problem\n\n`gardener/recipes/*.toml` (4 files: cascade-rebase, chicken-egg-ci, flaky-test, shellcheck-violations) are an older pattern predating `formulas/*.toml`. Two systems for the same thing.\n\n## Fix\n\nMigrate any unique content from recipes to the gardener formula or to new formulas. Delete the recipes directory.\n\n## Affected files\n- `gardener/recipes/*.toml` — delete after migration\n- `formulas/run-gardener.toml` — absorb relevant content\n- Gardener scripts that reference recipes/\n\n## Acceptance criteria\n- [ ] Contents of `gardener/recipes/*.toml` are diff'd against `formulas/run-gardener.toml` — any unique content is migrated\n- [ ] `gardener/recipes/` directory is deleted\n- [ ] No scripts in `gardener/` reference the `recipes/` path after migration\n- [ ] ShellCheck passes on all modified scripts" + }, + { + "action": "add_label", + "issue": 742, + "label": "backlog" + }, + { + "action": "add_label", + "issue": 741, + "label": "backlog" } ] diff --git a/lib/AGENTS.md b/lib/AGENTS.md index a70e9a7..c0119fa 100644 --- a/lib/AGENTS.md +++ b/lib/AGENTS.md @@ -1,4 +1,4 @@ - + # Shared Helpers (`lib/`) All agents source `lib/env.sh` as their first action. Additional helpers are @@ -6,29 +6,20 @@ sourced as needed. | File | What it provides | Sourced by | |---|---|---| -| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (paginates all pages; accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`; handles invalid/empty JSON responses gracefully — returns empty on parse error instead of crashing), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`) — each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens — only the runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced — compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. **Save/restore scope (#364)**: only `FORGE_URL` is preserved across `.env` re-sourcing (compose injects `http://forgejo:3000`, `.env` has `http://localhost:3000`). `FORGE_TOKEN` is NOT preserved so refreshed tokens in `.env` take effect immediately. **Required env var**: `FORGE_PASS` — bot password for git HTTP push (Forgejo 11.x rejects API tokens for `git push`, #361). | Every agent | -| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status ` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number ` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote ` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this — vault redesign in progress, see #73-#77). `ci_get_logs [--step ]` — reads CI logs from Woodpecker SQLite database via `lib/ci-log-reader.py`; outputs last 200 lines to stdout. Requires mounted woodpecker-data volume at /woodpecker-data. | dev-poll, review-poll, review-pr | +| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`) — each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens — only the runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced — compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. | Every agent | +| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status ` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number ` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote ` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this — vault redesign in progress, see #73-#77). `ci_get_logs [--step ]` — reads CI logs from Woodpecker SQLite database; outputs last 200 lines to stdout. Requires mounted woodpecker-data volume at /woodpecker-data. | dev-poll, review-poll, review-pr, supervisor-poll | | `lib/ci-debug.sh` | CLI tool for Woodpecker CI: `list`, `status`, `logs`, `failures` subcommands. Not sourced — run directly. | Humans / dev-agent (tool access) | -| `lib/ci-log-reader.py` | Python tool: reads CI logs from Woodpecker SQLite database. ` [--step ]` — returns last 200 lines from failed steps (or specified step). Used by `ci_get_logs()` in ci-helpers.sh. Requires `WOODPECKER_DATA_DIR` (default: /woodpecker-data). | ci-helpers.sh | -| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). Also exports `FORGE_REPO_OWNER` (the owner component of `FORGE_REPO`, e.g. `disinto-admin` from `disinto-admin/disinto`). | env.sh (when `PROJECT_TOML` is set) | -| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll | -| `lib/formula-session.sh` | `acquire_cron_lock()`, `load_formula()`, `load_formula_or_profile()`, `build_context_block()`, `ensure_ops_repo()`, `ops_commit_and_push()`, `build_prompt_footer()`, `build_sdk_prompt_footer()`, `formula_worktree_setup()`, `formula_prepare_profile_context()`, `formula_lessons_block()`, `profile_write_journal()`, `profile_load_lessons()`, `ensure_profile_repo()`, `_profile_has_repo()`, `_count_undigested_journals()`, `_profile_digest_journals()`, `_profile_commit_and_push()`, `resolve_agent_identity()`, `build_graph_section()`, `build_scratch_instruction()`, `read_scratch_context()`, `cleanup_stale_crashed_worktrees()` — shared helpers for formula-driven cron agents (lock, .profile repo management, prompt assembly, worktree setup). Memory guard is provided by `memory_guard()` in `lib/env.sh` (not duplicated here). `resolve_agent_identity()` — sets `FORGE_TOKEN`, `AGENT_IDENTITY`, `FORGE_REMOTE` from per-agent token env vars and FORGE_URL remote detection. `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh | +| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). | env.sh (when `PROJECT_TOML` is set), supervisor-poll (per-project iteration) | +| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll | +| `lib/formula-session.sh` | `acquire_cron_lock()`, `check_memory()`, `load_formula()`, `build_context_block()`, `consume_escalation_reply()`, `start_formula_session()`, `formula_phase_callback()`, `build_prompt_footer()`, `build_graph_section()`, `run_formula_and_monitor(AGENT [TIMEOUT] [CALLBACK])` — shared helpers for formula-driven cron agents (lock, memory guard, formula loading, prompt assembly, tmux session, monitor loop, crash recovery). `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `formula_phase_callback()` handles `PHASE:escalate` (unified escalation path — kills the session). `run_formula_and_monitor` accepts an optional CALLBACK (default: `formula_phase_callback`) so callers can install custom merge-through or escalation handlers. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh | | `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in cron logs. Sourced by dev-poll.sh, review-poll.sh, predictor-run.sh, supervisor-run.sh. | cron entry points | -| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh — called after every successful merge. | dev-poll.sh | +| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh and dev/phase-handler.sh — called after every successful merge. | dev-poll.sh, phase-handler.sh | | `lib/build-graph.py` | Python tool: parses VISION.md, prerequisites.md (from ops repo), AGENTS.md, formulas/*.toml, evidence/ (from ops repo), and forge issues/labels into a NetworkX DiGraph. Runs structural analyses (orphaned objectives, stale prerequisites, thin evidence, circular deps) and outputs a JSON report. Used by `review-pr.sh` (per-PR changed-file analysis) and `predictor-run.sh` (full-project analysis) to provide structural context to Claude. | review-pr.sh, predictor-run.sh | -| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | issue-lifecycle.sh | -| `lib/stack-lock.sh` | File-based lock protocol for singleton project stack access. `stack_lock_acquire(holder, project)` — polls until free, breaks stale heartbeats (>10 min old), claims lock. `stack_lock_release(project)` — deletes lock file. `stack_lock_check(project)` — inspect current lock state. `stack_lock_heartbeat(project)` — update heartbeat timestamp (callers must call every 2 min while holding). Lock files at `~/data/locks/-stack.lock`. | docker/edge/dispatcher.sh, reproduce formula | +| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | file-action-issue.sh, phase-handler.sh | +| `lib/file-action-issue.sh` | `file_action_issue()` — dedup check, secret scan, label lookup, and issue creation for formula-driven cron wrappers. Sets `FILED_ISSUE_NUM` on success. Returns 4 if secrets detected in body. | (available for future use) | | `lib/tea-helpers.sh` | `tea_file_issue(title, body, labels...)` — create issue via tea CLI with secret scanning; sets `FILED_ISSUE_NUM`. `tea_relabel(issue_num, labels...)` — replace labels using tea's `edit` subcommand (not `label`). `tea_comment(issue_num, body)` — add comment with secret scanning. `tea_close(issue_num)` — close issue. All use `TEA_LOGIN` and `FORGE_REPO` from env.sh. Labels by name (no ID lookup). Tea binary download verified via sha256 checksum. Sourced by env.sh when `tea` binary is available. | env.sh (conditional) | | `lib/worktree.sh` | Reusable git worktree management: `worktree_create(path, branch, [base_ref])` — create worktree, checkout base, fetch submodules. `worktree_recover(path, branch, [remote])` — detect existing worktree, reuse if on correct branch (sets `_WORKTREE_REUSED`), otherwise clean and recreate. `worktree_cleanup(path)` — `git worktree remove --force`, clear Claude Code project cache (`~/.claude/projects/` matching path). `worktree_cleanup_stale([max_age_hours])` — scan `/tmp` for orphaned worktrees older than threshold, skip preserved and active tmux worktrees, prune. `worktree_preserve(path, reason)` — mark worktree as preserved for debugging (writes `.worktree-preserved` marker, skipped by stale cleanup). | dev-agent.sh, supervisor-run.sh, planner-run.sh, predictor-run.sh, gardener-run.sh | | `lib/pr-lifecycle.sh` | Reusable PR lifecycle library: `pr_create()`, `pr_find_by_branch()`, `pr_poll_ci()`, `pr_poll_review()`, `pr_merge()`, `pr_is_merged()`, `pr_walk_to_merge()`, `build_phase_protocol_prompt()`. Requires `lib/ci-helpers.sh`. | dev-agent.sh (future) | | `lib/issue-lifecycle.sh` | Reusable issue lifecycle library: `issue_claim()` (add in-progress, remove backlog), `issue_release()` (remove in-progress, add backlog), `issue_block()` (post diagnostic comment with secret redaction, add blocked label), `issue_close()`, `issue_check_deps()` (parse deps, check transitive closure; sets `_ISSUE_BLOCKED_BY`, `_ISSUE_SUGGESTION`), `issue_suggest_next()` (find next unblocked backlog issue; sets `_ISSUE_NEXT`), `issue_post_refusal()` (structured refusal comment with dedup). Label IDs cached in globals on first lookup. Sources `lib/secret-scan.sh`. | dev-agent.sh (future) | +| `lib/agent-session.sh` | Shared tmux + Claude session helpers: `create_agent_session()`, `inject_formula()`, `agent_wait_for_claude_ready()`, `agent_inject_into_session()`, `agent_kill_session()`, `monitor_phase_loop()`, `read_phase()`, `write_compact_context()`. `create_agent_session(session, workdir, [phase_file])` optionally installs a PostToolUse hook (matcher `Bash\|Write`) that detects phase file writes in real-time — when Claude writes to the phase file, the hook writes a marker so `monitor_phase_loop` reacts on the next poll instead of waiting for mtime changes. Also installs a StopFailure hook (matcher `rate_limit\|server_error\|authentication_failed\|billing_error`) that writes `PHASE:failed` with an `api_error` reason to the phase file and touches the phase-changed marker, so the orchestrator discovers API errors within one poll cycle instead of waiting for idle timeout. Also installs a SessionStart hook (matcher `compact`) that re-injects phase protocol instructions after context compaction — callers write the context file via `write_compact_context(phase_file, content)`, and the hook (`on-compact-reinject.sh`) outputs the file content to stdout so Claude retains critical instructions. When `phase_file` is set, passes it to the idle stop hook (`on-idle-stop.sh`) so the hook can **nudge Claude** (up to 2 times) if Claude returns to the prompt without writing to the phase file — the hook injects a tmux reminder asking Claude to signal PHASE:done or PHASE:awaiting_ci. The PreToolUse guard hook (`on-pretooluse-guard.sh`) receives the session name as a third argument — formula agents (`gardener-*`, `planner-*`, `predictor-*`, `supervisor-*`) are identified this way and allowed to access `FACTORY_ROOT` from worktrees (they need env.sh, AGENTS.md, formulas/, lib/). **OAuth flock**: when `DISINTO_CONTAINER=1`, Claude CLI is wrapped in `flock -w 300 ~/.claude/session.lock` to queue concurrent token refresh attempts and prevent rotation races across agents sharing the same credentials. `monitor_phase_loop` sets `_MONITOR_LOOP_EXIT` to one of: `done`, `idle_timeout`, `idle_prompt` (Claude returned to `>` for 3 consecutive polls without writing any phase — callback invoked with `PHASE:failed`, session already dead), `crashed`, or `PHASE:escalate` / other `PHASE:*` string. **Unified escalation**: `PHASE:escalate` is the signal that a session needs human input (renamed from `PHASE:needs_human`). **Callers must handle `idle_prompt`** in both their callback and their post-loop exit handler — see [`docs/PHASE-PROTOCOL.md` idle_prompt](docs/PHASE-PROTOCOL.md#idle_prompt-exit-reason) for the full contract. | dev-agent.sh | | `lib/vault.sh` | **Vault PR helper** — create vault action PRs on ops repo via Forgejo API (works from containers without SSH). `vault_request ` validates TOML (using `validate_vault_action` from `vault/vault-env.sh`), creates branch `vault/`, writes `vault/actions/.toml`, creates PR targeting `main` with title `vault: ` and body from context field, returns PR number. Idempotent: if PR exists, returns existing number. Requires `FORGE_TOKEN`, `FORGE_URL`, `FORGE_REPO`, `FORGE_OPS_REPO`. Uses the calling agent's own token (saves/restores `FORGE_TOKEN` around sourcing `vault-env.sh`), so approval workflow respects individual agent identities. | dev-agent (vault actions), future vault dispatcher | -| `lib/branch-protection.sh` | Branch protection helpers for Forgejo repos. `setup_vault_branch_protection()` — configures admin-only merge protection on main (require 1 approval, restrict merge to admin role, block direct pushes). `setup_profile_branch_protection()` — same protection for `.profile` repos. `verify_branch_protection()` — checks protection is correctly configured. `remove_branch_protection()` — removes protection (cleanup/testing). Handles race condition after initial push: retries with backoff if Forgejo hasn't processed the branch yet. Requires `FORGE_TOKEN`, `FORGE_URL`, `FORGE_OPS_REPO`. | bin/disinto (hire-an-agent) | -| `lib/agent-sdk.sh` | `agent_run([--resume SESSION_ID] [--worktree DIR] PROMPT)` — one-shot `claude -p` invocation with session persistence. Saves session ID to `SID_FILE`, reads it back on resume. `agent_recover_session()` — restore previous session ID from `SID_FILE` on startup. **Nudge guard**: skips nudge injection if the worktree is clean and no push is expected, preventing spurious re-invocations. Callers must define `SID_FILE`, `LOGFILE`, and `log()` before sourcing. | formula-driven agents (dev-agent, planner-run, predictor-run, gardener-run) | -| `lib/forge-setup.sh` | `setup_forge()` — Forgejo instance provisioning: creates admin user, bot accounts, org, repos (code + ops), configures webhooks, sets repo topics. Extracted from `bin/disinto`. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`. **Password storage (#361)**: after creating each bot account, stores its password in `.env` as `FORGE__PASS` (e.g. `FORGE_PASS`, `FORGE_REVIEW_PASS`, etc.) for use by `forge-push.sh`. | bin/disinto (init) | -| `lib/forge-push.sh` | `push_to_forge()` — pushes a local clone to the Forgejo remote and verifies the push. `_assert_forge_push_globals()` validates required env vars before use. Requires `FORGE_URL`, `FORGE_PASS`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. **Auth**: uses `FORGE_PASS` (bot password) for git HTTP push — Forgejo 11.x rejects API tokens for `git push` (#361). | bin/disinto (init) | -| `lib/ops-setup.sh` | `setup_ops_repo()` — creates ops repo on Forgejo if it doesn't exist, configures bot collaborators, clones/initializes ops repo locally, seeds directory structure (vault, knowledge, evidence). Exports `_ACTUAL_OPS_SLUG`. | bin/disinto (init) | -| `lib/ci-setup.sh` | `_install_cron_impl()` — installs crontab entries for project agents. `_create_woodpecker_oauth_impl()` — creates OAuth2 app on Forgejo for Woodpecker. `_generate_woodpecker_token_impl()` — auto-generates WOODPECKER_TOKEN via OAuth2 flow. `_activate_woodpecker_repo_impl()` — activates repo in Woodpecker. All gated by `_load_ci_context()` which validates required env vars. | bin/disinto (init) | -| `lib/generators.sh` | Template generation for `disinto init`: `generate_compose()` — docker-compose.yml, `generate_caddyfile()` — Caddyfile, `generate_staging_index()` — staging index, `generate_deploy_pipelines()` — Woodpecker deployment pipeline configs. Requires `FACTORY_ROOT`, `PROJECT_NAME`, `PRIMARY_BRANCH`. | bin/disinto (init) | -| `lib/hire-agent.sh` | `disinto_hire_an_agent()` — user creation, `.profile` repo setup, formula copying, branch protection, and state marker creation for hiring a new agent. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`, `PROJECT_NAME`. Extracted from `bin/disinto`. | bin/disinto (hire) | -| `lib/release.sh` | `disinto_release()` — vault TOML creation, branch setup on ops repo, PR creation, and auto-merge request for a versioned release. `_assert_release_globals()` validates required env vars. Requires `FORGE_URL`, `FORGE_TOKEN`, `FORGE_OPS_REPO`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. Extracted from `bin/disinto`. | bin/disinto (release) | diff --git a/lib/agent-sdk.sh b/lib/agent-sdk.sh index 1c1a69c..82ad9a9 100644 --- a/lib/agent-sdk.sh +++ b/lib/agent-sdk.sh @@ -46,23 +46,9 @@ agent_run() { [ -n "${CLAUDE_MODEL:-}" ] && args+=(--model "$CLAUDE_MODEL") local run_dir="${worktree_dir:-$(pwd)}" - local lock_file="${HOME}/.claude/session.lock" - mkdir -p "$(dirname "$lock_file")" - local output rc + local output log "agent_run: starting (resume=${resume_id:-(new)}, dir=${run_dir})" - output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") && rc=0 || rc=$? - if [ "$rc" -eq 124 ]; then - log "agent_run: timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $rc)" - elif [ "$rc" -ne 0 ]; then - log "agent_run: claude exited with code $rc" - # Log last 3 lines of output for diagnostics - if [ -n "$output" ]; then - log "agent_run: last output lines: $(echo "$output" | tail -3)" - fi - fi - if [ -z "$output" ]; then - log "agent_run: empty output (claude may have crashed or failed, exit code: $rc)" - fi + output=$(cd "$run_dir" && timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") || true # Extract and persist session_id local new_sid @@ -80,37 +66,27 @@ agent_run() { # Nudge: if the model stopped without pushing, resume with encouragement. # Some models emit end_turn prematurely when confused. A nudge often unsticks them. - if [ -n "$_AGENT_SESSION_ID" ] && [ -n "$output" ]; then + if [ -n "$_AGENT_SESSION_ID" ]; then local has_changes has_changes=$(cd "$run_dir" && git status --porcelain 2>/dev/null | head -1) || true local has_pushed has_pushed=$(cd "$run_dir" && git log --oneline "${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH:-main}..HEAD" 2>/dev/null | head -1) || true if [ -z "$has_pushed" ]; then + local nudge="You stopped but did not push any code. " if [ -n "$has_changes" ]; then - # Nudge: there are uncommitted changes - local nudge="You stopped but did not push any code. You have uncommitted changes. Commit them and push." - log "agent_run: nudging (uncommitted changes)" - local nudge_rc - output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") && nudge_rc=0 || nudge_rc=$? - if [ "$nudge_rc" -eq 124 ]; then - log "agent_run: nudge timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $nudge_rc)" - elif [ "$nudge_rc" -ne 0 ]; then - log "agent_run: nudge claude exited with code $nudge_rc" - # Log last 3 lines of output for diagnostics - if [ -n "$output" ]; then - log "agent_run: nudge last output lines: $(echo "$output" | tail -3)" - fi - fi - new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true - if [ -n "$new_sid" ]; then - _AGENT_SESSION_ID="$new_sid" - printf '%s' "$new_sid" > "$SID_FILE" - fi - printf '%s' "$output" > "$diag_file" 2>/dev/null || true - _AGENT_LAST_OUTPUT="$output" + nudge+="You have uncommitted changes. Commit them and push." else - log "agent_run: no push and no changes — skipping nudge" + nudge+="Complete the implementation, commit, and push your branch." fi + log "agent_run: nudging (no push detected)" + output=$(cd "$run_dir" && timeout "${CLAUDE_TIMEOUT:-7200}" claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") || true + new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true + if [ -n "$new_sid" ]; then + _AGENT_SESSION_ID="$new_sid" + printf '%s' "$new_sid" > "$SID_FILE" + fi + printf '%s' "$output" > "$diag_file" 2>/dev/null || true + _AGENT_LAST_OUTPUT="$output" fi fi } diff --git a/lib/agent-session.sh b/lib/agent-session.sh new file mode 100644 index 0000000..dbb1e2a --- /dev/null +++ b/lib/agent-session.sh @@ -0,0 +1,486 @@ +#!/usr/bin/env bash +# agent-session.sh — Shared tmux + Claude interactive session helpers +# +# Source this into agent orchestrator scripts for reusable session management. +# +# Functions: +# agent_wait_for_claude_ready SESSION_NAME [TIMEOUT_SECS] +# agent_inject_into_session SESSION_NAME TEXT +# agent_kill_session SESSION_NAME +# monitor_phase_loop PHASE_FILE IDLE_TIMEOUT_SECS CALLBACK_FN [SESSION_NAME] +# session_lock_acquire [TIMEOUT_SECS] +# session_lock_release + +# --- Cooperative session lock (fd-based) --- +# File descriptor for the session lock. Set by create_agent_session(). +# Callers can release/re-acquire via session_lock_release/session_lock_acquire +# to allow other Claude sessions during idle phases (awaiting_review/awaiting_ci). +SESSION_LOCK_FD="" + +# Release the session lock without closing the file descriptor. +# The fd stays open so it can be re-acquired later. +session_lock_release() { + if [ -n "${SESSION_LOCK_FD:-}" ]; then + flock -u "$SESSION_LOCK_FD" + fi +} + +# Re-acquire the session lock. Blocks until available or timeout. +# Opens the lock fd if not already open (for use by external callers). +# Args: [timeout_secs] (default 300) +# Returns 0 on success, 1 on timeout/error. +# shellcheck disable=SC2120 # timeout arg is used by external callers +session_lock_acquire() { + local timeout="${1:-300}" + if [ -z "${SESSION_LOCK_FD:-}" ]; then + local lock_dir="${HOME}/.claude" + mkdir -p "$lock_dir" + exec {SESSION_LOCK_FD}>>"${lock_dir}/session.lock" + fi + flock -w "$timeout" "$SESSION_LOCK_FD" +} + +# Wait for the Claude ❯ ready prompt in a tmux pane. +# Returns 0 if ready within TIMEOUT_SECS (default 120), 1 otherwise. +agent_wait_for_claude_ready() { + local session="$1" + local timeout="${2:-120}" + local elapsed=0 + while [ "$elapsed" -lt "$timeout" ]; do + if tmux capture-pane -t "$session" -p 2>/dev/null | grep -q '❯'; then + return 0 + fi + sleep 2 + elapsed=$((elapsed + 2)) + done + return 1 +} + +# Paste TEXT into SESSION (waits for Claude to be ready first), then press Enter. +agent_inject_into_session() { + local session="$1" + local text="$2" + local tmpfile + # Re-acquire session lock before injecting — Claude will resume working + # shellcheck disable=SC2119 # using default timeout + session_lock_acquire || true + agent_wait_for_claude_ready "$session" 120 || true + # Clear idle marker — new work incoming + rm -f "/tmp/claude-idle-${session}.ts" + tmpfile=$(mktemp /tmp/agent-inject-XXXXXX) + printf '%s' "$text" > "$tmpfile" + tmux load-buffer -b "agent-inject-$$" "$tmpfile" + tmux paste-buffer -t "$session" -b "agent-inject-$$" + sleep 0.5 + tmux send-keys -t "$session" "" Enter + tmux delete-buffer -b "agent-inject-$$" 2>/dev/null || true + rm -f "$tmpfile" +} + +# Create a tmux session running Claude in the given workdir. +# Installs a Stop hook for idle detection (see monitor_phase_loop). +# Installs a PreToolUse hook to guard destructive Bash operations. +# Optionally installs a PostToolUse hook for phase file write detection. +# Optionally installs a StopFailure hook for immediate phase file update on API error. +# Args: session workdir [phase_file] +# Returns 0 if session is ready, 1 otherwise. +create_agent_session() { + local session="$1" + local workdir="${2:-.}" + local phase_file="${3:-}" + + # Prepare settings directory for hooks + mkdir -p "${workdir}/.claude" + local settings="${workdir}/.claude/settings.json" + + # Install Stop hook for idle detection: when Claude finishes a response, + # the hook writes a timestamp to a marker file. monitor_phase_loop checks + # this marker instead of fragile tmux pane scraping. + local idle_marker="/tmp/claude-idle-${session}.ts" + local hook_script="${FACTORY_ROOT}/lib/hooks/on-idle-stop.sh" + if [ -x "$hook_script" ]; then + local hook_cmd="${hook_script} ${idle_marker}" + # When a phase file is available, pass it and the session name so the + # hook can nudge Claude if it returns to the prompt without signalling. + if [ -n "$phase_file" ]; then + hook_cmd="${hook_script} ${idle_marker} ${phase_file} ${session}" + fi + if [ -f "$settings" ]; then + # Append our Stop hook to existing project settings + jq --arg cmd "$hook_cmd" ' + if (.hooks.Stop // [] | any(.[]; .hooks[]?.command == $cmd)) + then . + else .hooks.Stop = (.hooks.Stop // []) + [{ + matcher: "", + hooks: [{type: "command", command: $cmd}] + }] + end + ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" + else + jq -n --arg cmd "$hook_cmd" '{ + hooks: { + Stop: [{ + matcher: "", + hooks: [{type: "command", command: $cmd}] + }] + } + }' > "$settings" + fi + fi + + # Install PostToolUse hook for phase file write detection: when Claude + # writes to the phase file via Bash or Write, the hook writes a marker + # so monitor_phase_loop can react immediately instead of waiting for + # the next mtime-based poll cycle. + if [ -n "$phase_file" ]; then + local phase_marker="/tmp/phase-changed-${session}.marker" + local phase_hook_script="${FACTORY_ROOT}/lib/hooks/on-phase-change.sh" + if [ -x "$phase_hook_script" ]; then + local phase_hook_cmd="${phase_hook_script} ${phase_file} ${phase_marker}" + if [ -f "$settings" ]; then + jq --arg cmd "$phase_hook_cmd" ' + if (.hooks.PostToolUse // [] | any(.[]; .hooks[]?.command == $cmd)) + then . + else .hooks.PostToolUse = (.hooks.PostToolUse // []) + [{ + matcher: "Bash|Write", + hooks: [{type: "command", command: $cmd}] + }] + end + ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" + else + jq -n --arg cmd "$phase_hook_cmd" '{ + hooks: { + PostToolUse: [{ + matcher: "Bash|Write", + hooks: [{type: "command", command: $cmd}] + }] + } + }' > "$settings" + fi + rm -f "$phase_marker" + fi + fi + + # Install StopFailure hook for immediate phase file update on API error: + # when Claude hits a rate limit, server error, billing error, or auth failure, + # the hook writes PHASE:failed to the phase file and touches the phase-changed + # marker so monitor_phase_loop picks it up within one poll cycle instead of + # waiting for idle timeout (up to 2 hours). + if [ -n "$phase_file" ]; then + local stop_failure_hook_script="${FACTORY_ROOT}/lib/hooks/on-stop-failure.sh" + if [ -x "$stop_failure_hook_script" ]; then + # phase_marker is defined in the PostToolUse block above; redeclare so + # this block is self-contained if that block is ever removed. + local sf_phase_marker="/tmp/phase-changed-${session}.marker" + local stop_failure_hook_cmd="${stop_failure_hook_script} ${phase_file} ${sf_phase_marker}" + if [ -f "$settings" ]; then + jq --arg cmd "$stop_failure_hook_cmd" ' + if (.hooks.StopFailure // [] | any(.[]; .hooks[]?.command == $cmd)) + then . + else .hooks.StopFailure = (.hooks.StopFailure // []) + [{ + matcher: "rate_limit|server_error|authentication_failed|billing_error", + hooks: [{type: "command", command: $cmd}] + }] + end + ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" + else + jq -n --arg cmd "$stop_failure_hook_cmd" '{ + hooks: { + StopFailure: [{ + matcher: "rate_limit|server_error|authentication_failed|billing_error", + hooks: [{type: "command", command: $cmd}] + }] + } + }' > "$settings" + fi + fi + fi + + # Install PreToolUse hook for destructive operation guard: blocks force push + # to primary branch, rm -rf outside worktree, direct API merge calls, and + # checkout/switch to primary branch. Claude sees the denial reason on exit 2 + # and can self-correct. + local guard_hook_script="${FACTORY_ROOT}/lib/hooks/on-pretooluse-guard.sh" + if [ -x "$guard_hook_script" ]; then + local abs_workdir + abs_workdir=$(cd "$workdir" 2>/dev/null && pwd) || abs_workdir="$workdir" + local guard_hook_cmd="${guard_hook_script} ${PRIMARY_BRANCH:-main} ${abs_workdir} ${session}" + if [ -f "$settings" ]; then + jq --arg cmd "$guard_hook_cmd" ' + if (.hooks.PreToolUse // [] | any(.[]; .hooks[]?.command == $cmd)) + then . + else .hooks.PreToolUse = (.hooks.PreToolUse // []) + [{ + matcher: "Bash", + hooks: [{type: "command", command: $cmd}] + }] + end + ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" + else + jq -n --arg cmd "$guard_hook_cmd" '{ + hooks: { + PreToolUse: [{ + matcher: "Bash", + hooks: [{type: "command", command: $cmd}] + }] + } + }' > "$settings" + fi + fi + + # Install SessionEnd hook for guaranteed cleanup: when the Claude session + # exits (clean or crash), write a termination marker so monitor_phase_loop + # detects the exit faster than tmux has-session polling alone. + local exit_marker="/tmp/claude-exited-${session}.ts" + local session_end_hook_script="${FACTORY_ROOT}/lib/hooks/on-session-end.sh" + if [ -x "$session_end_hook_script" ]; then + local session_end_hook_cmd="${session_end_hook_script} ${exit_marker}" + if [ -f "$settings" ]; then + jq --arg cmd "$session_end_hook_cmd" ' + if (.hooks.SessionEnd // [] | any(.[]; .hooks[]?.command == $cmd)) + then . + else .hooks.SessionEnd = (.hooks.SessionEnd // []) + [{ + matcher: "", + hooks: [{type: "command", command: $cmd}] + }] + end + ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" + else + jq -n --arg cmd "$session_end_hook_cmd" '{ + hooks: { + SessionEnd: [{ + matcher: "", + hooks: [{type: "command", command: $cmd}] + }] + } + }' > "$settings" + fi + fi + rm -f "$exit_marker" + + # Install SessionStart hook for context re-injection after compaction: + # when Claude Code compacts context during long sessions, the phase protocol + # instructions are lost. This hook fires after each compaction and outputs + # the content of a context file so Claude retains critical instructions. + # The context file is written by callers via write_compact_context(). + if [ -n "$phase_file" ]; then + local compact_hook_script="${FACTORY_ROOT}/lib/hooks/on-compact-reinject.sh" + if [ -x "$compact_hook_script" ]; then + local context_file="${phase_file%.phase}.context" + local compact_hook_cmd="${compact_hook_script} ${context_file}" + if [ -f "$settings" ]; then + jq --arg cmd "$compact_hook_cmd" ' + if (.hooks.SessionStart // [] | any(.[]; .hooks[]?.command == $cmd)) + then . + else .hooks.SessionStart = (.hooks.SessionStart // []) + [{ + matcher: "compact", + hooks: [{type: "command", command: $cmd}] + }] + end + ' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings" + else + jq -n --arg cmd "$compact_hook_cmd" '{ + hooks: { + SessionStart: [{ + matcher: "compact", + hooks: [{type: "command", command: $cmd}] + }] + } + }' > "$settings" + fi + fi + fi + + rm -f "$idle_marker" + local model_flag="" + if [ -n "${CLAUDE_MODEL:-}" ]; then + model_flag="--model ${CLAUDE_MODEL}" + fi + + # Acquire a session-level mutex via fd-based flock to prevent concurrent + # Claude sessions from racing on OAuth token refresh. Unlike the previous + # command-wrapper flock, the fd approach allows callers to release the lock + # during idle phases (awaiting_review/awaiting_ci) and re-acquire before + # injecting the next prompt. See #724. + # Use ~/.claude/session.lock so the lock is shared across containers when + # the host ~/.claude directory is bind-mounted. + local lock_dir="${HOME}/.claude" + mkdir -p "$lock_dir" + local claude_lock="${lock_dir}/session.lock" + if [ -z "${SESSION_LOCK_FD:-}" ]; then + exec {SESSION_LOCK_FD}>>"${claude_lock}" + fi + if ! flock -w 300 "$SESSION_LOCK_FD"; then + return 1 + fi + local claude_cmd="claude --dangerously-skip-permissions ${model_flag}" + + tmux new-session -d -s "$session" -c "$workdir" \ + "$claude_cmd" 2>/dev/null + sleep 1 + tmux has-session -t "$session" 2>/dev/null || return 1 + agent_wait_for_claude_ready "$session" 120 || return 1 + return 0 +} + +# Inject a prompt/formula into a session (alias for agent_inject_into_session). +inject_formula() { + agent_inject_into_session "$@" +} + +# Monitor a phase file, calling a callback on changes and handling idle timeout. +# Sets _MONITOR_LOOP_EXIT to the exit reason (idle_timeout, idle_prompt, done, crashed, PHASE:failed, PHASE:escalate). +# Sets _MONITOR_SESSION to the resolved session name (arg 4 or $SESSION_NAME). +# Callbacks should reference _MONITOR_SESSION instead of $SESSION_NAME directly. +# Args: phase_file idle_timeout_secs callback_fn [session_name] +# session_name — tmux session to health-check; falls back to $SESSION_NAME global +# +# Idle detection: uses a Stop hook marker file (written by lib/hooks/on-idle-stop.sh) +# to detect when Claude finishes responding without writing a phase signal. +# If the marker exists for 3 consecutive polls with no phase written, the session +# is killed and the callback invoked with "PHASE:failed". +monitor_phase_loop() { + local phase_file="$1" + local idle_timeout="$2" + local callback="$3" + local _session="${4:-${SESSION_NAME:-}}" + # Export resolved session name so callbacks can reference it regardless of + # which session was passed to monitor_phase_loop (analogous to _MONITOR_LOOP_EXIT). + export _MONITOR_SESSION="$_session" + local poll_interval="${PHASE_POLL_INTERVAL:-10}" + local last_mtime=0 + local idle_elapsed=0 + local idle_pane_count=0 + + while true; do + sleep "$poll_interval" + idle_elapsed=$(( idle_elapsed + poll_interval )) + + # Session health check: SessionEnd hook marker provides fast detection, + # tmux has-session is the fallback for unclean exits (e.g. tmux crash). + local exit_marker="/tmp/claude-exited-${_session}.ts" + if [ -f "$exit_marker" ] || ! tmux has-session -t "${_session}" 2>/dev/null; then + local current_phase + current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true) + case "$current_phase" in + PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate) + ;; # terminal — fall through to phase handler + *) + # Call callback with "crashed" — let agent-specific code handle recovery + if type "${callback}" &>/dev/null; then + "$callback" "PHASE:crashed" + fi + # If callback didn't restart session, break + if ! tmux has-session -t "${_session}" 2>/dev/null; then + _MONITOR_LOOP_EXIT="crashed" + return 1 + fi + idle_elapsed=0 + idle_pane_count=0 + continue + ;; + esac + fi + + # Check phase-changed marker from PostToolUse hook — if present, the hook + # detected a phase file write so we reset last_mtime to force processing + # this cycle instead of waiting for the next mtime change. + local phase_marker="/tmp/phase-changed-${_session}.marker" + if [ -f "$phase_marker" ]; then + rm -f "$phase_marker" + last_mtime=0 + fi + + # Check phase file for changes + local phase_mtime + phase_mtime=$(stat -c %Y "$phase_file" 2>/dev/null || echo 0) + local current_phase + current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true) + + if [ -z "$current_phase" ] || [ "$phase_mtime" -le "$last_mtime" ]; then + # No phase change — check idle timeout + if [ "$idle_elapsed" -ge "$idle_timeout" ]; then + _MONITOR_LOOP_EXIT="idle_timeout" + agent_kill_session "${_session}" + return 0 + fi + # Idle detection via Stop hook: the on-idle-stop.sh hook writes a marker + # file when Claude finishes a response. If the marker exists and no phase + # has been written, Claude returned to the prompt without following the + # phase protocol. 3 consecutive polls = confirmed idle (not mid-turn). + local idle_marker="/tmp/claude-idle-${_session}.ts" + if [ -z "$current_phase" ] && [ -f "$idle_marker" ]; then + idle_pane_count=$(( idle_pane_count + 1 )) + if [ "$idle_pane_count" -ge 3 ]; then + _MONITOR_LOOP_EXIT="idle_prompt" + # Session is killed before the callback is invoked. + # Callbacks that handle PHASE:failed must not assume the session is alive. + agent_kill_session "${_session}" + if type "${callback}" &>/dev/null; then + "$callback" "PHASE:failed" + fi + return 0 + fi + else + idle_pane_count=0 + fi + continue + fi + + # Phase changed + last_mtime="$phase_mtime" + # shellcheck disable=SC2034 # read by phase-handler.sh callback + LAST_PHASE_MTIME="$phase_mtime" + idle_elapsed=0 + idle_pane_count=0 + + # Terminal phases + case "$current_phase" in + PHASE:done|PHASE:merged) + _MONITOR_LOOP_EXIT="done" + if type "${callback}" &>/dev/null; then + "$callback" "$current_phase" + fi + return 0 + ;; + PHASE:failed|PHASE:escalate) + _MONITOR_LOOP_EXIT="$current_phase" + if type "${callback}" &>/dev/null; then + "$callback" "$current_phase" + fi + return 0 + ;; + esac + + # Non-terminal phase — call callback + if type "${callback}" &>/dev/null; then + "$callback" "$current_phase" + fi + done +} + +# Write context to a file for re-injection after context compaction. +# The SessionStart compact hook reads this file and outputs it to stdout. +# Args: phase_file content +write_compact_context() { + local phase_file="$1" + local content="$2" + local context_file="${phase_file%.phase}.context" + printf '%s\n' "$content" > "$context_file" +} + +# Kill a tmux session gracefully (no-op if not found). +agent_kill_session() { + local session="${1:-}" + [ -n "$session" ] && tmux kill-session -t "$session" 2>/dev/null || true + rm -f "/tmp/claude-idle-${session}.ts" + rm -f "/tmp/phase-changed-${session}.marker" + rm -f "/tmp/claude-exited-${session}.ts" + rm -f "/tmp/claude-nudge-${session}.count" +} + +# Read the current phase from a phase file, stripped of whitespace. +# Usage: read_phase [file] — defaults to $PHASE_FILE +read_phase() { + local file="${1:-${PHASE_FILE:-}}" + { cat "$file" 2>/dev/null || true; } | head -1 | tr -d '[:space:]' +} diff --git a/lib/branch-protection.sh b/lib/branch-protection.sh index e972977..81a2be1 100644 --- a/lib/branch-protection.sh +++ b/lib/branch-protection.sh @@ -51,30 +51,14 @@ setup_vault_branch_protection() { _bp_log "Setting up branch protection for ${branch} on ${FORGE_OPS_REPO}" - # Check if branch exists with retry loop (handles race condition after initial push) - local branch_exists="0" - local max_attempts=3 - local attempt=1 - - while [ "$attempt" -le "$max_attempts" ]; do - branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") - - if [ "$branch_exists" = "200" ]; then - _bp_log "Branch ${branch} exists on ${FORGE_OPS_REPO}" - break - fi - - if [ "$attempt" -lt "$max_attempts" ]; then - _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..." - sleep 2 - fi - attempt=$((attempt + 1)) - done + # Check if branch exists + local branch_exists + branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") if [ "$branch_exists" != "200" ]; then - _bp_log "ERROR: Branch ${branch} does not exist on ${FORGE_OPS_REPO} after ${max_attempts} attempts" + _bp_log "ERROR: Branch ${branch} does not exist" return 1 fi @@ -244,30 +228,14 @@ setup_profile_branch_protection() { local api_url api_url="${FORGE_URL}/api/v1/repos/${repo}" - # Check if branch exists with retry loop (handles race condition after initial push) - local branch_exists="0" - local max_attempts=3 - local attempt=1 - - while [ "$attempt" -le "$max_attempts" ]; do - branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") - - if [ "$branch_exists" = "200" ]; then - _bp_log "Branch ${branch} exists on ${repo}" - break - fi - - if [ "$attempt" -lt "$max_attempts" ]; then - _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..." - sleep 2 - fi - attempt=$((attempt + 1)) - done + # Check if branch exists + local branch_exists + branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") if [ "$branch_exists" != "200" ]; then - _bp_log "ERROR: Branch ${branch} does not exist on ${repo} after ${max_attempts} attempts" + _bp_log "ERROR: Branch ${branch} does not exist on ${repo}" return 1 fi @@ -411,7 +379,7 @@ remove_branch_protection() { # - Allow review-bot to approve PRs # # Args: -# $1 - Repo path in format 'owner/repo' (e.g., 'disinto-admin/disinto') +# $1 - Repo path in format 'owner/repo' (e.g., 'johba/disinto') # $2 - Branch to protect (default: main) # # Returns: 0 on success, 1 on failure @@ -430,30 +398,14 @@ setup_project_branch_protection() { local api_url api_url="${FORGE_URL}/api/v1/repos/${repo}" - # Check if branch exists with retry loop (handles race condition after initial push) - local branch_exists="0" - local max_attempts=3 - local attempt=1 - - while [ "$attempt" -le "$max_attempts" ]; do - branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") - - if [ "$branch_exists" = "200" ]; then - _bp_log "Branch ${branch} exists on ${repo}" - break - fi - - if [ "$attempt" -lt "$max_attempts" ]; then - _bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..." - sleep 2 - fi - attempt=$((attempt + 1)) - done + # Check if branch exists + local branch_exists + branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/git/branches/${branch}" 2>/dev/null || echo "0") if [ "$branch_exists" != "200" ]; then - _bp_log "ERROR: Branch ${branch} does not exist on ${repo} after ${max_attempts} attempts" + _bp_log "ERROR: Branch ${branch} does not exist on ${repo}" return 1 fi @@ -584,7 +536,7 @@ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then echo "Required environment variables:" echo " FORGE_TOKEN Forgejo API token (admin user recommended)" echo " FORGE_URL Forgejo instance URL (e.g., https://codeberg.org)" - echo " FORGE_OPS_REPO Ops repo in format owner/repo (e.g., disinto-admin/disinto-ops)" + echo " FORGE_OPS_REPO Ops repo in format owner/repo (e.g., johba/disinto-ops)" exit 0 ;; esac diff --git a/lib/ci-helpers.sh b/lib/ci-helpers.sh index 11c668e..42f306e 100644 --- a/lib/ci-helpers.sh +++ b/lib/ci-helpers.sh @@ -7,6 +7,27 @@ set -euo pipefail # ci_commit_status() / ci_pipeline_number() require: woodpecker_api(), forge_api() (from env.sh) # classify_pipeline_failure() requires: woodpecker_api() (defined in env.sh) +# ensure_blocked_label_id — look up (or create) the "blocked" label, print its ID. +# Caches the result in _BLOCKED_LABEL_ID to avoid repeated API calls. +# Requires: FORGE_TOKEN, FORGE_API (from env.sh), forge_api() +ensure_blocked_label_id() { + if [ -n "${_BLOCKED_LABEL_ID:-}" ]; then + printf '%s' "$_BLOCKED_LABEL_ID" + return 0 + fi + _BLOCKED_LABEL_ID=$(forge_api GET "/labels" 2>/dev/null \ + | jq -r '.[] | select(.name == "blocked") | .id' 2>/dev/null || true) + if [ -z "$_BLOCKED_LABEL_ID" ]; then + _BLOCKED_LABEL_ID=$(curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_API}/labels" \ + -d '{"name":"blocked","color":"#e11d48"}' 2>/dev/null \ + | jq -r '.id // empty' 2>/dev/null || true) + fi + printf '%s' "$_BLOCKED_LABEL_ID" +} + # ensure_priority_label — look up (or create) the "priority" label, print its ID. # Caches the result in _PRIORITY_LABEL_ID to avoid repeated API calls. # Requires: FORGE_TOKEN, FORGE_API (from env.sh), forge_api() diff --git a/lib/ci-setup.sh b/lib/ci-setup.sh deleted file mode 100644 index 7c4c5dd..0000000 --- a/lib/ci-setup.sh +++ /dev/null @@ -1,455 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# ci-setup.sh — CI setup functions for Woodpecker and cron configuration -# -# Internal functions (called via _load_ci_context + _*_impl): -# _install_cron_impl() - Install crontab entries for project agents -# _create_woodpecker_oauth_impl() - Create OAuth2 app on Forgejo for Woodpecker -# _generate_woodpecker_token_impl() - Auto-generate WOODPECKER_TOKEN via OAuth2 flow -# _activate_woodpecker_repo_impl() - Activate repo in Woodpecker -# -# Globals expected (asserted by _load_ci_context): -# FORGE_URL - Forge instance URL (e.g. http://localhost:3000) -# FORGE_TOKEN - Forge API token -# FACTORY_ROOT - Root of the disinto factory -# -# Usage: -# source "${FACTORY_ROOT}/lib/ci-setup.sh" -# ============================================================================= -set -euo pipefail - -# Assert required globals are set before using this module. -_load_ci_context() { - local missing=() - [ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL") - [ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN") - [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT") - if [ "${#missing[@]}" -gt 0 ]; then - echo "Error: ci-setup.sh requires these globals to be set: ${missing[*]}" >&2 - exit 1 - fi -} - -# Generate and optionally install cron entries for the project agents. -# Usage: install_cron -_install_cron_impl() { - local name="$1" toml="$2" auto_yes="$3" bare="${4:-false}" - - # In compose mode, skip host cron — the agents container runs cron internally - if [ "$bare" = false ]; then - echo "" - echo "Cron: skipped (agents container handles scheduling in compose mode)" - return - fi - - # Bare mode: crontab is required on the host - if ! command -v crontab &>/dev/null; then - echo "Error: crontab not found (required for bare-metal mode)" >&2 - echo " Install: apt install cron / brew install cron" >&2 - exit 1 - fi - - # Use absolute path for the TOML in cron entries - local abs_toml - abs_toml="$(cd "$(dirname "$toml")" && pwd)/$(basename "$toml")" - - local cron_block - cron_block="# disinto: ${name} -2,7,12,17,22,27,32,37,42,47,52,57 * * * * ${FACTORY_ROOT}/review/review-poll.sh ${abs_toml} >/dev/null 2>&1 -4,9,14,19,24,29,34,39,44,49,54,59 * * * * ${FACTORY_ROOT}/dev/dev-poll.sh ${abs_toml} >/dev/null 2>&1 -0 0,6,12,18 * * * cd ${FACTORY_ROOT} && bash gardener/gardener-run.sh ${abs_toml} >/dev/null 2>&1" - - echo "" - echo "Cron entries to install:" - echo "$cron_block" - echo "" - - # Check if cron entries already exist - local current_crontab - current_crontab=$(crontab -l 2>/dev/null || true) - if echo "$current_crontab" | grep -q "# disinto: ${name}"; then - echo "Cron: skipped (entries for ${name} already installed)" - return - fi - - if [ "$auto_yes" = false ] && [ -t 0 ]; then - read -rp "Install these cron entries? [y/N] " confirm - if [[ ! "$confirm" =~ ^[Yy] ]]; then - echo "Skipped cron install. Add manually with: crontab -e" - return - fi - fi - - # Append to existing crontab - if { crontab -l 2>/dev/null || true; printf '%s\n' "$cron_block"; } | crontab -; then - echo "Cron entries installed for ${name}" - else - echo "Error: failed to install cron entries" >&2 - return 1 - fi -} - -# Set up Woodpecker CI to use Forgejo as its forge backend. -# Creates an OAuth2 app on Forgejo for Woodpecker, activates the repo. -# Usage: create_woodpecker_oauth -_create_woodpecker_oauth_impl() { - local forge_url="$1" - local _repo_slug="$2" # unused but required for signature compatibility - - echo "" - echo "── Woodpecker OAuth2 setup ────────────────────────────" - - # Create OAuth2 application on Forgejo for Woodpecker - local oauth2_name="woodpecker-ci" - local redirect_uri="http://localhost:8000/authorize" - local existing_app client_id client_secret - - # Check if OAuth2 app already exists - existing_app=$(curl -sf \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/user/applications/oauth2" 2>/dev/null \ - | jq -r --arg name "$oauth2_name" '.[] | select(.name == $name) | .client_id // empty' 2>/dev/null) || true - - if [ -n "$existing_app" ]; then - echo "OAuth2: ${oauth2_name} (already exists, client_id=${existing_app})" - client_id="$existing_app" - else - local oauth2_resp - oauth2_resp=$(curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/user/applications/oauth2" \ - -d "{\"name\":\"${oauth2_name}\",\"redirect_uris\":[\"${redirect_uri}\"],\"confidential_client\":true}" \ - 2>/dev/null) || oauth2_resp="" - - if [ -z "$oauth2_resp" ]; then - echo "Warning: failed to create OAuth2 app on Forgejo" >&2 - return - fi - - client_id=$(printf '%s' "$oauth2_resp" | jq -r '.client_id // empty') - client_secret=$(printf '%s' "$oauth2_resp" | jq -r '.client_secret // empty') - - if [ -z "$client_id" ]; then - echo "Warning: OAuth2 app creation returned no client_id" >&2 - return - fi - - echo "OAuth2: ${oauth2_name} created (client_id=${client_id})" - fi - - # Store Woodpecker forge config in .env - # WP_FORGEJO_CLIENT/SECRET match the docker-compose.yml variable references - # WOODPECKER_HOST must be host-accessible URL to match OAuth2 redirect_uri - local env_file="${FACTORY_ROOT}/.env" - local wp_vars=( - "WOODPECKER_FORGEJO=true" - "WOODPECKER_FORGEJO_URL=${forge_url}" - "WOODPECKER_HOST=http://localhost:8000" - ) - if [ -n "${client_id:-}" ]; then - wp_vars+=("WP_FORGEJO_CLIENT=${client_id}") - fi - if [ -n "${client_secret:-}" ]; then - wp_vars+=("WP_FORGEJO_SECRET=${client_secret}") - fi - - for var_line in "${wp_vars[@]}"; do - local var_name="${var_line%%=*}" - if grep -q "^${var_name}=" "$env_file" 2>/dev/null; then - sed -i "s|^${var_name}=.*|${var_line}|" "$env_file" - else - printf '%s\n' "$var_line" >> "$env_file" - fi - done - echo "Config: Woodpecker forge vars written to .env" -} - -# Auto-generate WOODPECKER_TOKEN by driving the Forgejo OAuth2 login flow. -# Requires _FORGE_ADMIN_PASS (set by setup_forge when admin user was just created). -# Called after compose stack is up, before activate_woodpecker_repo. -# Usage: generate_woodpecker_token -_generate_woodpecker_token_impl() { - local forge_url="$1" - local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}" - local env_file="${FACTORY_ROOT}/.env" - local admin_user="disinto-admin" - local admin_pass="${_FORGE_ADMIN_PASS:-}" - - # Skip if already set - if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then - echo "Config: WOODPECKER_TOKEN already set in .env" - return 0 - fi - - echo "" - echo "── Woodpecker token generation ────────────────────────" - - if [ -z "$admin_pass" ]; then - echo "Warning: Forgejo admin password not available — cannot generate WOODPECKER_TOKEN" >&2 - echo " Log into Woodpecker at ${wp_server} and create a token manually" >&2 - return 1 - fi - - # Wait for Woodpecker to become ready - echo -n "Waiting for Woodpecker" - local retries=0 - while ! curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; do - retries=$((retries + 1)) - if [ "$retries" -gt 30 ]; then - echo "" - echo "Warning: Woodpecker not ready at ${wp_server} — skipping token generation" >&2 - return 1 - fi - echo -n "." - sleep 2 - done - echo " ready" - - # Flow: Forgejo web login → OAuth2 authorize → Woodpecker callback → token - local cookie_jar auth_body_file - cookie_jar=$(mktemp /tmp/wp-auth-XXXXXX) - auth_body_file=$(mktemp /tmp/wp-body-XXXXXX) - - # Step 1: Log into Forgejo web UI (session cookie needed for OAuth consent) - local csrf - csrf=$(curl -sf -c "$cookie_jar" "${forge_url}/user/login" 2>/dev/null \ - | grep -o 'name="_csrf"[^>]*' | head -1 \ - | grep -oE '(content|value)="[^"]*"' | head -1 \ - | cut -d'"' -f2) || csrf="" - - if [ -z "$csrf" ]; then - echo "Warning: could not get Forgejo CSRF token — skipping token generation" >&2 - rm -f "$cookie_jar" "$auth_body_file" - return 1 - fi - - curl -sf -b "$cookie_jar" -c "$cookie_jar" -X POST \ - -o /dev/null \ - "${forge_url}/user/login" \ - --data-urlencode "_csrf=${csrf}" \ - --data-urlencode "user_name=${admin_user}" \ - --data-urlencode "password=${admin_pass}" \ - 2>/dev/null || true - - # Step 2: Start Woodpecker OAuth2 flow (captures authorize URL with state param) - local wp_redir - wp_redir=$(curl -sf -o /dev/null -w '%{redirect_url}' \ - "${wp_server}/authorize" 2>/dev/null) || wp_redir="" - - if [ -z "$wp_redir" ]; then - echo "Warning: Woodpecker did not provide OAuth redirect — skipping token generation" >&2 - rm -f "$cookie_jar" "$auth_body_file" - return 1 - fi - - # Rewrite internal Docker network URLs to host-accessible URLs. - # Handle both plain and URL-encoded forms of the internal hostnames. - local forge_url_enc wp_server_enc - forge_url_enc=$(printf '%s' "$forge_url" | sed 's|:|%3A|g; s|/|%2F|g') - wp_server_enc=$(printf '%s' "$wp_server" | sed 's|:|%3A|g; s|/|%2F|g') - wp_redir=$(printf '%s' "$wp_redir" \ - | sed "s|http://forgejo:3000|${forge_url}|g" \ - | sed "s|http%3A%2F%2Fforgejo%3A3000|${forge_url_enc}|g" \ - | sed "s|http://woodpecker:8000|${wp_server}|g" \ - | sed "s|http%3A%2F%2Fwoodpecker%3A8000|${wp_server_enc}|g") - - # Step 3: Hit Forgejo OAuth authorize endpoint with session - # First time: shows consent page. Already approved: redirects with code. - local auth_headers redirect_loc auth_code - auth_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \ - -D - -o "$auth_body_file" \ - "$wp_redir" 2>/dev/null) || auth_headers="" - - redirect_loc=$(printf '%s' "$auth_headers" \ - | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') - - if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then - # Auto-approved: extract code from redirect - auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/') - else - # Consent page: extract CSRF and all form fields, POST grant approval - local consent_csrf form_client_id form_state form_redirect_uri - consent_csrf=$(grep -o 'name="_csrf"[^>]*' "$auth_body_file" 2>/dev/null \ - | head -1 | grep -oE '(content|value)="[^"]*"' | head -1 \ - | cut -d'"' -f2) || consent_csrf="" - form_client_id=$(grep 'name="client_id"' "$auth_body_file" 2>/dev/null \ - | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_client_id="" - form_state=$(grep 'name="state"' "$auth_body_file" 2>/dev/null \ - | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_state="" - form_redirect_uri=$(grep 'name="redirect_uri"' "$auth_body_file" 2>/dev/null \ - | grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_redirect_uri="" - - if [ -n "$consent_csrf" ]; then - local grant_headers - grant_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \ - -D - -o /dev/null -X POST \ - "${forge_url}/login/oauth/grant" \ - --data-urlencode "_csrf=${consent_csrf}" \ - --data-urlencode "client_id=${form_client_id}" \ - --data-urlencode "state=${form_state}" \ - --data-urlencode "scope=" \ - --data-urlencode "nonce=" \ - --data-urlencode "redirect_uri=${form_redirect_uri}" \ - --data-urlencode "granted=true" \ - 2>/dev/null) || grant_headers="" - - redirect_loc=$(printf '%s' "$grant_headers" \ - | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') - - if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then - auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/') - fi - fi - fi - - rm -f "$auth_body_file" - - if [ -z "${auth_code:-}" ]; then - echo "Warning: could not obtain OAuth2 authorization code — skipping token generation" >&2 - rm -f "$cookie_jar" - return 1 - fi - - # Step 4: Complete Woodpecker OAuth callback (exchanges code for session) - local state - state=$(printf '%s' "$wp_redir" | sed -n 's/.*[&?]state=\([^&]*\).*/\1/p') - - local wp_headers wp_token - wp_headers=$(curl -sf -c "$cookie_jar" \ - -D - -o /dev/null \ - "${wp_server}/authorize?code=${auth_code}&state=${state:-}" \ - 2>/dev/null) || wp_headers="" - - # Extract token from redirect URL (Woodpecker returns ?access_token=...) - redirect_loc=$(printf '%s' "$wp_headers" \ - | grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}') - - wp_token="" - if printf '%s' "${redirect_loc:-}" | grep -q 'access_token='; then - wp_token=$(printf '%s' "$redirect_loc" | sed 's/.*access_token=\([^&]*\).*/\1/') - fi - - # Fallback: check for user_sess cookie - if [ -z "$wp_token" ]; then - wp_token=$(awk '/user_sess/{print $NF}' "$cookie_jar" 2>/dev/null) || wp_token="" - fi - - rm -f "$cookie_jar" - - if [ -z "$wp_token" ]; then - echo "Warning: could not obtain Woodpecker token — skipping token generation" >&2 - return 1 - fi - - # Step 5: Create persistent personal access token via Woodpecker API - # WP v3 requires CSRF header for POST operations with session tokens. - local wp_csrf - wp_csrf=$(curl -sf -b "user_sess=${wp_token}" \ - "${wp_server}/web-config.js" 2>/dev/null \ - | sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p') || wp_csrf="" - - local pat_resp final_token - pat_resp=$(curl -sf -X POST \ - -b "user_sess=${wp_token}" \ - ${wp_csrf:+-H "X-CSRF-Token: ${wp_csrf}"} \ - "${wp_server}/api/user/token" \ - 2>/dev/null) || pat_resp="" - - final_token="" - if [ -n "$pat_resp" ]; then - final_token=$(printf '%s' "$pat_resp" \ - | jq -r 'if .token then .token elif .access_token then .access_token else empty end' \ - 2>/dev/null) || final_token="" - fi - - # Use persistent token if available, otherwise use session token - final_token="${final_token:-$wp_token}" - - # Save to .env - if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then - sed -i "s|^WOODPECKER_TOKEN=.*|WOODPECKER_TOKEN=${final_token}|" "$env_file" - else - printf 'WOODPECKER_TOKEN=%s\n' "$final_token" >> "$env_file" - fi - export WOODPECKER_TOKEN="$final_token" - echo "Config: WOODPECKER_TOKEN generated and saved to .env" -} - -# Activate a repo in Woodpecker CI. -# Usage: activate_woodpecker_repo -_activate_woodpecker_repo_impl() { - local forge_repo="$1" - local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}" - - # Wait for Woodpecker to become ready after stack start - local retries=0 - while [ $retries -lt 10 ]; do - if curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; then - break - fi - retries=$((retries + 1)) - sleep 2 - done - - if ! curl -sf --max-time 5 "${wp_server}/api/version" >/dev/null 2>&1; then - echo "Woodpecker: not reachable at ${wp_server} after stack start, skipping repo activation" >&2 - return - fi - - echo "" - echo "── Woodpecker repo activation ─────────────────────────" - - local wp_token="${WOODPECKER_TOKEN:-}" - if [ -z "$wp_token" ]; then - echo "Warning: WOODPECKER_TOKEN not set — cannot activate repo" >&2 - echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2 - return - fi - - local wp_repo_id - wp_repo_id=$(curl -sf \ - -H "Authorization: Bearer ${wp_token}" \ - "${wp_server}/api/repos/lookup/${forge_repo}" 2>/dev/null \ - | jq -r '.id // empty' 2>/dev/null) || true - - if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then - echo "Repo: ${forge_repo} already active in Woodpecker (id=${wp_repo_id})" - else - # Get Forgejo repo numeric ID for WP activation - local forge_repo_id - forge_repo_id=$(curl -sf \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${FORGE_URL:-http://localhost:3000}/api/v1/repos/${forge_repo}" 2>/dev/null \ - | jq -r '.id // empty' 2>/dev/null) || forge_repo_id="" - - local activate_resp - activate_resp=$(curl -sf -X POST \ - -H "Authorization: Bearer ${wp_token}" \ - "${wp_server}/api/repos?forge_remote_id=${forge_repo_id:-0}" \ - 2>/dev/null) || activate_resp="" - - wp_repo_id=$(printf '%s' "$activate_resp" | jq -r '.id // empty' 2>/dev/null) || true - - if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then - echo "Repo: ${forge_repo} activated in Woodpecker (id=${wp_repo_id})" - - # Set pipeline timeout to 5 minutes (default is 60) - if curl -sf -X PATCH \ - -H "Authorization: Bearer ${wp_token}" \ - -H "Content-Type: application/json" \ - "${wp_server}/api/repos/${wp_repo_id}" \ - -d '{"timeout": 5}' >/dev/null 2>&1; then - echo "Config: pipeline timeout set to 5 minutes" - fi - else - echo "Warning: could not activate repo in Woodpecker" >&2 - echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2 - fi - fi - - # Store repo ID for later TOML generation - if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then - _WP_REPO_ID="$wp_repo_id" - fi -} diff --git a/lib/env.sh b/lib/env.sh index 20e64d0..bf7a239 100755 --- a/lib/env.sh +++ b/lib/env.sh @@ -13,61 +13,59 @@ FACTORY_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" if [ "${DISINTO_CONTAINER:-}" = "1" ]; then DISINTO_DATA_DIR="${HOME}/data" DISINTO_LOG_DIR="${DISINTO_DATA_DIR}/logs" - mkdir -p "${DISINTO_DATA_DIR}" "${DISINTO_LOG_DIR}"/{dev,action,review,supervisor,vault,site,metrics,gardener,planner,predictor,architect,dispatcher} + mkdir -p "${DISINTO_DATA_DIR}" "${DISINTO_LOG_DIR}"/{dev,action,review,supervisor,vault,site,metrics} else DISINTO_LOG_DIR="${FACTORY_ROOT}" fi export DISINTO_LOG_DIR # Load secrets: prefer .env.enc (SOPS-encrypted), fall back to plaintext .env. -# Inside containers (DISINTO_CONTAINER=1), compose environment is the source of truth. -# On bare metal, .env/.env.enc is sourced to provide default values. -if [ "${DISINTO_CONTAINER:-}" != "1" ]; then - if [ -f "$FACTORY_ROOT/.env.enc" ] && command -v sops &>/dev/null; then - set -a - _saved_forge_url="${FORGE_URL:-}" - # Use temp file + validate dotenv format before sourcing (avoids eval injection) - # SOPS -d automatically verifies MAC/GCM authentication tag during decryption - _tmpenv=$(mktemp) || { echo "Error: failed to create temp file for .env.enc" >&2; exit 1; } - if ! sops -d --output-type dotenv "$FACTORY_ROOT/.env.enc" > "$_tmpenv" 2>/dev/null; then - echo "Error: failed to decrypt .env.enc — decryption failed, possible corruption" >&2 - rm -f "$_tmpenv" - exit 1 - fi - # Validate: non-empty, non-comment lines must match KEY=value pattern - # Filter out blank lines and comments before validation - _validated=$(grep -E '^[A-Za-z_][A-Za-z0-9_]*=' "$_tmpenv" 2>/dev/null || true) - if [ -n "$_validated" ]; then - # Write validated content to a second temp file and source it - _validated_env=$(mktemp) - printf '%s\n' "$_validated" > "$_validated_env" - # shellcheck source=/dev/null - source "$_validated_env" - rm -f "$_validated_env" - else - echo "Error: .env.enc decryption output failed format validation" >&2 - rm -f "$_tmpenv" - exit 1 - fi +# Always source .env — cron jobs inside the container do NOT inherit compose +# env vars (FORGE_TOKEN, etc.). Compose-injected vars (like FORGE_URL) are +# already set and won't be clobbered since env.sh uses ${VAR:-default} patterns +# for derived values. FORGE_URL from .env (localhost:3000) is overridden below +# by the compose-injected value when running via docker exec. +if [ -f "$FACTORY_ROOT/.env.enc" ] && command -v sops &>/dev/null; then + set -a + _saved_forge_url="${FORGE_URL:-}" + _saved_forge_token="${FORGE_TOKEN:-}" + # Use temp file + validate dotenv format before sourcing (avoids eval injection) + # SOPS -d automatically verifies MAC/GCM authentication tag during decryption + _tmpenv=$(mktemp) || { echo "Error: failed to create temp file for .env.enc" >&2; exit 1; } + if ! sops -d --output-type dotenv "$FACTORY_ROOT/.env.enc" > "$_tmpenv" 2>/dev/null; then + echo "Error: failed to decrypt .env.enc — decryption failed, possible corruption" >&2 rm -f "$_tmpenv" - set +a - [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url" - elif [ -f "$FACTORY_ROOT/.env" ]; then - # Preserve compose-injected FORGE_URL (localhost in .env != forgejo in Docker) - _saved_forge_url="${FORGE_URL:-}" - set -a - # shellcheck source=/dev/null - source "$FACTORY_ROOT/.env" - set +a - [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url" + exit 1 fi -fi - -# Allow per-container token override (#375): .env sets the default FORGE_TOKEN -# (dev-bot), then FORGE_TOKEN_OVERRIDE replaces it for containers that need a -# different Forgejo identity (e.g. dev-qwen). -if [ -n "${FORGE_TOKEN_OVERRIDE:-}" ]; then - export FORGE_TOKEN="$FORGE_TOKEN_OVERRIDE" + # Validate: non-empty, non-comment lines must match KEY=value pattern + # Filter out blank lines and comments before validation + _validated=$(grep -E '^[A-Za-z_][A-Za-z0-9_]*=' "$_tmpenv" 2>/dev/null || true) + if [ -n "$_validated" ]; then + # Write validated content to a second temp file and source it + _validated_env=$(mktemp) + printf '%s\n' "$_validated" > "$_validated_env" + # shellcheck source=/dev/null + source "$_validated_env" + rm -f "$_validated_env" + else + echo "Error: .env.enc decryption output failed format validation" >&2 + rm -f "$_tmpenv" + exit 1 + fi + rm -f "$_tmpenv" + set +a + [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url" + [ -n "$_saved_forge_token" ] && export FORGE_TOKEN="$_saved_forge_token" +elif [ -f "$FACTORY_ROOT/.env" ]; then + # Preserve compose-injected FORGE_URL (localhost in .env != forgejo in Docker) + _saved_forge_url="${FORGE_URL:-}" + _saved_forge_token="${FORGE_TOKEN:-}" + set -a + # shellcheck source=/dev/null + source "$FACTORY_ROOT/.env" + set +a + [ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url" + [ -n "$_saved_forge_token" ] && export FORGE_TOKEN="$_saved_forge_token" fi # PATH: foundry, node, system @@ -79,11 +77,16 @@ if [ -n "${PROJECT_TOML:-}" ] && [ -f "$PROJECT_TOML" ]; then source "${FACTORY_ROOT}/lib/load-project.sh" "$PROJECT_TOML" fi -# Forge token -export FORGE_TOKEN="${FORGE_TOKEN:-}" +# Forge token: new FORGE_TOKEN > legacy CODEBERG_TOKEN +if [ -z "${FORGE_TOKEN:-}" ]; then + FORGE_TOKEN="${CODEBERG_TOKEN:-}" +fi +export FORGE_TOKEN +export CODEBERG_TOKEN="${FORGE_TOKEN}" # backwards compat -# Review bot token +# Review bot token: FORGE_REVIEW_TOKEN > legacy REVIEW_BOT_TOKEN export FORGE_REVIEW_TOKEN="${FORGE_REVIEW_TOKEN:-${REVIEW_BOT_TOKEN:-}}" +export REVIEW_BOT_TOKEN="${FORGE_REVIEW_TOKEN}" # backwards compat # Per-agent tokens (#747): each agent gets its own Forgejo identity. # Falls back to FORGE_TOKEN for backwards compat with single-token setups. @@ -94,14 +97,18 @@ export FORGE_SUPERVISOR_TOKEN="${FORGE_SUPERVISOR_TOKEN:-${FORGE_TOKEN}}" export FORGE_PREDICTOR_TOKEN="${FORGE_PREDICTOR_TOKEN:-${FORGE_TOKEN}}" export FORGE_ARCHITECT_TOKEN="${FORGE_ARCHITECT_TOKEN:-${FORGE_TOKEN}}" -# Bot usernames filter -export FORGE_BOT_USERNAMES="${FORGE_BOT_USERNAMES:-dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot}" +# Bot usernames filter: FORGE_BOT_USERNAMES > legacy CODEBERG_BOT_USERNAMES +export FORGE_BOT_USERNAMES="${FORGE_BOT_USERNAMES:-${CODEBERG_BOT_USERNAMES:-dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot}}" +export CODEBERG_BOT_USERNAMES="${FORGE_BOT_USERNAMES}" # backwards compat -# Project config -export FORGE_REPO="${FORGE_REPO:-}" +# Project config (FORGE_* preferred, CODEBERG_* fallback) +export FORGE_REPO="${FORGE_REPO:-${CODEBERG_REPO:-}}" +export CODEBERG_REPO="${FORGE_REPO}" # backwards compat export FORGE_URL="${FORGE_URL:-http://localhost:3000}" export FORGE_API="${FORGE_API:-${FORGE_URL}/api/v1/repos/${FORGE_REPO}}" export FORGE_WEB="${FORGE_WEB:-${FORGE_URL}/${FORGE_REPO}}" +export CODEBERG_API="${FORGE_API}" # backwards compat +export CODEBERG_WEB="${FORGE_WEB}" # backwards compat # tea CLI login name: derived from FORGE_URL (codeberg vs local forgejo) if [ -z "${TEA_LOGIN:-}" ]; then case "${FORGE_URL}" in @@ -137,12 +144,8 @@ unset CLAWHUB_TOKEN 2>/dev/null || true export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1 # Shared log helper -# Usage: log "message" -# Output: [2026-04-03T14:00:00Z] agent: message -# Where agent is set via LOG_AGENT variable (defaults to caller's context) log() { - local agent="${LOG_AGENT:-agent}" - printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" + printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" } # ============================================================================= @@ -206,6 +209,8 @@ forge_api() { -H "Content-Type: application/json" \ "${FORGE_API}${path}" "$@" } +# Backwards-compat alias +codeberg_api() { forge_api "$@"; } # Paginate a Forge API GET endpoint and return all items as a merged JSON array. # Usage: forge_api_all /path (no existing query params) diff --git a/lib/file-action-issue.sh b/lib/file-action-issue.sh new file mode 100644 index 0000000..abba4c8 --- /dev/null +++ b/lib/file-action-issue.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# file-action-issue.sh — File an action issue for a formula run +# +# Usage: source this file, then call file_action_issue. +# Requires: forge_api() from lib/env.sh, jq, lib/secret-scan.sh +# +# file_action_issue <body> +# Sets FILED_ISSUE_NUM on success. +# Returns: 0=created, 1=duplicate exists, 2=label not found, 3=API error, 4=secrets detected + +# Load secret scanner +# shellcheck source=secret-scan.sh +source "$(dirname "${BASH_SOURCE[0]}")/secret-scan.sh" + +file_action_issue() { + local formula_name="$1" title="$2" body="$3" + FILED_ISSUE_NUM="" + + # Secret scan: reject issue bodies containing embedded secrets + if ! scan_for_secrets "$body"; then + echo "file-action-issue: BLOCKED — issue body for '${formula_name}' contains potential secrets. Use env var references instead." >&2 + return 4 + fi + + # Dedup: skip if an open action issue for this formula already exists + local open_actions + open_actions=$(forge_api_all "/issues?state=open&type=issues&labels=action" 2>/dev/null || true) + if [ -n "$open_actions" ] && [ "$open_actions" != "null" ]; then + local existing + existing=$(printf '%s' "$open_actions" | \ + jq --arg f "$formula_name" '[.[] | select(.title | test($f))] | length' 2>/dev/null || echo 0) + if [ "${existing:-0}" -gt 0 ]; then + return 1 + fi + fi + + # Fetch 'action' label ID + local action_label_id + action_label_id=$(forge_api GET "/labels" 2>/dev/null | \ + jq -r '.[] | select(.name == "action") | .id' 2>/dev/null || true) + if [ -z "$action_label_id" ]; then + return 2 + fi + + # Create the issue + local payload result + payload=$(jq -nc \ + --arg title "$title" \ + --arg body "$body" \ + --argjson labels "[$action_label_id]" \ + '{title: $title, body: $body, labels: $labels}') + + result=$(forge_api POST "/issues" -d "$payload" 2>/dev/null || true) + FILED_ISSUE_NUM=$(printf '%s' "$result" | jq -r '.number // empty' 2>/dev/null || true) + + if [ -z "$FILED_ISSUE_NUM" ]; then + return 3 + fi +} diff --git a/lib/forge-push.sh b/lib/forge-push.sh deleted file mode 100644 index 1da61f7..0000000 --- a/lib/forge-push.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# forge-push.sh — push_to_forge() function -# -# Handles pushing a local clone to the Forgejo remote and verifying the push. -# -# Globals expected: -# FORGE_URL - Forge instance URL (e.g. http://localhost:3000) -# FORGE_TOKEN - API token for Forge operations (used for API verification) -# FORGE_PASS - Bot password for git HTTP push (#361: tokens rejected by Forgejo 11.x) -# FACTORY_ROOT - Root of the disinto factory -# PRIMARY_BRANCH - Primary branch name (e.g. main) -# -# Usage: -# source "${FACTORY_ROOT}/lib/forge-push.sh" -# push_to_forge <repo_root> <forge_url> <repo_slug> -# ============================================================================= -set -euo pipefail - -# Assert required globals are set before using this module. -_assert_forge_push_globals() { - local missing=() - [ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL") - [ -z "${FORGE_PASS:-}" ] && missing+=("FORGE_PASS") - [ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN") - [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT") - [ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH") - if [ "${#missing[@]}" -gt 0 ]; then - echo "Error: forge-push.sh requires these globals to be set: ${missing[*]}" >&2 - exit 1 - fi -} - -# Push local clone to the Forgejo remote. -push_to_forge() { - local repo_root="$1" forge_url="$2" repo_slug="$3" - - # Build authenticated remote URL: http://dev-bot:<password>@host:port/org/repo.git - # Forgejo 11.x rejects API tokens for git HTTP push (#361); password auth works. - if [ -z "${FORGE_PASS:-}" ]; then - echo "Error: FORGE_PASS not set — cannot push to Forgejo (see #361)" >&2 - return 1 - fi - local auth_url - auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_PASS}@|") - local remote_url="${auth_url}/${repo_slug}.git" - # Display URL without token - local display_url="${forge_url}/${repo_slug}.git" - - # Always set the remote URL to ensure credentials are current - if git -C "$repo_root" remote get-url forgejo >/dev/null 2>&1; then - git -C "$repo_root" remote set-url forgejo "$remote_url" - else - git -C "$repo_root" remote add forgejo "$remote_url" - fi - echo "Remote: forgejo -> ${display_url}" - - # Skip push if local repo has no commits (e.g. cloned from empty Forgejo repo) - if ! git -C "$repo_root" rev-parse HEAD >/dev/null 2>&1; then - echo "Push: skipped (local repo has no commits)" - return 0 - fi - - # Push all branches and tags - echo "Pushing: branches to forgejo" - if ! git -C "$repo_root" push forgejo --all 2>&1; then - echo "Error: failed to push branches to Forgejo" >&2 - return 1 - fi - echo "Pushing: tags to forgejo" - if ! git -C "$repo_root" push forgejo --tags 2>&1; then - echo "Error: failed to push tags to Forgejo" >&2 - return 1 - fi - - # Verify the repo is no longer empty (Forgejo may need a moment to index pushed refs) - local is_empty="true" - local verify_attempt - for verify_attempt in $(seq 1 5); do - local repo_info - repo_info=$(curl -sf --max-time 10 \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/repos/${repo_slug}" 2>/dev/null) || repo_info="" - if [ -z "$repo_info" ]; then - is_empty="skipped" - break # API unreachable, skip verification - fi - is_empty=$(printf '%s' "$repo_info" | jq -r '.empty // "unknown"') - if [ "$is_empty" != "true" ]; then - echo "Verify: repo is not empty (push confirmed)" - break - fi - if [ "$verify_attempt" -lt 5 ]; then - sleep 2 - fi - done - if [ "$is_empty" = "true" ]; then - echo "Warning: Forgejo repo still reports empty after push" >&2 - return 1 - fi -} diff --git a/lib/forge-setup.sh b/lib/forge-setup.sh deleted file mode 100644 index d640755..0000000 --- a/lib/forge-setup.sh +++ /dev/null @@ -1,550 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# forge-setup.sh — setup_forge() and helpers for Forgejo provisioning -# -# Handles admin user creation, bot user creation, token generation, -# password resets, repo creation, and collaborator setup. -# -# Globals expected (asserted by _load_init_context): -# FORGE_URL - Forge instance URL (e.g. http://localhost:3000) -# FACTORY_ROOT - Root of the disinto factory -# PRIMARY_BRANCH - Primary branch name (e.g. main) -# -# Usage: -# source "${FACTORY_ROOT}/lib/forge-setup.sh" -# setup_forge <forge_url> <repo_slug> -# ============================================================================= -set -euo pipefail - -# Assert required globals are set before using this module. -_load_init_context() { - local missing=() - [ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL") - [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT") - [ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH") - if [ "${#missing[@]}" -gt 0 ]; then - echo "Error: forge-setup.sh requires these globals to be set: ${missing[*]}" >&2 - exit 1 - fi -} - -# Execute a command in the Forgejo container (for admin operations) -_forgejo_exec() { - local use_bare="${DISINTO_BARE:-false}" - if [ "$use_bare" = true ]; then - docker exec -u git disinto-forgejo "$@" - else - docker compose -f "${FACTORY_ROOT}/docker-compose.yml" exec -T -u git forgejo "$@" - fi -} - -# Provision or connect to a local Forgejo instance. -# Creates admin + bot users, generates API tokens, stores in .env. -# When $DISINTO_BARE is set, uses standalone docker run; otherwise uses compose. -setup_forge() { - local forge_url="$1" - local repo_slug="$2" - local use_bare="${DISINTO_BARE:-false}" - - echo "" - echo "── Forge setup ────────────────────────────────────────" - - # Check if Forgejo is already running - if curl -sf --max-time 5 "${forge_url}/api/v1/version" >/dev/null 2>&1; then - echo "Forgejo: ${forge_url} (already running)" - else - echo "Forgejo not reachable at ${forge_url}" - echo "Starting Forgejo via Docker..." - - if ! command -v docker &>/dev/null; then - echo "Error: docker not found — needed to provision Forgejo" >&2 - echo " Install Docker or start Forgejo manually at ${forge_url}" >&2 - exit 1 - fi - - # Extract port from forge_url - local forge_port - forge_port=$(printf '%s' "$forge_url" | sed -E 's|.*:([0-9]+)/?$|\1|') - forge_port="${forge_port:-3000}" - - if [ "$use_bare" = true ]; then - # Bare-metal mode: standalone docker run - mkdir -p "${FORGEJO_DATA_DIR}" - - if docker ps -a --format '{{.Names}}' | grep -q '^disinto-forgejo$'; then - docker start disinto-forgejo >/dev/null 2>&1 || true - else - docker run -d \ - --name disinto-forgejo \ - --restart unless-stopped \ - -p "${forge_port}:3000" \ - -p 2222:22 \ - -v "${FORGEJO_DATA_DIR}:/data" \ - -e "FORGEJO__database__DB_TYPE=sqlite3" \ - -e "FORGEJO__server__ROOT_URL=${forge_url}/" \ - -e "FORGEJO__server__HTTP_PORT=3000" \ - -e "FORGEJO__service__DISABLE_REGISTRATION=true" \ - codeberg.org/forgejo/forgejo:11.0 - fi - else - # Compose mode: start Forgejo via docker compose - docker compose -f "${FACTORY_ROOT}/docker-compose.yml" up -d forgejo - fi - - # Wait for Forgejo to become healthy - echo -n "Waiting for Forgejo to start" - local retries=0 - while ! curl -sf --max-time 3 "${forge_url}/api/v1/version" >/dev/null 2>&1; do - retries=$((retries + 1)) - if [ "$retries" -gt 60 ]; then - echo "" - echo "Error: Forgejo did not become ready within 60s" >&2 - exit 1 - fi - echo -n "." - sleep 1 - done - echo " ready" - fi - - # Wait for Forgejo database to accept writes (API may be ready before DB is) - echo -n "Waiting for Forgejo database" - local db_ready=false - for _i in $(seq 1 30); do - if _forgejo_exec forgejo admin user list >/dev/null 2>&1; then - db_ready=true - break - fi - echo -n "." - sleep 1 - done - echo "" - if [ "$db_ready" != true ]; then - echo "Error: Forgejo database not ready after 30s" >&2 - exit 1 - fi - - # Create admin user if it doesn't exist - local admin_user="disinto-admin" - local admin_pass - local env_file="${FACTORY_ROOT}/.env" - - # Re-read persisted admin password if available (#158) - if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then - admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-) - fi - # Generate a fresh password only when none was persisted - if [ -z "${admin_pass:-}" ]; then - admin_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - fi - - if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then - echo "Creating admin user: ${admin_user}" - local create_output - if ! create_output=$(_forgejo_exec forgejo admin user create \ - --admin \ - --username "${admin_user}" \ - --password "${admin_pass}" \ - --email "admin@disinto.local" \ - --must-change-password=false 2>&1); then - echo "Error: failed to create admin user '${admin_user}':" >&2 - echo " ${create_output}" >&2 - exit 1 - fi - # Forgejo 11.x ignores --must-change-password=false on create; - # explicitly clear the flag so basic-auth token creation works. - _forgejo_exec forgejo admin user change-password \ - --username "${admin_user}" \ - --password "${admin_pass}" \ - --must-change-password=false - - # Verify admin user was actually created - if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then - echo "Error: admin user '${admin_user}' not found after creation" >&2 - exit 1 - fi - - # Persist admin password to .env for idempotent re-runs (#158) - if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then - sed -i "s|^FORGE_ADMIN_PASS=.*|FORGE_ADMIN_PASS=${admin_pass}|" "$env_file" - else - printf 'FORGE_ADMIN_PASS=%s\n' "$admin_pass" >> "$env_file" - fi - else - echo "Admin user: ${admin_user} (already exists)" - # Only reset password if basic auth fails (#158, #267) - # Forgejo 11.x may ignore --must-change-password=false, blocking token creation - if ! curl -sf --max-time 5 -u "${admin_user}:${admin_pass}" \ - "${forge_url}/api/v1/user" >/dev/null 2>&1; then - _forgejo_exec forgejo admin user change-password \ - --username "${admin_user}" \ - --password "${admin_pass}" \ - --must-change-password=false - fi - fi - # Preserve password for Woodpecker OAuth2 token generation (#779) - _FORGE_ADMIN_PASS="$admin_pass" - - # Create human user (disinto-admin) as site admin if it doesn't exist - local human_user="disinto-admin" - local human_pass - human_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - - if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then - echo "Creating human user: ${human_user}" - local create_output - if ! create_output=$(_forgejo_exec forgejo admin user create \ - --admin \ - --username "${human_user}" \ - --password "${human_pass}" \ - --email "admin@disinto.local" \ - --must-change-password=false 2>&1); then - echo "Error: failed to create human user '${human_user}':" >&2 - echo " ${create_output}" >&2 - exit 1 - fi - # Forgejo 11.x ignores --must-change-password=false on create; - # explicitly clear the flag so basic-auth token creation works. - _forgejo_exec forgejo admin user change-password \ - --username "${human_user}" \ - --password "${human_pass}" \ - --must-change-password=false - - # Verify human user was actually created - if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then - echo "Error: human user '${human_user}' not found after creation" >&2 - exit 1 - fi - echo " Human user '${human_user}' created as site admin" - else - echo "Human user: ${human_user} (already exists)" - fi - - # Delete existing admin token if present (token sha1 is only returned at creation time) - local existing_token_id - existing_token_id=$(curl -sf \ - -u "${admin_user}:${admin_pass}" \ - "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \ - | jq -r '.[] | select(.name == "disinto-admin-token") | .id') || existing_token_id="" - if [ -n "$existing_token_id" ]; then - curl -sf -X DELETE \ - -u "${admin_user}:${admin_pass}" \ - "${forge_url}/api/v1/users/${admin_user}/tokens/${existing_token_id}" >/dev/null 2>&1 || true - fi - - # Create admin token (fresh, so sha1 is returned) - local admin_token - admin_token=$(curl -sf -X POST \ - -u "${admin_user}:${admin_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${admin_user}/tokens" \ - -d '{"name":"disinto-admin-token","scopes":["all"]}' 2>/dev/null \ - | jq -r '.sha1 // empty') || admin_token="" - - if [ -z "$admin_token" ]; then - echo "Error: failed to obtain admin API token" >&2 - exit 1 - fi - - # Get or create human user token - local human_token - if curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then - # Delete existing human token if present (token sha1 is only returned at creation time) - local existing_human_token_id - existing_human_token_id=$(curl -sf \ - -u "${human_user}:${human_pass}" \ - "${forge_url}/api/v1/users/${human_user}/tokens" 2>/dev/null \ - | jq -r '.[] | select(.name == "disinto-human-token") | .id') || existing_human_token_id="" - if [ -n "$existing_human_token_id" ]; then - curl -sf -X DELETE \ - -u "${human_user}:${human_pass}" \ - "${forge_url}/api/v1/users/${human_user}/tokens/${existing_human_token_id}" >/dev/null 2>&1 || true - fi - - # Create human token (fresh, so sha1 is returned) - human_token=$(curl -sf -X POST \ - -u "${human_user}:${human_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${human_user}/tokens" \ - -d '{"name":"disinto-human-token","scopes":["all"]}' 2>/dev/null \ - | jq -r '.sha1 // empty') || human_token="" - - if [ -n "$human_token" ]; then - # Store human token in .env - if grep -q '^HUMAN_TOKEN=' "$env_file" 2>/dev/null; then - sed -i "s|^HUMAN_TOKEN=.*|HUMAN_TOKEN=${human_token}|" "$env_file" - else - printf 'HUMAN_TOKEN=%s\n' "$human_token" >> "$env_file" - fi - export HUMAN_TOKEN="$human_token" - echo " Human token saved (HUMAN_TOKEN)" - fi - fi - - # Create bot users and tokens - # Each agent gets its own Forgejo account for identity and audit trail (#747). - # Map: bot-username -> env-var-name for the token - local -A bot_token_vars=( - [dev-bot]="FORGE_TOKEN" - [review-bot]="FORGE_REVIEW_TOKEN" - [planner-bot]="FORGE_PLANNER_TOKEN" - [gardener-bot]="FORGE_GARDENER_TOKEN" - [vault-bot]="FORGE_VAULT_TOKEN" - [supervisor-bot]="FORGE_SUPERVISOR_TOKEN" - [predictor-bot]="FORGE_PREDICTOR_TOKEN" - [architect-bot]="FORGE_ARCHITECT_TOKEN" - ) - # Map: bot-username -> env-var-name for the password - # Forgejo 11.x API tokens don't work for git HTTP push (#361). - # Store passwords so agents can use password auth for git operations. - local -A bot_pass_vars=( - [dev-bot]="FORGE_PASS" - [review-bot]="FORGE_REVIEW_PASS" - [planner-bot]="FORGE_PLANNER_PASS" - [gardener-bot]="FORGE_GARDENER_PASS" - [vault-bot]="FORGE_VAULT_PASS" - [supervisor-bot]="FORGE_SUPERVISOR_PASS" - [predictor-bot]="FORGE_PREDICTOR_PASS" - [architect-bot]="FORGE_ARCHITECT_PASS" - ) - - local bot_user bot_pass token token_var pass_var - - for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot architect-bot; do - bot_pass="bot-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - token_var="${bot_token_vars[$bot_user]}" - - # Check if bot user exists - local user_exists=false - if curl -sf --max-time 5 \ - -H "Authorization: token ${admin_token}" \ - "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then - user_exists=true - fi - - if [ "$user_exists" = false ]; then - echo "Creating bot user: ${bot_user}" - local create_output - if ! create_output=$(_forgejo_exec forgejo admin user create \ - --username "${bot_user}" \ - --password "${bot_pass}" \ - --email "${bot_user}@disinto.local" \ - --must-change-password=false 2>&1); then - echo "Error: failed to create bot user '${bot_user}':" >&2 - echo " ${create_output}" >&2 - exit 1 - fi - # Forgejo 11.x ignores --must-change-password=false on create; - # explicitly clear the flag so basic-auth token creation works. - _forgejo_exec forgejo admin user change-password \ - --username "${bot_user}" \ - --password "${bot_pass}" \ - --must-change-password=false - - # Verify bot user was actually created - if ! curl -sf --max-time 5 \ - -H "Authorization: token ${admin_token}" \ - "${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then - echo "Error: bot user '${bot_user}' not found after creation" >&2 - exit 1 - fi - echo " ${bot_user} user created" - else - echo " ${bot_user} user exists (resetting password for token generation)" - # User exists but may not have a known password. - # Use admin API to reset the password so we can generate a new token. - _forgejo_exec forgejo admin user change-password \ - --username "${bot_user}" \ - --password "${bot_pass}" \ - --must-change-password=false || { - echo "Error: failed to reset password for existing bot user '${bot_user}'" >&2 - exit 1 - } - fi - - # Generate token via API (basic auth as the bot user — Forgejo requires - # basic auth on POST /users/{username}/tokens, token auth is rejected) - # First, try to delete existing tokens to avoid name collision - # Use bot user's own Basic Auth (we just set the password above) - local existing_token_ids - existing_token_ids=$(curl -sf \ - -u "${bot_user}:${bot_pass}" \ - "${forge_url}/api/v1/users/${bot_user}/tokens" 2>/dev/null \ - | jq -r '.[].id // empty' 2>/dev/null) || existing_token_ids="" - - # Delete any existing tokens for this user - if [ -n "$existing_token_ids" ]; then - while IFS= read -r tid; do - [ -n "$tid" ] && curl -sf -X DELETE \ - -u "${bot_user}:${bot_pass}" \ - "${forge_url}/api/v1/users/${bot_user}/tokens/${tid}" >/dev/null 2>&1 || true - done <<< "$existing_token_ids" - fi - - token=$(curl -sf -X POST \ - -u "${bot_user}:${bot_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${bot_user}/tokens" \ - -d "{\"name\":\"disinto-${bot_user}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \ - | jq -r '.sha1 // empty') || token="" - - if [ -z "$token" ]; then - echo "Error: failed to create API token for '${bot_user}'" >&2 - exit 1 - fi - - # Store token in .env under the per-agent variable name - if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then - sed -i "s|^${token_var}=.*|${token_var}=${token}|" "$env_file" - else - printf '%s=%s\n' "$token_var" "$token" >> "$env_file" - fi - export "${token_var}=${token}" - echo " ${bot_user} token generated and saved (${token_var})" - - # Store password in .env for git HTTP push (#361) - # Forgejo 11.x API tokens don't work for git push; password auth does. - pass_var="${bot_pass_vars[$bot_user]}" - if grep -q "^${pass_var}=" "$env_file" 2>/dev/null; then - sed -i "s|^${pass_var}=.*|${pass_var}=${bot_pass}|" "$env_file" - else - printf '%s=%s\n' "$pass_var" "$bot_pass" >> "$env_file" - fi - export "${pass_var}=${bot_pass}" - echo " ${bot_user} password saved (${pass_var})" - - # Backwards-compat aliases for dev-bot and review-bot - if [ "$bot_user" = "dev-bot" ]; then - export CODEBERG_TOKEN="$token" - elif [ "$bot_user" = "review-bot" ]; then - export REVIEW_BOT_TOKEN="$token" - fi - done - - # Create .profile repos for all bot users (if they don't already exist) - # This runs the same logic as hire-an-agent Step 2-3 for idempotent setup - echo "" - echo "── Setting up .profile repos ────────────────────────────" - - local -a bot_users=(dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot architect-bot) - local bot_user - - for bot_user in "${bot_users[@]}"; do - # Check if .profile repo already exists - if curl -sf --max-time 5 "${forge_url}/api/v1/repos/${bot_user}/.profile" >/dev/null 2>&1; then - echo " ${bot_user}/.profile already exists" - continue - fi - - echo "Creating ${bot_user}/.profile repo..." - - # Create the repo using the admin API to ensure it's created in the bot user's namespace - local create_output - create_output=$(curl -sf -X POST \ - -u "${admin_user}:${admin_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/admin/users/${bot_user}/repos" \ - -d "{\"name\":\".profile\",\"description\":\"${bot_user}'s .profile repo\",\"private\":true,\"auto_init\":false}" 2>&1) || true - - if echo "$create_output" | grep -q '"id":\|[0-9]'; then - echo " Created ${bot_user}/.profile (via admin API)" - else - echo " Warning: failed to create ${bot_user}/.profile: ${create_output}" >&2 - fi - done - - # Store FORGE_URL in .env if not already present - if ! grep -q '^FORGE_URL=' "$env_file" 2>/dev/null; then - printf 'FORGE_URL=%s\n' "$forge_url" >> "$env_file" - fi - - # Create the repo on Forgejo if it doesn't exist - local org_name="${repo_slug%%/*}" - local repo_name="${repo_slug##*/}" - - # Check if repo already exists - if ! curl -sf --max-time 5 \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/repos/${repo_slug}" >/dev/null 2>&1; then - - # Try creating org first (ignore if exists) - curl -sf -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/orgs" \ - -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true - - # Create repo under org - if ! curl -sf -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/orgs/${org_name}/repos" \ - -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then - # Fallback: create under the human user namespace using admin endpoint - if [ -n "${admin_token:-}" ]; then - if ! curl -sf -X POST \ - -H "Authorization: token ${admin_token}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/admin/users/${org_name}/repos" \ - -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then - echo "Error: failed to create repo '${repo_slug}' on Forgejo (admin endpoint)" >&2 - exit 1 - fi - elif [ -n "${HUMAN_TOKEN:-}" ]; then - if ! curl -sf -X POST \ - -H "Authorization: token ${HUMAN_TOKEN}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/user/repos" \ - -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then - echo "Error: failed to create repo '${repo_slug}' on Forgejo (user endpoint)" >&2 - exit 1 - fi - else - echo "Error: failed to create repo '${repo_slug}' — no admin or human token available" >&2 - exit 1 - fi - fi - - # Add all bot users as collaborators with appropriate permissions - # dev-bot: write (PR creation via lib/vault.sh) - # review-bot: read (PR review) - # planner-bot: write (prerequisites.md, memory) - # gardener-bot: write (backlog grooming) - # vault-bot: write (vault items) - # supervisor-bot: read (health monitoring) - # predictor-bot: read (pattern detection) - # architect-bot: write (sprint PRs) - local bot_perm - declare -A bot_permissions=( - [dev-bot]="write" - [review-bot]="read" - [planner-bot]="write" - [gardener-bot]="write" - [vault-bot]="write" - [supervisor-bot]="read" - [predictor-bot]="read" - [architect-bot]="write" - ) - for bot_user in "${!bot_permissions[@]}"; do - bot_perm="${bot_permissions[$bot_user]}" - curl -sf -X PUT \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/repos/${repo_slug}/collaborators/${bot_user}" \ - -d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1 || true - done - - # Add disinto-admin as admin collaborator - curl -sf -X PUT \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/repos/${repo_slug}/collaborators/disinto-admin" \ - -d '{"permission":"admin"}' >/dev/null 2>&1 || true - - echo "Repo: ${repo_slug} created on Forgejo" - else - echo "Repo: ${repo_slug} (already exists on Forgejo)" - fi - - echo "Forge: ${forge_url} (ready)" -} diff --git a/lib/formula-session.sh b/lib/formula-session.sh index 1b2b884..e6c6aae 100644 --- a/lib/formula-session.sh +++ b/lib/formula-session.sh @@ -1,34 +1,24 @@ #!/usr/bin/env bash # formula-session.sh — Shared helpers for formula-driven cron agents # -# Provides reusable utility functions for the common cron-wrapper pattern -# used by planner-run.sh, predictor-run.sh, gardener-run.sh, and supervisor-run.sh. +# Provides reusable functions for the common cron-wrapper + tmux-session +# pattern used by planner-run.sh, predictor-run.sh, gardener-run.sh, and supervisor-run.sh. # # Functions: # acquire_cron_lock LOCK_FILE — PID lock with stale cleanup +# check_memory [MIN_MB] — skip if available RAM too low # load_formula FORMULA_FILE — sets FORMULA_CONTENT # build_context_block FILE [FILE ...] — sets CONTEXT_BLOCK -# build_prompt_footer [EXTRA_API_LINES] — sets PROMPT_FOOTER (API ref + env) -# build_sdk_prompt_footer [EXTRA_API] — omits phase protocol (SDK mode) -# formula_worktree_setup WORKTREE — isolated worktree for formula execution +# start_formula_session SESSION WORKDIR PHASE_FILE — create tmux + claude +# build_prompt_footer [EXTRA_API] — sets PROMPT_FOOTER (API ref + env + phase) +# run_formula_and_monitor AGENT [TIMEOUT] [CALLBACK] — session start, inject, monitor, log +# formula_phase_callback PHASE — standard crash-recovery callback # formula_prepare_profile_context — load lessons from .profile repo (pre-session) -# formula_lessons_block — return lessons block for prompt -# profile_write_journal ISSUE_NUM TITLE OUTCOME [FILES] — post-session journal -# profile_load_lessons — load lessons-learned.md into LESSONS_CONTEXT -# ensure_profile_repo [AGENT_IDENTITY] — clone/pull .profile repo -# _profile_has_repo — check if agent has .profile repo -# _count_undigested_journals — count journal entries to digest -# _profile_digest_journals — digest journals into lessons -# _profile_commit_and_push MESSAGE [FILES] — commit/push to .profile repo -# resolve_agent_identity — resolve agent user login from FORGE_TOKEN -# build_graph_section — run build-graph.py and set GRAPH_SECTION -# build_scratch_instruction SCRATCH_FILE — return context scratch instruction -# read_scratch_context SCRATCH_FILE — return scratch file content block -# ensure_ops_repo — clone/pull ops repo -# ops_commit_and_push MESSAGE [FILES] — commit/push to ops repo -# cleanup_stale_crashed_worktrees [HOURS] — thin wrapper around worktree_cleanup_stale # -# Requires: lib/env.sh, lib/worktree.sh sourced first for shared helpers. +# Requires: lib/agent-session.sh sourced first (for create_agent_session, +# agent_kill_session, agent_inject_into_session). +# Globals used by formula_phase_callback: SESSION_NAME, PHASE_FILE, +# PROJECT_REPO_ROOT, PROMPT (set by the calling script). # ── Cron guards ────────────────────────────────────────────────────────── @@ -50,6 +40,18 @@ acquire_cron_lock() { trap 'rm -f "$_CRON_LOCK_FILE"' EXIT } +# check_memory [MIN_MB] +# Exits 0 (skip) if available memory is below MIN_MB (default 2000). +check_memory() { + local min_mb="${1:-2000}" + local avail_mb + avail_mb=$(free -m | awk '/Mem:/{print $7}') + if [ "${avail_mb:-0}" -lt "$min_mb" ]; then + log "run: skipping — only ${avail_mb}MB available (need ${min_mb})" + exit 0 + fi +} + # ── Agent identity resolution ──────────────────────────────────────────── # resolve_agent_identity @@ -73,24 +75,6 @@ resolve_agent_identity() { return 0 } -# ── Forge remote resolution ────────────────────────────────────────────── - -# resolve_forge_remote -# Resolves FORGE_REMOTE by matching FORGE_URL hostname against git remotes. -# Falls back to "origin" if no match found. -# Requires: FORGE_URL, git repo with remotes configured. -# Exports: FORGE_REMOTE (always set). -resolve_forge_remote() { - # Extract hostname from FORGE_URL (e.g., https://codeberg.org/user/repo -> codeberg.org) - _forge_host=$(printf '%s' "$FORGE_URL" | sed 's|https\?://||; s|/.*||; s|:.*||') - # Find git remote whose push URL matches the forge host - FORGE_REMOTE=$(git remote -v | awk -v host="$_forge_host" '$2 ~ host && /\(push\)/ {print $1; exit}') - # Fallback to origin if no match found - FORGE_REMOTE="${FORGE_REMOTE:-origin}" - export FORGE_REMOTE - log "forge remote: ${FORGE_REMOTE}" -} - # ── .profile repo management ────────────────────────────────────────────── # ensure_profile_repo [AGENT_IDENTITY] @@ -150,7 +134,7 @@ ensure_profile_repo() { # Checks if the agent has a .profile repo by querying Forgejo API. # Returns 0 if repo exists, 1 otherwise. _profile_has_repo() { - local agent_identity="${AGENT_IDENTITY:-}" + local agent_identity="${1:-${AGENT_IDENTITY:-}}" if [ -z "$agent_identity" ]; then if ! resolve_agent_identity; then @@ -186,8 +170,8 @@ _count_undigested_journals() { # Runs a claude -p one-shot to digest undigested journals into lessons-learned.md # Returns 0 on success, 1 on failure. _profile_digest_journals() { - local agent_identity="${AGENT_IDENTITY:-}" - local model="${CLAUDE_MODEL:-opus}" + local agent_identity="${1:-${AGENT_IDENTITY:-}}" + local model="${2:-${CLAUDE_MODEL:-opus}}" if [ -z "$agent_identity" ]; then if ! resolve_agent_identity; then @@ -253,6 +237,7 @@ Write the complete, rewritten lessons-learned.md content below. No preamble, no output=$(claude -p "$digest_prompt" \ --output-format json \ --dangerously-skip-permissions \ + --max-tokens 1000 \ ${model:+--model "$model"} \ 2>>"$LOGFILE" || echo '{"result":"error"}') @@ -447,6 +432,7 @@ Write the journal entry below. Use markdown format." output=$(claude -p "$reflection_prompt" \ --output-format json \ --dangerously-skip-permissions \ + --max-tokens 500 \ ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} \ 2>>"$LOGFILE" || echo '{"result":"error"}') @@ -571,7 +557,7 @@ $(cat "$ctx_path") done } -# ── Ops repo helpers ──────────────────────────────────────────────────── +# ── Ops repo helpers ───────────────────────────────────────────────── # ensure_ops_repo # Clones or pulls the ops repo so agents can read/write operational data. @@ -634,6 +620,90 @@ ops_commit_and_push() { ) } +# ── Session management ─────────────────────────────────────────────────── + +# start_formula_session SESSION WORKDIR PHASE_FILE +# Kills stale session, resets phase file, creates a per-agent git worktree +# for session isolation, and creates a new tmux + claude session in it. +# Sets _FORMULA_SESSION_WORKDIR to the worktree path (or original workdir +# on fallback). Callers must clean up via remove_formula_worktree after +# the session ends. +# Returns 0 on success, 1 on failure. +start_formula_session() { + local session="$1" workdir="$2" phase_file="$3" + agent_kill_session "$session" + rm -f "$phase_file" + + # Create per-agent git worktree for session isolation. + # Each agent gets its own CWD so Claude Code treats them as separate + # projects — no resume collisions between sequential formula runs. + _FORMULA_SESSION_WORKDIR="/tmp/disinto-${session}" + # Clean up any stale worktree from a previous run + git -C "$workdir" worktree remove "$_FORMULA_SESSION_WORKDIR" --force 2>/dev/null || true + if git -C "$workdir" worktree add "$_FORMULA_SESSION_WORKDIR" HEAD --detach 2>/dev/null; then + log "Created worktree: ${_FORMULA_SESSION_WORKDIR}" + else + log "WARNING: worktree creation failed — falling back to ${workdir}" + _FORMULA_SESSION_WORKDIR="$workdir" + fi + + log "Creating tmux session: ${session}" + if ! create_agent_session "$session" "$_FORMULA_SESSION_WORKDIR" "$phase_file"; then + log "ERROR: failed to create tmux session ${session}" + return 1 + fi +} + +# remove_formula_worktree +# Removes the worktree created by start_formula_session if it differs from +# PROJECT_REPO_ROOT. Safe to call multiple times. No-op if no worktree was created. +remove_formula_worktree() { + if [ -n "${_FORMULA_SESSION_WORKDIR:-}" ] \ + && [ "$_FORMULA_SESSION_WORKDIR" != "${PROJECT_REPO_ROOT:-}" ]; then + git -C "$PROJECT_REPO_ROOT" worktree remove "$_FORMULA_SESSION_WORKDIR" --force 2>/dev/null || true + log "Removed worktree: ${_FORMULA_SESSION_WORKDIR}" + fi +} + +# formula_phase_callback PHASE +# Standard crash-recovery phase callback for formula sessions. +# Requires globals: SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT. +# Uses _FORMULA_CRASH_COUNT (auto-initialized) for single-retry limit. +# shellcheck disable=SC2154 # SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT set by caller +formula_phase_callback() { + local phase="$1" + log "phase: ${phase}" + case "$phase" in + PHASE:crashed) + if [ "${_FORMULA_CRASH_COUNT:-0}" -gt 0 ]; then + log "ERROR: session crashed again after recovery — giving up" + return 0 + fi + _FORMULA_CRASH_COUNT=$(( ${_FORMULA_CRASH_COUNT:-0} + 1 )) + log "WARNING: tmux session died unexpectedly — attempting recovery" + if create_agent_session "${_MONITOR_SESSION:-$SESSION_NAME}" "${_FORMULA_SESSION_WORKDIR:-$PROJECT_REPO_ROOT}" "$PHASE_FILE" 2>/dev/null; then + agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" "$PROMPT" + log "Recovery session started" + else + log "ERROR: could not restart session after crash" + fi + ;; + PHASE:done|PHASE:failed|PHASE:escalate|PHASE:merged) + agent_kill_session "${_MONITOR_SESSION:-$SESSION_NAME}" + ;; + esac +} + +# ── Stale crashed worktree cleanup ───────────────────────────────────────── + +# cleanup_stale_crashed_worktrees [MAX_AGE_HOURS] +# Thin wrapper around worktree_cleanup_stale() from lib/worktree.sh. +# Kept for backwards compatibility with existing callers. +# Requires: lib/worktree.sh sourced. +cleanup_stale_crashed_worktrees() { + worktree_cleanup_stale "${1:-24}" +} + # ── Scratch file helpers (compaction survival) ──────────────────────────── # build_scratch_instruction SCRATCH_FILE @@ -709,26 +779,25 @@ build_sdk_prompt_footer() { # Creates an isolated worktree for synchronous formula execution. # Fetches primary branch, cleans stale worktree, creates new one, and # sets an EXIT trap for cleanup. -# Requires globals: PROJECT_REPO_ROOT, PRIMARY_BRANCH, FORGE_REMOTE. -# Ensure resolve_forge_remote() is called before this function. +# Requires globals: PROJECT_REPO_ROOT, PRIMARY_BRANCH. formula_worktree_setup() { local worktree="$1" cd "$PROJECT_REPO_ROOT" || return - git fetch "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true + git fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true worktree_cleanup "$worktree" - git worktree add "$worktree" "${FORGE_REMOTE}/${PRIMARY_BRANCH}" --detach 2>/dev/null + git worktree add "$worktree" "origin/${PRIMARY_BRANCH}" --detach 2>/dev/null # shellcheck disable=SC2064 # expand worktree now, not at trap time trap "worktree_cleanup '$worktree'" EXIT } -# ── Prompt helpers ────────────────────────────────────────────────────── +# ── Prompt + monitor helpers ────────────────────────────────────────────── # build_prompt_footer [EXTRA_API_LINES] -# Assembles the common forge API reference + environment block for formula prompts. -# Sets PROMPT_FOOTER. +# Assembles the common forge API reference + environment + phase protocol +# block for formula prompts. Sets PROMPT_FOOTER. # Pass additional API endpoint lines (pre-formatted, newline-prefixed) via $1. # Requires globals: FORGE_API, FACTORY_ROOT, PROJECT_REPO_ROOT, -# PRIMARY_BRANCH. +# PRIMARY_BRANCH, PHASE_FILE. build_prompt_footer() { local extra_api="${1:-}" # shellcheck disable=SC2034 # consumed by the calling script's PROMPT @@ -744,15 +813,66 @@ NEVER echo or include the actual token value in output — always reference \${F FACTORY_ROOT=${FACTORY_ROOT} PROJECT_REPO_ROOT=${PROJECT_REPO_ROOT} OPS_REPO_ROOT=${OPS_REPO_ROOT} -PRIMARY_BRANCH=${PRIMARY_BRANCH}" +PRIMARY_BRANCH=${PRIMARY_BRANCH} +PHASE_FILE=${PHASE_FILE} + +## Phase protocol (REQUIRED) +When all work is done: + echo 'PHASE:done' > '${PHASE_FILE}' +On unrecoverable error: + printf 'PHASE:failed\nReason: %s\n' 'describe error' > '${PHASE_FILE}'" } -# ── Stale crashed worktree cleanup ──────────────────────────────────────── +# run_formula_and_monitor AGENT_NAME [TIMEOUT] +# Starts the formula session, injects PROMPT, monitors phase, and logs result. +# Requires globals: SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT, +# FORGE_REPO, CLAUDE_MODEL (exported). +# shellcheck disable=SC2154 # SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT set by caller +run_formula_and_monitor() { + local agent_name="$1" + local timeout="${2:-7200}" + local callback="${3:-formula_phase_callback}" -# cleanup_stale_crashed_worktrees [MAX_AGE_HOURS] -# Thin wrapper around worktree_cleanup_stale() from lib/worktree.sh. -# Kept for backwards compatibility with existing callers. -# Requires: lib/worktree.sh sourced. -cleanup_stale_crashed_worktrees() { - worktree_cleanup_stale "${1:-24}" + if ! start_formula_session "$SESSION_NAME" "$PROJECT_REPO_ROOT" "$PHASE_FILE"; then + exit 1 + fi + + # Write phase protocol to context file for compaction survival + if [ -n "${PROMPT_FOOTER:-}" ]; then + write_compact_context "$PHASE_FILE" "$PROMPT_FOOTER" + fi + + agent_inject_into_session "$SESSION_NAME" "$PROMPT" + log "Prompt sent to tmux session" + + log "Monitoring phase file: ${PHASE_FILE}" + _FORMULA_CRASH_COUNT=0 + + monitor_phase_loop "$PHASE_FILE" "$timeout" "$callback" + + FINAL_PHASE=$(read_phase "$PHASE_FILE") + log "Final phase: ${FINAL_PHASE:-none}" + + if [ "$FINAL_PHASE" != "PHASE:done" ]; then + case "${_MONITOR_LOOP_EXIT:-}" in + idle_prompt) + log "${agent_name}: Claude returned to prompt without writing phase signal" + ;; + idle_timeout) + log "${agent_name}: timed out with no phase signal" + ;; + *) + log "${agent_name} finished without PHASE:done (phase: ${FINAL_PHASE:-none}, exit: ${_MONITOR_LOOP_EXIT:-})" + ;; + esac + fi + + # Preserve worktree on crash for debugging; clean up on success + if [ "${_MONITOR_LOOP_EXIT:-}" = "crashed" ]; then + worktree_preserve "${_FORMULA_SESSION_WORKDIR:-}" "crashed (agent=${agent_name})" + else + remove_formula_worktree + fi + + log "--- ${agent_name^} run done ---" } diff --git a/lib/generators.sh b/lib/generators.sh deleted file mode 100644 index 80386d2..0000000 --- a/lib/generators.sh +++ /dev/null @@ -1,443 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# generators — template generation functions for disinto init -# -# Generates docker-compose.yml, Dockerfile, Caddyfile, staging index, and -# deployment pipeline configs. -# -# Globals expected (must be set before sourcing): -# FACTORY_ROOT - Root of the disinto factory -# PROJECT_NAME - Project name for the project repo (defaults to 'project') -# PRIMARY_BRANCH - Primary branch name (defaults to 'main') -# -# Usage: -# source "${FACTORY_ROOT}/lib/generators.sh" -# generate_compose "$forge_port" -# generate_caddyfile -# generate_staging_index -# generate_deploy_pipelines "$repo_root" "$project_name" -# ============================================================================= -set -euo pipefail - -# Assert required globals are set -: "${FACTORY_ROOT:?FACTORY_ROOT must be set}" -# PROJECT_NAME defaults to 'project' if not set (env.sh may have set it from FORGE_REPO) -PROJECT_NAME="${PROJECT_NAME:-project}" -# PRIMARY_BRANCH defaults to main (env.sh may have set it to 'master') -PRIMARY_BRANCH="${PRIMARY_BRANCH:-main}" - -# Generate docker-compose.yml in the factory root. -_generate_compose_impl() { - local forge_port="${1:-3000}" - local compose_file="${FACTORY_ROOT}/docker-compose.yml" - - # Check if compose file already exists - if [ -f "$compose_file" ]; then - echo "Compose: ${compose_file} (already exists, skipping)" - return 0 - fi - - cat > "$compose_file" <<'COMPOSEEOF' -# docker-compose.yml — generated by disinto init -# Brings up Forgejo, Woodpecker, and the agent runtime. - -services: - forgejo: - image: codeberg.org/forgejo/forgejo:1 - container_name: disinto-forgejo - restart: unless-stopped - security_opt: - - apparmor=unconfined - volumes: - - forgejo-data:/data - environment: - FORGEJO__database__DB_TYPE: sqlite3 - FORGEJO__server__ROOT_URL: http://forgejo:3000/ - FORGEJO__server__HTTP_PORT: "3000" - FORGEJO__security__INSTALL_LOCK: "true" - FORGEJO__service__DISABLE_REGISTRATION: "true" - FORGEJO__webhook__ALLOWED_HOST_LIST: "private" - networks: - - disinto-net - - woodpecker: - image: woodpeckerci/woodpecker-server:v3 - container_name: disinto-woodpecker - restart: unless-stopped - security_opt: - - apparmor=unconfined - ports: - - "8000:8000" - - "9000:9000" - volumes: - - woodpecker-data:/var/lib/woodpecker - environment: - WOODPECKER_FORGEJO: "true" - WOODPECKER_FORGEJO_URL: http://forgejo:3000 - WOODPECKER_FORGEJO_CLIENT: ${WP_FORGEJO_CLIENT:-} - WOODPECKER_FORGEJO_SECRET: ${WP_FORGEJO_SECRET:-} - WOODPECKER_HOST: ${WOODPECKER_HOST:-http://woodpecker:8000} - WOODPECKER_OPEN: "true" - WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-} - WOODPECKER_DATABASE_DRIVER: sqlite3 - WOODPECKER_DATABASE_DATASOURCE: /var/lib/woodpecker/woodpecker.sqlite - WOODPECKER_ENVIRONMENT: "FORGE_TOKEN:${FORGE_TOKEN}" - depends_on: - - forgejo - networks: - - disinto-net - - woodpecker-agent: - image: woodpeckerci/woodpecker-agent:v3 - container_name: disinto-woodpecker-agent - restart: unless-stopped - network_mode: host - privileged: true - volumes: - - /var/run/docker.sock:/var/run/docker.sock - environment: - WOODPECKER_SERVER: localhost:9000 - WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-} - WOODPECKER_GRPC_SECURE: "false" - WOODPECKER_HEALTHCHECK_ADDR: ":3333" - WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net - WOODPECKER_MAX_WORKFLOWS: 1 - depends_on: - - woodpecker - - agents: - build: - context: . - dockerfile: docker/agents/Dockerfile - container_name: disinto-agents - restart: unless-stopped - security_opt: - - apparmor=unconfined - volumes: - - agent-data:/home/agent/data - - project-repos:/home/agent/repos - - ${HOME}/.claude:/home/agent/.claude - - ${HOME}/.claude.json:/home/agent/.claude.json:ro - - CLAUDE_BIN_PLACEHOLDER:/usr/local/bin/claude:ro - - ${HOME}/.ssh:/home/agent/.ssh:ro - - ${HOME}/.config/sops/age:/home/agent/.config/sops/age:ro - - woodpecker-data:/woodpecker-data:ro - environment: - FORGE_URL: http://forgejo:3000 - FORGE_TOKEN: ${FORGE_TOKEN:-} - FORGE_REVIEW_TOKEN: ${FORGE_REVIEW_TOKEN:-} - FORGE_PLANNER_TOKEN: ${FORGE_PLANNER_TOKEN:-} - FORGE_GARDENER_TOKEN: ${FORGE_GARDENER_TOKEN:-} - FORGE_VAULT_TOKEN: ${FORGE_VAULT_TOKEN:-} - FORGE_SUPERVISOR_TOKEN: ${FORGE_SUPERVISOR_TOKEN:-} - FORGE_PREDICTOR_TOKEN: ${FORGE_PREDICTOR_TOKEN:-} - FORGE_ARCHITECT_TOKEN: ${FORGE_ARCHITECT_TOKEN:-} - FORGE_BOT_USERNAMES: ${FORGE_BOT_USERNAMES:-} - WOODPECKER_TOKEN: ${WOODPECKER_TOKEN:-} - CLAUDE_TIMEOUT: ${CLAUDE_TIMEOUT:-7200} - CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: ${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1} - ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-} - FORGE_ADMIN_PASS: ${FORGE_ADMIN_PASS:-} - DISINTO_CONTAINER: "1" - PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project} - WOODPECKER_DATA_DIR: /woodpecker-data - # IMPORTANT: agents get explicit environment variables (forge tokens, CI tokens, config). - # Vault-only secrets (GITHUB_TOKEN, CLAWHUB_TOKEN, deploy keys) live in - # .env.vault.enc and are NEVER injected here — only the runner - # container receives them at fire time (AD-006, #745). - depends_on: - - forgejo - - woodpecker - networks: - - disinto-net - - runner: - build: - context: . - dockerfile: docker/agents/Dockerfile - profiles: ["vault"] - security_opt: - - apparmor=unconfined - volumes: - - agent-data:/home/agent/data - environment: - FORGE_URL: http://forgejo:3000 - DISINTO_CONTAINER: "1" - PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project} - # Vault redesign in progress (PR-based approval, see #73-#77) - # This container is being replaced — entrypoint will be updated in follow-up - networks: - - disinto-net - - # Edge proxy — reverse proxy to Forgejo, Woodpecker, and staging - # Serves on ports 80/443, routes based on path - edge: - build: ./docker/edge - container_name: disinto-edge - ports: - - "80:80" - - "443:443" - environment: - - DISINTO_VERSION=${DISINTO_VERSION:-main} - - FORGE_URL=http://forgejo:3000 - - FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto} - - FORGE_OPS_REPO=${FORGE_OPS_REPO:-disinto-admin/disinto-ops} - - FORGE_TOKEN=${FORGE_TOKEN:-} - - FORGE_ADMIN_USERS=${FORGE_ADMIN_USERS:-disinto-admin} - - FORGE_ADMIN_TOKEN=${FORGE_ADMIN_TOKEN:-} - - OPS_REPO_ROOT=/opt/disinto-ops - - PROJECT_REPO_ROOT=/opt/disinto - - PRIMARY_BRANCH=main - volumes: - - ./docker/Caddyfile:/etc/caddy/Caddyfile - - caddy_data:/data - - /var/run/docker.sock:/var/run/docker.sock - depends_on: - - forgejo - - woodpecker - - staging - networks: - - disinto-net - - # Staging container — static file server for staging artifacts - # Edge proxy routes to this container for default requests - staging: - image: caddy:alpine - command: ["caddy", "file-server", "--root", "/srv/site"] - volumes: - - ./docker:/srv/site:ro - networks: - - disinto-net - - # Staging deployment slot — activated by Woodpecker staging pipeline (#755). - # Profile-gated: only starts when explicitly targeted by deploy commands. - # Customize image/ports/volumes for your project after init. - staging-deploy: - image: alpine:3 - profiles: ["staging"] - security_opt: - - apparmor=unconfined - environment: - DEPLOY_ENV: staging - networks: - - disinto-net - command: ["echo", "staging slot — replace with project image"] - -volumes: - forgejo-data: - woodpecker-data: - agent-data: - project-repos: - caddy_data: - -networks: - disinto-net: - driver: bridge -COMPOSEEOF - - # Patch the Claude CLI binary path — resolve from host PATH at init time. - local claude_bin - claude_bin="$(command -v claude 2>/dev/null || true)" - if [ -n "$claude_bin" ]; then - # Resolve symlinks to get the real binary path - claude_bin="$(readlink -f "$claude_bin")" - sed -i "s|CLAUDE_BIN_PLACEHOLDER|${claude_bin}|" "$compose_file" - else - echo "Warning: claude CLI not found in PATH — update docker-compose.yml volumes manually" >&2 - sed -i "s|CLAUDE_BIN_PLACEHOLDER|/usr/local/bin/claude|" "$compose_file" - fi - - # Patch the forgejo port mapping into the file if non-default - if [ "$forge_port" != "3000" ]; then - # Add port mapping to forgejo service so it's reachable from host during init - sed -i "/image: codeberg\.org\/forgejo\/forgejo:1/a\\ ports:\\n - \"${forge_port}:3000\"" "$compose_file" - else - sed -i "/image: codeberg\.org\/forgejo\/forgejo:1/a\\ ports:\\n - \"3000:3000\"" "$compose_file" - fi - - echo "Created: ${compose_file}" -} - -# Generate docker/agents/ files if they don't already exist. -_generate_agent_docker_impl() { - local docker_dir="${FACTORY_ROOT}/docker/agents" - mkdir -p "$docker_dir" - - if [ ! -f "${docker_dir}/Dockerfile" ]; then - echo "Warning: docker/agents/Dockerfile not found — expected in repo" >&2 - fi - if [ ! -f "${docker_dir}/entrypoint.sh" ]; then - echo "Warning: docker/agents/entrypoint.sh not found — expected in repo" >&2 - fi -} - -# Generate docker/Caddyfile template for edge proxy. -_generate_caddyfile_impl() { - local docker_dir="${FACTORY_ROOT}/docker" - local caddyfile="${docker_dir}/Caddyfile" - - if [ -f "$caddyfile" ]; then - echo "Caddyfile: ${caddyfile} (already exists, skipping)" - return - fi - - cat > "$caddyfile" <<'CADDYFILEEOF' -# Caddyfile — edge proxy configuration -# IP-only binding at bootstrap; domain + TLS added later via vault resource request - -:80 { - # Reverse proxy to Forgejo - handle /forgejo/* { - reverse_proxy forgejo:3000 - } - - # Reverse proxy to Woodpecker CI - handle /ci/* { - reverse_proxy woodpecker:8000 - } - - # Default: proxy to staging container - handle { - reverse_proxy staging:80 - } -} -CADDYFILEEOF - - echo "Created: ${caddyfile}" -} - -# Generate docker/index.html default page. -_generate_staging_index_impl() { - local docker_dir="${FACTORY_ROOT}/docker" - local index_file="${docker_dir}/index.html" - - if [ -f "$index_file" ]; then - echo "Staging: ${index_file} (already exists, skipping)" - return - fi - - cat > "$index_file" <<'INDEXEOF' -<!DOCTYPE html> -<html lang="en"> -<head> - <meta charset="UTF-8"> - <meta name="viewport" content="width=device-width, initial-scale=1.0"> - <title>Nothing shipped yet - - - -
-

Nothing shipped yet

-

CI pipelines will update this page with your staging artifacts.

-
- - -INDEXEOF - - echo "Created: ${index_file}" -} - -# Generate template .woodpecker/ deployment pipeline configs in a project repo. -# Creates staging.yml and production.yml alongside the project's existing CI config. -# These pipelines trigger on Woodpecker's deployment event with environment filters. -_generate_deploy_pipelines_impl() { - local repo_root="$1" - local project_name="$2" - : "${project_name// /}" # Silence SC2034 - variable used in heredoc - local wp_dir="${repo_root}/.woodpecker" - - mkdir -p "$wp_dir" - - # Skip if deploy pipelines already exist - if [ -f "${wp_dir}/staging.yml" ] && [ -f "${wp_dir}/production.yml" ]; then - echo "Deploy: .woodpecker/{staging,production}.yml (already exist)" - return - fi - - if [ ! -f "${wp_dir}/staging.yml" ]; then - cat > "${wp_dir}/staging.yml" <<'STAGINGEOF' -# .woodpecker/staging.yml — Staging deployment pipeline -# Triggered by runner via Woodpecker promote API. -# Human approves promotion in vault → runner calls promote → this runs. - -when: - event: deployment - environment: staging - -steps: - - name: deploy-staging - image: docker:27 - commands: - - echo "Deploying to staging environment..." - - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from CI #${CI_PIPELINE_PARENT}" - # Pull the image built by CI and deploy to staging - # Customize these commands for your project: - # - docker compose -f docker-compose.yml --profile staging up -d - - echo "Staging deployment complete" - - - name: verify-staging - image: alpine:3 - commands: - - echo "Verifying staging deployment..." - # Add health checks, smoke tests, or integration tests here: - # - curl -sf http://staging:8080/health || exit 1 - - echo "Staging verification complete" -STAGINGEOF - echo "Created: ${wp_dir}/staging.yml" - fi - - if [ ! -f "${wp_dir}/production.yml" ]; then - cat > "${wp_dir}/production.yml" <<'PRODUCTIONEOF' -# .woodpecker/production.yml — Production deployment pipeline -# Triggered by runner via Woodpecker promote API. -# Human approves promotion in vault → runner calls promote → this runs. - -when: - event: deployment - environment: production - -steps: - - name: deploy-production - image: docker:27 - commands: - - echo "Deploying to production environment..." - - echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from staging" - # Pull the verified image and deploy to production - # Customize these commands for your project: - # - docker compose -f docker-compose.yml up -d - - echo "Production deployment complete" - - - name: verify-production - image: alpine:3 - commands: - - echo "Verifying production deployment..." - # Add production health checks here: - # - curl -sf http://production:8080/health || exit 1 - - echo "Production verification complete" -PRODUCTIONEOF - echo "Created: ${wp_dir}/production.yml" - fi -} diff --git a/lib/hire-agent.sh b/lib/hire-agent.sh deleted file mode 100644 index b15b2b7..0000000 --- a/lib/hire-agent.sh +++ /dev/null @@ -1,471 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# hire-agent — disinto_hire_an_agent() function -# -# Handles user creation, .profile repo setup, formula copying, branch protection, -# and state marker creation for hiring a new agent. -# -# Globals expected: -# FORGE_URL - Forge instance URL -# FORGE_TOKEN - Admin token for Forge operations -# FACTORY_ROOT - Root of the disinto factory -# PROJECT_NAME - Project name for email/domain generation -# -# Usage: -# source "${FACTORY_ROOT}/lib/hire-agent.sh" -# disinto_hire_an_agent [--formula ] [--local-model ] [--poll-interval ] -# ============================================================================= -set -euo pipefail - -disinto_hire_an_agent() { - local agent_name="${1:-}" - local role="${2:-}" - local formula_path="" - local local_model="" - local poll_interval="" - - if [ -z "$agent_name" ] || [ -z "$role" ]; then - echo "Error: agent-name and role required" >&2 - echo "Usage: disinto hire-an-agent [--formula ] [--local-model ] [--poll-interval ]" >&2 - exit 1 - fi - shift 2 - - # Parse flags - while [ $# -gt 0 ]; do - case "$1" in - --formula) - formula_path="$2" - shift 2 - ;; - --local-model) - local_model="$2" - shift 2 - ;; - --poll-interval) - poll_interval="$2" - shift 2 - ;; - *) - echo "Unknown option: $1" >&2 - exit 1 - ;; - esac - done - - # Default formula path — try both naming conventions - if [ -z "$formula_path" ]; then - formula_path="${FACTORY_ROOT}/formulas/${role}.toml" - if [ ! -f "$formula_path" ]; then - formula_path="${FACTORY_ROOT}/formulas/run-${role}.toml" - fi - fi - - # Validate formula exists - if [ ! -f "$formula_path" ]; then - echo "Error: formula not found at ${formula_path}" >&2 - exit 1 - fi - - echo "── Hiring agent: ${agent_name} (${role}) ───────────────────────" - echo "Formula: ${formula_path}" - if [ -n "$local_model" ]; then - echo "Local model: ${local_model}" - echo "Poll interval: ${poll_interval:-300}s" - fi - - # Ensure FORGE_TOKEN is set - if [ -z "${FORGE_TOKEN:-}" ]; then - echo "Error: FORGE_TOKEN not set" >&2 - exit 1 - fi - - # Get Forge URL - local forge_url="${FORGE_URL:-http://localhost:3000}" - echo "Forge: ${forge_url}" - - # Step 1: Create user via API (skip if exists) - echo "" - echo "Step 1: Creating user '${agent_name}' (if not exists)..." - - local user_pass="" - local admin_pass="" - - # Read admin password from .env for standalone runs (#184) - local env_file="${FACTORY_ROOT}/.env" - if [ -f "$env_file" ] && grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then - admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-) - fi - - # Get admin token early (needed for both user creation and password reset) - local admin_user="disinto-admin" - admin_pass="${admin_pass:-admin}" - local admin_token="" - local admin_token_name - admin_token_name="temp-token-$(date +%s)" - admin_token=$(curl -sf -X POST \ - -u "${admin_user}:${admin_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${admin_user}/tokens" \ - -d "{\"name\":\"${admin_token_name}\",\"scopes\":[\"all\"]}" 2>/dev/null \ - | jq -r '.sha1 // empty') || admin_token="" - if [ -z "$admin_token" ]; then - # Token might already exist — try listing - admin_token=$(curl -sf \ - -u "${admin_user}:${admin_pass}" \ - "${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \ - | jq -r '.[0].sha1 // empty') || admin_token="" - fi - if [ -z "$admin_token" ]; then - echo "Error: failed to obtain admin API token" >&2 - echo " Cannot proceed without admin privileges" >&2 - exit 1 - fi - - if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then - echo " User '${agent_name}' already exists" - # Reset user password so we can get a token (#184) - user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - # Use Forgejo CLI to reset password (API PATCH ignores must_change_password in Forgejo 11.x) - if _forgejo_exec forgejo admin user change-password \ - --username "${agent_name}" \ - --password "${user_pass}" \ - --must-change-password=false >/dev/null 2>&1; then - echo " Reset password for existing user '${agent_name}'" - else - echo " Warning: could not reset password for existing user" >&2 - fi - else - # Create user using basic auth (admin token fallback would poison subsequent calls) - # Create the user - user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)" - if curl -sf -X POST \ - -u "${admin_user}:${admin_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/admin/users" \ - -d "{\"username\":\"${agent_name}\",\"password\":\"${user_pass}\",\"email\":\"${agent_name}@${PROJECT_NAME:-disinto}.local\",\"full_name\":\"${agent_name}\",\"active\":true,\"admin\":false,\"must_change_password\":false}" >/dev/null 2>&1; then - echo " Created user '${agent_name}'" - else - echo " Warning: failed to create user via admin API" >&2 - # Try alternative: user might already exist - if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then - echo " User '${agent_name}' exists (confirmed)" - else - echo " Error: failed to create user '${agent_name}'" >&2 - exit 1 - fi - fi - fi - - # Step 1.5: Generate Forge token for the new/existing user - echo "" - echo "Step 1.5: Generating Forge token for '${agent_name}'..." - - # Convert role to uppercase token variable name (e.g., architect -> FORGE_ARCHITECT_TOKEN) - local role_upper - role_upper=$(echo "$role" | tr '[:lower:]' '[:upper:]') - local token_var="FORGE_${role_upper}_TOKEN" - - # Generate token using the user's password (basic auth) - local agent_token="" - agent_token=$(curl -sf -X POST \ - -u "${agent_name}:${user_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${agent_name}/tokens" \ - -d "{\"name\":\"disinto-${agent_name}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \ - | jq -r '.sha1 // empty') || agent_token="" - - if [ -z "$agent_token" ]; then - # Token name collision — create with timestamp suffix - agent_token=$(curl -sf -X POST \ - -u "${agent_name}:${user_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/users/${agent_name}/tokens" \ - -d "{\"name\":\"disinto-${agent_name}-$(date +%s)\",\"scopes\":[\"all\"]}" 2>/dev/null \ - | jq -r '.sha1 // empty') || agent_token="" - fi - - if [ -z "$agent_token" ]; then - echo " Warning: failed to create API token for '${agent_name}'" >&2 - else - # Store token in .env under the role-specific variable name - if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then - # Use sed with alternative delimiter and proper escaping for special chars in token - local escaped_token - escaped_token=$(printf '%s\n' "$agent_token" | sed 's/[&/\]/\\&/g') - sed -i "s|^${token_var}=.*|${token_var}=${escaped_token}|" "$env_file" - echo " ${agent_name} token updated (${token_var})" - else - printf '%s=%s\n' "$token_var" "$agent_token" >> "$env_file" - echo " ${agent_name} token saved (${token_var})" - fi - export "${token_var}=${agent_token}" - fi - - # Step 2: Create .profile repo on Forgejo - echo "" - echo "Step 2: Creating '${agent_name}/.profile' repo (if not exists)..." - - if curl -sf --max-time 5 "${forge_url}/api/v1/repos/${agent_name}/.profile" >/dev/null 2>&1; then - echo " Repo '${agent_name}/.profile' already exists" - else - # Create the repo using the admin API to ensure it's created in the agent's namespace. - # Using POST /api/v1/user/repos with a user token would create the repo under the - # authenticated user, which could be wrong if the token belongs to a different user. - # The admin API POST /api/v1/admin/users/{username}/repos explicitly creates in the - # specified user's namespace. - local create_output - create_output=$(curl -sf -X POST \ - -u "${admin_user}:${admin_pass}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/admin/users/${agent_name}/repos" \ - -d "{\"name\":\".profile\",\"description\":\"${agent_name}'s .profile repo\",\"private\":true,\"auto_init\":false}" 2>&1) || true - - if echo "$create_output" | grep -q '"id":\|[0-9]'; then - echo " Created repo '${agent_name}/.profile' (via admin API)" - else - echo " Error: failed to create repo '${agent_name}/.profile'" >&2 - echo " Response: ${create_output}" >&2 - exit 1 - fi - fi - - # Step 3: Clone repo and create initial commit - echo "" - echo "Step 3: Cloning repo and creating initial commit..." - - local clone_dir="/tmp/.profile-clone-${agent_name}" - rm -rf "$clone_dir" - mkdir -p "$clone_dir" - - # Build authenticated clone URL using basic auth (user_pass is always set in Step 1) - if [ -z "${user_pass:-}" ]; then - echo " Error: no user password available for cloning" >&2 - exit 1 - fi - - local auth_url - auth_url=$(printf '%s' "$forge_url" | sed "s|://|://${agent_name}:${user_pass}@|") - auth_url="${auth_url}/${agent_name}/.profile.git" - - # Display unauthenticated URL (auth token only in actual git clone command) - echo " Cloning: ${forge_url}/${agent_name}/.profile.git" - - # Try authenticated clone first (required for private repos) - if ! git clone --quiet "$auth_url" "$clone_dir" 2>/dev/null; then - echo " Error: failed to clone repo with authentication" >&2 - echo " Note: Ensure the user has a valid API token with repository access" >&2 - rm -rf "$clone_dir" - exit 1 - fi - - # Configure git - git -C "$clone_dir" config user.name "disinto-admin" - git -C "$clone_dir" config user.email "disinto-admin@localhost" - - # Create directory structure - echo " Creating directory structure..." - mkdir -p "${clone_dir}/journal" - mkdir -p "${clone_dir}/knowledge" - touch "${clone_dir}/journal/.gitkeep" - touch "${clone_dir}/knowledge/.gitkeep" - - # Copy formula - echo " Copying formula..." - cp "$formula_path" "${clone_dir}/formula.toml" - - # Create README - if [ ! -f "${clone_dir}/README.md" ]; then - cat > "${clone_dir}/README.md" </dev/null; then - git -C "$clone_dir" commit -m "chore: initial .profile setup" -q - git -C "$clone_dir" push origin main >/dev/null 2>&1 || \ - git -C "$clone_dir" push origin master >/dev/null 2>&1 || true - echo " Committed: initial .profile setup" - else - echo " No changes to commit" - fi - - rm -rf "$clone_dir" - - # Step 4: Set up branch protection - echo "" - echo "Step 4: Setting up branch protection..." - - # Source branch-protection.sh helper - local bp_script="${FACTORY_ROOT}/lib/branch-protection.sh" - if [ -f "$bp_script" ]; then - # Source required environment - if [ -f "${FACTORY_ROOT}/lib/env.sh" ]; then - source "${FACTORY_ROOT}/lib/env.sh" - fi - - # Set up branch protection for .profile repo - if source "$bp_script" 2>/dev/null && setup_profile_branch_protection "${agent_name}/.profile" "main"; then - echo " Branch protection configured for main branch" - echo " - Requires 1 approval before merge" - echo " - Admin-only merge enforcement" - echo " - Journal branch created for direct agent pushes" - else - echo " Warning: could not configure branch protection (Forgejo API may not be available)" - echo " Note: Branch protection can be set up manually later" - fi - else - echo " Warning: branch-protection.sh not found at ${bp_script}" - fi - - # Step 5: Create state marker - echo "" - echo "Step 5: Creating state marker..." - - local state_dir="${FACTORY_ROOT}/state" - mkdir -p "$state_dir" - local state_file="${state_dir}/.${role}-active" - - if [ ! -f "$state_file" ]; then - touch "$state_file" - echo " Created: ${state_file}" - else - echo " State marker already exists: ${state_file}" - fi - - # Step 6: Set up local model agent (if --local-model specified) - if [ -n "$local_model" ]; then - echo "" - echo "Step 6: Configuring local model agent..." - - local override_file="${FACTORY_ROOT}/docker-compose.override.yml" - local override_dir - override_dir=$(dirname "$override_file") - mkdir -p "$override_dir" - - # Validate model endpoint is reachable - echo " Validating model endpoint: ${local_model}" - if ! curl -sf --max-time 10 "${local_model}/health" >/dev/null 2>&1; then - # Try /v1/chat/completions as fallback endpoint check - if ! curl -sf --max-time 10 "${local_model}/v1/chat/completions" >/dev/null 2>&1; then - echo " Warning: model endpoint may not be reachable at ${local_model}" - echo " Continuing with configuration..." - fi - else - echo " Model endpoint is reachable" - fi - - # Generate service name from agent name (lowercase) - local service_name="agents-${agent_name}" - service_name=$(echo "$service_name" | tr '[:upper:]' '[:lower:]') - - # Set default poll interval - local interval="${poll_interval:-300}" - - # Generate the override compose file - # Bash expands ${service_name}, ${local_model}, ${interval}, ${PROJECT_NAME} at generation time - # \$HOME, \$FORGE_TOKEN become ${HOME}, ${FORGE_TOKEN} in the file for docker-compose runtime expansion - cat > "$override_file" < "$tmpfile" - jq -Rs '{body:.}' < "$tmpfile" > "$tmpjson" - curl -sf -o /dev/null -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_API}/issues/${issue}/comments" \ - --data-binary @"$tmpjson" 2>/dev/null || true - rm -f "$tmpfile" "$tmpjson" -} - # --------------------------------------------------------------------------- # issue_block — add "blocked" label, post diagnostic comment, remove in-progress. # Args: issue_number reason [result_text] @@ -207,9 +187,14 @@ issue_block() { fi } > "$tmpfile" - # Post comment using shared helper - _ilc_post_comment "$issue" "$(cat "$tmpfile")" - rm -f "$tmpfile" + # Post comment + jq -Rs '{body:.}' < "$tmpfile" > "${tmpfile}.json" + curl -sf -o /dev/null -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_API}/issues/${issue}/comments" \ + --data-binary @"${tmpfile}.json" 2>/dev/null || true + rm -f "$tmpfile" "${tmpfile}.json" # Remove in-progress, add blocked local ip_id bk_id diff --git a/lib/load-project.sh b/lib/load-project.sh index 9d7afaf..dcddc94 100755 --- a/lib/load-project.sh +++ b/lib/load-project.sh @@ -10,6 +10,7 @@ # PROJECT_CONTAINERS, CHECK_PRS, CHECK_DEV_AGENT, # CHECK_PIPELINE_STALL, CI_STALE_MINUTES, # MIRROR_NAMES, MIRROR_URLS, MIRROR_ (per configured mirror) +# (plus backwards-compat aliases: CODEBERG_REPO, CODEBERG_API, CODEBERG_WEB) # # If no argument given, does nothing (allows poll scripts to work with # plain .env fallback for backwards compatibility). @@ -82,7 +83,7 @@ if mirrors: # Export parsed variables. # Inside the agents container (DISINTO_CONTAINER=1), compose already sets the # correct FORGE_URL (http://forgejo:3000) and path vars for the container -# environment. The TOML carries host-perspective values (localhost, /home/admin/…) +# environment. The TOML carries host-perspective values (localhost, /home/johba/…) # that would break container API calls and path resolution. Skip overriding # any env var that is already set when running inside the container. while IFS='=' read -r _key _val; do @@ -99,9 +100,11 @@ export FORGE_URL="${FORGE_URL:-http://localhost:3000}" if [ -n "$FORGE_REPO" ]; then export FORGE_API="${FORGE_URL}/api/v1/repos/${FORGE_REPO}" export FORGE_WEB="${FORGE_URL}/${FORGE_REPO}" - # Extract repo owner (first path segment of owner/repo) - export FORGE_REPO_OWNER="${FORGE_REPO%%/*}" fi +# Backwards-compat aliases +export CODEBERG_REPO="${FORGE_REPO}" +export CODEBERG_API="${FORGE_API:-}" +export CODEBERG_WEB="${FORGE_WEB:-}" # Derive PROJECT_REPO_ROOT if not explicitly set if [ -z "${PROJECT_REPO_ROOT:-}" ] && [ -n "${PROJECT_NAME:-}" ]; then diff --git a/lib/ops-setup.sh b/lib/ops-setup.sh deleted file mode 100644 index ae6b216..0000000 --- a/lib/ops-setup.sh +++ /dev/null @@ -1,236 +0,0 @@ -#!/usr/bin/env bash -# ops-setup.sh — Setup ops repository (disinto-ops) -# -# Source from bin/disinto: -# source "$(dirname "$0")/../lib/ops-setup.sh" -# -# Required globals: FORGE_URL, FORGE_TOKEN, FACTORY_ROOT -# Optional: admin_token (falls back to FORGE_TOKEN for admin operations) -# -# Functions: -# setup_ops_repo [primary_branch] -# - Create ops repo on Forgejo if it doesn't exist -# - Configure bot collaborators with appropriate permissions -# - Clone or initialize ops repo locally -# - Seed directory structure (vault, knowledge, evidence) -# - Export _ACTUAL_OPS_SLUG for caller to use -# -# Globals modified: -# _ACTUAL_OPS_SLUG - resolved ops repo slug after function completes - -set -euo pipefail - -setup_ops_repo() { - - local forge_url="$1" ops_slug="$2" ops_root="$3" primary_branch="${4:-main}" - local org_name="${ops_slug%%/*}" - local ops_name="${ops_slug##*/}" - - echo "" - echo "── Ops repo setup ─────────────────────────────────────" - - # Determine the actual ops repo location by searching across possible namespaces - # This handles cases where the repo was created under a different namespace - # due to past bugs (e.g., dev-bot/disinto-ops instead of disinto-admin/disinto-ops) - local actual_ops_slug="" - local -a possible_namespaces=( "$org_name" "dev-bot" "disinto-admin" ) - local http_code - - for ns in "${possible_namespaces[@]}"; do - slug="${ns}/${ops_name}" - if curl -sf --max-time 5 \ - -H "Authorization: token ${FORGE_TOKEN}" \ - "${forge_url}/api/v1/repos/${slug}" >/dev/null 2>&1; then - actual_ops_slug="$slug" - echo "Ops repo: ${slug} (found at ${slug})" - break - fi - done - - # If not found, try to create it in the configured namespace - if [ -z "$actual_ops_slug" ]; then - echo "Creating ops repo in namespace: ${org_name}" - # Create org if it doesn't exist - curl -sf -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/orgs" \ - -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true - if curl -sf -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/orgs/${org_name}/repos" \ - -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" >/dev/null 2>&1; then - actual_ops_slug="${org_name}/${ops_name}" - echo "Ops repo: ${actual_ops_slug} created on Forgejo" - else - # Fallback: use admin API to create repo under the target namespace - http_code=$(curl -s -o /dev/null -w "%{http_code}" \ - -X POST \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/admin/users/${org_name}/repos" \ - -d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" 2>/dev/null || echo "0") - if [ "$http_code" = "201" ]; then - actual_ops_slug="${org_name}/${ops_name}" - echo "Ops repo: ${actual_ops_slug} created on Forgejo (via admin API)" - else - echo "Error: failed to create ops repo '${org_name}/${ops_name}' (HTTP ${http_code})" >&2 - return 1 - fi - fi - fi - - # Configure collaborators on the ops repo - local bot_user bot_perm - declare -A bot_permissions=( - [dev-bot]="write" - [review-bot]="read" - [planner-bot]="write" - [gardener-bot]="write" - [vault-bot]="write" - [supervisor-bot]="read" - [predictor-bot]="read" - [architect-bot]="write" - ) - - # Add all bot users as collaborators with appropriate permissions - # vault branch protection (#77) requires: - # - Admin-only merge to main (enforced by admin_enforced: true) - # - Bots can push branches and create PRs, but cannot merge - for bot_user in "${!bot_permissions[@]}"; do - bot_perm="${bot_permissions[$bot_user]}" - if curl -sf -X PUT \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/repos/${actual_ops_slug}/collaborators/${bot_user}" \ - -d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1; then - echo " + ${bot_user} = ${bot_perm} collaborator" - else - echo " ! ${bot_user} = ${bot_perm} (already set or failed)" - fi - done - - # Add disinto-admin as admin collaborator - if curl -sf -X PUT \ - -H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \ - -H "Content-Type: application/json" \ - "${forge_url}/api/v1/repos/${actual_ops_slug}/collaborators/disinto-admin" \ - -d '{"permission":"admin"}' >/dev/null 2>&1; then - echo " + disinto-admin = admin collaborator" - else - echo " ! disinto-admin = admin (already set or failed)" - fi - - # Clone ops repo locally if not present - if [ ! -d "${ops_root}/.git" ]; then - local auth_url - auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_TOKEN}@|") - local clone_url="${auth_url}/${actual_ops_slug}.git" - echo "Cloning: ops repo -> ${ops_root}" - if git clone --quiet "$clone_url" "$ops_root" 2>/dev/null; then - echo "Ops repo: ${actual_ops_slug} cloned successfully" - else - echo "Initializing: ops repo at ${ops_root}" - mkdir -p "$ops_root" - git -C "$ops_root" init --initial-branch="${primary_branch}" -q - # Set remote to the actual ops repo location - git -C "$ops_root" remote add origin "${forge_url}/${actual_ops_slug}.git" - echo "Ops repo: ${actual_ops_slug} initialized locally" - fi - else - echo "Ops repo: ${ops_root} (already exists locally)" - # Verify remote is correct - local current_remote - current_remote=$(git -C "$ops_root" remote get-url origin 2>/dev/null || true) - local expected_remote="${forge_url}/${actual_ops_slug}.git" - if [ -n "$current_remote" ] && [ "$current_remote" != "$expected_remote" ]; then - echo " Fixing: remote URL from ${current_remote} to ${expected_remote}" - git -C "$ops_root" remote set-url origin "$expected_remote" - fi - fi - - # Seed directory structure - local seeded=false - mkdir -p "${ops_root}/vault/pending" - mkdir -p "${ops_root}/vault/approved" - mkdir -p "${ops_root}/vault/fired" - mkdir -p "${ops_root}/vault/rejected" - mkdir -p "${ops_root}/knowledge" - mkdir -p "${ops_root}/evidence/engagement" - mkdir -p "${ops_root}/evidence/red-team" - mkdir -p "${ops_root}/evidence/holdout" - mkdir -p "${ops_root}/evidence/evolution" - mkdir -p "${ops_root}/evidence/user-test" - mkdir -p "${ops_root}/sprints" - [ -f "${ops_root}/sprints/.gitkeep" ] || { touch "${ops_root}/sprints/.gitkeep"; seeded=true; } - [ -f "${ops_root}/evidence/red-team/.gitkeep" ] || { touch "${ops_root}/evidence/red-team/.gitkeep"; seeded=true; } - [ -f "${ops_root}/evidence/holdout/.gitkeep" ] || { touch "${ops_root}/evidence/holdout/.gitkeep"; seeded=true; } - [ -f "${ops_root}/evidence/evolution/.gitkeep" ] || { touch "${ops_root}/evidence/evolution/.gitkeep"; seeded=true; } - [ -f "${ops_root}/evidence/user-test/.gitkeep" ] || { touch "${ops_root}/evidence/user-test/.gitkeep"; seeded=true; } - - if [ ! -f "${ops_root}/README.md" ]; then - cat > "${ops_root}/README.md" < **Note:** Journal directories (journal/planner/ and journal/supervisor/) have been removed from the ops repo. Agent journals are now stored in each agent's .profile repo on Forgejo. - -## Branch protection - -- \`main\`: 2 reviewers required for vault items -- Journal/evidence commits may use lighter rules -OPSEOF - seeded=true - fi - - # Create stub files if they don't exist - [ -f "${ops_root}/portfolio.md" ] || { echo "# Portfolio" > "${ops_root}/portfolio.md"; seeded=true; } - [ -f "${ops_root}/prerequisites.md" ] || { echo "# Prerequisite Tree" > "${ops_root}/prerequisites.md"; seeded=true; } - [ -f "${ops_root}/RESOURCES.md" ] || { echo "# Resources" > "${ops_root}/RESOURCES.md"; seeded=true; } - - # Commit and push seed content - if [ "$seeded" = true ] && [ -d "${ops_root}/.git" ]; then - # Auto-configure repo-local git identity if missing (#778) - if [ -z "$(git -C "$ops_root" config user.name 2>/dev/null)" ]; then - git -C "$ops_root" config user.name "disinto-admin" - fi - if [ -z "$(git -C "$ops_root" config user.email 2>/dev/null)" ]; then - git -C "$ops_root" config user.email "disinto-admin@localhost" - fi - - git -C "$ops_root" add -A - if ! git -C "$ops_root" diff --cached --quiet 2>/dev/null; then - git -C "$ops_root" commit -m "chore: seed ops repo structure" -q - # Push if remote exists - if git -C "$ops_root" remote get-url origin >/dev/null 2>&1; then - if git -C "$ops_root" push origin "${primary_branch}" -q 2>/dev/null; then - echo "Seeded: ops repo with initial structure" - else - echo "Warning: failed to push seed content to ops repo" >&2 - fi - fi - fi - fi - - # Export resolved slug for the caller to write back to the project TOML - _ACTUAL_OPS_SLUG="${actual_ops_slug}" -} diff --git a/lib/pr-lifecycle.sh b/lib/pr-lifecycle.sh index e097f34..c4ba4c5 100644 --- a/lib/pr-lifecycle.sh +++ b/lib/pr-lifecycle.sh @@ -357,18 +357,11 @@ pr_close() { local pr_num="$1" _prl_log "closing PR #${pr_num}" - local resp http_code - resp=$(curl -sf -w "\n%{http_code}" -X PATCH \ + curl -sf -X PATCH \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ "${FORGE_API}/pulls/${pr_num}" \ - -d '{"state":"closed"}' 2>/dev/null) || true - http_code=$(printf '%s\n' "$resp" | tail -1) - if [ "$http_code" != "200" ] && [ "$http_code" != "204" ]; then - _prl_log "pr_close FAILED: HTTP ${http_code} for PR #${pr_num}" - return 1 - fi - _prl_log "PR #${pr_num} closed" + -d '{"state":"closed"}' >/dev/null 2>&1 || true } # --------------------------------------------------------------------------- @@ -405,18 +398,11 @@ pr_walk_to_merge() { if [ "${_PR_CI_FAILURE_TYPE:-}" = "infra" ] && [ "$ci_retry_count" -lt 1 ]; then ci_retry_count=$((ci_retry_count + 1)) _prl_log "infra failure — retriggering CI (retry ${ci_retry_count})" - local rebase_output rebase_rc ( cd "$worktree" && \ git commit --allow-empty -m "ci: retrigger after infra failure" --no-verify && \ git fetch "$remote" "${PRIMARY_BRANCH}" 2>/dev/null && \ git rebase "${remote}/${PRIMARY_BRANCH}" && \ - git push --force-with-lease "$remote" HEAD ) > /tmp/rebase-output-$$ 2>&1 - rebase_rc=$? - rebase_output=$(cat /tmp/rebase-output-$$) - rm -f /tmp/rebase-output-$$ - if [ "$rebase_rc" -ne 0 ]; then - _prl_log "rebase/push failed (exit code $rebase_rc): $(echo "$rebase_output" | tail -5)" - fi + git push --force-with-lease "$remote" HEAD ) 2>&1 | tail -5 || true continue fi @@ -488,7 +474,11 @@ Fix the issue, run tests, commit, rebase on ${PRIMARY_BRANCH}, and push: _PR_WALK_EXIT_REASON="merged" return 0 fi - # Merge failed (conflict or HTTP 405) — ask agent to rebase + if [ "$rc" -eq 2 ]; then + _PR_WALK_EXIT_REASON="merge_blocked" + return 1 + fi + # Merge failed (conflict) — ask agent to rebase _prl_log "merge failed — invoking agent to rebase" agent_run --resume "$session_id" --worktree "$worktree" \ "PR #${pr_num} approved but merge failed: ${_PR_MERGE_ERROR:-unknown} @@ -534,7 +524,8 @@ Commit, rebase on ${PRIMARY_BRANCH}, and push: # build_phase_protocol_prompt — Generate push/commit instructions for Claude. # # For the synchronous agent_run architecture: tells Claude how to commit and -# push (no phase files). +# push (no phase files). For the tmux session architecture, use the +# build_phase_protocol_prompt in dev/phase-handler.sh instead. # # Args: branch [remote] # Stdout: instruction text diff --git a/lib/profile.sh b/lib/profile.sh new file mode 100644 index 0000000..79f8514 --- /dev/null +++ b/lib/profile.sh @@ -0,0 +1,210 @@ +#!/usr/bin/env bash +# profile.sh — Helpers for agent .profile repo management +# +# Source after lib/env.sh and lib/formula-session.sh: +# source "$(dirname "$0")/../lib/env.sh" +# source "$(dirname "$0")/lib/formula-session.sh" +# source "$(dirname "$0")/lib/profile.sh" +# +# Required globals: FORGE_TOKEN, FORGE_URL, AGENT_IDENTITY, PROFILE_REPO_PATH +# +# Functions: +# profile_propose_formula NEW_FORMULA CONTENT REASON — create PR to update formula.toml + +set -euo pipefail + +# Internal log helper +_profile_log() { + if declare -f log >/dev/null 2>&1; then + log "profile: $*" + else + printf '[%s] profile: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >&2 + fi +} + +# ----------------------------------------------------------------------------- +# profile_propose_formula — Propose a formula change via PR +# +# Creates a branch, writes updated formula.toml, opens a PR, and returns PR number. +# Branch is protected (requires admin approval per #87). +# +# Args: +# $1 - NEW_FORMULA_CONTENT: The complete new formula.toml content +# $2 - REASON: Human-readable explanation of what changed and why +# +# Returns: +# 0 on success, prints PR number to stdout +# 1 on failure +# +# Example: +# source "$(dirname "$0")/../lib/env.sh" +# source "$(dirname "$0")/lib/formula-session.sh" +# source "$(dirname "$0")/lib/profile.sh" +# AGENT_IDENTITY="dev-bot" +# ensure_profile_repo "$AGENT_IDENTITY" +# profile_propose_formula "$new_formula" "Added new prompt pattern for code review" +# ----------------------------------------------------------------------------- +profile_propose_formula() { + local new_formula="$1" + local reason="$2" + + if [ -z "${AGENT_IDENTITY:-}" ]; then + _profile_log "ERROR: AGENT_IDENTITY not set" + return 1 + fi + + if [ -z "${PROFILE_REPO_PATH:-}" ]; then + _profile_log "ERROR: PROFILE_REPO_PATH not set — ensure_profile_repo not called" + return 1 + fi + + if [ -z "${FORGE_TOKEN:-}" ]; then + _profile_log "ERROR: FORGE_TOKEN not set" + return 1 + fi + + if [ -z "${FORGE_URL:-}" ]; then + _profile_log "ERROR: FORGE_URL not set" + return 1 + fi + + # Generate short description from reason for branch name + local short_desc + short_desc=$(printf '%s' "$reason" | \ + tr '[:upper:]' '[:lower:]' | \ + sed 's/[^a-z0-9 ]//g' | \ + sed 's/ */ /g' | \ + sed 's/^ *//;s/ *$//' | \ + cut -c1-40 | \ + tr ' ' '-') + + if [ -z "$short_desc" ]; then + short_desc="formula-update" + fi + + local branch_name="formula/${short_desc}" + local formula_path="${PROFILE_REPO_PATH}/formula.toml" + + _profile_log "Proposing formula change: ${branch_name}" + _profile_log "Reason: ${reason}" + + # Ensure we're on main branch and up-to-date + _profile_log "Fetching .profile repo" + ( + cd "$PROFILE_REPO_PATH" || return 1 + + git fetch origin main --quiet 2>/dev/null || \ + git fetch origin master --quiet 2>/dev/null || true + + # Reset to main/master + if git checkout main --quiet 2>/dev/null; then + git pull --ff-only origin main --quiet 2>/dev/null || true + elif git checkout master --quiet 2>/dev/null; then + git pull --ff-only origin master --quiet 2>/dev/null || true + else + _profile_log "ERROR: Failed to checkout main/master branch" + return 1 + fi + + # Create and checkout new branch + git checkout -b "$branch_name" 2>/dev/null || { + _profile_log "Branch ${branch_name} may already exist" + git checkout "$branch_name" 2>/dev/null || return 1 + } + + # Write formula.toml + printf '%s' "$new_formula" > "$formula_path" + + # Commit the change + git config user.name "${AGENT_IDENTITY}" || true + git config user.email "${AGENT_IDENTITY}@users.noreply.codeberg.org" || true + + git add "$formula_path" + git commit -m "formula: ${reason}" --no-verify || { + _profile_log "No changes to commit (formula unchanged)" + # Check if branch has any commits + if git rev-parse HEAD >/dev/null 2>&1; then + : # branch has commits, continue + else + _profile_log "ERROR: Failed to create commit" + return 1 + fi + } + + # Push branch + local remote="${FORGE_REMOTE:-origin}" + git push --set-upstream "$remote" "$branch_name" --quiet 2>/dev/null || { + _profile_log "ERROR: Failed to push branch" + return 1 + } + + _profile_log "Branch pushed: ${branch_name}" + + # Create PR + local forge_url="${FORGE_URL%/}" + local api_url="${forge_url}/api/v1/repos/${AGENT_IDENTITY}/.profile" + local primary_branch="main" + + # Check if main or master is the primary branch + if ! curl -sf -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/git/branches/main" 2>/dev/null | grep -q "200"; then + primary_branch="master" + fi + + local pr_title="formula: ${reason}" + local pr_body="# Formula Update + +**Reason:** ${reason} + +--- +*This PR was auto-generated by ${AGENT_IDENTITY}.* +" + + local pr_response http_code + local pr_json + pr_json=$(jq -n \ + --arg t "$pr_title" \ + --arg b "$pr_body" \ + --arg h "$branch_name" \ + --arg base "$primary_branch" \ + '{title:$t, body:$b, head:$h, base:$base}') || { + _profile_log "ERROR: Failed to build PR JSON" + return 1 + } + + pr_response=$(curl -s -w "\n%{http_code}" -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${api_url}/pulls" \ + -d "$pr_json" || true) + + http_code=$(printf '%s\n' "$pr_response" | tail -1) + pr_response=$(printf '%s\n' "$pr_response" | sed '$d') + + if [ "$http_code" = "201" ] || [ "$http_code" = "200" ]; then + local pr_num + pr_num=$(printf '%s' "$pr_response" | jq -r '.number') + _profile_log "PR created: #${pr_num}" + printf '%s' "$pr_num" + return 0 + else + # Check if PR already exists (409 conflict) + if [ "$http_code" = "409" ]; then + local existing_pr + existing_pr=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${api_url}/pulls?state=open&head=${AGENT_IDENTITY}:formula/${short_desc}" 2>/dev/null | \ + jq -r '.[0].number // empty') || true + if [ -n "$existing_pr" ]; then + _profile_log "PR already exists: #${existing_pr}" + printf '%s' "$existing_pr" + return 0 + fi + fi + _profile_log "ERROR: Failed to create PR (HTTP ${http_code})" + return 1 + fi + ) + + return $? +} diff --git a/lib/release.sh b/lib/release.sh deleted file mode 100644 index 6eb03ee..0000000 --- a/lib/release.sh +++ /dev/null @@ -1,178 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# release.sh — disinto_release() function -# -# Handles vault TOML creation, branch setup on ops repo, PR creation, -# and auto-merge request for a versioned release. -# -# Globals expected: -# FORGE_URL - Forge instance URL (e.g. http://localhost:3000) -# FORGE_TOKEN - API token for Forge operations -# FORGE_OPS_REPO - Ops repo slug (e.g. disinto-admin/myproject-ops) -# FACTORY_ROOT - Root of the disinto factory -# PRIMARY_BRANCH - Primary branch name (e.g. main) -# -# Usage: -# source "${FACTORY_ROOT}/lib/release.sh" -# disinto_release -# ============================================================================= -set -euo pipefail - -# Source vault.sh for _vault_log helper -source "${FACTORY_ROOT}/lib/vault.sh" - -# Assert required globals are set before using this module. -_assert_release_globals() { - local missing=() - [ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL") - [ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN") - [ -z "${FORGE_OPS_REPO:-}" ] && missing+=("FORGE_OPS_REPO") - [ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT") - [ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH") - if [ "${#missing[@]}" -gt 0 ]; then - echo "Error: release.sh requires these globals to be set: ${missing[*]}" >&2 - exit 1 - fi -} - -disinto_release() { - _assert_release_globals - - local version="${1:-}" - local formula_path="${FACTORY_ROOT}/formulas/release.toml" - - if [ -z "$version" ]; then - echo "Error: version required" >&2 - echo "Usage: disinto release " >&2 - echo "Example: disinto release v1.2.0" >&2 - exit 1 - fi - - # Validate version format (must start with 'v' followed by semver) - if ! echo "$version" | grep -qE '^v[0-9]+\.[0-9]+\.[0-9]+$'; then - echo "Error: version must be in format v1.2.3 (semver with 'v' prefix)" >&2 - exit 1 - fi - - # Load project config to get FORGE_OPS_REPO - if [ -z "${PROJECT_NAME:-}" ]; then - # PROJECT_NAME is unset - detect project TOML from projects/ directory - local found_toml - found_toml=$(find "${FACTORY_ROOT}/projects" -maxdepth 1 -name "*.toml" ! -name "*.example" 2>/dev/null | head -1) - if [ -n "$found_toml" ]; then - source "${FACTORY_ROOT}/lib/load-project.sh" "$found_toml" - fi - else - local project_toml="${FACTORY_ROOT}/projects/${PROJECT_NAME}.toml" - if [ -f "$project_toml" ]; then - source "${FACTORY_ROOT}/lib/load-project.sh" "$project_toml" - fi - fi - - # Check formula exists - if [ ! -f "$formula_path" ]; then - echo "Error: release formula not found at ${formula_path}" >&2 - exit 1 - fi - - # Get the ops repo root - local ops_root="${FACTORY_ROOT}/../disinto-ops" - if [ ! -d "${ops_root}/.git" ]; then - echo "Error: ops repo not found at ${ops_root}" >&2 - echo " Run 'disinto init' to set up the ops repo first" >&2 - exit 1 - fi - - # Generate a unique ID for the vault item - local id="release-${version//./}" - local vault_toml="${ops_root}/vault/actions/${id}.toml" - - # Create vault TOML with the specific version - cat > "$vault_toml" </dev/null || true - - # Push branch - git push -u origin "$branch_name" 2>/dev/null || { - echo "Error: failed to push branch" >&2 - exit 1 - } - ) - - # Create PR - local pr_response - pr_response=$(curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls" \ - -d "{\"title\":\"${pr_title}\",\"head\":\"${branch_name}\",\"base\":\"${PRIMARY_BRANCH}\",\"body\":\"$(echo "$pr_body" | sed ':a;N;$!ba;s/\n/\\n/g')\"}" 2>/dev/null) || { - echo "Error: failed to create PR" >&2 - echo "Response: ${pr_response}" >&2 - exit 1 - } - - local pr_number - pr_number=$(echo "$pr_response" | jq -r '.number') - - local pr_url="${FORGE_URL}/${FORGE_OPS_REPO}/pulls/${pr_number}" - - # Enable auto-merge on the PR — Forgejo will auto-merge after approval - _vault_log "Enabling auto-merge for PR #${pr_number}" - curl -sf -X POST \ - -H "Authorization: token ${FORGE_TOKEN}" \ - -H "Content-Type: application/json" \ - "${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls/${pr_number}/merge" \ - -d '{"Do":"merge","merge_when_checks_succeed":true}' >/dev/null 2>&1 || { - echo "Warning: failed to enable auto-merge (may already be enabled or not supported)" >&2 - } - - echo "" - echo "Release PR created: ${pr_url}" - echo "" - echo "Next steps:" - echo " 1. Review the PR" - echo " 2. Approve the PR (auto-merge will trigger after approval)" - echo " 3. The vault runner will execute the release formula" - echo "" - echo "After merge, the release will:" - echo " 1. Tag Forgejo main with ${version}" - echo " 2. Push tag to mirrors (Codeberg, GitHub)" - echo " 3. Build and tag the agents Docker image" - echo " 4. Restart agent containers" -} diff --git a/lib/stack-lock.sh b/lib/stack-lock.sh deleted file mode 100644 index 6c8c1ed..0000000 --- a/lib/stack-lock.sh +++ /dev/null @@ -1,197 +0,0 @@ -#!/usr/bin/env bash -# stack-lock.sh — File-based lock protocol for singleton project stack access -# -# Prevents CI pipelines and the reproduce-agent from stepping on each other -# when sharing a single project stack (e.g. harb docker compose). -# -# Lock file: /home/agent/data/locks/-stack.lock -# Contents: {"holder": "reproduce-agent-42", "since": "...", "heartbeat": "..."} -# -# Protocol: -# 1. stack_lock_check — inspect current lock state -# 2. stack_lock_acquire — wait until lock is free, then claim it -# 3. stack_lock_release — delete lock file when done -# -# Heartbeat: callers must update the heartbeat every 2 minutes while holding -# the lock by calling stack_lock_heartbeat. A heartbeat older than 10 minutes -# is considered stale — the next acquire will break it. -# -# Usage: -# source "$(dirname "$0")/../lib/stack-lock.sh" -# stack_lock_acquire "ci-pipeline-$BUILD_NUMBER" "myproject" -# trap 'stack_lock_release "myproject"' EXIT -# # ... do work ... -# stack_lock_release "myproject" - -set -euo pipefail - -STACK_LOCK_DIR="${HOME}/data/locks" -STACK_LOCK_POLL_INTERVAL=30 # seconds between retry polls -STACK_LOCK_STALE_SECONDS=600 # 10 minutes — heartbeat older than this = stale -STACK_LOCK_MAX_WAIT=3600 # 1 hour — give up after this many seconds - -# _stack_lock_path -# Print the path of the lock file for the given project. -_stack_lock_path() { - local project="$1" - echo "${STACK_LOCK_DIR}/${project}-stack.lock" -} - -# _stack_lock_now -# Print current UTC timestamp in ISO-8601 format. -_stack_lock_now() { - date -u +"%Y-%m-%dT%H:%M:%SZ" -} - -# _stack_lock_epoch -# Convert an ISO-8601 UTC timestamp to a Unix epoch integer. -_stack_lock_epoch() { - local ts="$1" - # Strip trailing Z, replace T with space for `date -d` - date -u -d "${ts%Z}" +%s 2>/dev/null || date -u -j -f "%Y-%m-%dT%H:%M:%S" "${ts%Z}" +%s 2>/dev/null -} - -# stack_lock_check -# Print lock status to stdout: "free", "held:", or "stale:". -# Returns 0 in all cases (status is in stdout). -stack_lock_check() { - local project="$1" - local lock_file - lock_file="$(_stack_lock_path "$project")" - - if [ ! -f "$lock_file" ]; then - echo "free" - return 0 - fi - - local holder heartbeat - holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder","unknown"))' "$lock_file" 2>/dev/null || echo "unknown") - heartbeat=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("heartbeat",""))' "$lock_file" 2>/dev/null || echo "") - - if [ -z "$heartbeat" ]; then - echo "stale:${holder}" - return 0 - fi - - local hb_epoch now_epoch age - hb_epoch=$(_stack_lock_epoch "$heartbeat" 2>/dev/null || echo "0") - now_epoch=$(date -u +%s) - age=$(( now_epoch - hb_epoch )) - - if [ "$age" -gt "$STACK_LOCK_STALE_SECONDS" ]; then - echo "stale:${holder}" - else - echo "held:${holder}" - fi -} - -# stack_lock_acquire [max_wait_seconds] -# Acquire the lock for on behalf of . -# Polls every STACK_LOCK_POLL_INTERVAL seconds. -# Breaks stale locks automatically. -# Exits non-zero if the lock cannot be acquired within max_wait_seconds. -stack_lock_acquire() { - local holder="$1" - local project="$2" - local max_wait="${3:-$STACK_LOCK_MAX_WAIT}" - local lock_file - lock_file="$(_stack_lock_path "$project")" - local deadline - deadline=$(( $(date -u +%s) + max_wait )) - - mkdir -p "$STACK_LOCK_DIR" - - while true; do - local status - status=$(stack_lock_check "$project") - - case "$status" in - free) - # Write to temp file then rename to avoid partial reads by other processes - local tmp_lock - tmp_lock=$(mktemp "${STACK_LOCK_DIR}/.lock-tmp-XXXXXX") - local now - now=$(_stack_lock_now) - printf '{"holder": "%s", "since": "%s", "heartbeat": "%s"}\n' \ - "$holder" "$now" "$now" > "$tmp_lock" - mv "$tmp_lock" "$lock_file" - echo "[stack-lock] acquired lock for ${project} as ${holder}" >&2 - return 0 - ;; - stale:*) - local stale_holder="${status#stale:}" - echo "[stack-lock] breaking stale lock held by ${stale_holder} for ${project}" >&2 - rm -f "$lock_file" - # Loop back immediately to re-check and claim - ;; - held:*) - local cur_holder="${status#held:}" - local remaining - remaining=$(( deadline - $(date -u +%s) )) - if [ "$remaining" -le 0 ]; then - echo "[stack-lock] timed out waiting for lock on ${project} (held by ${cur_holder})" >&2 - return 1 - fi - echo "[stack-lock] ${project} locked by ${cur_holder}, waiting ${STACK_LOCK_POLL_INTERVAL}s (${remaining}s left)..." >&2 - sleep "$STACK_LOCK_POLL_INTERVAL" - ;; - *) - echo "[stack-lock] unexpected status '${status}' for ${project}" >&2 - return 1 - ;; - esac - done -} - -# stack_lock_heartbeat -# Update the heartbeat timestamp in the lock file. -# Should be called every 2 minutes while holding the lock. -# No-op if the lock file is absent or held by a different holder. -stack_lock_heartbeat() { - local holder="$1" - local project="$2" - local lock_file - lock_file="$(_stack_lock_path "$project")" - - [ -f "$lock_file" ] || return 0 - - local current_holder - current_holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder",""))' "$lock_file" 2>/dev/null || echo "") - [ "$current_holder" = "$holder" ] || return 0 - - local since - since=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("since",""))' "$lock_file" 2>/dev/null || echo "") - local now - now=$(_stack_lock_now) - - local tmp_lock - tmp_lock=$(mktemp "${STACK_LOCK_DIR}/.lock-tmp-XXXXXX") - printf '{"holder": "%s", "since": "%s", "heartbeat": "%s"}\n' \ - "$holder" "$since" "$now" > "$tmp_lock" - mv "$tmp_lock" "$lock_file" -} - -# stack_lock_release [holder_id] -# Release the lock for . -# If holder_id is provided, only releases if the lock is held by that holder -# (prevents accidentally releasing someone else's lock). -stack_lock_release() { - local project="$1" - local holder="${2:-}" - local lock_file - lock_file="$(_stack_lock_path "$project")" - - [ -f "$lock_file" ] || return 0 - - if [ -n "$holder" ]; then - local current_holder - current_holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder",""))' "$lock_file" 2>/dev/null || echo "") - if [ "$current_holder" != "$holder" ]; then - echo "[stack-lock] refusing to release: lock held by '${current_holder}', not '${holder}'" >&2 - return 1 - fi - fi - - rm -f "$lock_file" - echo "[stack-lock] released lock for ${project}" >&2 -} diff --git a/planner/AGENTS.md b/planner/AGENTS.md index 7343b7c..84b511b 100644 --- a/planner/AGENTS.md +++ b/planner/AGENTS.md @@ -1,4 +1,4 @@ - + # Planner Agent **Role**: Strategic planning using a Prerequisite Tree (Theory of Constraints), @@ -65,7 +65,7 @@ component, not work. tree, humans steer by editing VISION.md. Tree grows organically as the planner discovers new prerequisites during runs - `$OPS_REPO_ROOT/knowledge/planner-memory.md` — Persistent memory across runs (in ops repo) - +- `$OPS_REPO_ROOT/journal/planner/*.md` — Daily raw logs from each planner run (in ops repo) **Constraint focus**: The planner uses Theory of Constraints to avoid premature issue filing. Only the top 3 unresolved prerequisites that block the most diff --git a/planner/planner-run.sh b/planner/planner-run.sh index 3c71d44..663703c 100755 --- a/planner/planner-run.sh +++ b/planner/planner-run.sh @@ -35,7 +35,7 @@ source "$FACTORY_ROOT/lib/guard.sh" # shellcheck source=../lib/agent-sdk.sh source "$FACTORY_ROOT/lib/agent-sdk.sh" -LOG_FILE="${DISINTO_LOG_DIR}/planner/planner.log" +LOG_FILE="$SCRIPT_DIR/planner.log" # shellcheck disable=SC2034 # consumed by agent-sdk.sh LOGFILE="$LOG_FILE" # shellcheck disable=SC2034 # consumed by agent-sdk.sh @@ -43,29 +43,20 @@ SID_FILE="/tmp/planner-session-${PROJECT_NAME}.sid" SCRATCH_FILE="/tmp/planner-${PROJECT_NAME}-scratch.md" WORKTREE="/tmp/${PROJECT_NAME}-planner-run" -# Override LOG_AGENT for consistent agent identification -# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() -LOG_AGENT="planner" - -# Override log() to append to planner-specific log file -# shellcheck disable=SC2034 -log() { - local agent="${LOG_AGENT:-planner}" - printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE" -} +log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } # ── Guards ──────────────────────────────────────────────────────────────── check_active planner acquire_cron_lock "/tmp/planner-run.lock" -memory_guard 2000 +check_memory 2000 log "--- Planner run start ---" -# ── Resolve forge remote for git operations ───────────────────────────── -resolve_forge_remote - # ── Resolve agent identity for .profile repo ──────────────────────────── -resolve_agent_identity || true +if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_PLANNER_TOKEN:-}" ]; then + AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_PLANNER_TOKEN}" \ + "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true) +fi # ── Load formula + context ─────────────────────────────────────────────── load_formula_or_profile "planner" "$FACTORY_ROOT/formulas/run-planner.toml" || exit 1 diff --git a/predictor/AGENTS.md b/predictor/AGENTS.md index d0bae51..327a842 100644 --- a/predictor/AGENTS.md +++ b/predictor/AGENTS.md @@ -1,4 +1,4 @@ - + # Predictor Agent **Role**: Abstract adversary (the "goblin"). Runs a 2-step formula diff --git a/predictor/predictor-run.sh b/predictor/predictor-run.sh index 889fe1c..266829c 100755 --- a/predictor/predictor-run.sh +++ b/predictor/predictor-run.sh @@ -36,7 +36,7 @@ source "$FACTORY_ROOT/lib/guard.sh" # shellcheck source=../lib/agent-sdk.sh source "$FACTORY_ROOT/lib/agent-sdk.sh" -LOG_FILE="${DISINTO_LOG_DIR}/predictor/predictor.log" +LOG_FILE="$SCRIPT_DIR/predictor.log" # shellcheck disable=SC2034 # consumed by agent-sdk.sh LOGFILE="$LOG_FILE" # shellcheck disable=SC2034 # consumed by agent-sdk.sh @@ -44,29 +44,20 @@ SID_FILE="/tmp/predictor-session-${PROJECT_NAME}.sid" SCRATCH_FILE="/tmp/predictor-${PROJECT_NAME}-scratch.md" WORKTREE="/tmp/${PROJECT_NAME}-predictor-run" -# Override LOG_AGENT for consistent agent identification -# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() -LOG_AGENT="predictor" - -# Override log() to append to predictor-specific log file -# shellcheck disable=SC2034 -log() { - local agent="${LOG_AGENT:-predictor}" - printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE" -} +log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; } # ── Guards ──────────────────────────────────────────────────────────────── check_active predictor acquire_cron_lock "/tmp/predictor-run.lock" -memory_guard 2000 +check_memory 2000 log "--- Predictor run start ---" -# ── Resolve forge remote for git operations ───────────────────────────── -resolve_forge_remote - # ── Resolve agent identity for .profile repo ──────────────────────────── -resolve_agent_identity || true +if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_PREDICTOR_TOKEN:-}" ]; then + AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_PREDICTOR_TOKEN}" \ + "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true) +fi # ── Load formula + context ─────────────────────────────────────────────── load_formula_or_profile "predictor" "$FACTORY_ROOT/formulas/run-predictor.toml" || exit 1 diff --git a/projects/disinto.toml.example b/projects/disinto.toml.example index 61781e5..ea0b8c5 100644 --- a/projects/disinto.toml.example +++ b/projects/disinto.toml.example @@ -5,7 +5,7 @@ name = "disinto" repo = "johba/disinto" -ops_repo = "disinto-admin/disinto-ops" +ops_repo = "johba/disinto-ops" forge_url = "http://localhost:3000" repo_root = "/home/YOU/dark-factory" ops_repo_root = "/home/YOU/disinto-ops" diff --git a/review/AGENTS.md b/review/AGENTS.md index 6976c04..e010ff5 100644 --- a/review/AGENTS.md +++ b/review/AGENTS.md @@ -1,4 +1,4 @@ - + # Review Agent **Role**: AI-powered PR review — post structured findings and formal @@ -9,8 +9,8 @@ whose CI has passed and that lack a review for the current HEAD SHA, then spawns `review-pr.sh `. **Key files**: -- `review/review-poll.sh` — Cron scheduler: finds unreviewed PRs with passing CI. Sources `lib/guard.sh` and calls `check_active reviewer` — skips if `$FACTORY_ROOT/state/.reviewer-active` is absent. **Circuit breaker**: counts existing `` comments; skips a PR if ≥3 consecutive errors for the same HEAD SHA (prevents flooding on repeated review failures). -- `review/review-pr.sh` — Creates/reuses a tmux session (`review-{project}-{pr}`), injects PR diff, waits for Claude to write structured JSON output, posts markdown review + formal forge review, auto-creates follow-up issues for pre-existing tech debt. Calls `resolve_forge_remote()` at startup to determine the correct git remote name (avoids hardcoded 'origin'). Before starting the session, runs `lib/build-graph.py --changed-files ` and appends the JSON structural analysis (affected objectives, orphaned prerequisites, thin evidence) to the review prompt. Graph failures are non-fatal — review proceeds without it. +- `review/review-poll.sh` — Cron scheduler: finds unreviewed PRs with passing CI. Sources `lib/guard.sh` and calls `check_active reviewer` — skips if `$FACTORY_ROOT/state/.reviewer-active` is absent. +- `review/review-pr.sh` — Creates/reuses a tmux session (`review-{project}-{pr}`), injects PR diff, waits for Claude to write structured JSON output, posts markdown review + formal forge review, auto-creates follow-up issues for pre-existing tech debt. Before starting the session, runs `lib/build-graph.py --changed-files ` and appends the JSON structural analysis (affected objectives, orphaned prerequisites, thin evidence) to the review prompt. Graph failures are non-fatal — review proceeds without it. **Environment variables consumed**: - `FORGE_TOKEN` — Dev-agent token (must not be the same account as FORGE_REVIEW_TOKEN) diff --git a/review/review-poll.sh b/review/review-poll.sh index 72a6e85..47d37df 100755 --- a/review/review-poll.sh +++ b/review/review-poll.sh @@ -23,15 +23,8 @@ LOGFILE="${DISINTO_LOG_DIR}/review/review-poll.log" MAX_REVIEWS=3 REVIEW_IDLE_TIMEOUT=14400 # 4h: kill review session if idle -# Override LOG_AGENT for consistent agent identification -# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log() -LOG_AGENT="review" - -# Override log() to append to review-specific log file -# shellcheck disable=SC2034 log() { - local agent="${LOG_AGENT:-review}" - printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOGFILE" + printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" } # Log rotation @@ -133,11 +126,10 @@ if [ -n "$REVIEW_SIDS" ]; then log " #${pr_num} re-review: new commits (${reviewed_sha:0:7}→${current_sha:0:7})" - review_output=$("${SCRIPT_DIR}/review-pr.sh" "$pr_num" 2>&1) && review_rc=0 || review_rc=$? - if [ "$review_rc" -eq 0 ]; then + if "${SCRIPT_DIR}/review-pr.sh" "$pr_num" 2>&1; then REVIEWED=$((REVIEWED + 1)) else - log " #${pr_num} re-review failed (exit code $review_rc): $(echo "$review_output" | tail -3)" + log " #${pr_num} re-review failed" fi [ "$REVIEWED" -lt "$MAX_REVIEWS" ] || break @@ -188,11 +180,10 @@ while IFS= read -r line; do log " #${PR_NUM} error check: ${ERROR_COMMENTS:-0} prior error(s) for ${PR_SHA:0:7}" - review_output=$("${SCRIPT_DIR}/review-pr.sh" "$PR_NUM" 2>&1) && review_rc=0 || review_rc=$? - if [ "$review_rc" -eq 0 ]; then + if "${SCRIPT_DIR}/review-pr.sh" "$PR_NUM" 2>&1; then REVIEWED=$((REVIEWED + 1)) else - log " #${PR_NUM} review failed (exit code $review_rc): $(echo "$review_output" | tail -3)" + log " #${PR_NUM} review failed" fi if [ "$REVIEWED" -ge "$MAX_REVIEWS" ]; then diff --git a/review/review-pr.sh b/review/review-pr.sh index a0e0ada..8a9a29d 100755 --- a/review/review-pr.sh +++ b/review/review-pr.sh @@ -35,10 +35,6 @@ git -C "$FACTORY_ROOT" pull --ff-only origin main 2>/dev/null || true # --- Config --- PR_NUMBER="${1:?Usage: review-pr.sh [--force]}" - -# Change to project repo early — required before any git commands -# (factory root is not a git repo after image rebuild) -cd "${PROJECT_REPO_ROOT}" FORCE="${2:-}" API="${FORGE_API}" LOGFILE="${DISINTO_LOG_DIR}/review/review.log" @@ -62,15 +58,13 @@ if [ -f "$LOGFILE" ] && [ "$(stat -c%s "$LOGFILE" 2>/dev/null || echo 0)" -gt 10 mv "$LOGFILE" "$LOGFILE.old" fi -# ============================================================================= -# RESOLVE FORGE REMOTE FOR GIT OPERATIONS -# ============================================================================= -resolve_forge_remote - # ============================================================================= # RESOLVE AGENT IDENTITY FOR .PROFILE REPO # ============================================================================= -resolve_agent_identity || true +if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_TOKEN:-}" ]; then + AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true) +fi # ============================================================================= # MEMORY GUARD @@ -137,7 +131,7 @@ PREV_REV=$(printf '%s' "$ALL_COMMENTS" | jq -r --arg s "$PR_SHA" \ if [ -n "$PREV_REV" ] && [ "$PREV_REV" != "null" ]; then PREV_BODY=$(printf '%s' "$PREV_REV" | jq -r '.body') PREV_SHA=$(printf '%s' "$PREV_BODY" | grep -oP ' + # Supervisor Agent **Role**: Health monitoring and auto-remediation, executed as a formula-driven @@ -9,17 +9,19 @@ resources or human decisions, files vault items instead of escalating directly. **Trigger**: `supervisor-run.sh` runs every 20 min via cron. Sources `lib/guard.sh` and calls `check_active supervisor` first — skips if -`$FACTORY_ROOT/state/.supervisor-active` is absent. Then runs `claude -p` -via `agent-sdk.sh`, injects `formulas/run-supervisor.toml` with -pre-collected metrics as context, and cleans up on completion or timeout (20 min max session). -No action issues — the supervisor runs directly from cron like the planner and predictor. +`$FACTORY_ROOT/state/.supervisor-active` is absent. Then creates a tmux session +with `claude --model sonnet`, injects `formulas/run-supervisor.toml` with +pre-collected metrics as context, monitors the phase file, and cleans up on +completion or timeout (20 min max session). No action issues — the supervisor +runs directly from cron like the planner and predictor. **Key files**: - `supervisor/supervisor-run.sh` — Cron wrapper + orchestrator: lock, memory guard, - runs preflight.sh, sources disinto project config, runs claude -p via agent-sdk.sh, - injects formula prompt with metrics, handles crash recovery + runs preflight.sh, sources disinto project config, creates tmux session, injects + formula prompt with metrics, monitors phase file, handles crash recovery via + `run_formula_and_monitor` - `supervisor/preflight.sh` — Data collection: system resources (RAM, disk, swap, - load), Docker status, active sessions + phase files, lock files, agent log + load), Docker status, active tmux sessions + phase files, lock files, agent log tails, CI pipeline status, open PRs, issue counts, stale worktrees, blocked issues. Also performs **stale phase cleanup**: scans `/tmp/*-session-*.phase` files for `PHASE:escalate` entries and auto-removes any whose linked issue @@ -29,8 +31,11 @@ No action issues — the supervisor runs directly from cron like the planner and - `formulas/run-supervisor.toml` — Execution spec: five steps (preflight review, health-assessment, decide-actions, report, journal) with `needs` dependencies. Claude evaluates all metrics and takes actions in a single interactive session +- `$OPS_REPO_ROOT/journal/supervisor/*.md` — Daily health logs from each supervisor run - `$OPS_REPO_ROOT/knowledge/*.md` — Domain-specific remediation guides (memory, disk, CI, git, dev-agent, review-agent, forge) +- `supervisor/supervisor-poll.sh` — Legacy bash orchestrator (superseded by + supervisor-run.sh + formula) **Alert priorities**: P0 (memory crisis), P1 (disk), P2 (factory stopped/stalled), P3 (degraded PRs, circular deps, stale deps), P4 (housekeeping). @@ -41,5 +46,5 @@ P3 (degraded PRs, circular deps, stale deps), P4 (housekeeping). - `WOODPECKER_TOKEN`, `WOODPECKER_SERVER`, `WOODPECKER_DB_PASSWORD`, `WOODPECKER_DB_USER`, `WOODPECKER_DB_HOST`, `WOODPECKER_DB_NAME` — CI database queries **Lifecycle**: supervisor-run.sh (cron */20) → lock + memory guard → run -preflight.sh (collect metrics) → load formula + context → run claude -p via agent-sdk.sh -→ Claude assesses health, auto-fixes, writes journal → `PHASE:done`. +preflight.sh (collect metrics) → load formula + context → create tmux +session → Claude assesses health, auto-fixes, writes journal → `PHASE:done`. diff --git a/supervisor/supervisor-poll.sh b/supervisor/supervisor-poll.sh new file mode 100755 index 0000000..42ab1dd --- /dev/null +++ b/supervisor/supervisor-poll.sh @@ -0,0 +1,808 @@ +#!/usr/bin/env bash +set -euo pipefail +# supervisor-poll.sh — Supervisor agent: bash checks + claude -p for fixes +# +# Two-layer architecture: +# 1. Factory infrastructure (project-agnostic): RAM, disk, swap, docker, stale processes +# 2. Per-project checks (config-driven): CI, PRs, dev-agent, deps — iterated over projects/*.toml +# +# Runs every 10min via cron. +# +# Cron: */10 * * * * /path/to/disinto/supervisor/supervisor-poll.sh +# +# Peek: cat /tmp/supervisor-status +# Log: tail -f /path/to/disinto/supervisor/supervisor.log + +source "$(dirname "$0")/../lib/env.sh" +source "$(dirname "$0")/../lib/ci-helpers.sh" + +LOGFILE="${DISINTO_LOG_DIR}/supervisor/supervisor.log" +STATUSFILE="/tmp/supervisor-status" +LOCKFILE="/tmp/supervisor-poll.lock" +PROMPT_FILE="${FACTORY_ROOT}/formulas/run-supervisor.toml" +PROJECTS_DIR="${FACTORY_ROOT}/projects" + +METRICS_FILE="${DISINTO_LOG_DIR}/metrics/supervisor-metrics.jsonl" + +emit_metric() { + printf '%s\n' "$1" >> "$METRICS_FILE" +} + +# Count all matching items from a paginated forge API endpoint. +# Usage: codeberg_count_paginated "/issues?state=open&labels=backlog&type=issues" +# Returns total count across all pages (max 20 pages = 1000 items). +codeberg_count_paginated() { + local endpoint="$1" total=0 page=1 count + while true; do + count=$(forge_api GET "${endpoint}&limit=50&page=${page}" 2>/dev/null | jq 'length' 2>/dev/null || echo 0) + total=$((total + ${count:-0})) + [ "${count:-0}" -lt 50 ] && break + page=$((page + 1)) + [ "$page" -gt 20 ] && break + done + echo "$total" +} + +rotate_metrics() { + [ -f "$METRICS_FILE" ] || return 0 + local cutoff tmpfile + cutoff=$(date -u -d '30 days ago' +%Y-%m-%dT%H:%M) + tmpfile="${METRICS_FILE}.tmp" + jq -c --arg cutoff "$cutoff" 'select(.ts >= $cutoff)' \ + "$METRICS_FILE" > "$tmpfile" 2>/dev/null + # Only replace if jq produced output, or the source is already empty + if [ -s "$tmpfile" ] || [ ! -s "$METRICS_FILE" ]; then + mv "$tmpfile" "$METRICS_FILE" + else + rm -f "$tmpfile" + fi +} + +# Prevent overlapping runs +if [ -f "$LOCKFILE" ]; then + LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null) + if kill -0 "$LOCK_PID" 2>/dev/null; then + exit 0 + fi + rm -f "$LOCKFILE" +fi +echo $$ > "$LOCKFILE" +trap 'rm -f "$LOCKFILE" "$STATUSFILE"' EXIT +mkdir -p "$(dirname "$METRICS_FILE")" +rotate_metrics + +flog() { + printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE" +} + +status() { + printf '[%s] supervisor: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" > "$STATUSFILE" + flog "$*" +} + +# Alerts by priority +P0_ALERTS="" +P1_ALERTS="" +P2_ALERTS="" +P3_ALERTS="" +P4_ALERTS="" + +p0() { P0_ALERTS="${P0_ALERTS}• [P0] $*\n"; flog "P0: $*"; } +p1() { P1_ALERTS="${P1_ALERTS}• [P1] $*\n"; flog "P1: $*"; } +p2() { P2_ALERTS="${P2_ALERTS}• [P2] $*\n"; flog "P2: $*"; } +p3() { P3_ALERTS="${P3_ALERTS}• [P3] $*\n"; flog "P3: $*"; } +p4() { P4_ALERTS="${P4_ALERTS}• [P4] $*\n"; flog "P4: $*"; } + +FIXES="" +fixed() { FIXES="${FIXES}• ✅ $*\n"; flog "FIXED: $*"; } + +# ############################################################################# +# LAYER 1: FACTORY INFRASTRUCTURE +# (project-agnostic, runs once) +# ############################################################################# + +# ============================================================================= +# P0: MEMORY — check first, fix first +# ============================================================================= +status "P0: checking memory" + +AVAIL_MB=$(free -m | awk '/Mem:/{print $7}') +SWAP_USED_MB=$(free -m | awk '/Swap:/{print $3}') + +if [ "${AVAIL_MB:-9999}" -lt 500 ] || { [ "${SWAP_USED_MB:-0}" -gt 3000 ] && [ "${AVAIL_MB:-9999}" -lt 2000 ]; }; then + flog "MEMORY CRISIS: avail=${AVAIL_MB}MB swap_used=${SWAP_USED_MB}MB — auto-fixing" + + # Kill stale agent-spawned claude processes (>3h old) — skip interactive sessions + STALE_CLAUDES=$(pgrep -f "claude -p" --older 10800 2>/dev/null || true) + if [ -n "$STALE_CLAUDES" ]; then + echo "$STALE_CLAUDES" | xargs kill 2>/dev/null || true + fixed "Killed stale claude processes: ${STALE_CLAUDES}" + fi + + # Drop filesystem caches + sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null 2>&1 + fixed "Dropped filesystem caches" + + # Re-check after fixes + AVAIL_MB_AFTER=$(free -m | awk '/Mem:/{print $7}') + SWAP_AFTER=$(free -m | awk '/Swap:/{print $3}') + + if [ "${AVAIL_MB_AFTER:-0}" -lt 500 ] || [ "${SWAP_AFTER:-0}" -gt 3000 ]; then + p0 "Memory still critical after auto-fix: avail=${AVAIL_MB_AFTER}MB swap=${SWAP_AFTER}MB" + else + flog "Memory recovered: avail=${AVAIL_MB_AFTER}MB swap=${SWAP_AFTER}MB" + fi +fi + +# P0 alerts already logged — clear so they are not duplicated in the final consolidated log +if [ -n "$P0_ALERTS" ]; then + P0_ALERTS="" +fi + +# ============================================================================= +# P1: DISK +# ============================================================================= +status "P1: checking disk" + +DISK_PERCENT=$(df -h / | awk 'NR==2{print $5}' | tr -d '%') + +if [ "${DISK_PERCENT:-0}" -gt 80 ]; then + flog "DISK PRESSURE: ${DISK_PERCENT}% — auto-cleaning" + + # Docker cleanup (safe — keeps images) + sudo docker system prune -f >/dev/null 2>&1 && fixed "Docker prune" + + # Truncate logs >10MB + for logfile in "${DISINTO_LOG_DIR}"/{dev,review,supervisor}/*.log; do + if [ -f "$logfile" ]; then + SIZE_KB=$(du -k "$logfile" 2>/dev/null | cut -f1) + if [ "${SIZE_KB:-0}" -gt 10240 ]; then + truncate -s 0 "$logfile" + fixed "Truncated $(basename "$logfile") (was ${SIZE_KB}KB)" + fi + fi + done + + # Woodpecker log_entries cleanup + LOG_ENTRIES_MB=$(wpdb -c "SELECT pg_size_pretty(pg_total_relation_size('log_entries'));" 2>/dev/null | xargs) + if echo "$LOG_ENTRIES_MB" | grep -qP '\d+\s*(GB|MB)'; then + SIZE_NUM=$(echo "$LOG_ENTRIES_MB" | grep -oP '\d+') + SIZE_UNIT=$(echo "$LOG_ENTRIES_MB" | grep -oP '(GB|MB)') + if [ "$SIZE_UNIT" = "GB" ] || { [ "$SIZE_UNIT" = "MB" ] && [ "$SIZE_NUM" -gt 500 ]; }; then + wpdb -c "DELETE FROM log_entries WHERE id < (SELECT max(id) - 100000 FROM log_entries);" 2>/dev/null + fixed "Trimmed Woodpecker log_entries (was ${LOG_ENTRIES_MB})" + fi + fi + + DISK_AFTER=$(df -h / | awk 'NR==2{print $5}' | tr -d '%') + if [ "${DISK_AFTER:-0}" -gt 80 ]; then + p1 "Disk still ${DISK_AFTER}% after auto-clean" + else + flog "Disk recovered: ${DISK_AFTER}%" + fi +fi + +# P1 alerts already logged — clear so they are not duplicated in the final consolidated log +if [ -n "$P1_ALERTS" ]; then + P1_ALERTS="" +fi + +# Emit infra metric +_RAM_TOTAL_MB=$(free -m | awk '/Mem:/{print $2}') +_RAM_USED_PCT=$(( ${_RAM_TOTAL_MB:-0} > 0 ? (${_RAM_TOTAL_MB:-0} - ${AVAIL_MB:-0}) * 100 / ${_RAM_TOTAL_MB:-1} : 0 )) +emit_metric "$(jq -nc \ + --arg ts "$(date -u +%Y-%m-%dT%H:%MZ)" \ + --argjson ram "${_RAM_USED_PCT:-0}" \ + --argjson disk "${DISK_PERCENT:-0}" \ + --argjson swap "${SWAP_USED_MB:-0}" \ + '{ts:$ts,type:"infra",ram_used_pct:$ram,disk_used_pct:$disk,swap_mb:$swap}' 2>/dev/null)" 2>/dev/null || true + +# ============================================================================= +# P4-INFRA: HOUSEKEEPING — stale processes, log rotation (project-agnostic) +# ============================================================================= +status "P4: infra housekeeping" + +# Stale agent-spawned claude processes (>3h) — skip interactive sessions +STALE_CLAUDES=$(pgrep -f "claude -p" --older 10800 2>/dev/null || true) +if [ -n "$STALE_CLAUDES" ]; then + echo "$STALE_CLAUDES" | xargs kill 2>/dev/null || true + fixed "Killed stale claude processes: $(echo $STALE_CLAUDES | wc -w) procs" +fi + +# Rotate logs >5MB +for logfile in "${DISINTO_LOG_DIR}"/{dev,review,supervisor}/*.log; do + if [ -f "$logfile" ]; then + SIZE_KB=$(du -k "$logfile" 2>/dev/null | cut -f1) + if [ "${SIZE_KB:-0}" -gt 5120 ]; then + mv "$logfile" "${logfile}.old" 2>/dev/null + fixed "Rotated $(basename "$logfile")" + fi + fi +done + +# ############################################################################# +# LAYER 2: PER-PROJECT CHECKS +# (iterated over projects/*.toml, config-driven) +# ############################################################################# + +# Infra retry tracking (shared across projects, created once) +_RETRY_DIR="/tmp/supervisor-infra-retries" +mkdir -p "$_RETRY_DIR" + +# Function: run all per-project checks for the currently loaded project config +check_project() { + local proj_name="${PROJECT_NAME:-unknown}" + flog "── checking project: ${proj_name} (${FORGE_REPO}) ──" + + # =========================================================================== + # P2: FACTORY STOPPED — CI, dev-agent, git + # =========================================================================== + status "P2: ${proj_name}: checking pipeline" + + # CI stuck + STUCK_CI=$(wpdb -c "SELECT count(*) FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status='running' AND EXTRACT(EPOCH FROM now() - to_timestamp(started)) > 1200;" 2>/dev/null | xargs || true) + [ "${STUCK_CI:-0}" -gt 0 ] 2>/dev/null && p2 "${proj_name}: CI: ${STUCK_CI} pipeline(s) running >20min" + + PENDING_CI=$(wpdb -c "SELECT count(*) FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status='pending' AND EXTRACT(EPOCH FROM now() - to_timestamp(created)) > 1800;" 2>/dev/null | xargs || true) + [ "${PENDING_CI:-0}" -gt 0 ] && p2 "${proj_name}: CI: ${PENDING_CI} pipeline(s) pending >30min" + + # Emit CI metric (last completed pipeline within 24h — skip if project has no recent CI) + _CI_ROW=$(wpdb -A -F ',' -c "SELECT id, COALESCE(ROUND(EXTRACT(EPOCH FROM (to_timestamp(finished) - to_timestamp(started)))/60)::int, 0), status FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status IN ('success','failure','error') AND finished > 0 AND to_timestamp(finished) > now() - interval '24 hours' ORDER BY id DESC LIMIT 1;" 2>/dev/null | grep -E '^[0-9]' | head -1 || true) + if [ -n "$_CI_ROW" ]; then + _CI_ID=$(echo "$_CI_ROW" | cut -d',' -f1 | tr -d ' ') + _CI_DUR=$(echo "$_CI_ROW" | cut -d',' -f2 | tr -d ' ') + _CI_STAT=$(echo "$_CI_ROW" | cut -d',' -f3 | tr -d ' ') + emit_metric "$(jq -nc \ + --arg ts "$(date -u +%Y-%m-%dT%H:%MZ)" \ + --arg proj "$proj_name" \ + --argjson pipeline "${_CI_ID:-0}" \ + --argjson duration "${_CI_DUR:-0}" \ + --arg status "${_CI_STAT:-unknown}" \ + '{ts:$ts,type:"ci",project:$proj,pipeline:$pipeline,duration_min:$duration,status:$status}' 2>/dev/null)" 2>/dev/null || true + fi + + # =========================================================================== + # P2e: INFRA FAILURES — auto-retrigger pipelines with infra failures + # =========================================================================== + if [ "${CHECK_INFRA_RETRY:-true}" = "true" ]; then + status "P2e: ${proj_name}: checking infra failures" + + # Recent failed pipelines (last 6h) + _failed_nums=$(wpdb -A -c " + SELECT number FROM pipelines + WHERE repo_id = ${WOODPECKER_REPO_ID} + AND status IN ('failure', 'error') + AND finished > 0 + AND to_timestamp(finished) > now() - interval '6 hours' + ORDER BY number DESC LIMIT 5;" 2>/dev/null \ + | tr -d ' ' | grep -E '^[0-9]+$' || true) + + # shellcheck disable=SC2086 + for _pip_num in $_failed_nums; do + [ -z "$_pip_num" ] && continue + + # Check retry count; alert if retries exhausted + _retry_file="${_RETRY_DIR}/${WOODPECKER_REPO_ID}-${_pip_num}" + _retries=0 + [ -f "$_retry_file" ] && _retries=$(cat "$_retry_file" 2>/dev/null || echo 0) + if [ "${_retries:-0}" -ge 2 ]; then + p2 "${proj_name}: Pipeline #${_pip_num}: infra retries exhausted (2/2), needs manual investigation" + continue + fi + + # Classify failure type via shared helper + _classification=$(classify_pipeline_failure "${WOODPECKER_REPO_ID}" "$_pip_num" 2>/dev/null || echo "code") + + if [[ "$_classification" == infra* ]]; then + _infra_reason="${_classification#infra }" + _new_retries=$(( _retries + 1 )) + if woodpecker_api "/repos/${WOODPECKER_REPO_ID}/pipelines/${_pip_num}" \ + -X POST >/dev/null 2>&1; then + echo "$_new_retries" > "$_retry_file" + fixed "${proj_name}: Retriggered pipeline #${_pip_num} (${_infra_reason}, retry ${_new_retries}/2)" + else + p2 "${proj_name}: Pipeline #${_pip_num}: infra failure (${_infra_reason}) but retrigger API call failed" + flog "${proj_name}: Failed to retrigger pipeline #${_pip_num}: API error" + fi + fi + done + + # Clean up stale retry tracking files (>24h) + find "$_RETRY_DIR" -type f -mmin +1440 -delete 2>/dev/null || true + fi + + # Dev-agent health (only if monitoring enabled) + if [ "${CHECK_DEV_AGENT:-true}" = "true" ]; then + DEV_LOCK="/tmp/dev-agent-${proj_name}.lock" + if [ -f "$DEV_LOCK" ]; then + DEV_PID=$(cat "$DEV_LOCK" 2>/dev/null) + if ! kill -0 "$DEV_PID" 2>/dev/null; then + rm -f "$DEV_LOCK" + fixed "${proj_name}: Removed stale dev-agent lock (PID ${DEV_PID} dead)" + else + DEV_STATUS_AGE=$(stat -c %Y "/tmp/dev-agent-status-${proj_name}" 2>/dev/null || echo 0) + NOW_EPOCH=$(date +%s) + STATUS_AGE_MIN=$(( (NOW_EPOCH - DEV_STATUS_AGE) / 60 )) + if [ "$STATUS_AGE_MIN" -gt 30 ]; then + p2 "${proj_name}: Dev-agent: status unchanged for ${STATUS_AGE_MIN}min" + fi + fi + fi + fi + + # Git repo health + if [ -d "${PROJECT_REPO_ROOT}" ]; then + cd "${PROJECT_REPO_ROOT}" 2>/dev/null || true + GIT_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") + GIT_REBASE=$([ -d .git/rebase-merge ] || [ -d .git/rebase-apply ] && echo "yes" || echo "no") + + if [ "$GIT_REBASE" = "yes" ]; then + git rebase --abort 2>/dev/null && git checkout "${PRIMARY_BRANCH}" 2>/dev/null && \ + fixed "${proj_name}: Aborted stale rebase, switched to ${PRIMARY_BRANCH}" || \ + p2 "${proj_name}: Git: stale rebase, auto-abort failed" + fi + if [ "$GIT_BRANCH" != "${PRIMARY_BRANCH}" ] && [ "$GIT_BRANCH" != "unknown" ]; then + git checkout "${PRIMARY_BRANCH}" 2>/dev/null && \ + fixed "${proj_name}: Switched repo from '${GIT_BRANCH}' to ${PRIMARY_BRANCH}" || \ + p2 "${proj_name}: Git: on '${GIT_BRANCH}' instead of ${PRIMARY_BRANCH}" + fi + fi + + # =========================================================================== + # P2b: FACTORY STALLED — backlog exists but no agent running + # =========================================================================== + if [ "${CHECK_PIPELINE_STALL:-true}" = "true" ]; then + status "P2: ${proj_name}: checking pipeline stall" + + BACKLOG_COUNT=$(forge_api GET "/issues?state=open&labels=backlog&type=issues&limit=1" 2>/dev/null | jq -r 'length' 2>/dev/null || echo "0") + IN_PROGRESS=$(forge_api GET "/issues?state=open&labels=in-progress&type=issues&limit=1" 2>/dev/null | jq -r 'length' 2>/dev/null || echo "0") + + if [ "${BACKLOG_COUNT:-0}" -gt 0 ] && [ "${IN_PROGRESS:-0}" -eq 0 ]; then + DEV_LOG="${DISINTO_LOG_DIR}/dev/dev-agent.log" + if [ -f "$DEV_LOG" ]; then + LAST_LOG_EPOCH=$(stat -c %Y "$DEV_LOG" 2>/dev/null || echo 0) + else + LAST_LOG_EPOCH=0 + fi + NOW_EPOCH=$(date +%s) + IDLE_MIN=$(( (NOW_EPOCH - LAST_LOG_EPOCH) / 60 )) + + if [ "$IDLE_MIN" -gt 20 ]; then + p2 "${proj_name}: Pipeline stalled: ${BACKLOG_COUNT} backlog issue(s), no agent ran for ${IDLE_MIN}min" + fi + fi + fi + + # =========================================================================== + # P2c: DEV-AGENT PRODUCTIVITY — all backlog blocked for too long + # =========================================================================== + if [ "${CHECK_DEV_AGENT:-true}" = "true" ]; then + status "P2: ${proj_name}: checking dev-agent productivity" + + DEV_LOG_FILE="${DISINTO_LOG_DIR}/dev/dev-agent.log" + if [ -f "$DEV_LOG_FILE" ]; then + RECENT_POLLS=$(tail -100 "$DEV_LOG_FILE" | grep "poll:" | tail -6) + TOTAL_RECENT=$(echo "$RECENT_POLLS" | grep -c "." || true) + BLOCKED_IN_RECENT=$(echo "$RECENT_POLLS" | grep -c "no ready issues" || true) + if [ "$TOTAL_RECENT" -ge 6 ] && [ "$BLOCKED_IN_RECENT" -eq "$TOTAL_RECENT" ]; then + p2 "${proj_name}: Dev-agent blocked: last ${BLOCKED_IN_RECENT} polls all report 'no ready issues'" + fi + fi + fi + + # =========================================================================== + # P3: FACTORY DEGRADED — derailed PRs, unreviewed PRs + # =========================================================================== + if [ "${CHECK_PRS:-true}" = "true" ]; then + status "P3: ${proj_name}: checking PRs" + + OPEN_PRS=$(forge_api GET "/pulls?state=open&limit=10" 2>/dev/null | jq -r '.[].number' 2>/dev/null || true) + for pr in $OPEN_PRS; do + PR_JSON=$(forge_api GET "/pulls/${pr}" 2>/dev/null || true) + [ -z "$PR_JSON" ] && continue + PR_SHA=$(echo "$PR_JSON" | jq -r '.head.sha // ""') + [ -z "$PR_SHA" ] && continue + + CI_STATE=$(ci_commit_status "$PR_SHA" 2>/dev/null || true) + + MERGEABLE=$(echo "$PR_JSON" | jq -r '.mergeable // true') + if [ "$MERGEABLE" = "false" ] && ci_passed "$CI_STATE"; then + p3 "${proj_name}: PR #${pr}: CI pass but merge conflict — needs rebase" + elif [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then + UPDATED=$(echo "$PR_JSON" | jq -r '.updated_at // ""') + if [ -n "$UPDATED" ]; then + UPDATED_EPOCH=$(date -d "$UPDATED" +%s 2>/dev/null || echo 0) + NOW_EPOCH=$(date +%s) + AGE_MIN=$(( (NOW_EPOCH - UPDATED_EPOCH) / 60 )) + [ "$AGE_MIN" -gt 30 ] && p3 "${proj_name}: PR #${pr}: CI=${CI_STATE}, stale ${AGE_MIN}min" + fi + elif ci_passed "$CI_STATE"; then + HAS_REVIEW=$(forge_api GET "/issues/${pr}/comments?limit=50" 2>/dev/null | \ + jq -r --arg sha "$PR_SHA" '[.[] | select(.body | contains(" - -## What was expected - - - -## Steps to reproduce - - -1. -2. -3. - -## Environment - - -- Browser/Client: -- Wallet (if applicable): -- Network (if applicable): -- Version: diff --git a/tests/mock-forgejo.py b/tests/mock-forgejo.py index c65b522..d8be511 100755 --- a/tests/mock-forgejo.py +++ b/tests/mock-forgejo.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Mock Forgejo API server for CI smoke tests. -Implements 16 Forgejo API endpoints that disinto init calls. +Implements 15 Forgejo API endpoints that disinto init calls. State stored in-memory (dicts), responds instantly. """ @@ -135,7 +135,6 @@ class ForgejoHandler(BaseHTTPRequestHandler): # Users patterns (r"^users/([^/]+)$", f"handle_{method}_users_username"), (r"^users/([^/]+)/tokens$", f"handle_{method}_users_username_tokens"), - (r"^users/([^/]+)/tokens/([^/]+)$", f"handle_{method}_users_username_tokens_token_id"), (r"^users/([^/]+)/repos$", f"handle_{method}_users_username_repos"), # Repos patterns (r"^repos/([^/]+)/([^/]+)$", f"handle_{method}_repos_owner_repo"), @@ -150,7 +149,6 @@ class ForgejoHandler(BaseHTTPRequestHandler): # Admin patterns (r"^admin/users$", f"handle_{method}_admin_users"), (r"^admin/users/([^/]+)$", f"handle_{method}_admin_users_username"), - (r"^admin/users/([^/]+)/repos$", f"handle_{method}_admin_users_username_repos"), # Org patterns (r"^orgs$", f"handle_{method}_orgs"), ] @@ -296,10 +294,7 @@ class ForgejoHandler(BaseHTTPRequestHandler): def handle_GET_users_username_tokens(self, query): """GET /api/v1/users/{username}/tokens""" - # Support both token auth (for listing own tokens) and basic auth (for admin listing) username = require_token(self) - if not username: - username = require_basic_auth(self) if not username: json_response(self, 401, {"message": "invalid authentication"}) return @@ -308,38 +303,6 @@ class ForgejoHandler(BaseHTTPRequestHandler): tokens = [t for t in state["tokens"].values() if t.get("username") == username] json_response(self, 200, tokens) - def handle_DELETE_users_username_tokens_token_id(self, query): - """DELETE /api/v1/users/{username}/tokens/{id}""" - # Support both token auth and basic auth - username = require_token(self) - if not username: - username = require_basic_auth(self) - if not username: - json_response(self, 401, {"message": "invalid authentication"}) - return - - parts = self.path.split("/") - if len(parts) >= 8: - token_id_str = parts[7] - else: - json_response(self, 404, {"message": "token not found"}) - return - - # Find and delete token by ID - deleted = False - for tok_sha1, tok in list(state["tokens"].items()): - if tok.get("id") == int(token_id_str) and tok.get("username") == username: - del state["tokens"][tok_sha1] - deleted = True - break - - if deleted: - self.send_response(204) - self.send_header("Content-Length", 0) - self.end_headers() - else: - json_response(self, 404, {"message": "token not found"}) - def handle_POST_users_username_tokens(self, query): """POST /api/v1/users/{username}/tokens""" username = require_basic_auth(self) @@ -497,55 +460,6 @@ class ForgejoHandler(BaseHTTPRequestHandler): state["repos"][key] = repo json_response(self, 201, repo) - def handle_POST_admin_users_username_repos(self, query): - """POST /api/v1/admin/users/{username}/repos - Admin API to create a repo under a specific user namespace. - This allows creating repos in any user's namespace when authenticated as admin. - """ - require_token(self) - - parts = self.path.split("/") - if len(parts) >= 6: - target_user = parts[4] - else: - json_response(self, 400, {"message": "username required"}) - return - - if target_user not in state["users"]: - json_response(self, 404, {"message": "user not found"}) - return - - content_length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(content_length).decode("utf-8") - data = json.loads(body) if body else {} - - repo_name = data.get("name") - if not repo_name: - json_response(self, 400, {"message": "name is required"}) - return - - repo_id = next_ids["repos"] - next_ids["repos"] += 1 - - key = f"{target_user}/{repo_name}" - repo = { - "id": repo_id, - "full_name": key, - "name": repo_name, - "owner": {"id": state["users"][target_user]["id"], "login": target_user}, - "empty": not data.get("auto_init", False), - "default_branch": data.get("default_branch", "main"), - "description": data.get("description", ""), - "private": data.get("private", False), - "html_url": f"https://example.com/{key}", - "ssh_url": f"git@example.com:{key}.git", - "clone_url": f"https://example.com/{key}.git", - "created_at": "2026-04-01T00:00:00Z", - } - - state["repos"][key] = repo - json_response(self, 201, repo) - def handle_POST_user_repos(self, query): """POST /api/v1/user/repos""" require_token(self) diff --git a/tests/smoke-init.sh b/tests/smoke-init.sh index a8371bd..80f8994 100644 --- a/tests/smoke-init.sh +++ b/tests/smoke-init.sh @@ -15,8 +15,7 @@ set -euo pipefail FACTORY_ROOT="$(cd "$(dirname "$0")/.." && pwd)" -# Always use localhost for mock Forgejo (in case FORGE_URL is set from docker-compose) -export FORGE_URL="http://localhost:3000" +FORGE_URL="${FORGE_URL:-http://localhost:3000}" MOCK_BIN="/tmp/smoke-mock-bin" TEST_SLUG="smoke-org/smoke-repo" FAILED=0 @@ -25,8 +24,6 @@ fail() { printf 'FAIL: %s\n' "$*" >&2; FAILED=1; } pass() { printf 'PASS: %s\n' "$*"; } cleanup() { - # Kill any leftover mock-forgejo.py processes by name - pkill -f "mock-forgejo.py" 2>/dev/null || true rm -rf "$MOCK_BIN" /tmp/smoke-test-repo \ "${FACTORY_ROOT}/projects/smoke-repo.toml" # Restore .env only if we created the backup @@ -175,18 +172,6 @@ else fail "disinto init exited non-zero" fi -# ── Idempotency test: run init again ─────────────────────────────────────── -echo "=== Idempotency test: running disinto init again ===" -if bash "${FACTORY_ROOT}/bin/disinto" init \ - "${TEST_SLUG}" \ - --bare --yes \ - --forge-url "$FORGE_URL" \ - --repo-root "/tmp/smoke-test-repo"; then - pass "disinto init (re-run) completed successfully" -else - fail "disinto init (re-run) exited non-zero" -fi - # ── 4. Verify Forgejo state ───────────────────────────────────────────────── echo "=== 4/6 Verifying Forgejo state ==="