Compare commits
198 commits
fix/issue-
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 192be70950 | |||
|
|
19dd7e61f4 | ||
| f7e36e76fe | |||
|
|
9a22e407a4 | ||
| 01f97ed6e5 | |||
|
|
d653680d64 | ||
| e871070942 | |||
|
|
cbc2a0ca4e | ||
| f19f38f16b | |||
|
|
6adb4895c2 | ||
| f686d47a98 | |||
|
|
7db129aba2 | ||
| e8b77b1055 | |||
|
|
630344900d | ||
| 2014eab1c4 | |||
| b495138850 | |||
|
|
514de48f58 | ||
|
|
cfe96f365c | ||
| ac2beac361 | |||
|
|
684501e385 | ||
| 83e92946d4 | |||
|
|
7e7fafd234 | ||
| 78c92dbdc4 | |||
|
|
c35d57a045 | ||
| fb27997e74 | |||
|
|
8480308d1d | ||
| 863925cb1c | |||
|
|
daf9151b9a | ||
| b4cc5d649e | |||
|
|
718327754a | ||
| ce250e3d1a | |||
|
|
ea64aa65d1 | ||
|
|
cc7dc6ccd7 | ||
|
|
a4bd8e8398 | ||
|
|
934cde7675 | ||
| 9830e6ce53 | |||
|
|
6d0eaf2687 | ||
| 8f58f834d5 | |||
|
|
f499de7c9d | ||
|
|
bba7665e09 | ||
| 8a10d6e26c | |||
|
|
96d1aa7a29 | ||
|
|
13a35f8355 | ||
| 9c199cdd6f | |||
| 113bc422cb | |||
|
|
e6ac67811a | ||
|
|
ae826f935b | ||
|
|
da70badb6d | ||
| 65ae5c908d | |||
|
|
c29d49cd5c | ||
| 064366678b | |||
|
|
fb23dcab41 | ||
| 205e28c66f | |||
| e2fbe9b718 | |||
|
|
52294a2efc | ||
|
|
5189b70dd3 | ||
| b0e789470e | |||
|
|
4aa824c203 | ||
| fcd892dce0 | |||
|
|
12ca3fe214 | ||
| 38acca0df4 | |||
|
|
b7bba15037 | ||
| 5c76d4beb0 | |||
|
|
3606d66a51 | ||
| ba5621f8f4 | |||
|
|
1d201fc9f6 | ||
| ffe763fcaa | |||
|
|
2b0f4f01d7 | ||
| 3775697e4f | |||
|
|
f637b53d3e | ||
| ef2cd16e3b | |||
|
|
e2e4ca5579 | ||
| c9e9c887db | |||
|
|
f2c7c806a1 | ||
| eaaecfc22b | |||
|
|
507e41a926 | ||
|
|
e22863eb60 | ||
| 84d74ce541 | |||
|
|
786c818509 | ||
| 3c76a5aac7 | |||
|
|
ce561b3745 | ||
|
|
7574bb7b3b | ||
| fcf72ccf7a | |||
|
|
47215a85aa | ||
| e65e091d3c | |||
|
|
c7e7fd00ea | ||
|
|
8c42303943 | ||
| 6d29dcf7d7 | |||
| 48a0826f4b | |||
|
|
3b1ebb4a3f | ||
|
|
7be56819be | ||
| 5e935e746b | |||
| 7f6a558681 | |||
|
|
5f6235e1f1 | ||
| a36f0a1b28 | |||
|
|
b21408e668 | ||
|
|
33f04a2976 | ||
| f10cdf2c9e | |||
| 141e44d423 | |||
|
|
b2be163808 | ||
|
|
7977e2562c | ||
| c01c27c04e | |||
|
|
b1695d8329 | ||
| 8d32168121 | |||
|
|
5b1a3b2091 | ||
| 8cdf92bd9d | |||
|
|
20778d3f06 | ||
| 6a05d8881b | |||
|
|
7dbd6c2352 | ||
| 5cf058b04b | |||
| 29e8cb0969 | |||
|
|
dd678737c7 | ||
|
|
a7eb051996 | ||
| c2ed7955e0 | |||
|
|
e7b11b22da | ||
| 8ad6e16829 | |||
| 94d5467ffe | |||
|
|
0098695644 | ||
|
|
26fa11efff | ||
| b23bb9f695 | |||
|
|
a97474d3f2 | ||
| a12346fe93 | |||
| b5e97b106c | |||
|
|
580de95f9e | ||
|
|
20de8e5d3a | ||
| f04a57e6db | |||
|
|
1cb7e4b8aa | ||
| 784a1ca1d5 | |||
|
|
300f335179 | ||
| ca3459ec61 | |||
|
|
bf2842eff8 | ||
|
|
a5d3f238bf | ||
|
|
81adad21e5 | ||
|
|
1053e02f67 | ||
|
|
139f77fdf5 | ||
| bc7d8d1df9 | |||
|
|
7ad1c63de3 | ||
| 410a5ee948 | |||
|
|
a5c34a5eba | ||
|
|
979e1210b4 | ||
| dcf348e486 | |||
|
|
4b47ca3c46 | ||
| fa0e5afd79 | |||
|
|
2381a24eaa | ||
| e3e809cd3b | |||
|
|
bd7a4d6d03 | ||
| e72168abee | |||
|
|
fc937d6904 | ||
|
|
d1fc528707 | ||
|
|
0883b1a5eb | ||
| 6d1b464bbd | |||
|
|
05022740ac | ||
| 1dce91664f | |||
| 4a94370215 | |||
|
|
8cbfbf102b | ||
|
|
67d66b3e7a | ||
|
|
3351bf06f0 | ||
| a8f13e1ac3 | |||
|
|
cbfbfef0bb | ||
| 6327f4d4d5 | |||
|
|
8f193eb40b | ||
| 076f6655df | |||
|
|
e4acd032f0 | ||
|
|
2b4c8be245 | ||
| bbc8ec8031 | |||
|
|
ed78d94025 | ||
| 562c6ad0bf | |||
|
|
31449cd401 | ||
| d191b54482 | |||
|
|
7f67153431 | ||
| d61d112cbf | |||
|
|
a2bfe1aa82 | ||
| e887663d8c | |||
|
|
38050bc2c3 | ||
| f425bfa72e | |||
|
|
fcaa2891eb | ||
| b894c5c0e1 | |||
|
|
68fdc898df | ||
| dd6937e997 | |||
|
|
d06cd47838 | ||
| 55e4132560 | |||
|
|
c362ac1440 | ||
| 9a1c9cc2f7 | |||
|
|
8184baf759 | ||
| 8522ee9abc | |||
|
|
cc771d89cd | ||
| 2596d2672a | |||
|
|
02a2c139a5 | ||
| 2aa3878915 | |||
|
|
3950c7fb8f | ||
| 999212b1cd | |||
|
|
f8bf620b32 | ||
| 33eb565d7e | |||
|
|
d98eb80398 | ||
| 6801ba3ed9 | |||
|
|
a8eba51653 | ||
| a5c2ef1d99 | |||
|
|
d03b44377d |
68 changed files with 5181 additions and 5132 deletions
20
.dockerignore
Normal file
20
.dockerignore
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
# Secrets — prevent .env files from being baked into the image
|
||||
.env
|
||||
.env.enc
|
||||
.env.vault
|
||||
.env.vault.enc
|
||||
|
||||
# Version control — .git is huge and not needed in image
|
||||
.git
|
||||
|
||||
# Archives — not needed at runtime
|
||||
*.tar.gz
|
||||
|
||||
# Prometheus data — large, ephemeral data
|
||||
prometheus-data/
|
||||
|
||||
# Compose files — only needed at runtime via volume mount
|
||||
docker-compose.yml
|
||||
|
||||
# Project TOML files — gitignored anyway, won't be in build context
|
||||
projects/*.toml
|
||||
|
|
@ -6,8 +6,6 @@
|
|||
# 2. Every custom function called by agent scripts is defined in lib/ or the script itself
|
||||
#
|
||||
# Fast (<10s): no network, no tmux, no Claude needed.
|
||||
# Would have caught: kill_tmux_session (renamed), create_agent_session (missing),
|
||||
# read_phase (missing from dev-agent.sh scope)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
|
|
@ -21,12 +19,16 @@ FAILED=0
|
|||
# Uses awk instead of grep -Eo for busybox/Alpine compatibility (#296).
|
||||
get_fns() {
|
||||
local f="$1"
|
||||
# BRE mode (no -E). Use [(][)] for literal parens — unambiguous across
|
||||
# GNU grep and BusyBox grep (some BusyBox builds treat bare () as grouping
|
||||
# even in BRE). BRE one-or-more via [X][X]* instead of +.
|
||||
grep '^[[:space:]]*[a-zA-Z_][a-zA-Z0-9_][a-zA-Z0-9_]*[[:space:]]*[(][)]' "$f" 2>/dev/null \
|
||||
| sed 's/^[[:space:]]*//; s/[[:space:]]*[(][)].*$//' \
|
||||
| sort -u || true
|
||||
# Pure-awk implementation: avoids grep/sed cross-platform differences
|
||||
# (BusyBox grep BRE quirks, sed ; separator issues on Alpine).
|
||||
awk '
|
||||
/^[[:space:]]*[a-zA-Z_][a-zA-Z0-9_][a-zA-Z0-9_]*[[:space:]]*[(][)]/ {
|
||||
line = $0
|
||||
gsub(/^[[:space:]]+/, "", line)
|
||||
sub(/[[:space:]]*[(].*/, "", line)
|
||||
print line
|
||||
}
|
||||
' "$f" 2>/dev/null | sort -u || true
|
||||
}
|
||||
|
||||
# Extract call-position identifiers that look like custom function calls:
|
||||
|
|
@ -95,13 +97,12 @@ echo "=== 2/2 Function resolution ==="
|
|||
#
|
||||
# Included — these are inline-sourced by agent scripts:
|
||||
# lib/env.sh — sourced by every agent (log, forge_api, etc.)
|
||||
# lib/agent-session.sh — sourced by orchestrators (create_agent_session, monitor_phase_loop, etc.)
|
||||
# lib/agent-sdk.sh — sourced by SDK agents (agent_run, agent_recover_session)
|
||||
# lib/ci-helpers.sh — sourced by pollers and review (ci_passed, classify_pipeline_failure, etc.)
|
||||
# lib/load-project.sh — sourced by env.sh when PROJECT_TOML is set
|
||||
# lib/file-action-issue.sh — sourced by gardener-run.sh (file_action_issue)
|
||||
# lib/secret-scan.sh — sourced by file-action-issue.sh, phase-handler.sh (scan_for_secrets, redact_secrets)
|
||||
# lib/formula-session.sh — sourced by formula-driven agents (acquire_cron_lock, run_formula_and_monitor, etc.)
|
||||
# lib/secret-scan.sh — sourced by file-action-issue.sh (scan_for_secrets, redact_secrets)
|
||||
# lib/formula-session.sh — sourced by formula-driven agents (acquire_cron_lock, check_memory, etc.)
|
||||
# lib/mirrors.sh — sourced by merge sites (mirror_push)
|
||||
# lib/guard.sh — sourced by all cron entry points (check_active)
|
||||
# lib/issue-lifecycle.sh — sourced by agents for issue claim/release/block/deps
|
||||
|
|
@ -116,7 +117,7 @@ echo "=== 2/2 Function resolution ==="
|
|||
# If a new lib file is added and sourced by agents, add it to LIB_FUNS below
|
||||
# and add a check_script call for it in the lib files section further down.
|
||||
LIB_FUNS=$(
|
||||
for f in lib/agent-session.sh lib/agent-sdk.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do
|
||||
for f in lib/agent-sdk.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh lib/mirrors.sh lib/guard.sh lib/pr-lifecycle.sh lib/issue-lifecycle.sh lib/worktree.sh; do
|
||||
if [ -f "$f" ]; then get_fns "$f"; fi
|
||||
done | sort -u
|
||||
)
|
||||
|
|
@ -180,13 +181,12 @@ check_script() {
|
|||
# These are already in LIB_FUNS (their definitions are available to agents),
|
||||
# but this verifies calls *within* each lib file are also resolvable.
|
||||
check_script lib/env.sh lib/mirrors.sh
|
||||
check_script lib/agent-session.sh
|
||||
check_script lib/agent-sdk.sh
|
||||
check_script lib/ci-helpers.sh
|
||||
check_script lib/secret-scan.sh
|
||||
check_script lib/file-action-issue.sh lib/secret-scan.sh
|
||||
check_script lib/tea-helpers.sh lib/secret-scan.sh
|
||||
check_script lib/formula-session.sh lib/agent-session.sh
|
||||
check_script lib/formula-session.sh
|
||||
check_script lib/load-project.sh
|
||||
check_script lib/mirrors.sh lib/env.sh
|
||||
check_script lib/guard.sh
|
||||
|
|
@ -199,15 +199,13 @@ check_script lib/ci-debug.sh
|
|||
check_script lib/parse-deps.sh
|
||||
|
||||
# Agent scripts — list cross-sourced files where function scope flows across files.
|
||||
# phase-handler.sh defines default callback stubs; sourcing agents may override.
|
||||
check_script dev/dev-agent.sh
|
||||
check_script dev/phase-handler.sh lib/secret-scan.sh
|
||||
check_script dev/dev-poll.sh
|
||||
check_script dev/phase-test.sh
|
||||
check_script gardener/gardener-run.sh
|
||||
check_script review/review-pr.sh lib/agent-sdk.sh
|
||||
check_script review/review-poll.sh
|
||||
check_script planner/planner-run.sh lib/agent-session.sh lib/formula-session.sh
|
||||
check_script planner/planner-run.sh lib/formula-session.sh
|
||||
check_script supervisor/supervisor-poll.sh
|
||||
check_script supervisor/update-prompt.sh
|
||||
check_script supervisor/supervisor-run.sh
|
||||
|
|
|
|||
|
|
@ -8,6 +8,19 @@
|
|||
when:
|
||||
event: [push, pull_request]
|
||||
|
||||
# Override default clone to authenticate against Forgejo using FORGE_TOKEN.
|
||||
# Required because Forgejo is configured with REQUIRE_SIGN_IN, so anonymous
|
||||
# git clones fail with exit code 128. FORGE_TOKEN is injected globally via
|
||||
# WOODPECKER_ENVIRONMENT in docker-compose.yml (generated by lib/generators.sh).
|
||||
clone:
|
||||
git:
|
||||
image: alpine/git
|
||||
commands:
|
||||
- AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:$FORGE_TOKEN@|")
|
||||
- git clone --depth 1 "$AUTH_URL" .
|
||||
- git fetch --depth 1 origin "$CI_COMMIT_REF"
|
||||
- git checkout FETCH_HEAD
|
||||
|
||||
steps:
|
||||
- name: shellcheck
|
||||
image: koalaman/shellcheck-alpine:stable
|
||||
|
|
|
|||
|
|
@ -302,6 +302,19 @@ def main() -> int:
|
|||
"f08a7139db9c96cd3526549c499c0332": "install_project_crons function in entrypoints (window f08a7139)",
|
||||
"f0917809bdf28ff93fff0749e7e7fea0": "install_project_crons function in entrypoints (window f0917809)",
|
||||
"f0e4101f9b90c2fa921e088057a96db7": "install_project_crons function in entrypoints (window f0e4101f)",
|
||||
# Structural end-of-while-loop+case pattern: `return 1 ;; esac done }`
|
||||
# Appears in stack_lock_acquire (lib/stack-lock.sh) and lib/pr-lifecycle.sh
|
||||
"29d4f34b703f44699237713cc8d8065b": "Structural end-of-while-loop+case (return 1, esac, done, closing brace)",
|
||||
# Forgejo org-creation API call pattern shared between forge-setup.sh and ops-setup.sh
|
||||
# Extracted from bin/disinto (not a .sh file, excluded from prior scans) into lib/forge-setup.sh
|
||||
"059b11945140c172465f9126b829ed7f": "Forgejo org-creation curl pattern (forge-setup.sh + ops-setup.sh)",
|
||||
# Docker compose environment block for agents service (generators.sh + hire-agent.sh)
|
||||
# Intentional duplicate - both generate the same docker-compose.yml template
|
||||
"8066210169a462fe565f18b6a26a57e0": "Docker compose environment block (generators.sh + hire-agent.sh)",
|
||||
"fd978fcd726696e0f280eba2c5198d50": "Docker compose environment block continuation (generators.sh + hire-agent.sh)",
|
||||
"e2760ccc2d4b993a3685bd8991594eb2": "Docker compose env_file + depends_on block (generators.sh + hire-agent.sh)",
|
||||
# The hash shown in output is 161a80f7 - need to match exactly what the script finds
|
||||
"161a80f7296d6e9d45895607b7f5b9c9": "Docker compose env_file + depends_on block (generators.sh + hire-agent.sh)",
|
||||
}
|
||||
|
||||
if not sh_files:
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ when:
|
|||
- "bin/disinto"
|
||||
- "lib/load-project.sh"
|
||||
- "lib/env.sh"
|
||||
- "lib/generators.sh"
|
||||
- "tests/**"
|
||||
- ".woodpecker/smoke-init.yml"
|
||||
|
||||
|
|
@ -12,6 +13,7 @@ steps:
|
|||
image: python:3-alpine
|
||||
commands:
|
||||
- apk add --no-cache bash curl jq git coreutils
|
||||
- python3 tests/mock-forgejo.py &
|
||||
- python3 tests/mock-forgejo.py & echo $! > /tmp/mock-forgejo.pid
|
||||
- sleep 2
|
||||
- bash tests/smoke-init.sh
|
||||
- kill $(cat /tmp/mock-forgejo.pid) 2>/dev/null || true
|
||||
|
|
|
|||
40
AGENTS.md
40
AGENTS.md
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a -->
|
||||
<!-- last-reviewed: ac2beac361503c8712ecfc72be0401b5968cce4e -->
|
||||
# Disinto — Agent Instructions
|
||||
|
||||
## What this repo is
|
||||
|
|
@ -21,17 +21,16 @@ See `README.md` for the full architecture and `disinto-factory/SKILL.md` for set
|
|||
|
||||
```
|
||||
disinto/ (code repo)
|
||||
├── dev/ dev-poll.sh, dev-agent.sh, phase-handler.sh — issue implementation
|
||||
├── dev/ dev-poll.sh, dev-agent.sh, phase-test.sh — issue implementation
|
||||
├── review/ review-poll.sh, review-pr.sh — PR review
|
||||
├── gardener/ gardener-run.sh — direct cron executor for run-gardener formula
|
||||
├── predictor/ predictor-run.sh — daily cron executor for run-predictor formula
|
||||
├── planner/ planner-run.sh — direct cron executor for run-planner formula
|
||||
├── supervisor/ supervisor-run.sh — formula-driven health monitoring (cron wrapper)
|
||||
│ preflight.sh — pre-flight data collection for supervisor formula
|
||||
│ supervisor-poll.sh — legacy bash orchestrator (superseded)
|
||||
├── architect/ architect-run.sh — strategic decomposition of vision into sprints
|
||||
├── vault/ vault-env.sh — shared env setup (vault redesign in progress, see #73-#77)
|
||||
├── lib/ env.sh, agent-session.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, formula-session.sh, profile.sh, build-graph.py
|
||||
├── lib/ env.sh, agent-sdk.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, formula-session.sh, stack-lock.sh, forge-setup.sh, forge-push.sh, ops-setup.sh, ci-setup.sh, generators.sh, hire-agent.sh, release.sh, build-graph.py
|
||||
├── projects/ *.toml.example — templates; *.toml — local per-box config (gitignored)
|
||||
├── formulas/ Issue templates (TOML specs for multi-step agent tasks)
|
||||
└── docs/ Protocol docs (PHASE-PROTOCOL.md, EVIDENCE-ARCHITECTURE.md)
|
||||
|
|
@ -53,35 +52,9 @@ disinto-ops/ (ops repo — {project}-ops)
|
|||
|
||||
## Agent .profile Model
|
||||
|
||||
Each agent has a `.profile` repository on Forgejo that stores:
|
||||
- `formula.toml` — agent-specific formula (optional, falls back to `formulas/<agent>.toml`)
|
||||
- `knowledge/lessons-learned.md` — distilled lessons from journal entries
|
||||
- `journal/` — session reflection entries (archived after digestion)
|
||||
Each agent has a `.profile` repository on Forgejo storing `knowledge/lessons-learned.md` (injected into each session prompt) and `journal/` reflection entries (digested into lessons). Pre-session: `formula_prepare_profile_context()` loads lessons. Post-session: `profile_write_journal` records reflections. See `lib/profile.sh`.
|
||||
|
||||
### How it works
|
||||
|
||||
1. **Pre-session:** The agent calls `formula_prepare_profile_context()` which:
|
||||
- Resolves the agent's Forgejo identity from their token
|
||||
- Clones/pulls the `.profile` repo to a local cache
|
||||
- Loads `knowledge/lessons-learned.md` into `LESSONS_CONTEXT` for prompt injection
|
||||
- Automatically digests journals if >10 undigested entries exist
|
||||
|
||||
2. **Prompt injection:** Lessons are injected into the agent prompt:
|
||||
```
|
||||
## Lessons learned (from .profile/knowledge/lessons-learned.md)
|
||||
<abstracted lessons from prior sessions>
|
||||
```
|
||||
|
||||
3. **Post-session:** The agent calls `profile_write_journal` which:
|
||||
- Generates a reflection entry about the session
|
||||
- Writes it to `journal/issue-{N}.md`
|
||||
- Commits and pushes to the `.profile` repo
|
||||
- Journals are archived after being digested into lessons-learned.md
|
||||
|
||||
> **Terminology note:** "Formulas" in this repo are TOML issue templates in `formulas/` that
|
||||
> orchestrate multi-step agent tasks (e.g., `run-gardener.toml`, `run-planner.toml`). This is
|
||||
> distinct from "processes" described in `docs/EVIDENCE-ARCHITECTURE.md`, which are measurement
|
||||
> and mutation pipelines that read external platforms and write structured evidence to git.
|
||||
> **Terminology note:** "Formulas" are TOML issue templates in `formulas/` that orchestrate multi-step agent tasks. Distinct from "processes" in `docs/EVIDENCE-ARCHITECTURE.md`.
|
||||
|
||||
## Tech stack
|
||||
|
||||
|
|
@ -146,6 +119,9 @@ Issues flow: `backlog` → `in-progress` → PR → CI → review → merge →
|
|||
| `blocked` | Issue is stuck — agent session failed, crashed, timed out, or CI exhausted. Diagnostic comment on the issue has details. Also used for unmet dependencies. | dev-agent.sh, dev-poll.sh (on failure) |
|
||||
| `tech-debt` | Pre-existing issue flagged by AI reviewer, not introduced by a PR. | review-pr.sh (auto-created follow-ups) |
|
||||
| `underspecified` | Dev-agent refused the issue as too large or vague. | dev-poll.sh (on preflight `too_large`), dev-agent.sh (on mid-run `too_large` refusal) |
|
||||
| `bug-report` | Issue describes user-facing broken behavior with reproduction steps. Separate triage track for reproduction automation. | Gardener (bug-report detection in grooming) |
|
||||
| `in-triage` | Bug reproduced but root cause not obvious — triage agent investigates. Set alongside `bug-report`. | reproduce-agent (when reproduction succeeds but cause unclear) |
|
||||
| `rejected` | Issue formally rejected — cannot reproduce, out of scope, or invalid. | reproduce-agent, humans |
|
||||
| `vision` | Goal anchors — high-level objectives from VISION.md. | Planner, humans |
|
||||
| `prediction/unreviewed` | Unprocessed prediction filed by predictor. | predictor-run.sh |
|
||||
| `prediction/dismissed` | Prediction triaged as DISMISS — planner disagrees, closed with reason. | Planner (triage-predictions step) |
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: auto-generated -->
|
||||
<!-- last-reviewed: ac2beac361503c8712ecfc72be0401b5968cce4e -->
|
||||
# Architect — Agent Instructions
|
||||
|
||||
## What this agent is
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ source "$FACTORY_ROOT/lib/guard.sh"
|
|||
# shellcheck source=../lib/agent-sdk.sh
|
||||
source "$FACTORY_ROOT/lib/agent-sdk.sh"
|
||||
|
||||
LOG_FILE="$SCRIPT_DIR/architect.log"
|
||||
LOG_FILE="${DISINTO_LOG_DIR}/architect/architect.log"
|
||||
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
|
||||
LOGFILE="$LOG_FILE"
|
||||
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
|
||||
|
|
@ -44,19 +44,40 @@ SID_FILE="/tmp/architect-session-${PROJECT_NAME}.sid"
|
|||
SCRATCH_FILE="/tmp/architect-${PROJECT_NAME}-scratch.md"
|
||||
WORKTREE="/tmp/${PROJECT_NAME}-architect-run"
|
||||
|
||||
log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
|
||||
# Override LOG_AGENT for consistent agent identification
|
||||
# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log()
|
||||
LOG_AGENT="architect"
|
||||
|
||||
# Override log() to append to architect-specific log file
|
||||
# shellcheck disable=SC2034
|
||||
log() {
|
||||
local agent="${LOG_AGENT:-architect}"
|
||||
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE"
|
||||
}
|
||||
|
||||
# ── Guards ────────────────────────────────────────────────────────────────
|
||||
check_active architect
|
||||
acquire_cron_lock "/tmp/architect-run.lock"
|
||||
check_memory 2000
|
||||
memory_guard 2000
|
||||
|
||||
log "--- Architect run start ---"
|
||||
|
||||
# ── Resolve forge remote for git operations ─────────────────────────────
|
||||
resolve_forge_remote
|
||||
|
||||
# ── Resolve agent identity for .profile repo ────────────────────────────
|
||||
if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_ARCHITECT_TOKEN:-}" ]; then
|
||||
AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_ARCHITECT_TOKEN}" \
|
||||
"${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true)
|
||||
fi
|
||||
|
||||
# ── Load formula + context ───────────────────────────────────────────────
|
||||
load_formula "$FACTORY_ROOT/formulas/run-architect.toml"
|
||||
load_formula_or_profile "architect" "$FACTORY_ROOT/formulas/run-architect.toml" || exit 1
|
||||
build_context_block VISION.md AGENTS.md ops:prerequisites.md
|
||||
|
||||
# ── Prepare .profile context (lessons injection) ─────────────────────────
|
||||
formula_prepare_profile_context
|
||||
|
||||
# ── Build structural analysis graph ──────────────────────────────────────
|
||||
build_graph_section
|
||||
|
||||
|
|
@ -84,6 +105,7 @@ and file sub-issues after design forks are resolved.
|
|||
${CONTEXT_BLOCK}
|
||||
${GRAPH_SECTION}
|
||||
${SCRATCH_CONTEXT}
|
||||
$(formula_lessons_block)
|
||||
## Formula
|
||||
${FORMULA_CONTENT}
|
||||
|
||||
|
|
@ -104,4 +126,8 @@ agent_run --worktree "$WORKTREE" "$PROMPT"
|
|||
log "agent_run complete"
|
||||
|
||||
rm -f "$SCRATCH_FILE"
|
||||
|
||||
# Write journal entry post-session
|
||||
profile_write_journal "architect-run" "Architect run $(date -u +%Y-%m-%d)" "complete" "" || true
|
||||
|
||||
log "--- Architect run done ---"
|
||||
|
|
|
|||
2134
bin/disinto
2134
bin/disinto
File diff suppressed because it is too large
Load diff
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a -->
|
||||
<!-- last-reviewed: ac2beac361503c8712ecfc72be0401b5968cce4e -->
|
||||
# Dev Agent
|
||||
|
||||
**Role**: Implement issues autonomously — write code, push branches, address
|
||||
|
|
@ -14,9 +14,8 @@ in-progress issues are also picked up. The direct-merge scan runs before the loc
|
|||
check so approved PRs get merged even while a dev-agent session is active.
|
||||
|
||||
**Key files**:
|
||||
- `dev/dev-poll.sh` — Cron scheduler: finds next ready issue, handles merge/rebase of approved PRs, tracks CI fix attempts. Formula guard skips issues labeled `formula`, `prediction/dismissed`, or `prediction/unreviewed` (replaced `prediction/backlog` — that label no longer exists)
|
||||
- `dev/dev-poll.sh` — Cron scheduler: finds next ready issue, handles merge/rebase of approved PRs, tracks CI fix attempts. Formula guard skips issues labeled `formula`, `prediction/dismissed`, or `prediction/unreviewed`. **Race prevention**: checks issue assignee before claiming — skips if assigned to a different bot user. **Stale branch abandonment**: closes PRs and deletes branches that are behind `$PRIMARY_BRANCH` (restarts poll cycle for a fresh start). **Stale in-progress recovery**: on each poll cycle, scans for issues labeled `in-progress`. If the issue is assigned to `$BOT_USER` (this agent), sets `BLOCKED_BY_INPROGRESS=true` — my thread is busy. If assigned to another agent, logs and falls through (does not block). If no assignee, no open PR, and no agent lock file — removes `in-progress`, adds `blocked` with a human-triage comment. **Per-agent open-PR gate**: before starting new work, filters open waiting PRs to only those assigned to this agent (`$BOT_USER`). Other agents' PRs do not block this agent's pipeline (#358, #369).
|
||||
- `dev/dev-agent.sh` — Orchestrator: claims issue, creates worktree + tmux session with interactive `claude`, monitors phase file, injects CI results and review feedback, merges on approval
|
||||
- `dev/phase-handler.sh` — Phase callback functions: `post_refusal_comment()`, `_on_phase_change()`, `build_phase_protocol_prompt()`. `do_merge()` detects already-merged PRs on HTTP 405 (race with dev-poll's pre-lock scan) and returns success instead of escalating. Sources `lib/mirrors.sh` and calls `mirror_push()` after every successful merge.
|
||||
- `dev/phase-test.sh` — Integration test for the phase protocol
|
||||
|
||||
**Environment variables consumed** (via `lib/env.sh` + project TOML):
|
||||
|
|
@ -33,7 +32,7 @@ check so approved PRs get merged even while a dev-agent session is active.
|
|||
|
||||
**Crash recovery**: on `PHASE:crashed` or non-zero exit, the worktree is **preserved** (not destroyed) for debugging. Location logged. Supervisor housekeeping removes stale crashed worktrees older than 24h.
|
||||
|
||||
**Lifecycle**: dev-poll.sh (`check_active dev`) → dev-agent.sh → tmux `dev-{project}-{issue}` → phase file
|
||||
**Lifecycle**: dev-poll.sh (`check_active dev`) → dev-agent.sh → tmux session → phase file
|
||||
drives CI/review loop → merge + `mirror_push()` → close issue. On respawn after
|
||||
`PHASE:escalate`, the stale phase file is cleared first so the session starts
|
||||
clean; the reinject prompt tells Claude not to re-escalate for the same reason.
|
||||
|
|
|
|||
374
dev/dev-poll.sh
374
dev/dev-poll.sh
|
|
@ -42,6 +42,11 @@ log() {
|
|||
printf '[%s] poll: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE"
|
||||
}
|
||||
|
||||
# Resolve current agent identity once at startup — cache for all assignee checks
|
||||
BOT_USER=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API%%/repos*}/user" | jq -r '.login') || BOT_USER=""
|
||||
log "running as agent: ${BOT_USER}"
|
||||
|
||||
# =============================================================================
|
||||
# CI FIX TRACKER: per-PR counter to avoid infinite respawn loops (max 3)
|
||||
# =============================================================================
|
||||
|
|
@ -94,6 +99,68 @@ is_blocked() {
|
|||
| jq -e '.[] | select(.name == "blocked")' >/dev/null 2>&1
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# STALENESS DETECTION FOR IN-PROGRESS ISSUES
|
||||
# =============================================================================
|
||||
|
||||
# Check if there's an open PR for a specific issue
|
||||
# Args: issue_number
|
||||
# Returns: 0 if open PR exists, 1 if not
|
||||
open_pr_exists() {
|
||||
local issue="$1"
|
||||
local branch="fix/issue-${issue}"
|
||||
local pr_num
|
||||
|
||||
pr_num=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/pulls?state=open&limit=20" | \
|
||||
jq -r --arg branch "$branch" \
|
||||
'.[] | select(.head.ref == $branch) | .number' | head -1) || true
|
||||
|
||||
[ -n "$pr_num" ]
|
||||
}
|
||||
|
||||
# Relabel a stale in-progress issue to blocked with diagnostic comment
|
||||
# Args: issue_number reason
|
||||
# Uses shared helpers from lib/issue-lifecycle.sh
|
||||
relabel_stale_issue() {
|
||||
local issue="$1" reason="$2"
|
||||
|
||||
log "relabeling stale in-progress issue #${issue} to blocked: ${reason}"
|
||||
|
||||
# Remove in-progress label
|
||||
local ip_id
|
||||
ip_id=$(_ilc_in_progress_id)
|
||||
if [ -n "$ip_id" ]; then
|
||||
curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/issues/${issue}/labels/${ip_id}" >/dev/null 2>&1 || true
|
||||
fi
|
||||
|
||||
# Add blocked label
|
||||
local bk_id
|
||||
bk_id=$(_ilc_blocked_id)
|
||||
if [ -n "$bk_id" ]; then
|
||||
curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${API}/issues/${issue}/labels" \
|
||||
-d "{\"labels\":[${bk_id}]}" >/dev/null 2>&1 || true
|
||||
fi
|
||||
|
||||
# Post diagnostic comment using shared helper
|
||||
local comment_body
|
||||
comment_body=$(
|
||||
printf '%s\n\n' '### Stale in-progress issue detected'
|
||||
printf '%s\n' '| Field | Value |'
|
||||
printf '%s\n' '|---|---|'
|
||||
printf '| Detection reason | `%s` |\n' "$reason"
|
||||
printf '| Timestamp | `%s` |\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
printf '%s\n' '**Status:** This issue was labeled `in-progress` but has no assignee, no open PR, and no agent lock file.'
|
||||
printf '%s\n' '**Action required:** A maintainer should triage this issue.'
|
||||
)
|
||||
_ilc_post_comment "$issue" "$comment_body"
|
||||
|
||||
_ilc_log "stale issue #${issue} relabeled to blocked: ${reason}"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# HELPER: handle CI-exhaustion check/block (DRY for 3 call sites)
|
||||
# Sets CI_FIX_ATTEMPTS for caller use. Returns 0 if exhausted, 1 if not.
|
||||
|
|
@ -278,6 +345,16 @@ for i in $(seq 0 $(($(echo "$PL_PRS" | jq 'length') - 1))); do
|
|||
jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true
|
||||
|
||||
if [ "${PL_HAS_APPROVE:-0}" -gt 0 ]; then
|
||||
# Check if issue is assigned to this agent — only merge own PRs
|
||||
if [ "$PL_ISSUE" -gt 0 ]; then
|
||||
PR_ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/issues/${PL_ISSUE}") || true
|
||||
PR_ISSUE_ASSIGNEE=$(echo "$PR_ISSUE_JSON" | jq -r '.assignee.login // ""') || true
|
||||
if [ -n "$PR_ISSUE_ASSIGNEE" ] && [ "$PR_ISSUE_ASSIGNEE" != "$BOT_USER" ]; then
|
||||
log "PR #${PL_PR_NUM} (issue #${PL_ISSUE}) assigned to ${PR_ISSUE_ASSIGNEE} — skipping merge (not mine)"
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
if try_direct_merge "$PL_PR_NUM" "$PL_ISSUE"; then
|
||||
PL_MERGED_ANY=true
|
||||
fi
|
||||
|
|
@ -301,6 +378,9 @@ if [ -f "$LOCKFILE" ]; then
|
|||
rm -f "$LOCKFILE"
|
||||
fi
|
||||
|
||||
# --- Fetch origin refs before any stale branch checks ---
|
||||
git fetch origin --prune 2>/dev/null || true
|
||||
|
||||
# --- Memory guard ---
|
||||
memory_guard 2000
|
||||
|
||||
|
|
@ -309,129 +389,176 @@ memory_guard 2000
|
|||
# =============================================================================
|
||||
log "checking for in-progress issues"
|
||||
|
||||
# Get current bot identity for assignee checks
|
||||
BOT_USER=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API%%/repos*}/user" | jq -r '.login') || BOT_USER=""
|
||||
|
||||
ORPHANS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/issues?state=open&labels=in-progress&limit=10&type=issues")
|
||||
|
||||
ORPHAN_COUNT=$(echo "$ORPHANS_JSON" | jq 'length')
|
||||
BLOCKED_BY_INPROGRESS=false
|
||||
if [ "$ORPHAN_COUNT" -gt 0 ]; then
|
||||
ISSUE_NUM=$(echo "$ORPHANS_JSON" | jq -r '.[0].number')
|
||||
|
||||
# Formula guard: formula-labeled issues should not be worked on by dev-agent.
|
||||
# Remove in-progress label and skip to prevent infinite respawn cycle (#115).
|
||||
ORPHAN_LABELS=$(echo "$ORPHANS_JSON" | jq -r '.[0].labels[].name' 2>/dev/null) || true
|
||||
SKIP_LABEL=$(echo "$ORPHAN_LABELS" | grep -oE '^(formula|prediction/dismissed|prediction/unreviewed)$' | head -1) || true
|
||||
if [ -n "$SKIP_LABEL" ]; then
|
||||
log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — removing in-progress, skipping"
|
||||
IP_ID=$(_ilc_in_progress_id)
|
||||
curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true
|
||||
exit 0
|
||||
# Staleness check: if no assignee, no open PR, and no agent lock, the issue is stale
|
||||
OPEN_PR=false
|
||||
if curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/pulls?state=open&limit=20" | \
|
||||
jq -e --arg branch "fix/issue-${ISSUE_NUM}" \
|
||||
'.[] | select(.head.ref == $branch)' >/dev/null 2>&1; then
|
||||
OPEN_PR=true
|
||||
fi
|
||||
|
||||
# Check if there's already an open PR for this issue
|
||||
HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/pulls?state=open&limit=20" | \
|
||||
jq -r --arg branch "fix/issue-${ISSUE_NUM}" \
|
||||
'.[] | select(.head.ref == $branch) | .number' | head -1) || true
|
||||
|
||||
if [ -n "$HAS_PR" ]; then
|
||||
# Check if branch is stale (behind primary branch)
|
||||
BRANCH="fix/issue-${ISSUE_NUM}"
|
||||
AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "999")
|
||||
if [ "$AHEAD" -gt 0 ]; then
|
||||
log "issue #${ISSUE_NUM} PR #${HAS_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR"
|
||||
# Close the PR via API
|
||||
curl -sf -X PATCH \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${API}/pulls/${HAS_PR}" \
|
||||
-d '{"state":"closed"}' >/dev/null 2>&1 || true
|
||||
# Delete the branch via git push
|
||||
git -C "${PROJECT_REPO_ROOT:-}" push origin --delete "${BRANCH}" 2>/dev/null || true
|
||||
# Reset to fresh start on primary branch
|
||||
git -C "${PROJECT_REPO_ROOT:-}" checkout "${PRIMARY_BRANCH}" 2>/dev/null || true
|
||||
git -C "${PROJECT_REPO_ROOT:-}" pull --ff-only origin "${PRIMARY_BRANCH}" 2>/dev/null || true
|
||||
# Exit to restart poll cycle (issue will be picked up fresh)
|
||||
exit 0
|
||||
fi
|
||||
|
||||
PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/pulls/${HAS_PR}" | jq -r '.head.sha') || true
|
||||
CI_STATE=$(ci_commit_status "$PR_SHA") || true
|
||||
|
||||
# Non-code PRs (docs, formulas, evidence) may have no CI — treat as passed
|
||||
if ! ci_passed "$CI_STATE" && ! ci_required_for_pr "$HAS_PR"; then
|
||||
CI_STATE="success"
|
||||
log "PR #${HAS_PR} has no code files — treating CI as passed"
|
||||
fi
|
||||
|
||||
# Check formal reviews (single fetch to avoid race window)
|
||||
REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/pulls/${HAS_PR}/reviews") || true
|
||||
HAS_APPROVE=$(echo "$REVIEWS_JSON" | \
|
||||
jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true
|
||||
HAS_CHANGES=$(echo "$REVIEWS_JSON" | \
|
||||
jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true
|
||||
|
||||
if ci_passed "$CI_STATE" && [ "${HAS_APPROVE:-0}" -gt 0 ]; then
|
||||
if try_direct_merge "$HAS_PR" "$ISSUE_NUM"; then
|
||||
exit 0
|
||||
fi
|
||||
# Direct merge failed (conflicts?) — fall back to dev-agent
|
||||
log "falling back to dev-agent for PR #${HAS_PR} merge"
|
||||
nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
|
||||
log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)"
|
||||
exit 0
|
||||
|
||||
# Do NOT gate REQUEST_CHANGES on ci_passed: act immediately even if CI is
|
||||
# pending/unknown. Definitive CI failure is handled by the elif below.
|
||||
elif [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then
|
||||
log "issue #${ISSUE_NUM} PR #${HAS_PR} has REQUEST_CHANGES — spawning agent"
|
||||
nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
|
||||
log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)"
|
||||
exit 0
|
||||
|
||||
elif ci_failed "$CI_STATE"; then
|
||||
if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM" "check_only"; then
|
||||
# Fall through to backlog scan instead of exit
|
||||
:
|
||||
else
|
||||
# Increment at actual launch time (not on guard-hit paths)
|
||||
if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM"; then
|
||||
exit 0 # exhausted between check and launch
|
||||
fi
|
||||
log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix (attempt ${CI_FIX_ATTEMPTS}/3)"
|
||||
nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
|
||||
log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check if issue has an assignee — only block on issues assigned to this agent
|
||||
assignee=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" "${API}/issues/${ISSUE_NUM}" | jq -r '.assignee.login // ""')
|
||||
if [ -n "$assignee" ]; then
|
||||
if [ "$assignee" = "$BOT_USER" ]; then
|
||||
log "issue #${ISSUE_NUM} assigned to me — my thread is busy"
|
||||
BLOCKED_BY_INPROGRESS=true
|
||||
else
|
||||
log "issue #${ISSUE_NUM} has open PR #${HAS_PR} (CI: ${CI_STATE}, waiting)"
|
||||
exit 0
|
||||
log "issue #${ISSUE_NUM} assigned to ${assignee} — their thread, not blocking"
|
||||
# Issue assigned to another agent — don't block, fall through to backlog
|
||||
fi
|
||||
else
|
||||
# Check assignee before adopting orphaned issue
|
||||
ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/issues/${ISSUE_NUM}") || true
|
||||
ASSIGNEE=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true
|
||||
fi
|
||||
|
||||
if [ -n "$ASSIGNEE" ] && [ "$ASSIGNEE" != "$BOT_USER" ]; then
|
||||
log "issue #${ISSUE_NUM} assigned to ${ASSIGNEE} — skipping (not orphaned)"
|
||||
# Remove in-progress label since this agent isn't working on it
|
||||
IP_ID=$(_ilc_in_progress_id)
|
||||
curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true
|
||||
exit 0
|
||||
# Only proceed with in-progress checks if not blocked by another agent
|
||||
if [ "$BLOCKED_BY_INPROGRESS" = false ]; then
|
||||
# Check for dev-agent lock file (agent may be running in another container)
|
||||
LOCK_FILE="/tmp/dev-impl-summary-${PROJECT_NAME}-${ISSUE_NUM}.txt"
|
||||
if [ -f "$LOCK_FILE" ]; then
|
||||
log "issue #${ISSUE_NUM} has agent lock file — trusting active work"
|
||||
BLOCKED_BY_INPROGRESS=true
|
||||
fi
|
||||
|
||||
log "recovering orphaned issue #${ISSUE_NUM} (no PR found, assigned to ${BOT_USER:-unassigned})"
|
||||
nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
|
||||
log "started dev-agent PID $! for issue #${ISSUE_NUM} (recovery)"
|
||||
if [ "$OPEN_PR" = false ] && [ "$BLOCKED_BY_INPROGRESS" = false ]; then
|
||||
log "issue #${ISSUE_NUM} is stale (no assignee, no open PR, no agent lock) — relabeling to blocked"
|
||||
relabel_stale_issue "$ISSUE_NUM" "no_assignee_no_open_pr_no_lock"
|
||||
BLOCKED_BY_INPROGRESS=true
|
||||
fi
|
||||
|
||||
# Formula guard: formula-labeled issues should not be worked on by dev-agent.
|
||||
# Remove in-progress label and skip to prevent infinite respawn cycle (#115).
|
||||
if [ "$BLOCKED_BY_INPROGRESS" = false ]; then
|
||||
ORPHAN_LABELS=$(echo "$ORPHANS_JSON" | jq -r '.[0].labels[].name' 2>/dev/null) || true
|
||||
SKIP_LABEL=$(echo "$ORPHAN_LABELS" | grep -oE '^(formula|prediction/dismissed|prediction/unreviewed)$' | head -1) || true
|
||||
if [ -n "$SKIP_LABEL" ]; then
|
||||
log "issue #${ISSUE_NUM} has '${SKIP_LABEL}' label — removing in-progress, skipping"
|
||||
IP_ID=$(_ilc_in_progress_id)
|
||||
curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true
|
||||
BLOCKED_BY_INPROGRESS=true
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check if there's already an open PR for this issue
|
||||
if [ "$BLOCKED_BY_INPROGRESS" = false ]; then
|
||||
HAS_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/pulls?state=open&limit=20" | \
|
||||
jq -r --arg branch "fix/issue-${ISSUE_NUM}" \
|
||||
'.[] | select(.head.ref == $branch) | .number' | head -1) || true
|
||||
|
||||
if [ -n "$HAS_PR" ]; then
|
||||
# Check if branch is stale (behind primary branch)
|
||||
BRANCH="fix/issue-${ISSUE_NUM}"
|
||||
AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "0")
|
||||
if [ "$AHEAD" -gt 0 ]; then
|
||||
log "issue #${ISSUE_NUM} PR #${HAS_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR"
|
||||
# Close the PR via API
|
||||
curl -sf -X PATCH \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${API}/pulls/${HAS_PR}" \
|
||||
-d '{"state":"closed"}' >/dev/null 2>&1 || true
|
||||
# Delete the branch via git push
|
||||
git -C "${PROJECT_REPO_ROOT:-}" push origin --delete "${BRANCH}" 2>/dev/null || true
|
||||
# Reset to fresh start on primary branch
|
||||
git -C "${PROJECT_REPO_ROOT:-}" checkout "${PRIMARY_BRANCH}" 2>/dev/null || true
|
||||
git -C "${PROJECT_REPO_ROOT:-}" pull --ff-only origin "${PRIMARY_BRANCH}" 2>/dev/null || true
|
||||
BLOCKED_BY_INPROGRESS=true
|
||||
fi
|
||||
|
||||
# Only process PR if not abandoned (stale branch check above)
|
||||
if [ "$BLOCKED_BY_INPROGRESS" = false ]; then
|
||||
PR_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/pulls/${HAS_PR}" | jq -r '.head.sha') || true
|
||||
CI_STATE=$(ci_commit_status "$PR_SHA") || true
|
||||
|
||||
# Non-code PRs (docs, formulas, evidence) may have no CI — treat as passed
|
||||
if ! ci_passed "$CI_STATE" && ! ci_required_for_pr "$HAS_PR"; then
|
||||
CI_STATE="success"
|
||||
log "PR #${HAS_PR} has no code files — treating CI as passed"
|
||||
fi
|
||||
|
||||
# Check formal reviews (single fetch to avoid race window)
|
||||
REVIEWS_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/pulls/${HAS_PR}/reviews") || true
|
||||
HAS_APPROVE=$(echo "$REVIEWS_JSON" | \
|
||||
jq -r '[.[] | select(.state == "APPROVED") | select(.stale == false)] | length') || true
|
||||
HAS_CHANGES=$(echo "$REVIEWS_JSON" | \
|
||||
jq -r '[.[] | select(.state == "REQUEST_CHANGES") | select(.stale == false)] | length') || true
|
||||
|
||||
if ci_passed "$CI_STATE" && [ "${HAS_APPROVE:-0}" -gt 0 ]; then
|
||||
if try_direct_merge "$HAS_PR" "$ISSUE_NUM"; then
|
||||
BLOCKED_BY_INPROGRESS=true
|
||||
else
|
||||
# Direct merge failed (conflicts?) — fall back to dev-agent
|
||||
log "falling back to dev-agent for PR #${HAS_PR} merge"
|
||||
nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
|
||||
log "started dev-agent PID $! for issue #${ISSUE_NUM} (agent-merge)"
|
||||
BLOCKED_BY_INPROGRESS=true
|
||||
fi
|
||||
|
||||
# Do NOT gate REQUEST_CHANGES on ci_passed: act immediately even if CI is
|
||||
# pending/unknown. Definitive CI failure is handled by the elif below.
|
||||
elif [ "${HAS_CHANGES:-0}" -gt 0 ] && { ci_passed "$CI_STATE" || [ "$CI_STATE" = "pending" ] || [ "$CI_STATE" = "unknown" ] || [ -z "$CI_STATE" ]; }; then
|
||||
log "issue #${ISSUE_NUM} PR #${HAS_PR} has REQUEST_CHANGES — spawning agent"
|
||||
nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
|
||||
log "started dev-agent PID $! for issue #${ISSUE_NUM} (review fix)"
|
||||
BLOCKED_BY_INPROGRESS=true
|
||||
|
||||
elif ci_failed "$CI_STATE"; then
|
||||
if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM" "check_only"; then
|
||||
# Fall through to backlog scan instead of exit
|
||||
:
|
||||
else
|
||||
# Increment at actual launch time (not on guard-hit paths)
|
||||
if handle_ci_exhaustion "$HAS_PR" "$ISSUE_NUM"; then
|
||||
BLOCKED_BY_INPROGRESS=true # exhausted between check and launch
|
||||
else
|
||||
log "issue #${ISSUE_NUM} PR #${HAS_PR} CI failed — spawning agent to fix (attempt ${CI_FIX_ATTEMPTS}/3)"
|
||||
nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
|
||||
log "started dev-agent PID $! for issue #${ISSUE_NUM} (CI fix)"
|
||||
BLOCKED_BY_INPROGRESS=true
|
||||
fi
|
||||
fi
|
||||
|
||||
else
|
||||
log "issue #${ISSUE_NUM} has open PR #${HAS_PR} (CI: ${CI_STATE}, waiting)"
|
||||
BLOCKED_BY_INPROGRESS=true
|
||||
fi
|
||||
fi
|
||||
else
|
||||
# Check assignee before adopting orphaned issue
|
||||
ISSUE_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/issues/${ISSUE_NUM}") || true
|
||||
ASSIGNEE=$(echo "$ISSUE_JSON" | jq -r '.assignee.login // ""') || true
|
||||
|
||||
if [ -n "$ASSIGNEE" ] && [ "$ASSIGNEE" != "$BOT_USER" ]; then
|
||||
log "issue #${ISSUE_NUM} assigned to ${ASSIGNEE} — skipping (not orphaned)"
|
||||
# Remove in-progress label since this agent isn't working on it
|
||||
IP_ID=$(_ilc_in_progress_id)
|
||||
curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/issues/${ISSUE_NUM}/labels/${IP_ID}" >/dev/null 2>&1 || true
|
||||
# Don't block — fall through to backlog
|
||||
else
|
||||
log "recovering orphaned issue #${ISSUE_NUM} (no PR found, assigned to ${BOT_USER:-unassigned})"
|
||||
nohup "${SCRIPT_DIR}/dev-agent.sh" "$ISSUE_NUM" >> "$LOGFILE" 2>&1 &
|
||||
log "started dev-agent PID $! for issue #${ISSUE_NUM} (recovery)"
|
||||
BLOCKED_BY_INPROGRESS=true
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# If blocked by in-progress work, exit now
|
||||
if [ "$BLOCKED_BY_INPROGRESS" = true ]; then
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
|
@ -593,7 +720,7 @@ for i in $(seq 0 $((BACKLOG_COUNT - 1))); do
|
|||
if [ -n "$EXISTING_PR" ]; then
|
||||
# Check if branch is stale (behind primary branch)
|
||||
BRANCH="fix/issue-${ISSUE_NUM}"
|
||||
AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "999")
|
||||
AHEAD=$(git rev-list --count "origin/${BRANCH}..origin/${PRIMARY_BRANCH}" 2>/dev/null || echo "0")
|
||||
if [ "$AHEAD" -gt 0 ]; then
|
||||
log "issue #${ISSUE_NUM} PR #${EXISTING_PR} is $AHEAD commits behind ${PRIMARY_BRANCH} — abandoning stale PR"
|
||||
# Close the PR via API
|
||||
|
|
@ -668,9 +795,32 @@ done
|
|||
|
||||
# Single-threaded per project: if any issue has an open PR waiting for review/CI,
|
||||
# don't start new work — let the pipeline drain first
|
||||
# But only block on PRs assigned to this agent (per-agent logic from #358)
|
||||
if [ -n "$READY_ISSUE" ] && [ -n "${WAITING_PRS:-}" ]; then
|
||||
log "holding #${READY_ISSUE} — waiting for open PR(s) to land first: ${WAITING_PRS}"
|
||||
exit 0
|
||||
# Filter to only this agent's waiting PRs
|
||||
MY_WAITING_PRS=""
|
||||
for pr_num in $(echo "$WAITING_PRS" | tr ',' ' '); do
|
||||
pr_num="${pr_num#\#}" # Remove leading #
|
||||
# Check if this PR's issue is assigned to this agent
|
||||
pr_info=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/pulls/${pr_num}" 2>/dev/null) || true
|
||||
pr_branch=$(echo "$pr_info" | jq -r '.head.ref') || true
|
||||
issue_num=$(echo "$pr_branch" | grep -oP '(?<=fix/issue-)\d+' || true)
|
||||
if [ -z "$issue_num" ]; then
|
||||
continue
|
||||
fi
|
||||
issue_assignee=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/issues/${issue_num}" 2>/dev/null | jq -r '.assignee.login // ""') || true
|
||||
if [ -n "$issue_assignee" ] && [ "$issue_assignee" = "$BOT_USER" ]; then
|
||||
MY_WAITING_PRS="${MY_WAITING_PRS:-}${MY_WAITING_PRS:+, }#${pr_num}"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -n "$MY_WAITING_PRS" ]; then
|
||||
log "holding #${READY_ISSUE} — waiting for my open PR(s) to land first: ${MY_WAITING_PRS}"
|
||||
exit 0
|
||||
fi
|
||||
log "other agents' PRs waiting: ${WAITING_PRS} — proceeding with #${READY_ISSUE}"
|
||||
fi
|
||||
|
||||
if [ -z "$READY_ISSUE" ]; then
|
||||
|
|
|
|||
|
|
@ -1,820 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# dev/phase-handler.sh — Phase callback functions for dev-agent.sh
|
||||
#
|
||||
# Source this file from agent orchestrators after lib/agent-session.sh is loaded.
|
||||
# Defines: post_refusal_comment(), _on_phase_change(), build_phase_protocol_prompt()
|
||||
#
|
||||
# Required globals (set by calling agent before or after sourcing):
|
||||
# ISSUE, FORGE_TOKEN, API, FORGE_WEB, PROJECT_NAME, FACTORY_ROOT
|
||||
# BRANCH, PHASE_FILE, WORKTREE, IMPL_SUMMARY_FILE
|
||||
# PRIMARY_BRANCH, SESSION_NAME, LOGFILE, ISSUE_TITLE
|
||||
# WOODPECKER_REPO_ID, WOODPECKER_TOKEN, WOODPECKER_SERVER
|
||||
#
|
||||
# Globals with defaults (agents can override after sourcing):
|
||||
# PR_NUMBER, CI_POLL_TIMEOUT, MAX_CI_FIXES, MAX_REVIEW_ROUNDS,
|
||||
# REVIEW_POLL_TIMEOUT, CI_RETRY_COUNT, CI_FIX_COUNT, REVIEW_ROUND,
|
||||
# CLAIMED, PHASE_POLL_INTERVAL
|
||||
#
|
||||
# Calls back to agent-defined helpers:
|
||||
# cleanup_worktree(), cleanup_labels(), status(), log()
|
||||
#
|
||||
# shellcheck shell=bash
|
||||
# shellcheck disable=SC2154 # globals are set in dev-agent.sh before calling
|
||||
# shellcheck disable=SC2034 # CLAIMED is read by cleanup() in dev-agent.sh
|
||||
|
||||
# Load secret scanner for redacting tmux output before posting to issues
|
||||
# shellcheck source=../lib/secret-scan.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/../lib/secret-scan.sh"
|
||||
|
||||
# Load shared CI helpers (is_infra_step, classify_pipeline_failure, etc.)
|
||||
# shellcheck source=../lib/ci-helpers.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/../lib/ci-helpers.sh"
|
||||
|
||||
# Load mirror push helper
|
||||
# shellcheck source=../lib/mirrors.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/../lib/mirrors.sh"
|
||||
|
||||
# --- Default callback stubs (agents can override after sourcing) ---
|
||||
# cleanup_worktree and cleanup_labels are called during phase transitions.
|
||||
# Provide no-op defaults so phase-handler.sh is self-contained; sourcing
|
||||
# agents override these with real implementations.
|
||||
if ! declare -f cleanup_worktree >/dev/null 2>&1; then
|
||||
cleanup_worktree() { :; }
|
||||
fi
|
||||
if ! declare -f cleanup_labels >/dev/null 2>&1; then
|
||||
cleanup_labels() { :; }
|
||||
fi
|
||||
|
||||
# --- Default globals (agents can override after sourcing) ---
|
||||
: "${CI_POLL_TIMEOUT:=1800}"
|
||||
: "${REVIEW_POLL_TIMEOUT:=10800}"
|
||||
: "${MAX_CI_FIXES:=3}"
|
||||
: "${MAX_REVIEW_ROUNDS:=5}"
|
||||
: "${CI_RETRY_COUNT:=0}"
|
||||
: "${CI_FIX_COUNT:=0}"
|
||||
: "${REVIEW_ROUND:=0}"
|
||||
: "${PR_NUMBER:=}"
|
||||
: "${CLAIMED:=false}"
|
||||
: "${PHASE_POLL_INTERVAL:=30}"
|
||||
|
||||
# --- Post diagnostic comment + label issue as blocked ---
|
||||
# Captures tmux pane output, posts a structured comment on the issue, removes
|
||||
# in-progress label, and adds the "blocked" label.
|
||||
#
|
||||
# Args: reason [session_name]
|
||||
# Uses globals: ISSUE, SESSION_NAME, PR_NUMBER, FORGE_TOKEN, API
|
||||
post_blocked_diagnostic() {
|
||||
local reason="$1"
|
||||
local session="${2:-${SESSION_NAME:-}}"
|
||||
|
||||
# Capture last 50 lines from tmux pane (before kill)
|
||||
local tmux_output=""
|
||||
if [ -n "$session" ] && tmux has-session -t "$session" 2>/dev/null; then
|
||||
tmux_output=$(tmux capture-pane -p -t "$session" -S -50 2>/dev/null || true)
|
||||
fi
|
||||
|
||||
# Redact any secrets from tmux output before posting to issue
|
||||
if [ -n "$tmux_output" ]; then
|
||||
tmux_output=$(redact_secrets "$tmux_output")
|
||||
fi
|
||||
|
||||
# Build diagnostic comment body
|
||||
local comment
|
||||
comment="### Session failure diagnostic
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Exit reason | \`${reason}\` |
|
||||
| Timestamp | \`$(date -u +%Y-%m-%dT%H:%M:%SZ)\` |"
|
||||
[ -n "${PR_NUMBER:-}" ] && [ "${PR_NUMBER:-0}" != "0" ] && \
|
||||
comment="${comment}
|
||||
| PR | #${PR_NUMBER} |"
|
||||
|
||||
if [ -n "$tmux_output" ]; then
|
||||
comment="${comment}
|
||||
|
||||
<details><summary>Last 50 lines from tmux pane</summary>
|
||||
|
||||
\`\`\`
|
||||
${tmux_output}
|
||||
\`\`\`
|
||||
</details>"
|
||||
fi
|
||||
|
||||
# Post comment to issue
|
||||
curl -sf -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${API}/issues/${ISSUE}/comments" \
|
||||
-d "$(jq -nc --arg b "$comment" '{body:$b}')" >/dev/null 2>&1 || true
|
||||
|
||||
# Remove in-progress, add blocked
|
||||
cleanup_labels
|
||||
local blocked_id
|
||||
blocked_id=$(ensure_blocked_label_id)
|
||||
if [ -n "$blocked_id" ]; then
|
||||
curl -sf -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${API}/issues/${ISSUE}/labels" \
|
||||
-d "{\"labels\":[${blocked_id}]}" >/dev/null 2>&1 || true
|
||||
fi
|
||||
CLAIMED=false
|
||||
_BLOCKED_POSTED=true
|
||||
}
|
||||
|
||||
# --- Build phase protocol prompt (shared across agents) ---
|
||||
# Generates the phase-signaling instructions for Claude prompts.
|
||||
# Args: phase_file summary_file branch [remote]
|
||||
# Output: The protocol text (stdout)
|
||||
build_phase_protocol_prompt() {
|
||||
local _pf="$1" _sf="$2" _br="$3" _remote="${4:-${FORGE_REMOTE:-origin}}"
|
||||
cat <<_PHASE_PROTOCOL_EOF_
|
||||
## Phase-Signaling Protocol (REQUIRED)
|
||||
|
||||
You are running in a persistent tmux session managed by an orchestrator.
|
||||
Communicate progress by writing to the phase file. The orchestrator watches
|
||||
this file and injects events (CI results, review feedback) back into this session.
|
||||
|
||||
### Key files
|
||||
\`\`\`
|
||||
PHASE_FILE="${_pf}"
|
||||
SUMMARY_FILE="${_sf}"
|
||||
\`\`\`
|
||||
|
||||
### Phase transitions — write these exactly:
|
||||
|
||||
**After committing and pushing your branch:**
|
||||
\`\`\`bash
|
||||
# Rebase on target branch before push to avoid merge conflicts
|
||||
git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH}
|
||||
git push ${_remote} ${_br}
|
||||
# Write a short summary of what you implemented:
|
||||
printf '%s' "<your summary>" > "\${SUMMARY_FILE}"
|
||||
# Signal the orchestrator to create the PR and watch for CI:
|
||||
echo "PHASE:awaiting_ci" > "${_pf}"
|
||||
\`\`\`
|
||||
Then STOP and wait. The orchestrator will inject CI results.
|
||||
|
||||
**When you receive a "CI passed" injection:**
|
||||
\`\`\`bash
|
||||
echo "PHASE:awaiting_review" > "${_pf}"
|
||||
\`\`\`
|
||||
Then STOP and wait. The orchestrator will inject review feedback.
|
||||
|
||||
**When you receive a "CI failed:" injection:**
|
||||
Fix the CI issue, then rebase on target branch and push:
|
||||
\`\`\`bash
|
||||
git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH}
|
||||
git push --force-with-lease ${_remote} ${_br}
|
||||
echo "PHASE:awaiting_ci" > "${_pf}"
|
||||
\`\`\`
|
||||
Then STOP and wait.
|
||||
|
||||
**When you receive a "Review: REQUEST_CHANGES" injection:**
|
||||
Address ALL review feedback, then rebase on target branch and push:
|
||||
\`\`\`bash
|
||||
git fetch ${_remote} ${PRIMARY_BRANCH} && git rebase ${_remote}/${PRIMARY_BRANCH}
|
||||
git push --force-with-lease ${_remote} ${_br}
|
||||
echo "PHASE:awaiting_ci" > "${_pf}"
|
||||
\`\`\`
|
||||
(CI runs again after each push — always write awaiting_ci, not awaiting_review)
|
||||
|
||||
**When you need human help (CI exhausted, merge blocked, stuck on a decision):**
|
||||
\`\`\`bash
|
||||
printf 'PHASE:escalate\nReason: %s\n' "describe what you need" > "${_pf}"
|
||||
\`\`\`
|
||||
Then STOP and wait. A human will review and respond via the forge.
|
||||
|
||||
**On unrecoverable failure:**
|
||||
\`\`\`bash
|
||||
printf 'PHASE:failed\nReason: %s\n' "describe what failed" > "${_pf}"
|
||||
\`\`\`
|
||||
_PHASE_PROTOCOL_EOF_
|
||||
}
|
||||
|
||||
# --- Merge helper ---
|
||||
# do_merge — attempt to merge PR via forge API.
|
||||
# Args: pr_num
|
||||
# Returns:
|
||||
# 0 = merged successfully
|
||||
# 1 = other failure (conflict, network error, etc.)
|
||||
# 2 = not enough approvals (HTTP 405) — PHASE:escalate already written
|
||||
do_merge() {
|
||||
local pr_num="$1"
|
||||
local merge_response merge_http_code merge_body
|
||||
merge_response=$(curl -s -w "\n%{http_code}" -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H 'Content-Type: application/json' \
|
||||
"${API}/pulls/${pr_num}/merge" \
|
||||
-d '{"Do":"merge","delete_branch_after_merge":true}') || true
|
||||
merge_http_code=$(echo "$merge_response" | tail -1)
|
||||
merge_body=$(echo "$merge_response" | sed '$d')
|
||||
|
||||
if [ "$merge_http_code" = "200" ] || [ "$merge_http_code" = "204" ]; then
|
||||
log "do_merge: PR #${pr_num} merged (HTTP ${merge_http_code})"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# HTTP 405 — could be "merge requirements not met" OR "already merged" (race with dev-poll).
|
||||
# Before escalating, check whether the PR was already merged by another agent.
|
||||
if [ "$merge_http_code" = "405" ]; then
|
||||
local pr_state
|
||||
pr_state=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/pulls/${pr_num}" | jq -r '.merged // false') || pr_state="false"
|
||||
if [ "$pr_state" = "true" ]; then
|
||||
log "do_merge: PR #${pr_num} already merged (detected after HTTP 405) — treating as success"
|
||||
return 0
|
||||
fi
|
||||
log "do_merge: PR #${pr_num} blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}"
|
||||
printf 'PHASE:escalate\nReason: %s\n' \
|
||||
"PR #${pr_num} merge blocked — merge requirements not met (HTTP 405): ${merge_body:0:200}" \
|
||||
> "$PHASE_FILE"
|
||||
return 2
|
||||
fi
|
||||
|
||||
log "do_merge: PR #${pr_num} merge failed (HTTP ${merge_http_code}): ${merge_body:0:200}"
|
||||
return 1
|
||||
}
|
||||
|
||||
# --- Refusal comment helper ---
|
||||
post_refusal_comment() {
|
||||
local emoji="$1" title="$2" body="$3"
|
||||
local last_has_title
|
||||
last_has_title=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/issues/${ISSUE}/comments?limit=5" | \
|
||||
jq -r --arg t "Dev-agent: ${title}" '[.[] | .body // ""] | any(contains($t)) | tostring') || true
|
||||
if [ "$last_has_title" = "true" ]; then
|
||||
log "skipping duplicate refusal comment: ${title}"
|
||||
return 0
|
||||
fi
|
||||
local comment
|
||||
comment="${emoji} **Dev-agent: ${title}**
|
||||
|
||||
${body}
|
||||
|
||||
---
|
||||
*Automated assessment by dev-agent · $(date -u '+%Y-%m-%d %H:%M UTC')*"
|
||||
printf '%s' "$comment" > "/tmp/refusal-comment.txt"
|
||||
jq -Rs '{body: .}' < "/tmp/refusal-comment.txt" > "/tmp/refusal-comment.json"
|
||||
curl -sf -o /dev/null -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${API}/issues/${ISSUE}/comments" \
|
||||
--data-binary @"/tmp/refusal-comment.json" 2>/dev/null || \
|
||||
log "WARNING: failed to post refusal comment"
|
||||
rm -f "/tmp/refusal-comment.txt" "/tmp/refusal-comment.json"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# PHASE DISPATCH CALLBACK
|
||||
# =============================================================================
|
||||
|
||||
# _on_phase_change — Phase dispatch callback for monitor_phase_loop
|
||||
# Receives the current phase as $1.
|
||||
# Returns 0 to continue the loop, 1 to break (terminal phase reached).
|
||||
_on_phase_change() {
|
||||
local phase="$1"
|
||||
|
||||
# ── PHASE: awaiting_ci ──────────────────────────────────────────────────────
|
||||
if [ "$phase" = "PHASE:awaiting_ci" ]; then
|
||||
# Release session lock — Claude is idle during CI polling (#724)
|
||||
session_lock_release
|
||||
|
||||
# Create PR if not yet created
|
||||
if [ -z "${PR_NUMBER:-}" ]; then
|
||||
status "creating PR for issue #${ISSUE}"
|
||||
IMPL_SUMMARY=""
|
||||
if [ -f "$IMPL_SUMMARY_FILE" ]; then
|
||||
# Don't treat refusal JSON as a PR summary
|
||||
if ! jq -e '.status' < "$IMPL_SUMMARY_FILE" >/dev/null 2>&1; then
|
||||
IMPL_SUMMARY=$(head -c 4000 "$IMPL_SUMMARY_FILE")
|
||||
fi
|
||||
fi
|
||||
|
||||
printf 'Fixes #%s\n\n## Changes\n%s' "$ISSUE" "$IMPL_SUMMARY" > "/tmp/pr-body-${ISSUE}.txt"
|
||||
jq -n \
|
||||
--arg title "fix: ${ISSUE_TITLE} (#${ISSUE})" \
|
||||
--rawfile body "/tmp/pr-body-${ISSUE}.txt" \
|
||||
--arg head "$BRANCH" \
|
||||
--arg base "${PRIMARY_BRANCH}" \
|
||||
'{title: $title, body: $body, head: $head, base: $base}' > "/tmp/pr-request-${ISSUE}.json"
|
||||
|
||||
PR_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${API}/pulls" \
|
||||
--data-binary @"/tmp/pr-request-${ISSUE}.json")
|
||||
|
||||
PR_HTTP_CODE=$(echo "$PR_RESPONSE" | tail -1)
|
||||
PR_RESPONSE_BODY=$(echo "$PR_RESPONSE" | sed '$d')
|
||||
rm -f "/tmp/pr-body-${ISSUE}.txt" "/tmp/pr-request-${ISSUE}.json"
|
||||
|
||||
if [ "$PR_HTTP_CODE" = "201" ] || [ "$PR_HTTP_CODE" = "200" ]; then
|
||||
PR_NUMBER=$(echo "$PR_RESPONSE_BODY" | jq -r '.number')
|
||||
log "created PR #${PR_NUMBER}"
|
||||
elif [ "$PR_HTTP_CODE" = "409" ]; then
|
||||
# PR already exists (race condition) — find it
|
||||
FOUND_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/pulls?state=open&limit=20" | \
|
||||
jq -r --arg branch "$BRANCH" \
|
||||
'.[] | select(.head.ref == $branch) | .number' | head -1) || true
|
||||
if [ -n "$FOUND_PR" ]; then
|
||||
PR_NUMBER="$FOUND_PR"
|
||||
log "PR already exists: #${PR_NUMBER}"
|
||||
else
|
||||
log "ERROR: PR creation got 409 but no existing PR found"
|
||||
agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP 409, no existing PR found). Check the forge API. Retry by writing PHASE:awaiting_ci again after verifying the branch was pushed."
|
||||
return 0
|
||||
fi
|
||||
else
|
||||
log "ERROR: PR creation failed (HTTP ${PR_HTTP_CODE})"
|
||||
agent_inject_into_session "$SESSION_NAME" "ERROR: Could not create PR (HTTP ${PR_HTTP_CODE}). Check branch was pushed: git push ${FORGE_REMOTE:-origin} ${BRANCH}. Then write PHASE:awaiting_ci again."
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# No CI configured? Treat as success immediately
|
||||
if [ "${WOODPECKER_REPO_ID:-2}" = "0" ]; then
|
||||
log "no CI configured — treating as passed"
|
||||
agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER} (no CI configured for this project).
|
||||
Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback."
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Poll CI until done or timeout
|
||||
status "waiting for CI on PR #${PR_NUMBER}"
|
||||
CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || \
|
||||
curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha')
|
||||
|
||||
CI_DONE=false
|
||||
CI_STATE="unknown"
|
||||
CI_POLL_ELAPSED=0
|
||||
while [ "$CI_POLL_ELAPSED" -lt "$CI_POLL_TIMEOUT" ]; do
|
||||
sleep 30
|
||||
CI_POLL_ELAPSED=$(( CI_POLL_ELAPSED + 30 ))
|
||||
|
||||
# Check session still alive during CI wait (exit_marker + tmux fallback)
|
||||
if [ -f "/tmp/claude-exited-${SESSION_NAME}.ts" ] || ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then
|
||||
log "session died during CI wait"
|
||||
break
|
||||
fi
|
||||
|
||||
# Re-fetch HEAD — Claude may have pushed new commits since loop started
|
||||
CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || echo "$CI_CURRENT_SHA")
|
||||
|
||||
CI_STATE=$(ci_commit_status "$CI_CURRENT_SHA")
|
||||
if [ "$CI_STATE" = "success" ] || [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then
|
||||
CI_DONE=true
|
||||
[ "$CI_STATE" = "success" ] && CI_FIX_COUNT=0
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if ! $CI_DONE; then
|
||||
log "TIMEOUT: CI didn't complete in ${CI_POLL_TIMEOUT}s"
|
||||
agent_inject_into_session "$SESSION_NAME" "CI TIMEOUT: CI did not complete within 30 minutes for PR #${PR_NUMBER} (SHA: ${CI_CURRENT_SHA:0:7}). This may be an infrastructure issue. Write PHASE:escalate if you cannot proceed."
|
||||
return 0
|
||||
fi
|
||||
|
||||
log "CI: ${CI_STATE}"
|
||||
|
||||
if [ "$CI_STATE" = "success" ]; then
|
||||
agent_inject_into_session "$SESSION_NAME" "CI passed on PR #${PR_NUMBER}.
|
||||
Write PHASE:awaiting_review to the phase file, then stop and wait for review feedback:
|
||||
echo \"PHASE:awaiting_review\" > \"${PHASE_FILE}\""
|
||||
else
|
||||
# Fetch CI error details
|
||||
PIPELINE_NUM=$(ci_pipeline_number "$CI_CURRENT_SHA")
|
||||
|
||||
FAILED_STEP=""
|
||||
FAILED_EXIT=""
|
||||
IS_INFRA=false
|
||||
if [ -n "$PIPELINE_NUM" ]; then
|
||||
FAILED_INFO=$(curl -sf \
|
||||
-H "Authorization: Bearer ${WOODPECKER_TOKEN}" \
|
||||
"${WOODPECKER_SERVER}/api/repos/${WOODPECKER_REPO_ID}/pipelines/${PIPELINE_NUM}" | \
|
||||
jq -r '.workflows[]?.children[]? | select(.state=="failure") | "\(.name)|\(.exit_code)"' | head -1 || true)
|
||||
FAILED_STEP=$(echo "$FAILED_INFO" | cut -d'|' -f1)
|
||||
FAILED_EXIT=$(echo "$FAILED_INFO" | cut -d'|' -f2)
|
||||
fi
|
||||
|
||||
log "CI failed: step=${FAILED_STEP:-unknown} exit=${FAILED_EXIT:-?}"
|
||||
|
||||
if [ -n "$FAILED_STEP" ] && is_infra_step "$FAILED_STEP" "${FAILED_EXIT:-0}" >/dev/null 2>&1; then
|
||||
IS_INFRA=true
|
||||
fi
|
||||
|
||||
if [ "$IS_INFRA" = true ] && [ "${CI_RETRY_COUNT:-0}" -lt 1 ]; then
|
||||
CI_RETRY_COUNT=$(( CI_RETRY_COUNT + 1 ))
|
||||
log "infra failure — retrigger CI (retry ${CI_RETRY_COUNT})"
|
||||
(cd "$WORKTREE" && git commit --allow-empty \
|
||||
-m "ci: retrigger after infra failure (#${ISSUE})" --no-verify 2>&1 | tail -1)
|
||||
# Rebase on target branch before push to avoid merge conflicts
|
||||
if ! (cd "$WORKTREE" && \
|
||||
git fetch "${FORGE_REMOTE:-origin}" "${PRIMARY_BRANCH}" 2>/dev/null && \
|
||||
git rebase "${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH}" 2>&1 | tail -5); then
|
||||
log "rebase conflict detected — aborting, agent must resolve"
|
||||
(cd "$WORKTREE" && git rebase --abort 2>/dev/null || git reset --hard HEAD 2>/dev/null) || true
|
||||
agent_inject_into_session "$SESSION_NAME" "REBASE CONFLICT: Cannot rebase onto ${PRIMARY_BRANCH} automatically.
|
||||
|
||||
Please resolve merge conflicts manually:
|
||||
1. Check conflict status: git status
|
||||
2. Resolve conflicts in the conflicted files
|
||||
3. Stage resolved files: git add <files>
|
||||
4. Continue rebase: git rebase --continue
|
||||
|
||||
If you cannot resolve conflicts, abort: git rebase --abort
|
||||
Then write PHASE:escalate with a reason."
|
||||
return 0
|
||||
fi
|
||||
# Rebase succeeded — push the result
|
||||
(cd "$WORKTREE" && git push --force-with-lease "${FORGE_REMOTE:-origin}" "$BRANCH" 2>&1 | tail -3)
|
||||
# Touch phase file so we recheck CI on the new SHA
|
||||
# Do NOT update LAST_PHASE_MTIME here — let the main loop detect the fresh mtime
|
||||
touch "$PHASE_FILE"
|
||||
CI_CURRENT_SHA=$(git -C "${WORKTREE}" rev-parse HEAD 2>/dev/null || true)
|
||||
return 0
|
||||
fi
|
||||
|
||||
CI_FIX_COUNT=$(( CI_FIX_COUNT + 1 ))
|
||||
_ci_pipeline_url="${WOODPECKER_SERVER}/repos/${WOODPECKER_REPO_ID}/pipeline/${PIPELINE_NUM:-0}"
|
||||
if [ "$CI_FIX_COUNT" -gt "$MAX_CI_FIXES" ]; then
|
||||
log "CI failure not recoverable after ${CI_FIX_COUNT} fix attempts — escalating"
|
||||
printf 'PHASE:escalate\nReason: ci_exhausted after %d attempts (step: %s)\n' "$CI_FIX_COUNT" "${FAILED_STEP:-unknown}" > "$PHASE_FILE"
|
||||
# Do NOT update LAST_PHASE_MTIME here — let the main loop detect PHASE:escalate
|
||||
return 0
|
||||
fi
|
||||
|
||||
CI_ERROR_LOG=""
|
||||
if [ -n "$PIPELINE_NUM" ]; then
|
||||
CI_ERROR_LOG=$(bash "${FACTORY_ROOT}/lib/ci-debug.sh" failures "$PIPELINE_NUM" 2>/dev/null | tail -80 | head -c 8000 || echo "")
|
||||
fi
|
||||
|
||||
# Save CI result for crash recovery
|
||||
printf 'CI failed (attempt %d/%d)\nStep: %s\nExit: %s\n\n%s' \
|
||||
"$CI_FIX_COUNT" "$MAX_CI_FIXES" "${FAILED_STEP:-unknown}" "${FAILED_EXIT:-?}" "$CI_ERROR_LOG" \
|
||||
> "/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt" 2>/dev/null || true
|
||||
|
||||
agent_inject_into_session "$SESSION_NAME" "CI failed on PR #${PR_NUMBER} (attempt ${CI_FIX_COUNT}/${MAX_CI_FIXES}).
|
||||
|
||||
Failed step: ${FAILED_STEP:-unknown} (exit code ${FAILED_EXIT:-?}, pipeline #${PIPELINE_NUM:-?})
|
||||
|
||||
CI debug tool:
|
||||
bash ${FACTORY_ROOT}/lib/ci-debug.sh failures ${PIPELINE_NUM:-0}
|
||||
bash ${FACTORY_ROOT}/lib/ci-debug.sh logs ${PIPELINE_NUM:-0} <step-name>
|
||||
|
||||
Error snippet:
|
||||
${CI_ERROR_LOG:-No logs available. Use ci-debug.sh to query the pipeline.}
|
||||
|
||||
Instructions:
|
||||
1. Run ci-debug.sh failures to get the full error output.
|
||||
2. Read the failing test file(s) — understand what the tests EXPECT.
|
||||
3. Fix the root cause — do NOT weaken tests.
|
||||
4. Rebase on target branch and push: git fetch ${FORGE_REMOTE:-origin} ${PRIMARY_BRANCH} && git rebase ${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH}
|
||||
git push --force-with-lease ${FORGE_REMOTE:-origin} ${BRANCH}
|
||||
5. Write: echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\"
|
||||
6. Stop and wait."
|
||||
fi
|
||||
|
||||
# ── PHASE: awaiting_review ──────────────────────────────────────────────────
|
||||
elif [ "$phase" = "PHASE:awaiting_review" ]; then
|
||||
# Release session lock — Claude is idle during review wait (#724)
|
||||
session_lock_release
|
||||
status "waiting for review on PR #${PR_NUMBER:-?}"
|
||||
CI_FIX_COUNT=0 # Reset CI fix budget for this review cycle
|
||||
|
||||
if [ -z "${PR_NUMBER:-}" ]; then
|
||||
log "WARNING: awaiting_review but PR_NUMBER unknown — searching for PR"
|
||||
FOUND_PR=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/pulls?state=open&limit=20" | \
|
||||
jq -r --arg branch "$BRANCH" \
|
||||
'.[] | select(.head.ref == $branch) | .number' | head -1) || true
|
||||
if [ -n "$FOUND_PR" ]; then
|
||||
PR_NUMBER="$FOUND_PR"
|
||||
log "found PR #${PR_NUMBER}"
|
||||
else
|
||||
agent_inject_into_session "$SESSION_NAME" "ERROR: Cannot find open PR for branch ${BRANCH}. Did you push? Verify with git status and git push ${FORGE_REMOTE:-origin} ${BRANCH}, then write PHASE:awaiting_ci."
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
REVIEW_POLL_ELAPSED=0
|
||||
REVIEW_FOUND=false
|
||||
while [ "$REVIEW_POLL_ELAPSED" -lt "$REVIEW_POLL_TIMEOUT" ]; do
|
||||
sleep 300 # 5 min between review checks
|
||||
REVIEW_POLL_ELAPSED=$(( REVIEW_POLL_ELAPSED + 300 ))
|
||||
|
||||
# Check session still alive (exit_marker + tmux fallback)
|
||||
if [ -f "/tmp/claude-exited-${SESSION_NAME}.ts" ] || ! tmux has-session -t "${SESSION_NAME}" 2>/dev/null; then
|
||||
log "session died during review wait"
|
||||
REVIEW_FOUND=false
|
||||
break
|
||||
fi
|
||||
|
||||
# Check if phase was updated while we wait (e.g., Claude reacted to something)
|
||||
NEW_MTIME=$(stat -c %Y "$PHASE_FILE" 2>/dev/null || echo 0)
|
||||
if [ "$NEW_MTIME" -gt "$LAST_PHASE_MTIME" ]; then
|
||||
log "phase file updated during review wait — re-entering main loop"
|
||||
# Do NOT update LAST_PHASE_MTIME here — leave it stale so the outer
|
||||
# loop detects the change on its next tick and dispatches the new phase.
|
||||
REVIEW_FOUND=true # Prevent timeout injection
|
||||
# Clean up review-poll sentinel if it exists (session already advanced)
|
||||
rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
|
||||
break
|
||||
fi
|
||||
|
||||
REVIEW_SHA=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/pulls/${PR_NUMBER}" | jq -r '.head.sha') || true
|
||||
REVIEW_COMMENT=$(forge_api_all "/issues/${PR_NUMBER}/comments" | \
|
||||
jq -r --arg sha "$REVIEW_SHA" \
|
||||
'[.[] | select(.body | contains("<!-- reviewed: " + $sha))] | last // empty') || true
|
||||
|
||||
if [ -n "$REVIEW_COMMENT" ] && [ "$REVIEW_COMMENT" != "null" ]; then
|
||||
REVIEW_TEXT=$(echo "$REVIEW_COMMENT" | jq -r '.body')
|
||||
|
||||
# Skip error reviews — they have no verdict
|
||||
if echo "$REVIEW_TEXT" | grep -q "review-error\|Review — Error"; then
|
||||
log "review was an error, waiting for re-review"
|
||||
continue
|
||||
fi
|
||||
|
||||
VERDICT=$(echo "$REVIEW_TEXT" | grep -oP '\*\*(APPROVE|REQUEST_CHANGES|DISCUSS)\*\*' | head -1 | tr -d '*' || true)
|
||||
log "review verdict: ${VERDICT:-unknown}"
|
||||
|
||||
# Also check formal forge reviews
|
||||
if [ -z "$VERDICT" ]; then
|
||||
VERDICT=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/pulls/${PR_NUMBER}/reviews" | \
|
||||
jq -r '[.[] | select(.stale == false)] | last | .state // empty' || true)
|
||||
if [ "$VERDICT" = "APPROVED" ]; then
|
||||
VERDICT="APPROVE"
|
||||
elif [ "$VERDICT" != "REQUEST_CHANGES" ]; then
|
||||
VERDICT=""
|
||||
fi
|
||||
[ -n "$VERDICT" ] && log "verdict from formal review: $VERDICT"
|
||||
fi
|
||||
|
||||
# Skip injection if review-poll.sh already injected (sentinel present).
|
||||
# Exception: APPROVE always falls through so do_merge() runs even when
|
||||
# review-poll injected first — prevents Claude writing PHASE:done on a
|
||||
# failed merge without the orchestrator detecting the error.
|
||||
REVIEW_SENTINEL="/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
|
||||
if [ -n "$VERDICT" ] && [ -f "$REVIEW_SENTINEL" ] && [ "$VERDICT" != "APPROVE" ]; then
|
||||
log "review already injected by review-poll (sentinel exists) — skipping"
|
||||
rm -f "$REVIEW_SENTINEL"
|
||||
REVIEW_FOUND=true
|
||||
break
|
||||
fi
|
||||
rm -f "$REVIEW_SENTINEL" # consume sentinel before APPROVE handling below
|
||||
|
||||
if [ "$VERDICT" = "APPROVE" ]; then
|
||||
REVIEW_FOUND=true
|
||||
_merge_rc=0; do_merge "$PR_NUMBER" || _merge_rc=$?
|
||||
if [ "$_merge_rc" -eq 0 ]; then
|
||||
# Merge succeeded — close issue and signal done
|
||||
curl -sf -X PATCH \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H 'Content-Type: application/json' \
|
||||
"${API}/issues/${ISSUE}" \
|
||||
-d '{"state":"closed"}' >/dev/null 2>&1 || true
|
||||
# Pull merged primary branch and push to mirrors
|
||||
git -C "$PROJECT_REPO_ROOT" fetch "${FORGE_REMOTE:-origin}" "$PRIMARY_BRANCH" 2>/dev/null || true
|
||||
git -C "$PROJECT_REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true
|
||||
git -C "$PROJECT_REPO_ROOT" pull --ff-only "${FORGE_REMOTE:-origin}" "$PRIMARY_BRANCH" 2>/dev/null || true
|
||||
mirror_push
|
||||
printf 'PHASE:done\n' > "$PHASE_FILE"
|
||||
elif [ "$_merge_rc" -ne 2 ]; then
|
||||
# Other merge failure (conflict, etc.) — delegate to Claude for rebase + retry
|
||||
agent_inject_into_session "$SESSION_NAME" "Approved! PR #${PR_NUMBER} has been approved, but the merge failed (likely conflicts).
|
||||
|
||||
Rebase onto ${PRIMARY_BRANCH} and push:
|
||||
git fetch ${FORGE_REMOTE:-origin} ${PRIMARY_BRANCH} && git rebase ${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH}
|
||||
git push --force-with-lease ${FORGE_REMOTE:-origin} ${BRANCH}
|
||||
echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\"
|
||||
|
||||
Do NOT merge or close the issue — the orchestrator handles that after CI passes.
|
||||
If rebase repeatedly fails, write PHASE:escalate with a reason."
|
||||
fi
|
||||
# _merge_rc=2: PHASE:escalate already written by do_merge()
|
||||
break
|
||||
|
||||
elif [ "$VERDICT" = "REQUEST_CHANGES" ] || [ "$VERDICT" = "DISCUSS" ]; then
|
||||
REVIEW_ROUND=$(( REVIEW_ROUND + 1 ))
|
||||
if [ "$REVIEW_ROUND" -ge "$MAX_REVIEW_ROUNDS" ]; then
|
||||
log "hit max review rounds (${MAX_REVIEW_ROUNDS})"
|
||||
log "PR #${PR_NUMBER}: hit ${MAX_REVIEW_ROUNDS} review rounds, needs human attention"
|
||||
fi
|
||||
REVIEW_FOUND=true
|
||||
agent_inject_into_session "$SESSION_NAME" "Review feedback (round ${REVIEW_ROUND}) on PR #${PR_NUMBER}:
|
||||
|
||||
${REVIEW_TEXT}
|
||||
|
||||
Instructions:
|
||||
1. Address each piece of feedback carefully.
|
||||
2. Run lint and tests when done.
|
||||
3. Rebase on target branch and push: git fetch ${FORGE_REMOTE:-origin} ${PRIMARY_BRANCH} && git rebase ${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH}
|
||||
git push --force-with-lease ${FORGE_REMOTE:-origin} ${BRANCH}
|
||||
4. Write: echo \"PHASE:awaiting_ci\" > \"${PHASE_FILE}\"
|
||||
5. Stop and wait for the next CI result."
|
||||
log "review REQUEST_CHANGES received (round ${REVIEW_ROUND})"
|
||||
break
|
||||
|
||||
else
|
||||
# No verdict found in comment or formal review — keep waiting
|
||||
log "review comment found but no verdict, continuing to wait"
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check if PR was merged or closed externally
|
||||
PR_JSON=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/pulls/${PR_NUMBER}") || true
|
||||
PR_STATE=$(echo "$PR_JSON" | jq -r '.state // "unknown"')
|
||||
PR_MERGED=$(echo "$PR_JSON" | jq -r '.merged // false')
|
||||
if [ "$PR_STATE" != "open" ]; then
|
||||
if [ "$PR_MERGED" = "true" ]; then
|
||||
log "PR #${PR_NUMBER} was merged externally"
|
||||
curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${API}/issues/${ISSUE}" -d '{"state":"closed"}' >/dev/null 2>&1 || true
|
||||
cleanup_labels
|
||||
agent_kill_session "$SESSION_NAME"
|
||||
cleanup_worktree
|
||||
rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "${SCRATCH_FILE:-}"
|
||||
exit 0
|
||||
else
|
||||
log "PR #${PR_NUMBER} was closed WITHOUT merge — NOT closing issue"
|
||||
cleanup_labels
|
||||
agent_kill_session "$SESSION_NAME"
|
||||
cleanup_worktree
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
log "waiting for review on PR #${PR_NUMBER} (${REVIEW_POLL_ELAPSED}s elapsed)"
|
||||
done
|
||||
|
||||
if ! $REVIEW_FOUND && [ "$REVIEW_POLL_ELAPSED" -ge "$REVIEW_POLL_TIMEOUT" ]; then
|
||||
log "TIMEOUT: no review after 3h"
|
||||
agent_inject_into_session "$SESSION_NAME" "TIMEOUT: No review received after 3 hours for PR #${PR_NUMBER}. Write PHASE:escalate to escalate to a human reviewer."
|
||||
fi
|
||||
|
||||
# ── PHASE: escalate ──────────────────────────────────────────────────────
|
||||
elif [ "$phase" = "PHASE:escalate" ]; then
|
||||
status "escalated — waiting for human input on issue #${ISSUE}"
|
||||
ESCALATE_REASON=$(sed -n '2p' "$PHASE_FILE" 2>/dev/null | sed 's/^Reason: //' || echo "")
|
||||
log "phase: escalate — reason: ${ESCALATE_REASON:-none}"
|
||||
# Session stays alive — human input arrives via vault/forge
|
||||
|
||||
# ── PHASE: done ─────────────────────────────────────────────────────────────
|
||||
# PR merged and issue closed (by orchestrator or Claude). Just clean up local state.
|
||||
elif [ "$phase" = "PHASE:done" ]; then
|
||||
if [ -n "${PR_NUMBER:-}" ]; then
|
||||
status "phase done — PR #${PR_NUMBER} merged, cleaning up"
|
||||
else
|
||||
status "phase done — issue #${ISSUE} complete, cleaning up"
|
||||
fi
|
||||
|
||||
# Belt-and-suspenders: ensure in-progress label removed (idempotent)
|
||||
cleanup_labels
|
||||
|
||||
# Local cleanup
|
||||
agent_kill_session "$SESSION_NAME"
|
||||
cleanup_worktree
|
||||
rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "${SCRATCH_FILE:-}" \
|
||||
"/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
|
||||
[ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
|
||||
CLAIMED=false # Don't unclaim again in cleanup()
|
||||
|
||||
# ── PHASE: failed ───────────────────────────────────────────────────────────
|
||||
elif [ "$phase" = "PHASE:failed" ]; then
|
||||
if [[ -f "$PHASE_FILE" ]]; then
|
||||
FAILURE_REASON=$(sed -n '2p' "$PHASE_FILE" | sed 's/^Reason: //')
|
||||
fi
|
||||
FAILURE_REASON="${FAILURE_REASON:-unspecified}"
|
||||
log "phase: failed — reason: ${FAILURE_REASON}"
|
||||
# Gitea labels API requires []int64 — look up the "backlog" label ID once
|
||||
BACKLOG_LABEL_ID=$(forge_api GET "/labels" 2>/dev/null \
|
||||
| jq -r '.[] | select(.name == "backlog") | .id' 2>/dev/null || true)
|
||||
BACKLOG_LABEL_ID="${BACKLOG_LABEL_ID:-1300815}"
|
||||
UNDERSPECIFIED_LABEL_ID=$(forge_api GET "/labels" 2>/dev/null \
|
||||
| jq -r '.[] | select(.name == "underspecified") | .id' 2>/dev/null || true)
|
||||
UNDERSPECIFIED_LABEL_ID="${UNDERSPECIFIED_LABEL_ID:-1300816}"
|
||||
|
||||
# Check if this is a refusal (Claude wrote refusal JSON to IMPL_SUMMARY_FILE)
|
||||
REFUSAL_JSON=""
|
||||
if [ -f "$IMPL_SUMMARY_FILE" ] && jq -e '.status' < "$IMPL_SUMMARY_FILE" >/dev/null 2>&1; then
|
||||
REFUSAL_JSON=$(cat "$IMPL_SUMMARY_FILE")
|
||||
fi
|
||||
|
||||
if [ -n "$REFUSAL_JSON" ] && [ "$FAILURE_REASON" = "refused" ]; then
|
||||
REFUSAL_STATUS=$(printf '%s' "$REFUSAL_JSON" | jq -r '.status')
|
||||
log "claude refused: ${REFUSAL_STATUS}"
|
||||
|
||||
# Write preflight result for dev-poll.sh
|
||||
printf '%s' "$REFUSAL_JSON" > "$PREFLIGHT_RESULT"
|
||||
|
||||
# Unclaim issue (restore backlog label, remove in-progress)
|
||||
cleanup_labels
|
||||
curl -sf -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${API}/issues/${ISSUE}/labels" \
|
||||
-d "{\"labels\":[${BACKLOG_LABEL_ID}]}" >/dev/null 2>&1 || true
|
||||
|
||||
case "$REFUSAL_STATUS" in
|
||||
unmet_dependency)
|
||||
BLOCKED_BY_MSG=$(printf '%s' "$REFUSAL_JSON" | jq -r '.blocked_by // "unknown"')
|
||||
SUGGESTION=$(printf '%s' "$REFUSAL_JSON" | jq -r '.suggestion // empty')
|
||||
COMMENT_BODY="### Blocked by unmet dependency
|
||||
|
||||
${BLOCKED_BY_MSG}"
|
||||
if [ -n "$SUGGESTION" ] && [ "$SUGGESTION" != "null" ]; then
|
||||
COMMENT_BODY="${COMMENT_BODY}
|
||||
|
||||
**Suggestion:** Work on #${SUGGESTION} first."
|
||||
fi
|
||||
post_refusal_comment "🚧" "Unmet dependency" "$COMMENT_BODY"
|
||||
;;
|
||||
too_large)
|
||||
REASON=$(printf '%s' "$REFUSAL_JSON" | jq -r '.reason // "unspecified"')
|
||||
post_refusal_comment "📏" "Too large for single session" "### Why this can't be implemented as-is
|
||||
|
||||
${REASON}
|
||||
|
||||
### Next steps
|
||||
A maintainer should split this issue or add more detail to the spec."
|
||||
curl -sf -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${API}/issues/${ISSUE}/labels" \
|
||||
-d "{\"labels\":[${UNDERSPECIFIED_LABEL_ID}]}" >/dev/null 2>&1 || true
|
||||
curl -sf -X DELETE \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${API}/issues/${ISSUE}/labels/${BACKLOG_LABEL_ID}" >/dev/null 2>&1 || true
|
||||
;;
|
||||
already_done)
|
||||
REASON=$(printf '%s' "$REFUSAL_JSON" | jq -r '.reason // "unspecified"')
|
||||
post_refusal_comment "✅" "Already implemented" "### Existing implementation
|
||||
|
||||
${REASON}
|
||||
|
||||
Closing as already implemented."
|
||||
curl -sf -X PATCH \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${API}/issues/${ISSUE}" \
|
||||
-d '{"state":"closed"}' >/dev/null 2>&1 || true
|
||||
;;
|
||||
*)
|
||||
post_refusal_comment "❓" "Unable to proceed" "The dev-agent could not process this issue.
|
||||
|
||||
Raw response:
|
||||
\`\`\`json
|
||||
$(printf '%s' "$REFUSAL_JSON" | head -c 2000)
|
||||
\`\`\`"
|
||||
;;
|
||||
esac
|
||||
|
||||
CLAIMED=false # Don't unclaim again in cleanup()
|
||||
agent_kill_session "$SESSION_NAME"
|
||||
cleanup_worktree
|
||||
rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "${SCRATCH_FILE:-}" \
|
||||
"/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
|
||||
[ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
|
||||
return 1
|
||||
|
||||
else
|
||||
# Genuine unrecoverable failure — label blocked with diagnostic
|
||||
log "session failed: ${FAILURE_REASON}"
|
||||
post_blocked_diagnostic "$FAILURE_REASON"
|
||||
|
||||
agent_kill_session "$SESSION_NAME"
|
||||
if [ -n "${PR_NUMBER:-}" ]; then
|
||||
log "keeping worktree (PR #${PR_NUMBER} still open)"
|
||||
else
|
||||
cleanup_worktree
|
||||
fi
|
||||
rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "${SCRATCH_FILE:-}" \
|
||||
"/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
|
||||
[ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# ── PHASE: crashed ──────────────────────────────────────────────────────────
|
||||
# Session died unexpectedly (OOM kill, tmux crash, etc.). Label blocked with
|
||||
# diagnostic comment so humans can triage directly on the issue.
|
||||
elif [ "$phase" = "PHASE:crashed" ]; then
|
||||
log "session crashed for issue #${ISSUE}"
|
||||
post_blocked_diagnostic "crashed"
|
||||
log "PRESERVED crashed worktree for debugging: $WORKTREE"
|
||||
rm -f "$PHASE_FILE" "$IMPL_SUMMARY_FILE" "${SCRATCH_FILE:-}" \
|
||||
"/tmp/ci-result-${PROJECT_NAME}-${ISSUE}.txt"
|
||||
[ -n "${PR_NUMBER:-}" ] && rm -f "/tmp/review-injected-${PROJECT_NAME}-${PR_NUMBER}"
|
||||
|
||||
else
|
||||
log "WARNING: unknown phase value: ${phase}"
|
||||
fi
|
||||
}
|
||||
|
|
@ -8,8 +8,13 @@
|
|||
|
||||
set -euo pipefail
|
||||
|
||||
# Source canonical read_phase() from shared library
|
||||
source "$(dirname "$0")/../lib/agent-session.sh"
|
||||
# Inline read_phase() function (previously from lib/agent-session.sh)
|
||||
# Read the current phase from a phase file, stripped of whitespace.
|
||||
# Usage: read_phase [file] — defaults to $PHASE_FILE
|
||||
read_phase() {
|
||||
local file="${1:-${PHASE_FILE:-}}"
|
||||
{ cat "$file" 2>/dev/null || true; } | head -1 | tr -d '[:space:]'
|
||||
}
|
||||
|
||||
PROJECT="testproject"
|
||||
ISSUE="999"
|
||||
|
|
@ -84,7 +89,7 @@ else
|
|||
fail "PHASE:failed format: first='$first_line' second='$second_line'"
|
||||
fi
|
||||
|
||||
# ── Test 5: orchestrator read function (canonical read_phase from lib/agent-session.sh)
|
||||
# ── Test 5: orchestrator read function (inline read_phase)
|
||||
echo "PHASE:awaiting_ci" > "$PHASE_FILE"
|
||||
phase=$(read_phase "$PHASE_FILE")
|
||||
if [ "$phase" = "PHASE:awaiting_ci" ]; then
|
||||
|
|
|
|||
|
|
@ -35,7 +35,49 @@ services:
|
|||
- FORGE_TOKEN=${FORGE_TOKEN:-}
|
||||
- FORGE_URL=http://forgejo:3000
|
||||
- PROJECT_TOML=projects/disinto.toml
|
||||
- FORGE_REPO=johba/disinto
|
||||
- FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto}
|
||||
depends_on:
|
||||
- forgejo
|
||||
|
||||
reproduce:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/reproduce/Dockerfile
|
||||
image: disinto-reproduce:latest
|
||||
network_mode: host
|
||||
profiles: ["reproduce"]
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- agent-data:/home/agent/data
|
||||
- project-repos:/home/agent/repos
|
||||
- ${HOME}/.claude:/home/agent/.claude
|
||||
- /usr/local/bin/claude:/usr/local/bin/claude:ro
|
||||
- ${HOME}/.ssh:/home/agent/.ssh:ro
|
||||
env_file:
|
||||
- .env
|
||||
|
||||
edge:
|
||||
build:
|
||||
context: docker/edge
|
||||
dockerfile: Dockerfile
|
||||
image: disinto/edge:latest
|
||||
container_name: disinto-edge
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- /usr/local/bin/claude:/usr/local/bin/claude:ro
|
||||
- ${HOME}/.claude.json:/root/.claude.json:ro
|
||||
- ${HOME}/.claude:/root/.claude:ro
|
||||
- disinto-logs:/opt/disinto-logs
|
||||
environment:
|
||||
- FORGE_SUPERVISOR_TOKEN=${FORGE_SUPERVISOR_TOKEN:-}
|
||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
||||
- CLAUDE_MODEL=claude-sonnet-4-6
|
||||
- FORGE_TOKEN=${FORGE_TOKEN:-}
|
||||
- FORGE_URL=http://forgejo:3000
|
||||
- DISINTO_CONTAINER=1
|
||||
ports:
|
||||
- "80:80"
|
||||
- "443:443"
|
||||
depends_on:
|
||||
- forgejo
|
||||
|
||||
|
|
@ -52,3 +94,6 @@ services:
|
|||
- FORGEJO__service__REQUIRE_SIGNIN_VIEW=true
|
||||
ports:
|
||||
- "3000:3000"
|
||||
|
||||
volumes:
|
||||
disinto-logs:
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
FROM debian:bookworm-slim
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
bash curl git jq tmux cron python3 python3-pip openssh-client ca-certificates age shellcheck \
|
||||
bash curl git jq tmux cron python3 python3-pip openssh-client ca-certificates age shellcheck procps \
|
||||
&& pip3 install --break-system-packages networkx \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
|
|
|||
|
|
@ -24,72 +24,22 @@ if [ ! -d "${PROJECT_REPO_ROOT}/.git" ]; then
|
|||
log "Cloning repo..."
|
||||
mkdir -p "$(dirname "$PROJECT_REPO_ROOT")"
|
||||
chown -R agent:agent /home/agent/repos 2>/dev/null || true
|
||||
su -s /bin/bash agent -c "git clone http://dev-bot:${FORGE_TOKEN}@forgejo:3000/${FORGE_REPO:-johba/disinto}.git ${PROJECT_REPO_ROOT}"
|
||||
# Use password auth for git HTTP — Forgejo 11.x rejects API tokens for push (#361)
|
||||
su -s /bin/bash agent -c "git clone http://dev-bot:${FORGE_PASS:-${FORGE_TOKEN}}@forgejo:3000/${FORGE_REPO:-disinto-admin/disinto}.git ${PROJECT_REPO_ROOT}"
|
||||
log "Repo cloned"
|
||||
fi
|
||||
|
||||
# Install crontab entries for agent user from project TOMLs
|
||||
install_project_crons() {
|
||||
local cron_lines="DISINTO_CONTAINER=1
|
||||
USER=agent
|
||||
FORGE_URL=http://forgejo:3000"
|
||||
|
||||
# Parse DISINTO_AGENTS env var (default: all agents)
|
||||
# Expected format: comma-separated list like "review,gardener" or "dev"
|
||||
local agents_to_run="review,dev,gardener"
|
||||
if [ -n "${DISINTO_AGENTS:-}" ]; then
|
||||
agents_to_run="$DISINTO_AGENTS"
|
||||
fi
|
||||
|
||||
for toml in "${DISINTO_DIR}"/projects/*.toml; do
|
||||
[ -f "$toml" ] || continue
|
||||
local pname
|
||||
pname=$(python3 -c "
|
||||
import sys, tomllib
|
||||
with open(sys.argv[1], 'rb') as f:
|
||||
print(tomllib.load(f)['name'])
|
||||
" "$toml" 2>/dev/null) || continue
|
||||
|
||||
cron_lines="${cron_lines}
|
||||
PROJECT_REPO_ROOT=/home/agent/repos/${pname}
|
||||
# disinto: ${pname}"
|
||||
|
||||
# Add review-poll only if review agent is configured
|
||||
if echo "$agents_to_run" | grep -qw "review"; then
|
||||
cron_lines="${cron_lines}
|
||||
2,7,12,17,22,27,32,37,42,47,52,57 * * * * ${DISINTO_DIR}/review/review-poll.sh ${toml} >>/home/agent/data/logs/cron.log 2>&1"
|
||||
fi
|
||||
|
||||
# Add dev-poll only if dev agent is configured
|
||||
if echo "$agents_to_run" | grep -qw "dev"; then
|
||||
cron_lines="${cron_lines}
|
||||
4,9,14,19,24,29,34,39,44,49,54,59 * * * * ${DISINTO_DIR}/dev/dev-poll.sh ${toml} >>/home/agent/data/logs/cron.log 2>&1"
|
||||
fi
|
||||
|
||||
# Add gardener-run only if gardener agent is configured
|
||||
if echo "$agents_to_run" | grep -qw "gardener"; then
|
||||
cron_lines="${cron_lines}
|
||||
0 0,6,12,18 * * * cd ${DISINTO_DIR} && bash gardener/gardener-run.sh ${toml} >>/home/agent/data/logs/cron.log 2>&1"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -n "$cron_lines" ]; then
|
||||
printf '%s\n' "$cron_lines" | crontab -u agent -
|
||||
log "Installed crontab for agent user (agents: ${agents_to_run})"
|
||||
else
|
||||
log "No project TOMLs found — crontab empty"
|
||||
fi
|
||||
}
|
||||
# Reset base repo to origin/main to avoid divergence warnings
|
||||
su -s /bin/bash agent -c "
|
||||
cd \"${PROJECT_REPO_ROOT}\"
|
||||
git fetch origin main
|
||||
git checkout main 2>/dev/null || true
|
||||
git reset --hard origin/main
|
||||
" || true
|
||||
log "Base repo reset to origin/main"
|
||||
|
||||
log "Entering poll loop (interval: ${POLL_INTERVAL:-300}s)"
|
||||
|
||||
# Install and start cron daemon
|
||||
DISINTO_DIR="/home/agent/disinto"
|
||||
install_project_crons
|
||||
log "Starting cron daemon"
|
||||
cron
|
||||
log "cron daemon started"
|
||||
|
||||
while true; do
|
||||
# Clear stale session IDs before each poll.
|
||||
# Local llama does not support --resume (no server-side session storage).
|
||||
|
|
@ -98,6 +48,10 @@ while true; do
|
|||
|
||||
su -s /bin/bash agent -c "
|
||||
export FORGE_TOKEN='${FORGE_TOKEN}'
|
||||
export FORGE_TOKEN_OVERRIDE='${FORGE_TOKEN_OVERRIDE:-}'
|
||||
export ANTHROPIC_API_KEY='${ANTHROPIC_API_KEY:-}'
|
||||
export ANTHROPIC_BASE_URL='${ANTHROPIC_BASE_URL:-}'
|
||||
export CLAUDE_CONFIG_DIR='${CLAUDE_CONFIG_DIR:-}'
|
||||
cd /home/agent/disinto && \
|
||||
bash dev/dev-poll.sh ${PROJECT_TOML:-projects/disinto.toml}
|
||||
" >> "$LOG_DIR/llama-loop.log" 2>&1 || true
|
||||
|
|
|
|||
|
|
@ -18,12 +18,15 @@ log() {
|
|||
|
||||
# Build crontab from project TOMLs and install for the agent user.
|
||||
install_project_crons() {
|
||||
local cron_lines="DISINTO_CONTAINER=1
|
||||
local cron_lines="PATH=/usr/local/bin:/usr/bin:/bin
|
||||
DISINTO_CONTAINER=1
|
||||
USER=agent
|
||||
FORGE_URL=http://forgejo:3000"
|
||||
|
||||
# Parse DISINTO_AGENTS env var (default: all agents)
|
||||
# Expected format: comma-separated list like "review,gardener" or "dev"
|
||||
# Note: supervisor is NOT installed here — it runs on the host, not in container.
|
||||
# Supervisor requires host-level Docker access and pgrep, which the container lacks.
|
||||
local agents_to_run="review,dev,gardener"
|
||||
if [ -n "${DISINTO_AGENTS:-}" ]; then
|
||||
agents_to_run="$DISINTO_AGENTS"
|
||||
|
|
@ -71,6 +74,9 @@ PROJECT_REPO_ROOT=/home/agent/repos/${pname}
|
|||
|
||||
log "Agent container starting"
|
||||
|
||||
# Set USER for scripts that source lib/env.sh (e.g., OPS_REPO_ROOT default)
|
||||
export USER=agent
|
||||
|
||||
# Verify Claude CLI is available (expected via volume mount from host).
|
||||
if ! command -v claude &>/dev/null; then
|
||||
log "FATAL: claude CLI not found in PATH."
|
||||
|
|
@ -95,6 +101,38 @@ fi
|
|||
|
||||
install_project_crons
|
||||
|
||||
# Configure git credential helper for password-based HTTP auth.
|
||||
# Forgejo 11.x rejects API tokens for git push (#361); password auth works.
|
||||
# This ensures all git operations (clone, fetch, push) from worktrees use
|
||||
# password auth without needing tokens embedded in remote URLs.
|
||||
if [ -n "${FORGE_PASS:-}" ] && [ -n "${FORGE_URL:-}" ]; then
|
||||
_forge_host=$(printf '%s' "$FORGE_URL" | sed 's|https\?://||; s|/.*||')
|
||||
_forge_proto=$(printf '%s' "$FORGE_URL" | sed 's|://.*||')
|
||||
# Determine the bot username from FORGE_TOKEN identity (or default to dev-bot)
|
||||
_bot_user=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_URL}/api/v1/user" 2>/dev/null | jq -r '.login // empty') || _bot_user=""
|
||||
_bot_user="${_bot_user:-dev-bot}"
|
||||
|
||||
# Write a static credential helper script (git credential protocol)
|
||||
cat > /home/agent/.git-credentials-helper <<CREDEOF
|
||||
#!/bin/sh
|
||||
# Auto-generated git credential helper for Forgejo password auth (#361)
|
||||
# Only respond to "get" action; ignore "store" and "erase".
|
||||
[ "\$1" = "get" ] || exit 0
|
||||
# Read and discard stdin (git sends protocol/host info)
|
||||
cat >/dev/null
|
||||
echo "protocol=${_forge_proto}"
|
||||
echo "host=${_forge_host}"
|
||||
echo "username=${_bot_user}"
|
||||
echo "password=${FORGE_PASS}"
|
||||
CREDEOF
|
||||
chmod 755 /home/agent/.git-credentials-helper
|
||||
chown agent:agent /home/agent/.git-credentials-helper
|
||||
|
||||
su -s /bin/bash agent -c "git config --global credential.helper '/home/agent/.git-credentials-helper'"
|
||||
log "Git credential helper configured for ${_bot_user}@${_forge_host} (password auth)"
|
||||
fi
|
||||
|
||||
# Configure tea CLI login for forge operations (runs as agent user).
|
||||
# tea stores config in ~/.config/tea/ — persistent across container restarts
|
||||
# only if that directory is on a mounted volume.
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM caddy:alpine
|
||||
RUN apk add --no-cache bash jq curl git docker-cli
|
||||
FROM caddy:latest
|
||||
RUN apt-get update && apt-get install -y bash jq curl git docker.io && rm -rf /var/lib/apt/lists/*
|
||||
COPY entrypoint-edge.sh /usr/local/bin/entrypoint-edge.sh
|
||||
ENTRYPOINT ["bash", "/usr/local/bin/entrypoint-edge.sh"]
|
||||
|
|
|
|||
|
|
@ -47,9 +47,14 @@ VAULT_ENV="${SCRIPT_ROOT}/../vault/vault-env.sh"
|
|||
# Comma-separated list of Forgejo usernames with admin role
|
||||
ADMIN_USERS="${FORGE_ADMIN_USERS:-vault-bot,admin}"
|
||||
|
||||
# Log function
|
||||
# Persistent log file for dispatcher
|
||||
DISPATCHER_LOG_FILE="${DISINTO_LOG_DIR:-/tmp}/dispatcher/dispatcher.log"
|
||||
mkdir -p "$(dirname "$DISPATCHER_LOG_FILE")"
|
||||
|
||||
# Log function with standardized format
|
||||
log() {
|
||||
printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*"
|
||||
local agent="${LOG_AGENT:-dispatcher}"
|
||||
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$DISPATCHER_LOG_FILE"
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
|
@ -159,7 +164,7 @@ get_pr_merger() {
|
|||
username: .merge_user?.login // .user?.login,
|
||||
merged: .merged,
|
||||
merged_at: .merged_at // empty
|
||||
}' || true
|
||||
}'
|
||||
}
|
||||
|
||||
# Get PR reviews
|
||||
|
|
@ -172,7 +177,7 @@ get_pr_reviews() {
|
|||
local ops_api="${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}"
|
||||
|
||||
curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${ops_api}/pulls/${pr_number}/reviews" 2>/dev/null || true
|
||||
"${ops_api}/pulls/${pr_number}/reviews" 2>/dev/null
|
||||
}
|
||||
|
||||
# Verify vault action was approved by an admin via PR review
|
||||
|
|
@ -451,6 +456,258 @@ launch_runner() {
|
|||
return $exit_code
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Reproduce dispatch — launch sidecar for bug-report issues
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Check if a reproduce run is already in-flight for a given issue.
|
||||
# Uses a simple pid-file in /tmp so we don't double-launch per dispatcher cycle.
|
||||
_reproduce_lockfile() {
|
||||
local issue="$1"
|
||||
echo "/tmp/reproduce-inflight-${issue}.pid"
|
||||
}
|
||||
|
||||
is_reproduce_running() {
|
||||
local issue="$1"
|
||||
local pidfile
|
||||
pidfile=$(_reproduce_lockfile "$issue")
|
||||
[ -f "$pidfile" ] || return 1
|
||||
local pid
|
||||
pid=$(cat "$pidfile" 2>/dev/null || echo "")
|
||||
[ -n "$pid" ] && kill -0 "$pid" 2>/dev/null
|
||||
}
|
||||
|
||||
# Fetch open issues labelled bug-report that have no outcome label yet.
|
||||
# Returns a newline-separated list of "issue_number:project_toml" pairs.
|
||||
fetch_reproduce_candidates() {
|
||||
# Require FORGE_TOKEN, FORGE_URL, FORGE_REPO
|
||||
[ -n "${FORGE_TOKEN:-}" ] || return 0
|
||||
[ -n "${FORGE_URL:-}" ] || return 0
|
||||
[ -n "${FORGE_REPO:-}" ] || return 0
|
||||
|
||||
local api="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
|
||||
|
||||
local issues_json
|
||||
issues_json=$(curl -sf \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${api}/issues?type=issues&state=open&labels=bug-report&limit=20" 2>/dev/null) || return 0
|
||||
|
||||
# Filter out issues that already carry an outcome label.
|
||||
# Write JSON to a temp file so python3 can read from stdin (heredoc) and
|
||||
# still receive the JSON as an argument (avoids SC2259: pipe vs heredoc).
|
||||
local tmpjson
|
||||
tmpjson=$(mktemp)
|
||||
echo "$issues_json" > "$tmpjson"
|
||||
python3 - "$tmpjson" <<'PYEOF'
|
||||
import sys, json
|
||||
data = json.load(open(sys.argv[1]))
|
||||
skip = {"in-progress", "in-triage", "rejected", "blocked"}
|
||||
for issue in data:
|
||||
labels = {l["name"] for l in (issue.get("labels") or [])}
|
||||
if labels & skip:
|
||||
continue
|
||||
print(issue["number"])
|
||||
PYEOF
|
||||
rm -f "$tmpjson"
|
||||
}
|
||||
|
||||
# Launch one reproduce container per candidate issue.
|
||||
# project_toml is resolved from FACTORY_ROOT/projects/*.toml (first match).
|
||||
dispatch_reproduce() {
|
||||
local issue_number="$1"
|
||||
|
||||
if is_reproduce_running "$issue_number"; then
|
||||
log "Reproduce already running for issue #${issue_number}, skipping"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Find first project TOML available (same convention as dev-poll)
|
||||
local project_toml=""
|
||||
for toml in "${FACTORY_ROOT}"/projects/*.toml; do
|
||||
[ -f "$toml" ] && { project_toml="$toml"; break; }
|
||||
done
|
||||
|
||||
if [ -z "$project_toml" ]; then
|
||||
log "WARNING: no project TOML found under ${FACTORY_ROOT}/projects/ — skipping reproduce for #${issue_number}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log "Dispatching reproduce-agent for issue #${issue_number} (project: ${project_toml})"
|
||||
|
||||
# Build docker run command using array (safe from injection)
|
||||
local -a cmd=(docker run --rm
|
||||
--name "disinto-reproduce-${issue_number}"
|
||||
--network host
|
||||
--security-opt apparmor=unconfined
|
||||
-v /var/run/docker.sock:/var/run/docker.sock
|
||||
-v agent-data:/home/agent/data
|
||||
-v project-repos:/home/agent/repos
|
||||
-e "FORGE_URL=${FORGE_URL}"
|
||||
-e "FORGE_TOKEN=${FORGE_TOKEN}"
|
||||
-e "FORGE_REPO=${FORGE_REPO}"
|
||||
-e "PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}"
|
||||
-e DISINTO_CONTAINER=1
|
||||
)
|
||||
|
||||
# Pass through ANTHROPIC_API_KEY if set
|
||||
if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
|
||||
cmd+=(-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}")
|
||||
fi
|
||||
|
||||
# Mount ~/.claude and ~/.ssh from the runtime user's home if available
|
||||
local runtime_home="${HOME:-/home/debian}"
|
||||
if [ -d "${runtime_home}/.claude" ]; then
|
||||
cmd+=(-v "${runtime_home}/.claude:/home/agent/.claude")
|
||||
fi
|
||||
if [ -f "${runtime_home}/.claude.json" ]; then
|
||||
cmd+=(-v "${runtime_home}/.claude.json:/home/agent/.claude.json:ro")
|
||||
fi
|
||||
if [ -d "${runtime_home}/.ssh" ]; then
|
||||
cmd+=(-v "${runtime_home}/.ssh:/home/agent/.ssh:ro")
|
||||
fi
|
||||
# Mount claude CLI binary if present on host
|
||||
if [ -f /usr/local/bin/claude ]; then
|
||||
cmd+=(-v /usr/local/bin/claude:/usr/local/bin/claude:ro)
|
||||
fi
|
||||
|
||||
# Mount the project TOML into the container at a stable path
|
||||
local container_toml="/home/agent/project.toml"
|
||||
cmd+=(-v "${project_toml}:${container_toml}:ro")
|
||||
|
||||
cmd+=(disinto-reproduce:latest "$container_toml" "$issue_number")
|
||||
|
||||
# Launch in background; write pid-file so we don't double-launch
|
||||
"${cmd[@]}" &
|
||||
local bg_pid=$!
|
||||
echo "$bg_pid" > "$(_reproduce_lockfile "$issue_number")"
|
||||
log "Reproduce container launched (pid ${bg_pid}) for issue #${issue_number}"
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Triage dispatch — launch sidecar for bug-report + in-triage issues
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Check if a triage run is already in-flight for a given issue.
|
||||
_triage_lockfile() {
|
||||
local issue="$1"
|
||||
echo "/tmp/triage-inflight-${issue}.pid"
|
||||
}
|
||||
|
||||
is_triage_running() {
|
||||
local issue="$1"
|
||||
local pidfile
|
||||
pidfile=$(_triage_lockfile "$issue")
|
||||
[ -f "$pidfile" ] || return 1
|
||||
local pid
|
||||
pid=$(cat "$pidfile" 2>/dev/null || echo "")
|
||||
[ -n "$pid" ] && kill -0 "$pid" 2>/dev/null
|
||||
}
|
||||
|
||||
# Fetch open issues labelled both bug-report and in-triage.
|
||||
# Returns a newline-separated list of issue numbers.
|
||||
fetch_triage_candidates() {
|
||||
# Require FORGE_TOKEN, FORGE_URL, FORGE_REPO
|
||||
[ -n "${FORGE_TOKEN:-}" ] || return 0
|
||||
[ -n "${FORGE_URL:-}" ] || return 0
|
||||
[ -n "${FORGE_REPO:-}" ] || return 0
|
||||
|
||||
local api="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
|
||||
|
||||
local issues_json
|
||||
issues_json=$(curl -sf \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${api}/issues?type=issues&state=open&labels=bug-report&limit=20" 2>/dev/null) || return 0
|
||||
|
||||
# Filter to issues that carry BOTH bug-report AND in-triage labels.
|
||||
local tmpjson
|
||||
tmpjson=$(mktemp)
|
||||
echo "$issues_json" > "$tmpjson"
|
||||
python3 - "$tmpjson" <<'PYEOF'
|
||||
import sys, json
|
||||
data = json.load(open(sys.argv[1]))
|
||||
for issue in data:
|
||||
labels = {l["name"] for l in (issue.get("labels") or [])}
|
||||
if "bug-report" in labels and "in-triage" in labels:
|
||||
print(issue["number"])
|
||||
PYEOF
|
||||
rm -f "$tmpjson"
|
||||
}
|
||||
|
||||
# Launch one triage container per candidate issue.
|
||||
# Uses the same disinto-reproduce:latest image as the reproduce-agent,
|
||||
# selecting the triage formula via DISINTO_FORMULA env var.
|
||||
# Stack lock is held for the full run (no timeout).
|
||||
dispatch_triage() {
|
||||
local issue_number="$1"
|
||||
|
||||
if is_triage_running "$issue_number"; then
|
||||
log "Triage already running for issue #${issue_number}, skipping"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Find first project TOML available (same convention as dev-poll)
|
||||
local project_toml=""
|
||||
for toml in "${FACTORY_ROOT}"/projects/*.toml; do
|
||||
[ -f "$toml" ] && { project_toml="$toml"; break; }
|
||||
done
|
||||
|
||||
if [ -z "$project_toml" ]; then
|
||||
log "WARNING: no project TOML found under ${FACTORY_ROOT}/projects/ — skipping triage for #${issue_number}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log "Dispatching triage-agent for issue #${issue_number} (project: ${project_toml})"
|
||||
|
||||
# Build docker run command using array (safe from injection)
|
||||
local -a cmd=(docker run --rm
|
||||
--name "disinto-triage-${issue_number}"
|
||||
--network host
|
||||
--security-opt apparmor=unconfined
|
||||
-v /var/run/docker.sock:/var/run/docker.sock
|
||||
-v agent-data:/home/agent/data
|
||||
-v project-repos:/home/agent/repos
|
||||
-e "FORGE_URL=${FORGE_URL}"
|
||||
-e "FORGE_TOKEN=${FORGE_TOKEN}"
|
||||
-e "FORGE_REPO=${FORGE_REPO}"
|
||||
-e "PRIMARY_BRANCH=${PRIMARY_BRANCH:-main}"
|
||||
-e DISINTO_CONTAINER=1
|
||||
-e DISINTO_FORMULA=triage
|
||||
)
|
||||
|
||||
# Pass through ANTHROPIC_API_KEY if set
|
||||
if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
|
||||
cmd+=(-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}")
|
||||
fi
|
||||
|
||||
# Mount ~/.claude and ~/.ssh from the runtime user's home if available
|
||||
local runtime_home="${HOME:-/home/debian}"
|
||||
if [ -d "${runtime_home}/.claude" ]; then
|
||||
cmd+=(-v "${runtime_home}/.claude:/home/agent/.claude")
|
||||
fi
|
||||
if [ -f "${runtime_home}/.claude.json" ]; then
|
||||
cmd+=(-v "${runtime_home}/.claude.json:/home/agent/.claude.json:ro")
|
||||
fi
|
||||
if [ -d "${runtime_home}/.ssh" ]; then
|
||||
cmd+=(-v "${runtime_home}/.ssh:/home/agent/.ssh:ro")
|
||||
fi
|
||||
# Mount claude CLI binary if present on host
|
||||
if [ -f /usr/local/bin/claude ]; then
|
||||
cmd+=(-v /usr/local/bin/claude:/usr/local/bin/claude:ro)
|
||||
fi
|
||||
|
||||
# Mount the project TOML into the container at a stable path
|
||||
local container_toml="/home/agent/project.toml"
|
||||
cmd+=(-v "${project_toml}:${container_toml}:ro")
|
||||
|
||||
cmd+=(disinto-reproduce:latest "$container_toml" "$issue_number")
|
||||
|
||||
# Launch in background; write pid-file so we don't double-launch
|
||||
"${cmd[@]}" &
|
||||
local bg_pid=$!
|
||||
echo "$bg_pid" > "$(_triage_lockfile "$issue_number")"
|
||||
log "Triage container launched (pid ${bg_pid}) for issue #${issue_number}"
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Main dispatcher loop
|
||||
# -----------------------------------------------------------------------------
|
||||
|
|
@ -501,6 +758,26 @@ main() {
|
|||
launch_runner "$toml_file" || true
|
||||
done
|
||||
|
||||
# Reproduce dispatch: check for bug-report issues needing reproduction
|
||||
local candidate_issues
|
||||
candidate_issues=$(fetch_reproduce_candidates) || true
|
||||
if [ -n "$candidate_issues" ]; then
|
||||
while IFS= read -r issue_num; do
|
||||
[ -n "$issue_num" ] || continue
|
||||
dispatch_reproduce "$issue_num" || true
|
||||
done <<< "$candidate_issues"
|
||||
fi
|
||||
|
||||
# Triage dispatch: check for bug-report + in-triage issues needing deep analysis
|
||||
local triage_issues
|
||||
triage_issues=$(fetch_triage_candidates) || true
|
||||
if [ -n "$triage_issues" ]; then
|
||||
while IFS= read -r issue_num; do
|
||||
[ -n "$issue_num" ] || continue
|
||||
dispatch_triage "$issue_num" || true
|
||||
done <<< "$triage_issues"
|
||||
fi
|
||||
|
||||
# Wait before next poll
|
||||
sleep 60
|
||||
done
|
||||
|
|
|
|||
|
|
@ -4,16 +4,23 @@ set -euo pipefail
|
|||
# Set USER before sourcing env.sh (Alpine doesn't set USER)
|
||||
export USER="${USER:-root}"
|
||||
|
||||
DISINTO_VERSION="${DISINTO_VERSION:-main}"
|
||||
DISINTO_REPO="${FORGE_URL:-http://forgejo:3000}/johba/disinto.git"
|
||||
FORGE_URL="${FORGE_URL:-http://forgejo:3000}"
|
||||
FORGE_REPO="${FORGE_REPO:-disinto-admin/disinto}"
|
||||
|
||||
# Shallow clone at the pinned version
|
||||
# Shallow clone at the pinned version (inject token to support auth-required Forgejo)
|
||||
if [ ! -d /opt/disinto/.git ]; then
|
||||
git clone --depth 1 --branch "$DISINTO_VERSION" "$DISINTO_REPO" /opt/disinto
|
||||
_auth_url=$(printf '%s' "$FORGE_URL" | sed "s|://|://token:${FORGE_TOKEN}@|")
|
||||
git clone --depth 1 --branch "${DISINTO_VERSION:-main}" "${_auth_url}/${FORGE_REPO}.git" /opt/disinto
|
||||
fi
|
||||
|
||||
# Start dispatcher in background
|
||||
bash /opt/disinto/docker/edge/dispatcher.sh &
|
||||
|
||||
# Start supervisor loop in background
|
||||
while true; do
|
||||
bash /opt/disinto/supervisor/supervisor-run.sh /opt/disinto/projects/disinto.toml 2>&1 | tee -a /opt/disinto-logs/supervisor.log || true
|
||||
sleep 1200 # 20 minutes
|
||||
done &
|
||||
|
||||
# Caddy as main process
|
||||
exec caddy run --config /etc/caddy/Caddyfile --adapter caddyfile
|
||||
|
|
|
|||
11
docker/reproduce/Dockerfile
Normal file
11
docker/reproduce/Dockerfile
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
FROM debian:bookworm-slim
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
bash curl git jq docker.io docker-compose-plugin \
|
||||
nodejs npm chromium \
|
||||
&& npm install -g @anthropic-ai/mcp-playwright \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
RUN useradd -m -u 1000 -s /bin/bash agent
|
||||
COPY docker/reproduce/entrypoint-reproduce.sh /entrypoint-reproduce.sh
|
||||
RUN chmod +x /entrypoint-reproduce.sh
|
||||
WORKDIR /home/agent
|
||||
ENTRYPOINT ["/entrypoint-reproduce.sh"]
|
||||
766
docker/reproduce/entrypoint-reproduce.sh
Normal file
766
docker/reproduce/entrypoint-reproduce.sh
Normal file
|
|
@ -0,0 +1,766 @@
|
|||
#!/usr/bin/env bash
|
||||
# entrypoint-reproduce.sh — Reproduce-agent sidecar entrypoint
|
||||
#
|
||||
# Acquires the stack lock, boots the project stack (if formula declares
|
||||
# stack_script), then drives Claude + Playwright MCP to follow the bug
|
||||
# report's repro steps. Labels the issue based on outcome and posts
|
||||
# findings + screenshots.
|
||||
#
|
||||
# Usage (launched by dispatcher.sh):
|
||||
# entrypoint-reproduce.sh <project_toml> <issue_number>
|
||||
#
|
||||
# Environment (injected by dispatcher via docker run -e):
|
||||
# FORGE_URL, FORGE_TOKEN, FORGE_REPO, PRIMARY_BRANCH, DISINTO_CONTAINER=1
|
||||
#
|
||||
# Volumes expected:
|
||||
# /home/agent/data — agent-data volume (stack-lock files go here)
|
||||
# /home/agent/repos — project-repos volume
|
||||
# /home/agent/.claude — host ~/.claude (OAuth credentials)
|
||||
# /home/agent/.ssh — host ~/.ssh (read-only)
|
||||
# /usr/local/bin/claude — host claude CLI binary (read-only)
|
||||
# /var/run/docker.sock — host docker socket
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
DISINTO_DIR="${DISINTO_DIR:-/home/agent/disinto}"
|
||||
|
||||
# Select formula based on DISINTO_FORMULA env var (set by dispatcher)
|
||||
case "${DISINTO_FORMULA:-reproduce}" in
|
||||
triage)
|
||||
ACTIVE_FORMULA="${DISINTO_DIR}/formulas/triage.toml"
|
||||
;;
|
||||
*)
|
||||
ACTIVE_FORMULA="${DISINTO_DIR}/formulas/reproduce.toml"
|
||||
;;
|
||||
esac
|
||||
|
||||
REPRODUCE_TIMEOUT="${REPRODUCE_TIMEOUT_MINUTES:-15}"
|
||||
LOGFILE="/home/agent/data/logs/reproduce.log"
|
||||
SCREENSHOT_DIR="/home/agent/data/screenshots"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Determine agent type early for log prefix
|
||||
# ---------------------------------------------------------------------------
|
||||
if [ "${DISINTO_FORMULA:-reproduce}" = "triage" ]; then
|
||||
AGENT_TYPE="triage"
|
||||
else
|
||||
AGENT_TYPE="reproduce"
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Logging
|
||||
# ---------------------------------------------------------------------------
|
||||
log() {
|
||||
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$AGENT_TYPE" "$*" | tee -a "$LOGFILE"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Argument validation
|
||||
# ---------------------------------------------------------------------------
|
||||
PROJECT_TOML="${1:-}"
|
||||
ISSUE_NUMBER="${2:-}"
|
||||
|
||||
if [ -z "$PROJECT_TOML" ] || [ -z "$ISSUE_NUMBER" ]; then
|
||||
log "FATAL: usage: entrypoint-reproduce.sh <project_toml> <issue_number>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "$PROJECT_TOML" ]; then
|
||||
log "FATAL: project TOML not found: ${PROJECT_TOML}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bootstrap: directories, env
|
||||
# ---------------------------------------------------------------------------
|
||||
mkdir -p /home/agent/data/logs /home/agent/data/locks "$SCREENSHOT_DIR"
|
||||
|
||||
export DISINTO_CONTAINER=1
|
||||
export HOME="${HOME:-/home/agent}"
|
||||
export USER="${USER:-agent}"
|
||||
|
||||
FORGE_API="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
|
||||
|
||||
# Load project name from TOML
|
||||
PROJECT_NAME=$(python3 -c "
|
||||
import sys, tomllib
|
||||
with open(sys.argv[1], 'rb') as f:
|
||||
print(tomllib.load(f)['name'])
|
||||
" "$PROJECT_TOML" 2>/dev/null) || {
|
||||
log "FATAL: could not read project name from ${PROJECT_TOML}"
|
||||
exit 1
|
||||
}
|
||||
export PROJECT_NAME
|
||||
|
||||
PROJECT_REPO_ROOT="/home/agent/repos/${PROJECT_NAME}"
|
||||
|
||||
if [ "$AGENT_TYPE" = "triage" ]; then
|
||||
log "Starting triage-agent for issue #${ISSUE_NUMBER} (project: ${PROJECT_NAME})"
|
||||
else
|
||||
log "Starting reproduce-agent for issue #${ISSUE_NUMBER} (project: ${PROJECT_NAME})"
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Verify claude CLI is available (mounted from host)
|
||||
# ---------------------------------------------------------------------------
|
||||
if ! command -v claude &>/dev/null; then
|
||||
log "FATAL: claude CLI not found. Mount the host binary at /usr/local/bin/claude"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Source stack-lock library
|
||||
# ---------------------------------------------------------------------------
|
||||
# shellcheck source=/home/agent/disinto/lib/stack-lock.sh
|
||||
source "${DISINTO_DIR}/lib/stack-lock.sh"
|
||||
|
||||
LOCK_HOLDER="reproduce-agent-${ISSUE_NUMBER}"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Read formula config
|
||||
# ---------------------------------------------------------------------------
|
||||
FORMULA_STACK_SCRIPT=""
|
||||
FORMULA_TIMEOUT_MINUTES="${REPRODUCE_TIMEOUT}"
|
||||
|
||||
if [ -f "$ACTIVE_FORMULA" ]; then
|
||||
FORMULA_STACK_SCRIPT=$(python3 -c "
|
||||
import sys, tomllib
|
||||
with open(sys.argv[1], 'rb') as f:
|
||||
d = tomllib.load(f)
|
||||
print(d.get('stack_script', ''))
|
||||
" "$ACTIVE_FORMULA" 2>/dev/null || echo "")
|
||||
|
||||
_tm=$(python3 -c "
|
||||
import sys, tomllib
|
||||
with open(sys.argv[1], 'rb') as f:
|
||||
d = tomllib.load(f)
|
||||
print(d.get('timeout_minutes', '${REPRODUCE_TIMEOUT}'))
|
||||
" "$ACTIVE_FORMULA" 2>/dev/null || echo "${REPRODUCE_TIMEOUT}")
|
||||
FORMULA_TIMEOUT_MINUTES="$_tm"
|
||||
fi
|
||||
|
||||
log "Formula stack_script: '${FORMULA_STACK_SCRIPT}'"
|
||||
log "Formula timeout: ${FORMULA_TIMEOUT_MINUTES}m"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fetch issue details for repro steps
|
||||
# ---------------------------------------------------------------------------
|
||||
log "Fetching issue #${ISSUE_NUMBER} from ${FORGE_API}..."
|
||||
ISSUE_JSON=$(curl -sf \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_API}/issues/${ISSUE_NUMBER}" 2>/dev/null) || {
|
||||
log "ERROR: failed to fetch issue #${ISSUE_NUMBER}"
|
||||
exit 1
|
||||
}
|
||||
|
||||
ISSUE_TITLE=$(echo "$ISSUE_JSON" | jq -r '.title // "unknown"')
|
||||
ISSUE_BODY=$(echo "$ISSUE_JSON" | jq -r '.body // ""')
|
||||
|
||||
log "Issue: ${ISSUE_TITLE}"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Acquire stack lock
|
||||
# ---------------------------------------------------------------------------
|
||||
log "Acquiring stack lock for project ${PROJECT_NAME}..."
|
||||
stack_lock_acquire "$LOCK_HOLDER" "$PROJECT_NAME" 900
|
||||
log "Stack lock acquired."
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Start heartbeat in background (every 2 minutes)
|
||||
# ---------------------------------------------------------------------------
|
||||
heartbeat_loop() {
|
||||
while true; do
|
||||
sleep 120
|
||||
stack_lock_heartbeat "$LOCK_HOLDER" "$PROJECT_NAME" 2>/dev/null || true
|
||||
done
|
||||
}
|
||||
heartbeat_loop &
|
||||
HEARTBEAT_PID=$!
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Debug branch cleanup trap (for triage-agent throwaway branches)
|
||||
# ---------------------------------------------------------------------------
|
||||
DEBUG_BRANCH="triage-debug-${ISSUE_NUMBER}"
|
||||
|
||||
# Combined EXIT trap: heartbeat kill + stack lock release + debug branch cleanup
|
||||
trap 'kill "$HEARTBEAT_PID" 2>/dev/null || true
|
||||
stack_lock_release "$PROJECT_NAME" "$LOCK_HOLDER" || true
|
||||
git -C "$PROJECT_REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true
|
||||
git -C "$PROJECT_REPO_ROOT" branch -D "$DEBUG_BRANCH" 2>/dev/null || true
|
||||
log "Cleanup completed (trap)"' EXIT
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Boot the project stack if formula declares stack_script
|
||||
# ---------------------------------------------------------------------------
|
||||
if [ -n "$FORMULA_STACK_SCRIPT" ] && [ -d "$PROJECT_REPO_ROOT" ]; then
|
||||
log "Running stack_script: ${FORMULA_STACK_SCRIPT}"
|
||||
# Run in project repo root; script path is relative to project repo.
|
||||
# Read stack_script into array to allow arguments (e.g. "scripts/dev.sh restart --full").
|
||||
read -ra _stack_cmd <<< "$FORMULA_STACK_SCRIPT"
|
||||
(cd "$PROJECT_REPO_ROOT" && bash "${_stack_cmd[@]}") || {
|
||||
log "WARNING: stack_script exited non-zero — continuing anyway"
|
||||
}
|
||||
# Give the stack a moment to stabilise
|
||||
sleep 5
|
||||
elif [ -n "$FORMULA_STACK_SCRIPT" ]; then
|
||||
log "WARNING: PROJECT_REPO_ROOT not found at ${PROJECT_REPO_ROOT} — skipping stack_script"
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build Claude prompt based on agent type
|
||||
# ---------------------------------------------------------------------------
|
||||
TIMESTAMP=$(date -u '+%Y%m%d-%H%M%S')
|
||||
SCREENSHOT_PREFIX="${SCREENSHOT_DIR}/issue-${ISSUE_NUMBER}-${TIMESTAMP}"
|
||||
|
||||
if [ "$AGENT_TYPE" = "triage" ]; then
|
||||
# Triage-agent prompt: deep root cause analysis after reproduce-agent findings
|
||||
CLAUDE_PROMPT=$(cat <<PROMPT
|
||||
You are the triage-agent. Your task is to perform deep root cause analysis on issue #${ISSUE_NUMBER} after the reproduce-agent has confirmed the bug.
|
||||
|
||||
## Issue title
|
||||
${ISSUE_TITLE}
|
||||
|
||||
## Issue body
|
||||
${ISSUE_BODY}
|
||||
|
||||
## Your task — 6-step triage workflow
|
||||
|
||||
You have a defined 6-step workflow to follow. Budget your turns: ~70% on tracing, ~30% on instrumentation.
|
||||
|
||||
### Step 1: Read reproduce-agent findings
|
||||
Before doing anything else, parse all prior evidence from the issue comments.
|
||||
|
||||
1. Fetch the issue body and all comments:
|
||||
curl -sf -H "Authorization: token \${FORGE_TOKEN}" \
|
||||
"\${FORGE_API}/issues/\${ISSUE_NUMBER}" | jq -r '.body'
|
||||
curl -sf -H "Authorization: token \${FORGE_TOKEN}" \
|
||||
"\${FORGE_API}/issues/\${ISSUE_NUMBER}/comments" | jq -r '.[].body'
|
||||
|
||||
2. Identify the reproduce-agent comment (look for sections like
|
||||
"Reproduction steps", "Logs examined", "What was tried").
|
||||
|
||||
3. Extract and note:
|
||||
- The exact symptom (error message, unexpected value, visual regression)
|
||||
- Steps that reliably trigger the bug
|
||||
- Log lines or API responses already captured
|
||||
- Any hypotheses the reproduce-agent already ruled out
|
||||
|
||||
Do NOT repeat work the reproduce-agent already did. Your job starts where
|
||||
theirs ended. If no reproduce-agent comment is found, note it and proceed
|
||||
with fresh investigation using the issue body only.
|
||||
|
||||
### Step 2: Trace data flow from symptom to source
|
||||
Systematically follow the symptom backwards through each layer of the stack.
|
||||
|
||||
Generic layer traversal: UI → API → backend → data store
|
||||
|
||||
For each layer boundary:
|
||||
1. What does the upstream layer send?
|
||||
2. What does the downstream layer expect?
|
||||
3. Is there a mismatch? If yes — is this the root cause or a symptom?
|
||||
|
||||
Tracing checklist:
|
||||
a. Start at the layer closest to the visible symptom.
|
||||
b. Read the relevant source files — do not guess data shapes.
|
||||
c. Cross-reference API contracts: compare what the code sends vs what it
|
||||
should send according to schemas, type definitions, or documentation.
|
||||
d. Check recent git history on suspicious files:
|
||||
git log --oneline -20 -- <file>
|
||||
e. Search for related issues or TODOs in the code:
|
||||
grep -r "TODO\|FIXME\|HACK" -- <relevant directory>
|
||||
|
||||
Capture for each layer:
|
||||
- The data shape flowing in and out (field names, types, nullability)
|
||||
- Whether the layer's behavior matches its documented contract
|
||||
- Any discrepancy found
|
||||
|
||||
If a clear root cause becomes obvious during tracing, note it and continue
|
||||
checking whether additional causes exist downstream.
|
||||
|
||||
### Step 3: Add debug instrumentation on a throwaway branch
|
||||
Use ~30% of your total turn budget here. Only instrument after tracing has
|
||||
identified the most likely failure points — do not instrument blindly.
|
||||
|
||||
1. Create a throwaway debug branch (NEVER commit this to main):
|
||||
cd "\$PROJECT_REPO_ROOT"
|
||||
git checkout -b debug/triage-\${ISSUE_NUMBER}
|
||||
|
||||
2. Add targeted logging at the layer boundaries identified during tracing:
|
||||
- Console.log / structured log statements around the suspicious code path
|
||||
- Log the actual values flowing through: inputs, outputs, intermediate state
|
||||
- Add verbose mode flags if the stack supports them
|
||||
- Keep instrumentation minimal — only what confirms or refutes the hypothesis
|
||||
|
||||
3. Restart the stack using the configured script (if set):
|
||||
\${stack_script:-"# No stack_script configured — restart manually or connect to staging"}
|
||||
|
||||
4. Re-run the reproduction steps from the reproduce-agent findings.
|
||||
|
||||
5. Observe and capture new output:
|
||||
- Paste relevant log lines into your working notes
|
||||
- Note whether the observed values match or contradict the hypothesis
|
||||
|
||||
6. If the first instrumentation pass is inconclusive, iterate:
|
||||
- Narrow the scope to the next most suspicious boundary
|
||||
- Re-instrument, restart, re-run
|
||||
- Maximum 2-3 instrumentation rounds before declaring inconclusive
|
||||
|
||||
Do NOT push the debug branch. It will be deleted in the cleanup step.
|
||||
|
||||
### Step 4: Decompose root causes into backlog issues
|
||||
After tracing and instrumentation, articulate each distinct root cause.
|
||||
|
||||
For each root cause found:
|
||||
|
||||
1. Determine the relationship to other causes:
|
||||
- Layered (one causes another) → use Depends-on in the issue body
|
||||
- Independent (separate code paths fail independently) → use Related
|
||||
|
||||
2. Create a backlog issue for each root cause:
|
||||
curl -sf -X POST "\${FORGE_API}/issues" \\
|
||||
-H "Authorization: token \${FORGE_TOKEN}" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d '{
|
||||
"title": "fix: <specific description of root cause N>",
|
||||
"body": "## Root cause\\n<exact code path, file:line>\\n\\n## Fix suggestion\\n<recommended approach>\\n\\n## Context\\nDecomposed from #\${ISSUE_NUMBER} (cause N of M)\\n\\n## Dependencies\\n<#X if this depends on another cause being fixed first>",
|
||||
"labels": [{"name": "backlog"}]
|
||||
}'
|
||||
|
||||
3. Note the newly created issue numbers.
|
||||
|
||||
If only one root cause is found, still create a single backlog issue with
|
||||
the specific code location and fix suggestion.
|
||||
|
||||
If the investigation is inconclusive (no clear root cause found), skip this
|
||||
step and proceed directly to link-back with the inconclusive outcome.
|
||||
|
||||
### Step 5: Update original issue and relabel
|
||||
Post a summary comment on the original issue and update its labels.
|
||||
|
||||
#### If root causes were found (conclusive):
|
||||
|
||||
Post a comment:
|
||||
"## Triage findings
|
||||
|
||||
Found N root cause(s):
|
||||
- #X — <one-line description> (cause 1 of N)
|
||||
- #Y — <one-line description> (cause 2 of N, depends on #X)
|
||||
|
||||
Data flow traced: <layer where the bug originates>
|
||||
Instrumentation: <key log output that confirmed the cause>
|
||||
|
||||
Next step: backlog issues above will be implemented in dependency order."
|
||||
|
||||
Then swap labels:
|
||||
- Remove: in-triage
|
||||
- Add: in-progress
|
||||
|
||||
#### If investigation was inconclusive (turn budget exhausted):
|
||||
|
||||
Post a comment:
|
||||
"## Triage — inconclusive
|
||||
|
||||
Traced: <layers checked>
|
||||
Tried: <instrumentation attempts and what they showed>
|
||||
Hypothesis: <best guess at cause, if any>
|
||||
|
||||
No definitive root cause identified. Leaving in-triage for supervisor
|
||||
to handle as a stale triage session."
|
||||
|
||||
Do NOT relabel. Leave in-triage. The supervisor monitors stale triage
|
||||
sessions and will escalate or reassign.
|
||||
|
||||
### Step 6: Delete throwaway debug branch
|
||||
Always delete the debug branch, even if the investigation was inconclusive.
|
||||
|
||||
1. Switch back to the main branch:
|
||||
cd "\$PROJECT_REPO_ROOT"
|
||||
git checkout "\$PRIMARY_BRANCH"
|
||||
|
||||
2. Delete the local debug branch:
|
||||
git branch -D debug/triage-\${ISSUE_NUMBER}
|
||||
|
||||
3. Confirm no remote was pushed (if accidentally pushed, delete it too):
|
||||
git push origin --delete debug/triage-\${ISSUE_NUMBER} 2>/dev/null || true
|
||||
|
||||
4. Verify the worktree is clean:
|
||||
git status
|
||||
git worktree list
|
||||
|
||||
A clean repo is a prerequisite for the next dev-agent run. Never leave
|
||||
debug branches behind — they accumulate and pollute the branch list.
|
||||
|
||||
## Notes
|
||||
- The application is accessible at localhost (network_mode: host)
|
||||
- Budget: 70% tracing data flow, 30% instrumented re-runs
|
||||
- Timeout: \${FORMULA_TIMEOUT_MINUTES} minutes total (or until turn limit)
|
||||
- Stack lock is held for the full run
|
||||
- If stack_script is empty, connect to existing staging environment
|
||||
|
||||
Begin now.
|
||||
PROMPT
|
||||
)
|
||||
else
|
||||
# Reproduce-agent prompt: reproduce the bug and report findings
|
||||
CLAUDE_PROMPT=$(cat <<PROMPT
|
||||
You are the reproduce-agent. Your task is to reproduce the bug described in issue #${ISSUE_NUMBER} and report your findings.
|
||||
|
||||
## Issue title
|
||||
${ISSUE_TITLE}
|
||||
|
||||
## Issue body
|
||||
${ISSUE_BODY}
|
||||
|
||||
## Your task — PRIMARY GOAL FIRST
|
||||
|
||||
This agent has ONE primary job and ONE secondary, minor job. Follow this ORDER:
|
||||
|
||||
### PRIMARY: Can the bug be reproduced? (60% of your turns)
|
||||
|
||||
This is the EXIT GATE. Answer YES or NO before doing anything else.
|
||||
|
||||
1. Read the issue, understand the claimed behavior
|
||||
2. Navigate the app via Playwright, follow the reported steps
|
||||
3. Observe: does the symptom match the report?
|
||||
4. Take screenshots as evidence (save to: ${SCREENSHOT_PREFIX}-step-N.png)
|
||||
5. Conclude: **reproduced** or **cannot reproduce**
|
||||
|
||||
If **cannot reproduce** → Write OUTCOME=cannot-reproduce, write findings, DONE. EXIT.
|
||||
If **inconclusive** (timeout, env issues, app not reachable) → Write OUTCOME=needs-triage with reason, write findings, DONE. EXIT.
|
||||
If **reproduced** → Continue to secondary check.
|
||||
|
||||
### SECONDARY (minor): Is the cause obvious? (40% of your turns, only if reproduced)
|
||||
|
||||
Only after reproduction is confirmed. Quick check only — do not go deep.
|
||||
|
||||
1. Check container logs: docker compose -f ${PROJECT_REPO_ROOT}/docker-compose.yml logs --tail=200
|
||||
Look for: stack traces, error messages, wrong addresses, missing config, parse errors
|
||||
2. Check browser console output captured during reproduction
|
||||
3. If the cause JUMPS OUT (clear error, obvious misconfiguration) → note it
|
||||
|
||||
If **obvious cause** → Write OUTCOME=reproduced and ROOT_CAUSE=<one-line summary>
|
||||
If **not obvious** → Write OUTCOME=reproduced (no ROOT_CAUSE line)
|
||||
|
||||
## Output files
|
||||
|
||||
1. **Findings report** — Write to: /tmp/reproduce-findings-${ISSUE_NUMBER}.md
|
||||
Include:
|
||||
- Steps you followed
|
||||
- What you observed (screenshots referenced by path)
|
||||
- Log excerpts (truncated to relevant lines)
|
||||
- OUTCOME line: OUTCOME=reproduced OR OUTCOME=cannot-reproduce OR OUTCOME=needs-triage
|
||||
- ROOT_CAUSE line (ONLY if cause is obvious): ROOT_CAUSE=<one-line summary>
|
||||
|
||||
2. **Outcome file** — Write to: /tmp/reproduce-outcome-${ISSUE_NUMBER}.txt
|
||||
Write ONLY the outcome word: reproduced OR cannot-reproduce OR needs-triage
|
||||
|
||||
## Notes
|
||||
- The application is accessible at localhost (network_mode: host)
|
||||
- Take screenshots liberally — they are evidence
|
||||
- If the app is not running or not reachable, write outcome: cannot-reproduce with reason "stack not reachable"
|
||||
- Timeout: ${FORMULA_TIMEOUT_MINUTES} minutes total
|
||||
- EXIT gates are enforced — do not continue to secondary check if primary result is NO or inconclusive
|
||||
|
||||
Begin now.
|
||||
PROMPT
|
||||
)
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Run Claude with Playwright MCP
|
||||
# ---------------------------------------------------------------------------
|
||||
if [ "$AGENT_TYPE" = "triage" ]; then
|
||||
log "Starting triage-agent session (timeout: ${FORMULA_TIMEOUT_MINUTES}m)..."
|
||||
else
|
||||
log "Starting Claude reproduction session (timeout: ${FORMULA_TIMEOUT_MINUTES}m)..."
|
||||
fi
|
||||
|
||||
CLAUDE_EXIT=0
|
||||
timeout "$(( FORMULA_TIMEOUT_MINUTES * 60 ))" \
|
||||
claude -p "$CLAUDE_PROMPT" \
|
||||
--mcp-server playwright \
|
||||
--output-format text \
|
||||
--max-turns 40 \
|
||||
> "/tmp/reproduce-claude-output-${ISSUE_NUMBER}.txt" 2>&1 || CLAUDE_EXIT=$?
|
||||
|
||||
if [ $CLAUDE_EXIT -eq 124 ]; then
|
||||
log "WARNING: Claude session timed out after ${FORMULA_TIMEOUT_MINUTES}m"
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Triage post-processing: enforce backlog label on created issues
|
||||
# ---------------------------------------------------------------------------
|
||||
# The triage agent may create sub-issues for root causes. Ensure they have
|
||||
# the backlog label so dev-agent picks them up. Parse Claude output for
|
||||
# newly created issue numbers and add the backlog label.
|
||||
if [ "$AGENT_TYPE" = "triage" ]; then
|
||||
log "Triage post-processing: checking for created issues to label..."
|
||||
|
||||
# Extract issue numbers from Claude output that were created during triage.
|
||||
# Match unambiguous creation patterns: "Created issue #123", "Created #123",
|
||||
# or "harb#123". Do NOT match bare #123 which would capture references in
|
||||
# the triage summary (e.g., "Decomposed from #5", "cause 1 of 2", etc.).
|
||||
CREATED_ISSUES=$(grep -oE '(Created|created) issue #[0-9]+|(Created|created) #[0-9]+|harb#[0-9]+' \
|
||||
"/tmp/reproduce-claude-output-${ISSUE_NUMBER}.txt" 2>/dev/null | \
|
||||
grep -oE '[0-9]+' | sort -u | head -10)
|
||||
|
||||
if [ -n "$CREATED_ISSUES" ]; then
|
||||
# Get backlog label ID
|
||||
BACKLOG_ID=$(_label_id "backlog" "#fef2c0")
|
||||
|
||||
if [ -z "$BACKLOG_ID" ]; then
|
||||
log "WARNING: could not get backlog label ID — skipping label enforcement"
|
||||
else
|
||||
for issue_num in $CREATED_ISSUES; do
|
||||
_add_label "$issue_num" "$BACKLOG_ID"
|
||||
log "Added backlog label to created issue #${issue_num}"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Read outcome
|
||||
# ---------------------------------------------------------------------------
|
||||
OUTCOME="needs-triage"
|
||||
OUTCOME_FILE=""
|
||||
OUTCOME_FOUND=false
|
||||
|
||||
# Check reproduce-agent outcome file first
|
||||
if [ -f "/tmp/reproduce-outcome-${ISSUE_NUMBER}.txt" ]; then
|
||||
OUTCOME_FILE="/tmp/reproduce-outcome-${ISSUE_NUMBER}.txt"
|
||||
OUTCOME_FOUND=true
|
||||
fi
|
||||
|
||||
# For triage agent, also check triage-specific outcome file
|
||||
if [ "$AGENT_TYPE" = "triage" ] && [ -f "/tmp/triage-outcome-${ISSUE_NUMBER}.txt" ]; then
|
||||
OUTCOME_FILE="/tmp/triage-outcome-${ISSUE_NUMBER}.txt"
|
||||
OUTCOME_FOUND=true
|
||||
fi
|
||||
|
||||
if [ "$OUTCOME_FOUND" = true ]; then
|
||||
_raw=$(tr -d '[:space:]' < "$OUTCOME_FILE" | tr '[:upper:]' '[:lower:]')
|
||||
case "$_raw" in
|
||||
reproduced|cannot-reproduce|needs-triage)
|
||||
OUTCOME="$_raw"
|
||||
;;
|
||||
*)
|
||||
log "WARNING: unexpected outcome '${_raw}' — defaulting to needs-triage"
|
||||
;;
|
||||
esac
|
||||
else
|
||||
# For triage agent, detect success by checking Claude output for:
|
||||
# 1. Triage findings comment indicating root causes were found
|
||||
# 2. Sub-issues created during triage
|
||||
if [ "$AGENT_TYPE" = "triage" ]; then
|
||||
CLAUDE_OUTPUT="/tmp/reproduce-claude-output-${ISSUE_NUMBER}.txt"
|
||||
|
||||
# Check for triage findings comment with root causes found
|
||||
if grep -q "## Triage findings" "$CLAUDE_OUTPUT" 2>/dev/null && \
|
||||
grep -q "Found [0-9]* root cause(s)" "$CLAUDE_OUTPUT" 2>/dev/null; then
|
||||
log "Triage success detected: findings comment with root causes found"
|
||||
OUTCOME="reproduced"
|
||||
OUTCOME_FOUND=true
|
||||
# Check for created sub-issues during triage
|
||||
elif grep -qE "(Created|created) issue #[0-9]+|(Created|created) #[0-9]+|harb#[0-9]+" "$CLAUDE_OUTPUT" 2>/dev/null; then
|
||||
log "Triage success detected: sub-issues created"
|
||||
OUTCOME="reproduced"
|
||||
OUTCOME_FOUND=true
|
||||
else
|
||||
log "WARNING: outcome file not found and no triage success indicators — defaulting to needs-triage"
|
||||
fi
|
||||
else
|
||||
log "WARNING: outcome file not found — defaulting to needs-triage"
|
||||
fi
|
||||
fi
|
||||
|
||||
log "Outcome: ${OUTCOME}"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Read findings
|
||||
# ---------------------------------------------------------------------------
|
||||
FINDINGS=""
|
||||
if [ -f "/tmp/reproduce-findings-${ISSUE_NUMBER}.md" ]; then
|
||||
FINDINGS=$(cat "/tmp/reproduce-findings-${ISSUE_NUMBER}.md")
|
||||
else
|
||||
if [ "$AGENT_TYPE" = "triage" ]; then
|
||||
FINDINGS="Triage-agent completed but did not write a findings report. Claude output:\n\`\`\`\n$(tail -100 "/tmp/reproduce-claude-output-${ISSUE_NUMBER}.txt" 2>/dev/null || echo '(no output)')\n\`\`\`"
|
||||
else
|
||||
FINDINGS="Reproduce-agent completed but did not write a findings report. Claude output:\n\`\`\`\n$(tail -100 "/tmp/reproduce-claude-output-${ISSUE_NUMBER}.txt" 2>/dev/null || echo '(no output)')\n\`\`\`"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Collect screenshot paths for comment
|
||||
# ---------------------------------------------------------------------------
|
||||
SCREENSHOT_LIST=""
|
||||
if find "$(dirname "${SCREENSHOT_PREFIX}")" -name "$(basename "${SCREENSHOT_PREFIX}")-*.png" -maxdepth 1 2>/dev/null | grep -q .; then
|
||||
SCREENSHOT_LIST="\n\n**Screenshots taken:**\n"
|
||||
for f in "${SCREENSHOT_PREFIX}"-*.png; do
|
||||
SCREENSHOT_LIST="${SCREENSHOT_LIST}- \`$(basename "$f")\`\n"
|
||||
done
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Label helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
_label_id() {
|
||||
local name="$1" color="$2"
|
||||
local id
|
||||
id=$(curl -sf \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_API}/labels" 2>/dev/null \
|
||||
| jq -r --arg n "$name" '.[] | select(.name == $n) | .id' 2>/dev/null || echo "")
|
||||
if [ -z "$id" ]; then
|
||||
id=$(curl -sf -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${FORGE_API}/labels" \
|
||||
-d "{\"name\":\"${name}\",\"color\":\"${color}\"}" 2>/dev/null \
|
||||
| jq -r '.id // empty' 2>/dev/null || echo "")
|
||||
fi
|
||||
echo "$id"
|
||||
}
|
||||
|
||||
_add_label() {
|
||||
local issue="$1" label_id="$2"
|
||||
[ -z "$label_id" ] && return 0
|
||||
curl -sf -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${FORGE_API}/issues/${issue}/labels" \
|
||||
-d "{\"labels\":[${label_id}]}" >/dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
_remove_label() {
|
||||
local issue="$1" label_id="$2"
|
||||
[ -z "$label_id" ] && return 0
|
||||
curl -sf -X DELETE \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_API}/issues/${issue}/labels/${label_id}" >/dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
_post_comment() {
|
||||
local issue="$1" body="$2"
|
||||
curl -sf -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${FORGE_API}/issues/${issue}/comments" \
|
||||
-d "$(jq -nc --arg b "$body" '{body:$b}')" >/dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Apply labels and post findings
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Exit gate logic:
|
||||
# 1. Can I reproduce it? → NO → rejected/blocked → EXIT
|
||||
# → YES → continue
|
||||
# 2. Is the cause obvious? → YES → backlog issue for dev → EXIT
|
||||
# → NO → in-triage → EXIT
|
||||
#
|
||||
# Label combinations (on the ORIGINAL issue):
|
||||
# - Reproduced + obvious cause: reproduced (custom status) → backlog issue created
|
||||
# - Reproduced + cause unclear: in-triage → Triage-agent
|
||||
# - Cannot reproduce: rejected → Human review
|
||||
# - Inconclusive (timeout/error): blocked → Gardener/human
|
||||
#
|
||||
# The newly created fix issue (when cause is obvious) gets backlog label
|
||||
# so dev-poll will pick it up for implementation.
|
||||
|
||||
# Remove bug-report label (we are resolving it)
|
||||
BUG_REPORT_ID=$(_label_id "bug-report" "#e4e669")
|
||||
_remove_label "$ISSUE_NUMBER" "$BUG_REPORT_ID"
|
||||
|
||||
# Determine agent name for comments (based on AGENT_TYPE set at script start)
|
||||
if [ "$AGENT_TYPE" = "triage" ]; then
|
||||
AGENT_NAME="Triage-agent"
|
||||
else
|
||||
AGENT_NAME="Reproduce-agent"
|
||||
fi
|
||||
|
||||
# Determine outcome and apply appropriate labels
|
||||
LABEL_NAME=""
|
||||
LABEL_COLOR=""
|
||||
COMMENT_HEADER=""
|
||||
CREATE_BACKLOG_ISSUE=false
|
||||
|
||||
case "$OUTCOME" in
|
||||
reproduced)
|
||||
# Check if root cause is obvious (ROOT_CAUSE is set and non-trivial)
|
||||
ROOT_CAUSE=$(grep -m1 "^ROOT_CAUSE=" "/tmp/reproduce-findings-${ISSUE_NUMBER}.md" 2>/dev/null \
|
||||
| sed 's/^ROOT_CAUSE=//' || echo "")
|
||||
if [ -n "$ROOT_CAUSE" ] && [ "$ROOT_CAUSE" != "See findings on issue #${ISSUE_NUMBER}" ]; then
|
||||
# Obvious cause → add reproduced status label, create backlog issue for dev-agent
|
||||
LABEL_NAME="reproduced"
|
||||
LABEL_COLOR="#0075ca"
|
||||
COMMENT_HEADER="## ${AGENT_NAME}: **Reproduced with obvious cause** :white_check_mark: :zap:"
|
||||
CREATE_BACKLOG_ISSUE=true
|
||||
else
|
||||
# Cause unclear → in-triage → Triage-agent
|
||||
LABEL_NAME="in-triage"
|
||||
LABEL_COLOR="#d93f0b"
|
||||
COMMENT_HEADER="## ${AGENT_NAME}: **Reproduced, cause unclear** :white_check_mark: :mag:"
|
||||
fi
|
||||
;;
|
||||
|
||||
cannot-reproduce)
|
||||
# Cannot reproduce → rejected → Human review
|
||||
LABEL_NAME="rejected"
|
||||
LABEL_COLOR="#e4e669"
|
||||
COMMENT_HEADER="## ${AGENT_NAME}: **Cannot reproduce** :x:"
|
||||
;;
|
||||
|
||||
needs-triage)
|
||||
# Inconclusive (timeout, env issues) → blocked → Gardener/human
|
||||
LABEL_NAME="blocked"
|
||||
LABEL_COLOR="#e11d48"
|
||||
COMMENT_HEADER="## ${AGENT_NAME}: **Inconclusive, blocked** :construction:"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Apply the outcome label
|
||||
OUTCOME_LABEL_ID=$(_label_id "$LABEL_NAME" "$LABEL_COLOR")
|
||||
_add_label "$ISSUE_NUMBER" "$OUTCOME_LABEL_ID"
|
||||
log "Applied label '${LABEL_NAME}' to issue #${ISSUE_NUMBER}"
|
||||
|
||||
# If obvious cause, create backlog issue for dev-agent
|
||||
if [ "$CREATE_BACKLOG_ISSUE" = true ]; then
|
||||
BACKLOG_BODY="## Summary
|
||||
Bug reproduced from issue #${ISSUE_NUMBER}: ${ISSUE_TITLE}
|
||||
|
||||
Root cause (quick log analysis): ${ROOT_CAUSE}
|
||||
|
||||
## Dependencies
|
||||
- #${ISSUE_NUMBER}
|
||||
|
||||
## Affected files
|
||||
- (see findings on issue #${ISSUE_NUMBER})
|
||||
|
||||
## Acceptance criteria
|
||||
- [ ] Root cause confirmed and fixed
|
||||
- [ ] Issue #${ISSUE_NUMBER} no longer reproducible"
|
||||
|
||||
log "Creating backlog issue for reproduced bug with obvious cause..."
|
||||
curl -sf -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${FORGE_API}/issues" \
|
||||
-d "$(jq -nc \
|
||||
--arg t "fix: $(echo "$ISSUE_TITLE" | sed 's/^bug:/fix:/' | sed 's/^feat:/fix:/')" \
|
||||
--arg b "$BACKLOG_BODY" \
|
||||
'{title:$t, body:$b, labels:[{"name":"backlog"}]}' 2>/dev/null)" >/dev/null 2>&1 || \
|
||||
log "WARNING: failed to create backlog issue"
|
||||
fi
|
||||
|
||||
COMMENT_BODY="${COMMENT_HEADER}
|
||||
|
||||
${FINDINGS}${SCREENSHOT_LIST}
|
||||
|
||||
---
|
||||
*${AGENT_NAME} run at $(date -u '+%Y-%m-%d %H:%M:%S UTC') — project: ${PROJECT_NAME}*"
|
||||
|
||||
_post_comment "$ISSUE_NUMBER" "$COMMENT_BODY"
|
||||
log "Posted findings to issue #${ISSUE_NUMBER}"
|
||||
|
||||
log "${AGENT_NAME} done. Outcome: ${OUTCOME}"
|
||||
|
|
@ -92,10 +92,9 @@ PHASE:failed → label issue blocked, post diagnostic comment
|
|||
|
||||
### `idle_prompt` exit reason
|
||||
|
||||
`monitor_phase_loop` (in `lib/agent-session.sh`) can exit with
|
||||
`_MONITOR_LOOP_EXIT=idle_prompt`. This happens when Claude returns to the
|
||||
interactive prompt (`❯`) for **3 consecutive polls** without writing any phase
|
||||
signal to the phase file.
|
||||
The phase monitor can exit with `_MONITOR_LOOP_EXIT=idle_prompt`. This happens
|
||||
when Claude returns to the interactive prompt (`❯`) for **3 consecutive polls**
|
||||
without writing any phase signal to the phase file.
|
||||
|
||||
**Trigger conditions:**
|
||||
- The phase file is empty (no phase has ever been written), **and**
|
||||
|
|
@ -111,14 +110,13 @@ signal to the phase file.
|
|||
callback without the phase file actually containing that value.
|
||||
|
||||
**Agent requirements:**
|
||||
- **Callback (`_on_phase_change` / `formula_phase_callback`):** Must handle
|
||||
`PHASE:failed` defensively — the session is already dead, so any tmux
|
||||
send-keys or session-dependent logic must be skipped or guarded.
|
||||
- **Callback:** Must handle `PHASE:failed` defensively — the session is already
|
||||
dead, so any tmux send-keys or session-dependent logic must be skipped or
|
||||
guarded.
|
||||
- **Post-loop exit handler (`case $_MONITOR_LOOP_EXIT`):** Must include an
|
||||
`idle_prompt)` branch. Typical actions: log the event, clean up temp files,
|
||||
and (for agents that use escalation) write an escalation entry or notify via
|
||||
vault/forge. See `dev/dev-agent.sh` and
|
||||
`gardener/gardener-agent.sh` for reference implementations.
|
||||
vault/forge. See `dev/dev-agent.sh` for reference implementations.
|
||||
|
||||
## Crash Recovery
|
||||
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ Validate release prerequisites before proceeding.
|
|||
|
||||
7. Check if tag already exists on Forgejo:
|
||||
- curl -sf -H "Authorization: token $FORGE_TOKEN" \
|
||||
- "$FORGE_URL/api/v1/repos/johba/disinto/git/tags/$RELEASE_VERSION"
|
||||
- "$FORGE_URL/api/v1/repos/$FORGE_REPO/git/tags/$RELEASE_VERSION"
|
||||
- If exists, exit with error
|
||||
|
||||
8. Export RELEASE_VERSION for subsequent steps:
|
||||
|
|
@ -77,14 +77,14 @@ Create the release tag on Forgejo main via the Forgejo API.
|
|||
|
||||
1. Get current HEAD SHA of main:
|
||||
- curl -sf -H "Authorization: token $FORGE_TOKEN" \
|
||||
- "$FORGE_URL/api/v1/repos/johba/disinto/branches/$PRIMARY_BRANCH"
|
||||
- "$FORGE_URL/api/v1/repos/$FORGE_REPO/branches/$PRIMARY_BRANCH"
|
||||
- Parse sha field from response
|
||||
|
||||
2. Create tag via Forgejo API:
|
||||
- curl -sf -X POST \
|
||||
- -H "Authorization: token $FORGE_TOKEN" \
|
||||
- -H "Content-Type: application/json" \
|
||||
- "$FORGE_URL/api/v1/repos/johba/disinto/tags" \
|
||||
- "$FORGE_URL/api/v1/repos/$FORGE_REPO/tags" \
|
||||
- -d "{\"tag\":\"$RELEASE_VERSION\",\"target\":\"$HEAD_SHA\",\"message\":\"Release $RELEASE_VERSION\"}"
|
||||
- Parse response for success
|
||||
|
||||
|
|
@ -106,8 +106,8 @@ description = """
|
|||
Push the newly created tag to all configured mirrors.
|
||||
|
||||
1. Add mirror remotes if not already present:
|
||||
- Codeberg: git remote add codeberg git@codeberg.org:johba/disinto.git
|
||||
- GitHub: git remote add github git@github.com:disinto/disinto.git
|
||||
- Codeberg: git remote add codeberg git@codeberg.org:${FORGE_REPO_OWNER}/${PROJECT_NAME}.git
|
||||
- GitHub: git remote add github git@github.com:disinto/${PROJECT_NAME}.git
|
||||
- Check with: git remote -v
|
||||
|
||||
2. Push tag to Codeberg:
|
||||
|
|
@ -120,9 +120,9 @@ Push the newly created tag to all configured mirrors.
|
|||
|
||||
4. Verify tags exist on mirrors:
|
||||
- curl -sf -H "Authorization: token $GITHUB_TOKEN" \
|
||||
- "https://api.github.com/repos/disinto/disinto/tags/$RELEASE_VERSION"
|
||||
- "https://api.github.com/repos/disinto/${PROJECT_NAME}/tags/$RELEASE_VERSION"
|
||||
- curl -sf -H "Authorization: token $FORGE_TOKEN" \
|
||||
- "$FORGE_URL/api/v1/repos/johba/disinto/git/tags/$RELEASE_VERSION"
|
||||
- "$FORGE_URL/api/v1/repos/$FORGE_REPO/git/tags/$RELEASE_VERSION"
|
||||
|
||||
5. Log success:
|
||||
- echo "Tag $RELEASE_VERSION pushed to mirrors"
|
||||
|
|
@ -227,7 +227,7 @@ Write the release result to a file for tracking.
|
|||
- {
|
||||
- "version": "$RELEASE_VERSION",
|
||||
- "image_id": "$IMAGE_ID",
|
||||
- "forgejo_tag_url": "$FORGE_URL/johba/disinto/src/$RELEASE_VERSION",
|
||||
- "forgejo_tag_url": "$FORGE_URL/$FORGE_REPO/src/$RELEASE_VERSION",
|
||||
- "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
|
||||
- "status": "success"
|
||||
- }
|
||||
|
|
|
|||
31
formulas/reproduce.toml
Normal file
31
formulas/reproduce.toml
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
# formulas/reproduce.toml — Reproduce-agent formula
|
||||
#
|
||||
# Declares the reproduce-agent's runtime parameters.
|
||||
# The dispatcher reads this to configure the sidecar container.
|
||||
#
|
||||
# stack_script: path (relative to PROJECT_REPO_ROOT) of the script used to
|
||||
# restart/rebuild the project stack before reproduction. Omit (or leave
|
||||
# blank) to connect to an existing staging environment instead.
|
||||
#
|
||||
# tools: MCP servers to pass to claude via --mcp-server flags.
|
||||
#
|
||||
# timeout_minutes: hard upper bound on the Claude session.
|
||||
#
|
||||
# Exit gate logic:
|
||||
# 1. Can I reproduce it? → NO → rejected/blocked → EXIT
|
||||
# → YES → continue
|
||||
# 2. Is the cause obvious? → YES → in-progress + backlog issue → EXIT
|
||||
# → NO → in-triage → EXIT
|
||||
#
|
||||
# Turn budget: 60% on step 1 (reproduction), 40% on step 2 (cause check).
|
||||
|
||||
name = "reproduce"
|
||||
description = "Primary: reproduce the bug. Secondary: check if cause is obvious. Exit gates enforced."
|
||||
version = 1
|
||||
|
||||
# Set stack_script to the restart command for local stacks.
|
||||
# Leave empty ("") to target an existing staging environment.
|
||||
stack_script = ""
|
||||
|
||||
tools = ["playwright"]
|
||||
timeout_minutes = 15
|
||||
|
|
@ -76,6 +76,63 @@ Pre-checks (bash, zero tokens — detect problems before invoking Claude):
|
|||
6. Tech-debt promotion: list all tech-debt labeled issues — goal is to
|
||||
process them all (promote to backlog or classify as dust).
|
||||
|
||||
7. Bug-report detection: for each open unlabeled issue (no backlog, no
|
||||
bug-report, no in-progress, no blocked, no underspecified, no vision,
|
||||
no tech-debt), check whether it describes a user-facing bug with
|
||||
reproduction steps. Criteria — ALL must be true:
|
||||
a. Body describes broken behavior (something that should work but
|
||||
doesn't), NOT a feature request or enhancement
|
||||
b. Body contains steps to reproduce (numbered list, "steps to
|
||||
reproduce" heading, or clear sequence of actions that trigger the bug)
|
||||
c. Issue is not already labeled
|
||||
|
||||
If all criteria match, enrich the issue body and write the manifest actions:
|
||||
|
||||
Body enrichment (CRITICAL — turns raw reports into actionable investigation briefs):
|
||||
Before writing the add_label action, construct an enriched body by appending
|
||||
these sections to the original issue body:
|
||||
|
||||
a. ``## What was reported``
|
||||
One or two sentence summary of the user's claim. Distill the broken
|
||||
behavior concisely — what the user expected vs. what actually happened.
|
||||
|
||||
b. ``## Known context``
|
||||
What can be inferred from the codebase without running anything:
|
||||
- Which contracts/components/files are involved (use AGENTS.md layout
|
||||
and file paths mentioned in the issue or body)
|
||||
- What the expected behavior should be (from VISION.md, docs, code)
|
||||
- Any recent changes to involved components:
|
||||
git log --oneline -5 -- <paths>
|
||||
- Related issues or prior fixes (cross-reference by number if known)
|
||||
|
||||
c. ``## Reproduction plan``
|
||||
Concrete steps for a reproduce-agent or human. Be specific:
|
||||
- Which environment to use (e.g. "start fresh stack with
|
||||
\`./scripts/dev.sh restart --full\`")
|
||||
- Which transactions or actions to execute (with \`cast\` commands,
|
||||
API calls, or UI navigation steps where applicable)
|
||||
- What state to check after each step (contract reads, API queries,
|
||||
UI observations, log output)
|
||||
|
||||
d. ``## What needs verification``
|
||||
Checkboxes distinguishing known facts from unknowns:
|
||||
- ``- [ ]`` Does the reported behavior actually occur? (reproduce)
|
||||
- ``- [ ]`` Is <component X> behaving as expected? (check state)
|
||||
- ``- [ ]`` Is the data flow correct from <A> to <B>? (trace)
|
||||
Tailor these to the specific bug — three to five items covering the
|
||||
key unknowns a reproduce-agent must resolve.
|
||||
|
||||
e. Construct full new body = original body text + appended sections.
|
||||
Write an edit_body action BEFORE the add_label action:
|
||||
echo '{"action":"edit_body","issue":NNN,"body":"<full new body>"}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
|
||||
|
||||
f. Write the add_label action:
|
||||
echo '{"action":"add_label","issue":NNN,"label":"bug-report"}' >> "$PROJECT_REPO_ROOT/gardener/pending-actions.jsonl"
|
||||
echo "ACTION: labeled #NNN as bug-report — <reason>" >> "$RESULT_FILE"
|
||||
|
||||
Do NOT also add the backlog label — bug-report is a separate triage
|
||||
track that feeds into reproduction automation.
|
||||
|
||||
For each issue, choose ONE action and write to result file:
|
||||
|
||||
ACTION (substantial — promote, close duplicate, add acceptance criteria):
|
||||
|
|
@ -167,8 +224,9 @@ Sibling dependency rule (CRITICAL):
|
|||
Processing order:
|
||||
1. Handle PRIORITY_blockers_starving_factory first — promote or resolve
|
||||
2. Quality gate — strip backlog from issues missing acceptance criteria or affected files
|
||||
3. Process tech-debt issues by score (impact/effort)
|
||||
4. Classify remaining items as dust or route to vault
|
||||
3. Bug-report detection — label qualifying issues before other classification
|
||||
4. Process tech-debt issues by score (impact/effort)
|
||||
5. Classify remaining items as dust or route to vault
|
||||
|
||||
Do NOT bundle dust yourself — the dust-bundling step handles accumulation,
|
||||
dedup, TTL expiry, and bundling into backlog issues.
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# formulas/run-supervisor.toml — Supervisor formula (health monitoring + remediation)
|
||||
#
|
||||
# Executed by supervisor/supervisor-run.sh via cron (every 20 minutes).
|
||||
# supervisor-run.sh creates a tmux session with Claude (sonnet) and injects
|
||||
# supervisor-run.sh runs claude -p via agent-sdk.sh and injects
|
||||
# this formula with pre-collected metrics as context.
|
||||
#
|
||||
# Steps: preflight → health-assessment → decide-actions → report → journal
|
||||
|
|
@ -137,14 +137,15 @@ For each finding from the health assessment, decide and execute an action.
|
|||
|
||||
**P3 Stale PRs (CI done >20min, no push since):**
|
||||
Do NOT read dev-poll.sh, push branches, attempt merges, or investigate pipeline code.
|
||||
Instead, nudge the dev-agent via tmux injection if a session is alive:
|
||||
# Find the dev session for this issue
|
||||
SESSION=$(tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "dev-.*-${ISSUE_NUM}" | head -1)
|
||||
if [ -n "$SESSION" ]; then
|
||||
# Inject a nudge into the dev-agent session
|
||||
tmux send-keys -t "$SESSION" "# [supervisor] PR stale >20min — CI finished, please push or update" Enter
|
||||
fi
|
||||
If no active tmux session exists, note it in the journal for the next dev-poll cycle.
|
||||
Instead, file a vault item for the dev-agent to pick up:
|
||||
Write $OPS_REPO_ROOT/vault/pending/stale-pr-${ISSUE_NUM}.md:
|
||||
# Stale PR: ${PR_TITLE}
|
||||
## What
|
||||
CI finished >20min ago but no git push has been made to the PR branch.
|
||||
## Why
|
||||
P3 — Factory degraded: PRs should be pushed within 20min of CI completion.
|
||||
## Unblocks
|
||||
- Factory health: dev-agent will push the branch and continue the workflow
|
||||
Do NOT file vault items for stale PRs unless they remain stale for >3 consecutive runs.
|
||||
|
||||
### Cannot auto-fix → file vault item
|
||||
|
|
@ -251,7 +252,6 @@ knowledge file in the ops repo:
|
|||
Knowledge files: memory.md, disk.md, ci.md, forge.md, dev-agent.md,
|
||||
review-agent.md, git.md.
|
||||
|
||||
After writing the journal, write the phase signal:
|
||||
echo 'PHASE:done' > "$PHASE_FILE"
|
||||
After writing the journal, the agent session completes automatically.
|
||||
"""
|
||||
needs = ["report"]
|
||||
|
|
|
|||
267
formulas/triage.toml
Normal file
267
formulas/triage.toml
Normal file
|
|
@ -0,0 +1,267 @@
|
|||
# formulas/triage.toml — Triage-agent formula (generic template)
|
||||
#
|
||||
# This is the base template for triage investigations.
|
||||
# Project-specific formulas (e.g. formulas/triage-harb.toml) extend this by
|
||||
# overriding the fields in the [project] section and providing stack-specific
|
||||
# step descriptions.
|
||||
#
|
||||
# Triggered by: bug-report + in-triage label combination.
|
||||
# Set by the reproduce-agent when:
|
||||
# - Bug was confirmed (reproduced)
|
||||
# - Quick log analysis did not reveal an obvious root cause
|
||||
# - Reproduce-agent documented all steps taken and logs examined
|
||||
#
|
||||
# Steps:
|
||||
# 1. read-findings — parse issue comments for prior reproduce-agent evidence
|
||||
# 2. trace-data-flow — follow symptom through UI → API → backend → data store
|
||||
# 3. instrumentation — throwaway branch, add logging, restart, observe
|
||||
# 4. decompose — file backlog issues for each root cause
|
||||
# 5. link-back — update original issue, swap in-triage → in-progress
|
||||
# 6. cleanup — delete throwaway debug branch
|
||||
#
|
||||
# Best practices:
|
||||
# - Start from reproduce-agent findings; do not repeat their work
|
||||
# - Budget: 70% tracing data flow, 30% instrumented re-runs
|
||||
# - Multiple causes: check if layered (Depends-on) or independent (Related)
|
||||
# - Always delete the throwaway debug branch before finishing
|
||||
# - If inconclusive after full turn budget: leave in-triage, post what was
|
||||
# tried, do NOT relabel — supervisor handles stale triage sessions
|
||||
#
|
||||
# Project-specific formulas extend this template by defining:
|
||||
# - stack_script: how to start/stop the project stack
|
||||
# - [project].data_flow: layer names (e.g. "chain → indexer → GraphQL → UI")
|
||||
# - [project].api_endpoints: which APIs/services to inspect
|
||||
# - [project].stack_lock: stack lock configuration
|
||||
# - Per-step description overrides with project-specific commands
|
||||
#
|
||||
# No hard timeout — runs until Claude hits its turn limit.
|
||||
# Stack lock held for full run (triage is rare; blocking CI is acceptable).
|
||||
|
||||
name = "triage"
|
||||
description = "Deep root cause analysis: trace data flow, add debug instrumentation, decompose causes into backlog issues."
|
||||
version = 2
|
||||
|
||||
# Set stack_script to the restart command for local stacks.
|
||||
# Leave empty ("") to connect to an existing staging environment.
|
||||
stack_script = ""
|
||||
|
||||
tools = ["playwright"]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Project-specific extension fields.
|
||||
# Override these in formulas/triage-<project>.toml.
|
||||
# ---------------------------------------------------------------------------
|
||||
[project]
|
||||
# Human-readable layer names for the data-flow trace (generic default).
|
||||
# Example project override: "chain → indexer → GraphQL → UI"
|
||||
data_flow = "UI → API → backend → data store"
|
||||
|
||||
# Comma-separated list of API endpoints or services to inspect.
|
||||
# Example: "GraphQL /graphql, REST /api/v1, RPC ws://localhost:8545"
|
||||
api_endpoints = ""
|
||||
|
||||
# Stack lock configuration (leave empty for default behavior).
|
||||
# Example: "full" to hold a full stack lock during triage.
|
||||
stack_lock = ""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Steps
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
[[steps]]
|
||||
id = "read-findings"
|
||||
title = "Read reproduce-agent findings"
|
||||
description = """
|
||||
Before doing anything else, parse all prior evidence from the issue comments.
|
||||
|
||||
1. Fetch the issue body and all comments:
|
||||
curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_API}/issues/${ISSUE_NUMBER}" | jq -r '.body'
|
||||
curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_API}/issues/${ISSUE_NUMBER}/comments" | jq -r '.[].body'
|
||||
|
||||
2. Identify the reproduce-agent comment (look for sections like
|
||||
"Reproduction steps", "Logs examined", "What was tried").
|
||||
|
||||
3. Extract and note:
|
||||
- The exact symptom (error message, unexpected value, visual regression)
|
||||
- Steps that reliably trigger the bug
|
||||
- Log lines or API responses already captured
|
||||
- Any hypotheses the reproduce-agent already ruled out
|
||||
|
||||
Do NOT repeat work the reproduce-agent already did. Your job starts where
|
||||
theirs ended. If no reproduce-agent comment is found, note it and proceed
|
||||
with fresh investigation using the issue body only.
|
||||
"""
|
||||
|
||||
[[steps]]
|
||||
id = "trace-data-flow"
|
||||
title = "Trace data flow from symptom to source"
|
||||
description = """
|
||||
Systematically follow the symptom backwards through each layer of the stack.
|
||||
Spend ~70% of your total turn budget here before moving to instrumentation.
|
||||
|
||||
Generic layer traversal (adapt to the project's actual stack):
|
||||
UI → API → backend → data store
|
||||
|
||||
For each layer boundary:
|
||||
1. What does the upstream layer send?
|
||||
2. What does the downstream layer expect?
|
||||
3. Is there a mismatch? If yes — is this the root cause or a symptom?
|
||||
|
||||
Tracing checklist:
|
||||
a. Start at the layer closest to the visible symptom.
|
||||
b. Read the relevant source files — do not guess data shapes.
|
||||
c. Cross-reference API contracts: compare what the code sends vs what it
|
||||
should send according to schemas, type definitions, or documentation.
|
||||
d. Check recent git history on suspicious files:
|
||||
git log --oneline -20 -- <file>
|
||||
e. Search for related issues or TODOs in the code:
|
||||
grep -r "TODO\|FIXME\|HACK" -- <relevant directory>
|
||||
|
||||
Capture for each layer:
|
||||
- The data shape flowing in and out (field names, types, nullability)
|
||||
- Whether the layer's behavior matches its documented contract
|
||||
- Any discrepancy found
|
||||
|
||||
If a clear root cause becomes obvious during tracing, note it and continue
|
||||
checking whether additional causes exist downstream.
|
||||
"""
|
||||
needs = ["read-findings"]
|
||||
|
||||
[[steps]]
|
||||
id = "instrumentation"
|
||||
title = "Add debug instrumentation on a throwaway branch"
|
||||
description = """
|
||||
Use ~30% of your total turn budget here. Only instrument after tracing has
|
||||
identified the most likely failure points — do not instrument blindly.
|
||||
|
||||
1. Create a throwaway debug branch (NEVER commit this to main):
|
||||
cd "$PROJECT_REPO_ROOT"
|
||||
git checkout -b debug/triage-${ISSUE_NUMBER}
|
||||
|
||||
2. Add targeted logging at the layer boundaries identified during tracing:
|
||||
- Console.log / structured log statements around the suspicious code path
|
||||
- Log the actual values flowing through: inputs, outputs, intermediate state
|
||||
- Add verbose mode flags if the stack supports them
|
||||
- Keep instrumentation minimal — only what confirms or refutes the hypothesis
|
||||
|
||||
3. Restart the stack using the configured script (if set):
|
||||
${stack_script:-"# No stack_script configured — restart manually or connect to staging"}
|
||||
|
||||
4. Re-run the reproduction steps from the reproduce-agent findings.
|
||||
|
||||
5. Observe and capture new output:
|
||||
- Paste relevant log lines into your working notes
|
||||
- Note whether the observed values match or contradict the hypothesis
|
||||
|
||||
6. If the first instrumentation pass is inconclusive, iterate:
|
||||
- Narrow the scope to the next most suspicious boundary
|
||||
- Re-instrument, restart, re-run
|
||||
- Maximum 2-3 instrumentation rounds before declaring inconclusive
|
||||
|
||||
Do NOT push the debug branch. It will be deleted in the cleanup step.
|
||||
"""
|
||||
needs = ["trace-data-flow"]
|
||||
|
||||
[[steps]]
|
||||
id = "decompose"
|
||||
title = "Decompose root causes into backlog issues"
|
||||
description = """
|
||||
After tracing and instrumentation, articulate each distinct root cause.
|
||||
|
||||
For each root cause found:
|
||||
|
||||
1. Determine the relationship to other causes:
|
||||
- Layered (one causes another) → use Depends-on in the issue body
|
||||
- Independent (separate code paths fail independently) → use Related
|
||||
|
||||
2. Create a backlog issue for each root cause:
|
||||
curl -sf -X POST "${FORGE_API}/issues" \\
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d '{
|
||||
"title": "fix: <specific description of root cause N>",
|
||||
"body": "## Root cause\\n<exact code path, file:line>\\n\\n## Fix suggestion\\n<recommended approach>\\n\\n## Context\\nDecomposed from #${ISSUE_NUMBER} (cause N of M)\\n\\n## Dependencies\\n<#X if this depends on another cause being fixed first>",
|
||||
"labels": [{"name": "backlog"}]
|
||||
}'
|
||||
|
||||
3. Note the newly created issue numbers.
|
||||
|
||||
If only one root cause is found, still create a single backlog issue with
|
||||
the specific code location and fix suggestion.
|
||||
|
||||
If the investigation is inconclusive (no clear root cause found), skip this
|
||||
step and proceed directly to link-back with the inconclusive outcome.
|
||||
"""
|
||||
needs = ["instrumentation"]
|
||||
|
||||
[[steps]]
|
||||
id = "link-back"
|
||||
title = "Update original issue and relabel"
|
||||
description = """
|
||||
Post a summary comment on the original issue and update its labels.
|
||||
|
||||
### If root causes were found (conclusive):
|
||||
|
||||
Post a comment:
|
||||
"## Triage findings
|
||||
|
||||
Found N root cause(s):
|
||||
- #X — <one-line description> (cause 1 of N)
|
||||
- #Y — <one-line description> (cause 2 of N, depends on #X)
|
||||
|
||||
Data flow traced: <layer where the bug originates>
|
||||
Instrumentation: <key log output that confirmed the cause>
|
||||
|
||||
Next step: backlog issues above will be implemented in dependency order."
|
||||
|
||||
Then swap labels:
|
||||
- Remove: in-triage
|
||||
- Add: in-progress
|
||||
|
||||
### If investigation was inconclusive (turn budget exhausted):
|
||||
|
||||
Post a comment:
|
||||
"## Triage — inconclusive
|
||||
|
||||
Traced: <layers checked>
|
||||
Tried: <instrumentation attempts and what they showed>
|
||||
Hypothesis: <best guess at cause, if any>
|
||||
|
||||
No definitive root cause identified. Leaving in-triage for supervisor
|
||||
to handle as a stale triage session."
|
||||
|
||||
Do NOT relabel. Leave in-triage. The supervisor monitors stale triage
|
||||
sessions and will escalate or reassign.
|
||||
|
||||
**CRITICAL: Write outcome file** — Always write the outcome to the outcome file:
|
||||
- If root causes found (conclusive): echo "reproduced" > /tmp/triage-outcome-${ISSUE_NUMBER}.txt
|
||||
- If inconclusive: echo "needs-triage" > /tmp/triage-outcome-${ISSUE_NUMBER}.txt
|
||||
"""
|
||||
needs = ["decompose"]
|
||||
|
||||
[[steps]]
|
||||
id = "cleanup"
|
||||
title = "Delete throwaway debug branch"
|
||||
description = """
|
||||
Always delete the debug branch, even if the investigation was inconclusive.
|
||||
|
||||
1. Switch back to the main branch:
|
||||
cd "$PROJECT_REPO_ROOT"
|
||||
git checkout "$PRIMARY_BRANCH"
|
||||
|
||||
2. Delete the local debug branch:
|
||||
git branch -D debug/triage-${ISSUE_NUMBER}
|
||||
|
||||
3. Confirm no remote was pushed (if accidentally pushed, delete it too):
|
||||
git push origin --delete debug/triage-${ISSUE_NUMBER} 2>/dev/null || true
|
||||
|
||||
4. Verify the worktree is clean:
|
||||
git status
|
||||
git worktree list
|
||||
|
||||
A clean repo is a prerequisite for the next dev-agent run. Never leave
|
||||
debug branches behind — they accumulate and pollute the branch list.
|
||||
"""
|
||||
needs = ["link-back"]
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a -->
|
||||
<!-- last-reviewed: ac2beac361503c8712ecfc72be0401b5968cce4e -->
|
||||
# Gardener Agent
|
||||
|
||||
**Role**: Backlog grooming — detect duplicate issues, missing acceptance
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ source "$FACTORY_ROOT/lib/agent-sdk.sh"
|
|||
# shellcheck source=../lib/pr-lifecycle.sh
|
||||
source "$FACTORY_ROOT/lib/pr-lifecycle.sh"
|
||||
|
||||
LOG_FILE="$SCRIPT_DIR/gardener.log"
|
||||
LOG_FILE="${DISINTO_LOG_DIR}/gardener/gardener.log"
|
||||
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
|
||||
LOGFILE="$LOG_FILE"
|
||||
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
|
||||
|
|
@ -55,20 +55,22 @@ RESULT_FILE="/tmp/gardener-result-${PROJECT_NAME}.txt"
|
|||
GARDENER_PR_FILE="/tmp/gardener-pr-${PROJECT_NAME}.txt"
|
||||
WORKTREE="/tmp/${PROJECT_NAME}-gardener-run"
|
||||
|
||||
log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
|
||||
# Override LOG_AGENT for consistent agent identification
|
||||
# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log()
|
||||
LOG_AGENT="gardener"
|
||||
|
||||
# ── Guards ────────────────────────────────────────────────────────────────
|
||||
check_active gardener
|
||||
acquire_cron_lock "/tmp/gardener-run.lock"
|
||||
check_memory 2000
|
||||
memory_guard 2000
|
||||
|
||||
log "--- Gardener run start ---"
|
||||
|
||||
# ── Resolve forge remote for git operations ─────────────────────────────
|
||||
resolve_forge_remote
|
||||
|
||||
# ── Resolve agent identity for .profile repo ────────────────────────────
|
||||
if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_GARDENER_TOKEN:-}" ]; then
|
||||
AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_GARDENER_TOKEN}" \
|
||||
"${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true)
|
||||
fi
|
||||
resolve_agent_identity || true
|
||||
|
||||
# ── Load formula + context ───────────────────────────────────────────────
|
||||
load_formula_or_profile "gardener" "$FACTORY_ROOT/formulas/run-gardener.toml" || exit 1
|
||||
|
|
@ -127,16 +129,7 @@ ${SCRATCH_INSTRUCTION}
|
|||
${PROMPT_FOOTER}"
|
||||
|
||||
# ── Create worktree ──────────────────────────────────────────────────────
|
||||
cd "$PROJECT_REPO_ROOT"
|
||||
git fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true
|
||||
worktree_cleanup "$WORKTREE"
|
||||
git worktree add "$WORKTREE" "origin/${PRIMARY_BRANCH}" --detach 2>/dev/null
|
||||
|
||||
cleanup() {
|
||||
worktree_cleanup "$WORKTREE"
|
||||
rm -f "$GARDENER_PR_FILE"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
formula_worktree_setup "$WORKTREE"
|
||||
|
||||
# ── Post-merge manifest execution ────────────────────────────────────────
|
||||
# Reads gardener/pending-actions.json and executes each action via API.
|
||||
|
|
@ -165,19 +158,21 @@ _gardener_execute_manifest() {
|
|||
|
||||
case "$action" in
|
||||
add_label)
|
||||
local label label_id
|
||||
local label label_id http_code resp
|
||||
label=$(jq -r ".[$i].label" "$manifest_file")
|
||||
label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_API}/labels" | jq -r --arg n "$label" \
|
||||
'.[] | select(.name == $n) | .id') || true
|
||||
if [ -n "$label_id" ]; then
|
||||
if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H 'Content-Type: application/json' \
|
||||
"${FORGE_API}/issues/${issue}/labels" \
|
||||
-d "{\"labels\":[${label_id}]}" >/dev/null 2>&1; then
|
||||
-d "{\"labels\":[${label_id}]}" 2>/dev/null) || true
|
||||
http_code=$(echo "$resp" | tail -1)
|
||||
if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
|
||||
log "manifest: add_label '${label}' to #${issue}"
|
||||
else
|
||||
log "manifest: FAILED add_label '${label}' to #${issue}"
|
||||
log "manifest: FAILED add_label '${label}' to #${issue}: HTTP ${http_code}"
|
||||
fi
|
||||
else
|
||||
log "manifest: FAILED add_label — label '${label}' not found"
|
||||
|
|
@ -185,17 +180,19 @@ _gardener_execute_manifest() {
|
|||
;;
|
||||
|
||||
remove_label)
|
||||
local label label_id
|
||||
local label label_id http_code resp
|
||||
label=$(jq -r ".[$i].label" "$manifest_file")
|
||||
label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_API}/labels" | jq -r --arg n "$label" \
|
||||
'.[] | select(.name == $n) | .id') || true
|
||||
if [ -n "$label_id" ]; then
|
||||
if curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_API}/issues/${issue}/labels/${label_id}" >/dev/null 2>&1; then
|
||||
resp=$(curl -sf -w "\n%{http_code}" -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_API}/issues/${issue}/labels/${label_id}" 2>/dev/null) || true
|
||||
http_code=$(echo "$resp" | tail -1)
|
||||
if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
|
||||
log "manifest: remove_label '${label}' from #${issue}"
|
||||
else
|
||||
log "manifest: FAILED remove_label '${label}' from #${issue}"
|
||||
log "manifest: FAILED remove_label '${label}' from #${issue}: HTTP ${http_code}"
|
||||
fi
|
||||
else
|
||||
log "manifest: FAILED remove_label — label '${label}' not found"
|
||||
|
|
@ -203,34 +200,38 @@ _gardener_execute_manifest() {
|
|||
;;
|
||||
|
||||
close)
|
||||
local reason
|
||||
local reason http_code resp
|
||||
reason=$(jq -r ".[$i].reason // empty" "$manifest_file")
|
||||
if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H 'Content-Type: application/json' \
|
||||
"${FORGE_API}/issues/${issue}" \
|
||||
-d '{"state":"closed"}' >/dev/null 2>&1; then
|
||||
-d '{"state":"closed"}' 2>/dev/null) || true
|
||||
http_code=$(echo "$resp" | tail -1)
|
||||
if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
|
||||
log "manifest: closed #${issue} (${reason})"
|
||||
else
|
||||
log "manifest: FAILED close #${issue}"
|
||||
log "manifest: FAILED close #${issue}: HTTP ${http_code}"
|
||||
fi
|
||||
;;
|
||||
|
||||
comment)
|
||||
local body escaped_body
|
||||
local body escaped_body http_code resp
|
||||
body=$(jq -r ".[$i].body" "$manifest_file")
|
||||
escaped_body=$(printf '%s' "$body" | jq -Rs '.')
|
||||
if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H 'Content-Type: application/json' \
|
||||
"${FORGE_API}/issues/${issue}/comments" \
|
||||
-d "{\"body\":${escaped_body}}" >/dev/null 2>&1; then
|
||||
-d "{\"body\":${escaped_body}}" 2>/dev/null) || true
|
||||
http_code=$(echo "$resp" | tail -1)
|
||||
if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
|
||||
log "manifest: commented on #${issue}"
|
||||
else
|
||||
log "manifest: FAILED comment on #${issue}"
|
||||
log "manifest: FAILED comment on #${issue}: HTTP ${http_code}"
|
||||
fi
|
||||
;;
|
||||
|
||||
create_issue)
|
||||
local title body labels escaped_title escaped_body label_ids
|
||||
local title body labels escaped_title escaped_body label_ids http_code resp
|
||||
title=$(jq -r ".[$i].title" "$manifest_file")
|
||||
body=$(jq -r ".[$i].body" "$manifest_file")
|
||||
labels=$(jq -r ".[$i].labels // [] | .[]" "$manifest_file")
|
||||
|
|
@ -250,40 +251,46 @@ _gardener_execute_manifest() {
|
|||
done <<< "$labels"
|
||||
[ -n "$ids_json" ] && label_ids="[${ids_json}]"
|
||||
fi
|
||||
if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H 'Content-Type: application/json' \
|
||||
"${FORGE_API}/issues" \
|
||||
-d "{\"title\":${escaped_title},\"body\":${escaped_body},\"labels\":${label_ids}}" >/dev/null 2>&1; then
|
||||
-d "{\"title\":${escaped_title},\"body\":${escaped_body},\"labels\":${label_ids}}" 2>/dev/null) || true
|
||||
http_code=$(echo "$resp" | tail -1)
|
||||
if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
|
||||
log "manifest: created issue '${title}'"
|
||||
else
|
||||
log "manifest: FAILED create_issue '${title}'"
|
||||
log "manifest: FAILED create_issue '${title}': HTTP ${http_code}"
|
||||
fi
|
||||
;;
|
||||
|
||||
edit_body)
|
||||
local body escaped_body
|
||||
local body escaped_body http_code resp
|
||||
body=$(jq -r ".[$i].body" "$manifest_file")
|
||||
escaped_body=$(printf '%s' "$body" | jq -Rs '.')
|
||||
if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H 'Content-Type: application/json' \
|
||||
"${FORGE_API}/issues/${issue}" \
|
||||
-d "{\"body\":${escaped_body}}" >/dev/null 2>&1; then
|
||||
-d "{\"body\":${escaped_body}}" 2>/dev/null) || true
|
||||
http_code=$(echo "$resp" | tail -1)
|
||||
if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
|
||||
log "manifest: edited body of #${issue}"
|
||||
else
|
||||
log "manifest: FAILED edit_body #${issue}"
|
||||
log "manifest: FAILED edit_body #${issue}: HTTP ${http_code}"
|
||||
fi
|
||||
;;
|
||||
|
||||
close_pr)
|
||||
local pr
|
||||
local pr http_code resp
|
||||
pr=$(jq -r ".[$i].pr" "$manifest_file")
|
||||
if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H 'Content-Type: application/json' \
|
||||
"${FORGE_API}/pulls/${pr}" \
|
||||
-d '{"state":"closed"}' >/dev/null 2>&1; then
|
||||
-d '{"state":"closed"}' 2>/dev/null) || true
|
||||
http_code=$(echo "$resp" | tail -1)
|
||||
if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
|
||||
log "manifest: closed PR #${pr}"
|
||||
else
|
||||
log "manifest: FAILED close_pr #${pr}"
|
||||
log "manifest: FAILED close_pr #${pr}: HTTP ${http_code}"
|
||||
fi
|
||||
;;
|
||||
|
||||
|
|
@ -328,9 +335,9 @@ if [ -n "$PR_NUMBER" ]; then
|
|||
|
||||
if [ "$_PR_WALK_EXIT_REASON" = "merged" ]; then
|
||||
# Post-merge: pull primary, mirror push, execute manifest
|
||||
git -C "$PROJECT_REPO_ROOT" fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true
|
||||
git -C "$PROJECT_REPO_ROOT" fetch "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true
|
||||
git -C "$PROJECT_REPO_ROOT" checkout "$PRIMARY_BRANCH" 2>/dev/null || true
|
||||
git -C "$PROJECT_REPO_ROOT" pull --ff-only origin "$PRIMARY_BRANCH" 2>/dev/null || true
|
||||
git -C "$PROJECT_REPO_ROOT" pull --ff-only "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true
|
||||
mirror_push
|
||||
_gardener_execute_manifest
|
||||
rm -f "$SCRATCH_FILE"
|
||||
|
|
|
|||
|
|
@ -1,32 +1,7 @@
|
|||
[
|
||||
{
|
||||
"action": "edit_body",
|
||||
"issue": 765,
|
||||
"body": "Depends on: none\n\n## Goal\n\nThe disinto website becomes a versioned artifact: built by CI, published to Codeberg's generic package registry, deployed to staging automatically. Version visible in footer.\n\n## Files to add/change\n\n### `site/VERSION`\n```\n0.1.0\n```\n\n### `site/build.sh`\n```bash\n#!/bin/bash\nVERSION=$(cat VERSION)\nmkdir -p dist\ncp *.html *.jpg *.webp *.png *.ico *.xml robots.txt dist/\nsed -i \"s|Built from scrap, powered by a single battery.|v${VERSION} · Built from scrap, powered by a single battery.|\" dist/index.html\necho \"$VERSION\" > dist/VERSION\n```\n\n### `site/index.html`\nNo template placeholder needed — `build.sh` does the sed replacement on the existing footer text.\n\n### `.woodpecker/site.yml`\n```yaml\nwhen:\n path: \"site/**\"\n event: push\n branch: main\n\nsteps:\n - name: build\n image: alpine\n commands:\n - cd site && sh build.sh\n - VERSION=$(cat site/VERSION)\n - tar czf site-${VERSION}.tar.gz -C site/dist .\n\n - name: publish\n image: alpine\n commands:\n - apk add curl\n - VERSION=$(cat site/VERSION)\n - >-\n curl -sf --user \"johba:$$FORGE_TOKEN\"\n --upload-file site-${VERSION}.tar.gz\n \"https://codeberg.org/api/packages/johba/generic/disinto-site/${VERSION}/site-${VERSION}.tar.gz\"\n environment:\n FORGE_TOKEN:\n from_secret: forge_token\n\n - name: deploy-staging\n image: alpine\n commands:\n - apk add curl\n - VERSION=$(cat site/VERSION)\n - >-\n curl -sf --user \"johba:$$FORGE_TOKEN\"\n \"https://codeberg.org/api/packages/johba/generic/disinto-site/${VERSION}/site-${VERSION}.tar.gz\"\n -o site.tar.gz\n - rm -rf /srv/staging/*\n - tar xzf site.tar.gz -C /srv/staging/\n environment:\n FORGE_TOKEN:\n from_secret: forge_token\n volumes:\n - /home/debian/staging-site:/srv/staging\n```\n\n## Infra setup (manual, before first run)\n- `mkdir -p /home/debian/staging-site`\n- Add to Caddyfile: `staging.disinto.ai { root * /home/debian/staging-site; file_server }`\n- DNS: `staging.disinto.ai` A record → same IP as `disinto.ai`\n- Reload Caddy: `sudo systemctl reload caddy`\n- Add `forge_token` as Woodpecker repo secret for johba/disinto (if not already set)\n- Add `/home/debian/staging-site` to `WOODPECKER_BACKEND_DOCKER_VOLUMES`\n\n## Verification\n- [ ] Merge PR that touches `site/` → CI runs site pipeline\n- [ ] Package appears at `codeberg.org/johba/-/packages/generic/disinto-site/0.1.0`\n- [ ] `staging.disinto.ai` serves the site with `v0.1.0` in footer\n- [ ] `disinto.ai` (production) unchanged\n\n## Related\n- #764 — docker stack edge proxy + staging (future: this moves inside the stack)\n- #755 — vault-gated production promotion (production deploy comes later)\n\n## Affected files\n- `site/VERSION` — new, holds current version string\n- `site/build.sh` — new, builds dist/ with version injected into footer\n- `.woodpecker/site.yml` — new, CI pipeline for build/publish/deploy-staging"
|
||||
},
|
||||
{
|
||||
"action": "edit_body",
|
||||
"issue": 764,
|
||||
"body": "Depends on: none (builds on existing docker-compose generation in `bin/disinto`)\n\n## Design\n\n`disinto init` + `disinto up` starts two additional containers as base factory infrastructure:\n\n### Edge proxy (Caddy)\n- Reverse proxies to Forgejo and Woodpecker\n- Serves staging site\n- Runs on ports 80/443\n- At bootstrap: IP-only, self-signed TLS or HTTP\n- Domain + Let's Encrypt added later via vault resource request\n\n### Staging container (Caddy)\n- Static file server for the project's staging artifacts\n- Starts with a default \"Nothing shipped yet\" page\n- CI pipelines write to a shared volume to update staging content\n- No vault approval needed — staging is the factory's sandbox\n\n### docker-compose addition\n```yaml\nservices:\n edge:\n image: caddy:alpine\n ports:\n - \"80:80\"\n - \"443:443\"\n volumes:\n - ./Caddyfile:/etc/caddy/Caddyfile\n - caddy_data:/data\n depends_on:\n - forgejo\n - woodpecker-server\n - staging\n\n staging:\n image: caddy:alpine\n volumes:\n - staging-site:/srv/site\n # Not exposed directly — edge proxies to it\n\nvolumes:\n caddy_data:\n staging-site:\n```\n\n### Caddyfile (generated by `disinto init`)\n```\n# IP-only at bootstrap, domain added later\n:80 {\n handle /forgejo/* {\n reverse_proxy forgejo:3000\n }\n handle /ci/* {\n reverse_proxy woodpecker-server:8000\n }\n handle {\n reverse_proxy staging:80\n }\n}\n```\n\n### Staging update flow\n1. CI builds artifact (site tarball, etc.)\n2. CI step writes to `staging-site` volume\n3. Staging container serves updated content immediately\n4. No restart needed — Caddy serves files directly\n\n### Domain lifecycle\n- Bootstrap: no domain, edge serves on IP\n- Later: factory files vault resource request for domain\n- Human buys domain, sets DNS\n- Caddyfile updated with domain, Let's Encrypt auto-provisions TLS\n\n## Affected files\n- `bin/disinto` — `generate_compose()` adds edge + staging services\n- New: default staging page (\"Nothing shipped yet\")\n- New: Caddyfile template in `docker/`\n\n## Related\n- #755 — vault-gated deployment promotion (production comes later)\n- #757 — ops repo (domain is a resource requested through vault)\n\n## Acceptance criteria\n- [ ] `disinto init` generates a `docker-compose.yml` that includes `edge` (Caddy) and `staging` containers\n- [ ] Edge proxy routes `/forgejo/*` → Forgejo, `/ci/*` → Woodpecker, default → staging container\n- [ ] Staging container serves a default \"Nothing shipped yet\" page on first boot\n- [ ] `docker/` directory contains a Caddyfile template generated by `disinto init`\n- [ ] `disinto up` starts all containers including edge and staging without manual steps"
|
||||
},
|
||||
{
|
||||
"action": "edit_body",
|
||||
"issue": 761,
|
||||
"body": "Depends on: #747\n\n## Design\n\nEach agent account on the bundled Forgejo gets a `.profile` repo. This repo holds the agent's formula (copied from disinto at creation time) and its journal.\n\n### Structure\n```\n{agent-bot}/.profile/\n├── formula.toml # snapshot of the formula at agent creation time\n├── journal/ # daily logs of what the agent did\n│ ├── 2026-03-26.md\n│ └── ...\n└── knowledge/ # learned patterns, best-practices (optional, agent can evolve)\n```\n\n### Lifecycle\n1. **Create agent** — `disinto init` or `disinto spawn-agent` creates Forgejo account + `.profile` repo\n2. **Copy formula** — current `formulas/{role}.toml` from disinto repo is copied to `.profile/formula.toml`\n3. **Agent reads its own formula** — at session start, agent reads from its `.profile`, not from the disinto repo\n4. **Agent writes journal** — daily entries pushed to `.profile/journal/`\n5. **Agent can evolve knowledge** — best-practices, heuristics, patterns written to `.profile/knowledge/`\n\n### What this enables\n\n**A/B testing formulas:** Create two agents from different formula versions, run both against the same backlog, compare results (cycle time, CI pass rate, review rejection rate).\n\n**Rollback:** New formula worse? Kill agent, spawn from older formula version.\n\n**Audit:** What formula was this agent running when it produced that PR? Check its `.profile` at that git commit.\n\n**Drift tracking:** Diff what an agent learned (`.profile/knowledge/`) vs what it started with. Measure formula evolution over time.\n\n**Portability:** Move agent to different box — `git clone` its `.profile`.\n\n### Disinto repo becomes the template\n\n```\ndisinto repo:\n formulas/dev-agent.toml ← canonical template, evolves\n formulas/review-agent.toml\n formulas/planner.toml\n ...\n\nRunning agents:\n dev-bot-v2/.profile/formula.toml ← snapshot from formulas/dev-agent.toml@v2\n dev-bot-v3/.profile/formula.toml ← snapshot from formulas/dev-agent.toml@v3\n review-bot/.profile/formula.toml ← snapshot from formulas/review-agent.toml\n```\n\nThe formula in the disinto repo is the template. The `.profile` copy is the instance. They can diverge — that's a feature, not a bug.\n\n## Affected files\n- `bin/disinto` — agent creation copies formula to .profile\n- Agent session scripts — read formula from .profile instead of local formulas/ dir\n- Planner/supervisor — can read other agents' journals from their .profile repos\n\n## Related\n- #747 — per-agent Forgejo accounts (prerequisite)\n- #757 — ops repo (shared concerns stay there: vault, portfolio, resources)\n\n## Acceptance criteria\n- [ ] `disinto spawn-agent` (or `disinto init`) creates a Forgejo account + `.profile` repo for each agent bot\n- [ ] Current `formulas/{role}.toml` is copied to `.profile/formula.toml` at agent creation time\n- [ ] Agent session script reads its formula from `.profile/formula.toml`, not from the repo's `formulas/` directory\n- [ ] Agent writes daily journal entries to `.profile/journal/YYYY-MM-DD.md`"
|
||||
},
|
||||
{
|
||||
"action": "edit_body",
|
||||
"issue": 742,
|
||||
"body": "## Problem\n\n`gardener/recipes/*.toml` (4 files: cascade-rebase, chicken-egg-ci, flaky-test, shellcheck-violations) are an older pattern predating `formulas/*.toml`. Two systems for the same thing.\n\n## Fix\n\nMigrate any unique content from recipes to the gardener formula or to new formulas. Delete the recipes directory.\n\n## Affected files\n- `gardener/recipes/*.toml` — delete after migration\n- `formulas/run-gardener.toml` — absorb relevant content\n- Gardener scripts that reference recipes/\n\n## Acceptance criteria\n- [ ] Contents of `gardener/recipes/*.toml` are diff'd against `formulas/run-gardener.toml` — any unique content is migrated\n- [ ] `gardener/recipes/` directory is deleted\n- [ ] No scripts in `gardener/` reference the `recipes/` path after migration\n- [ ] ShellCheck passes on all modified scripts"
|
||||
},
|
||||
{
|
||||
"action": "add_label",
|
||||
"issue": 742,
|
||||
"label": "backlog"
|
||||
},
|
||||
{
|
||||
"action": "add_label",
|
||||
"issue": 741,
|
||||
"label": "backlog"
|
||||
"issue": 356,
|
||||
"body": "## Problem\n\nThe entrypoint hardcodes `REPRODUCE_FORMULA` to `formulas/reproduce.toml` (line 26) and never checks the `DISINTO_FORMULA` environment variable passed by the dispatcher for triage runs.\n\nThe dispatcher sets `-e DISINTO_FORMULA=triage` for triage dispatch, but the entrypoint ignores it — always running the reproduce formula.\n\n## Fix\n\nAt line 26, select the formula based on `DISINTO_FORMULA`:\n\n```bash\ncase \"${DISINTO_FORMULA:-reproduce}\" in\n triage)\n ACTIVE_FORMULA=\"${DISINTO_DIR}/formulas/triage.toml\"\n ;;\n *)\n ACTIVE_FORMULA=\"${DISINTO_DIR}/formulas/reproduce.toml\"\n ;;\nesac\n```\n\nThen use `ACTIVE_FORMULA` everywhere `REPRODUCE_FORMULA` is currently used.\n\nAlso update log messages to reflect which formula is running (\"Starting triage-agent\" vs \"Starting reproduce-agent\").\n\n## Affected files\n\n- `docker/reproduce/entrypoint-reproduce.sh` — line 26 and all references to REPRODUCE_FORMULA\n\n## Acceptance criteria\n\n- [ ] `DISINTO_FORMULA=triage` selects `formulas/triage.toml` in the entrypoint\n- [ ] `DISINTO_FORMULA=reproduce` (or unset) still runs `formulas/reproduce.toml`\n- [ ] Log messages reflect which formula is active (\"Starting triage-agent\" / \"Starting reproduce-agent\")\n- [ ] All `REPRODUCE_FORMULA` references replaced with `ACTIVE_FORMULA`\n"
|
||||
}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a -->
|
||||
<!-- last-reviewed: ac2beac361503c8712ecfc72be0401b5968cce4e -->
|
||||
# Shared Helpers (`lib/`)
|
||||
|
||||
All agents source `lib/env.sh` as their first action. Additional helpers are
|
||||
|
|
@ -6,20 +6,29 @@ sourced as needed.
|
|||
|
||||
| File | What it provides | Sourced by |
|
||||
|---|---|---|
|
||||
| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`) — each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens — only the runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced — compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. | Every agent |
|
||||
| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \<reason>" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status <sha>` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number <sha>` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote <repo_id> <pipeline_num> <environment>` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this — vault redesign in progress, see #73-#77). `ci_get_logs <pipeline_number> [--step <name>]` — reads CI logs from Woodpecker SQLite database; outputs last 200 lines to stdout. Requires mounted woodpecker-data volume at /woodpecker-data. | dev-poll, review-poll, review-pr, supervisor-poll |
|
||||
| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (paginates all pages; accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`; handles invalid/empty JSON responses gracefully — returns empty on parse error instead of crashing), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`) — each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens — only the runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced — compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. **Save/restore scope (#364)**: only `FORGE_URL` is preserved across `.env` re-sourcing (compose injects `http://forgejo:3000`, `.env` has `http://localhost:3000`). `FORGE_TOKEN` is NOT preserved so refreshed tokens in `.env` take effect immediately. **Required env var**: `FORGE_PASS` — bot password for git HTTP push (Forgejo 11.x rejects API tokens for `git push`, #361). | Every agent |
|
||||
| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \<reason>" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status <sha>` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number <sha>` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote <repo_id> <pipeline_num> <environment>` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this — vault redesign in progress, see #73-#77). `ci_get_logs <pipeline_number> [--step <name>]` — reads CI logs from Woodpecker SQLite database via `lib/ci-log-reader.py`; outputs last 200 lines to stdout. Requires mounted woodpecker-data volume at /woodpecker-data. | dev-poll, review-poll, review-pr |
|
||||
| `lib/ci-debug.sh` | CLI tool for Woodpecker CI: `list`, `status`, `logs`, `failures` subcommands. Not sourced — run directly. | Humans / dev-agent (tool access) |
|
||||
| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). | env.sh (when `PROJECT_TOML` is set), supervisor-poll (per-project iteration) |
|
||||
| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll |
|
||||
| `lib/formula-session.sh` | `acquire_cron_lock()`, `check_memory()`, `load_formula()`, `build_context_block()`, `consume_escalation_reply()`, `start_formula_session()`, `formula_phase_callback()`, `build_prompt_footer()`, `build_graph_section()`, `run_formula_and_monitor(AGENT [TIMEOUT] [CALLBACK])` — shared helpers for formula-driven cron agents (lock, memory guard, formula loading, prompt assembly, tmux session, monitor loop, crash recovery). `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `formula_phase_callback()` handles `PHASE:escalate` (unified escalation path — kills the session). `run_formula_and_monitor` accepts an optional CALLBACK (default: `formula_phase_callback`) so callers can install custom merge-through or escalation handlers. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh |
|
||||
| `lib/ci-log-reader.py` | Python tool: reads CI logs from Woodpecker SQLite database. `<pipeline_number> [--step <name>]` — returns last 200 lines from failed steps (or specified step). Used by `ci_get_logs()` in ci-helpers.sh. Requires `WOODPECKER_DATA_DIR` (default: /woodpecker-data). | ci-helpers.sh |
|
||||
| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). Also exports `FORGE_REPO_OWNER` (the owner component of `FORGE_REPO`, e.g. `disinto-admin` from `disinto-admin/disinto`). | env.sh (when `PROJECT_TOML` is set) |
|
||||
| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll |
|
||||
| `lib/formula-session.sh` | `acquire_cron_lock()`, `load_formula()`, `load_formula_or_profile()`, `build_context_block()`, `ensure_ops_repo()`, `ops_commit_and_push()`, `build_prompt_footer()`, `build_sdk_prompt_footer()`, `formula_worktree_setup()`, `formula_prepare_profile_context()`, `formula_lessons_block()`, `profile_write_journal()`, `profile_load_lessons()`, `ensure_profile_repo()`, `_profile_has_repo()`, `_count_undigested_journals()`, `_profile_digest_journals()`, `_profile_commit_and_push()`, `resolve_agent_identity()`, `build_graph_section()`, `build_scratch_instruction()`, `read_scratch_context()`, `cleanup_stale_crashed_worktrees()` — shared helpers for formula-driven cron agents (lock, .profile repo management, prompt assembly, worktree setup). Memory guard is provided by `memory_guard()` in `lib/env.sh` (not duplicated here). `resolve_agent_identity()` — sets `FORGE_TOKEN`, `AGENT_IDENTITY`, `FORGE_REMOTE` from per-agent token env vars and FORGE_URL remote detection. `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh |
|
||||
| `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in cron logs. Sourced by dev-poll.sh, review-poll.sh, predictor-run.sh, supervisor-run.sh. | cron entry points |
|
||||
| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh and dev/phase-handler.sh — called after every successful merge. | dev-poll.sh, phase-handler.sh |
|
||||
| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh — called after every successful merge. | dev-poll.sh |
|
||||
| `lib/build-graph.py` | Python tool: parses VISION.md, prerequisites.md (from ops repo), AGENTS.md, formulas/*.toml, evidence/ (from ops repo), and forge issues/labels into a NetworkX DiGraph. Runs structural analyses (orphaned objectives, stale prerequisites, thin evidence, circular deps) and outputs a JSON report. Used by `review-pr.sh` (per-PR changed-file analysis) and `predictor-run.sh` (full-project analysis) to provide structural context to Claude. | review-pr.sh, predictor-run.sh |
|
||||
| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | file-action-issue.sh, phase-handler.sh |
|
||||
| `lib/file-action-issue.sh` | `file_action_issue()` — dedup check, secret scan, label lookup, and issue creation for formula-driven cron wrappers. Sets `FILED_ISSUE_NUM` on success. Returns 4 if secrets detected in body. | (available for future use) |
|
||||
| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | issue-lifecycle.sh |
|
||||
| `lib/stack-lock.sh` | File-based lock protocol for singleton project stack access. `stack_lock_acquire(holder, project)` — polls until free, breaks stale heartbeats (>10 min old), claims lock. `stack_lock_release(project)` — deletes lock file. `stack_lock_check(project)` — inspect current lock state. `stack_lock_heartbeat(project)` — update heartbeat timestamp (callers must call every 2 min while holding). Lock files at `~/data/locks/<project>-stack.lock`. | docker/edge/dispatcher.sh, reproduce formula |
|
||||
| `lib/tea-helpers.sh` | `tea_file_issue(title, body, labels...)` — create issue via tea CLI with secret scanning; sets `FILED_ISSUE_NUM`. `tea_relabel(issue_num, labels...)` — replace labels using tea's `edit` subcommand (not `label`). `tea_comment(issue_num, body)` — add comment with secret scanning. `tea_close(issue_num)` — close issue. All use `TEA_LOGIN` and `FORGE_REPO` from env.sh. Labels by name (no ID lookup). Tea binary download verified via sha256 checksum. Sourced by env.sh when `tea` binary is available. | env.sh (conditional) |
|
||||
| `lib/worktree.sh` | Reusable git worktree management: `worktree_create(path, branch, [base_ref])` — create worktree, checkout base, fetch submodules. `worktree_recover(path, branch, [remote])` — detect existing worktree, reuse if on correct branch (sets `_WORKTREE_REUSED`), otherwise clean and recreate. `worktree_cleanup(path)` — `git worktree remove --force`, clear Claude Code project cache (`~/.claude/projects/` matching path). `worktree_cleanup_stale([max_age_hours])` — scan `/tmp` for orphaned worktrees older than threshold, skip preserved and active tmux worktrees, prune. `worktree_preserve(path, reason)` — mark worktree as preserved for debugging (writes `.worktree-preserved` marker, skipped by stale cleanup). | dev-agent.sh, supervisor-run.sh, planner-run.sh, predictor-run.sh, gardener-run.sh |
|
||||
| `lib/pr-lifecycle.sh` | Reusable PR lifecycle library: `pr_create()`, `pr_find_by_branch()`, `pr_poll_ci()`, `pr_poll_review()`, `pr_merge()`, `pr_is_merged()`, `pr_walk_to_merge()`, `build_phase_protocol_prompt()`. Requires `lib/ci-helpers.sh`. | dev-agent.sh (future) |
|
||||
| `lib/issue-lifecycle.sh` | Reusable issue lifecycle library: `issue_claim()` (add in-progress, remove backlog), `issue_release()` (remove in-progress, add backlog), `issue_block()` (post diagnostic comment with secret redaction, add blocked label), `issue_close()`, `issue_check_deps()` (parse deps, check transitive closure; sets `_ISSUE_BLOCKED_BY`, `_ISSUE_SUGGESTION`), `issue_suggest_next()` (find next unblocked backlog issue; sets `_ISSUE_NEXT`), `issue_post_refusal()` (structured refusal comment with dedup). Label IDs cached in globals on first lookup. Sources `lib/secret-scan.sh`. | dev-agent.sh (future) |
|
||||
| `lib/agent-session.sh` | Shared tmux + Claude session helpers: `create_agent_session()`, `inject_formula()`, `agent_wait_for_claude_ready()`, `agent_inject_into_session()`, `agent_kill_session()`, `monitor_phase_loop()`, `read_phase()`, `write_compact_context()`. `create_agent_session(session, workdir, [phase_file])` optionally installs a PostToolUse hook (matcher `Bash\|Write`) that detects phase file writes in real-time — when Claude writes to the phase file, the hook writes a marker so `monitor_phase_loop` reacts on the next poll instead of waiting for mtime changes. Also installs a StopFailure hook (matcher `rate_limit\|server_error\|authentication_failed\|billing_error`) that writes `PHASE:failed` with an `api_error` reason to the phase file and touches the phase-changed marker, so the orchestrator discovers API errors within one poll cycle instead of waiting for idle timeout. Also installs a SessionStart hook (matcher `compact`) that re-injects phase protocol instructions after context compaction — callers write the context file via `write_compact_context(phase_file, content)`, and the hook (`on-compact-reinject.sh`) outputs the file content to stdout so Claude retains critical instructions. When `phase_file` is set, passes it to the idle stop hook (`on-idle-stop.sh`) so the hook can **nudge Claude** (up to 2 times) if Claude returns to the prompt without writing to the phase file — the hook injects a tmux reminder asking Claude to signal PHASE:done or PHASE:awaiting_ci. The PreToolUse guard hook (`on-pretooluse-guard.sh`) receives the session name as a third argument — formula agents (`gardener-*`, `planner-*`, `predictor-*`, `supervisor-*`) are identified this way and allowed to access `FACTORY_ROOT` from worktrees (they need env.sh, AGENTS.md, formulas/, lib/). **OAuth flock**: when `DISINTO_CONTAINER=1`, Claude CLI is wrapped in `flock -w 300 ~/.claude/session.lock` to queue concurrent token refresh attempts and prevent rotation races across agents sharing the same credentials. `monitor_phase_loop` sets `_MONITOR_LOOP_EXIT` to one of: `done`, `idle_timeout`, `idle_prompt` (Claude returned to `>` for 3 consecutive polls without writing any phase — callback invoked with `PHASE:failed`, session already dead), `crashed`, or `PHASE:escalate` / other `PHASE:*` string. **Unified escalation**: `PHASE:escalate` is the signal that a session needs human input (renamed from `PHASE:needs_human`). **Callers must handle `idle_prompt`** in both their callback and their post-loop exit handler — see [`docs/PHASE-PROTOCOL.md` idle_prompt](docs/PHASE-PROTOCOL.md#idle_prompt-exit-reason) for the full contract. | dev-agent.sh |
|
||||
| `lib/vault.sh` | **Vault PR helper** — create vault action PRs on ops repo via Forgejo API (works from containers without SSH). `vault_request <action_id> <toml_content>` validates TOML (using `validate_vault_action` from `vault/vault-env.sh`), creates branch `vault/<action-id>`, writes `vault/actions/<action-id>.toml`, creates PR targeting `main` with title `vault: <action-id>` and body from context field, returns PR number. Idempotent: if PR exists, returns existing number. Requires `FORGE_TOKEN`, `FORGE_URL`, `FORGE_REPO`, `FORGE_OPS_REPO`. Uses the calling agent's own token (saves/restores `FORGE_TOKEN` around sourcing `vault-env.sh`), so approval workflow respects individual agent identities. | dev-agent (vault actions), future vault dispatcher |
|
||||
| `lib/branch-protection.sh` | Branch protection helpers for Forgejo repos. `setup_vault_branch_protection()` — configures admin-only merge protection on main (require 1 approval, restrict merge to admin role, block direct pushes). `setup_profile_branch_protection()` — same protection for `.profile` repos. `verify_branch_protection()` — checks protection is correctly configured. `remove_branch_protection()` — removes protection (cleanup/testing). Handles race condition after initial push: retries with backoff if Forgejo hasn't processed the branch yet. Requires `FORGE_TOKEN`, `FORGE_URL`, `FORGE_OPS_REPO`. | bin/disinto (hire-an-agent) |
|
||||
| `lib/agent-sdk.sh` | `agent_run([--resume SESSION_ID] [--worktree DIR] PROMPT)` — one-shot `claude -p` invocation with session persistence. Saves session ID to `SID_FILE`, reads it back on resume. `agent_recover_session()` — restore previous session ID from `SID_FILE` on startup. **Nudge guard**: skips nudge injection if the worktree is clean and no push is expected, preventing spurious re-invocations. Callers must define `SID_FILE`, `LOGFILE`, and `log()` before sourcing. | formula-driven agents (dev-agent, planner-run, predictor-run, gardener-run) |
|
||||
| `lib/forge-setup.sh` | `setup_forge()` — Forgejo instance provisioning: creates admin user, bot accounts, org, repos (code + ops), configures webhooks, sets repo topics. Extracted from `bin/disinto`. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`. **Password storage (#361)**: after creating each bot account, stores its password in `.env` as `FORGE_<BOT>_PASS` (e.g. `FORGE_PASS`, `FORGE_REVIEW_PASS`, etc.) for use by `forge-push.sh`. | bin/disinto (init) |
|
||||
| `lib/forge-push.sh` | `push_to_forge()` — pushes a local clone to the Forgejo remote and verifies the push. `_assert_forge_push_globals()` validates required env vars before use. Requires `FORGE_URL`, `FORGE_PASS`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. **Auth**: uses `FORGE_PASS` (bot password) for git HTTP push — Forgejo 11.x rejects API tokens for `git push` (#361). | bin/disinto (init) |
|
||||
| `lib/ops-setup.sh` | `setup_ops_repo()` — creates ops repo on Forgejo if it doesn't exist, configures bot collaborators, clones/initializes ops repo locally, seeds directory structure (vault, knowledge, evidence). Exports `_ACTUAL_OPS_SLUG`. | bin/disinto (init) |
|
||||
| `lib/ci-setup.sh` | `_install_cron_impl()` — installs crontab entries for project agents. `_create_woodpecker_oauth_impl()` — creates OAuth2 app on Forgejo for Woodpecker. `_generate_woodpecker_token_impl()` — auto-generates WOODPECKER_TOKEN via OAuth2 flow. `_activate_woodpecker_repo_impl()` — activates repo in Woodpecker. All gated by `_load_ci_context()` which validates required env vars. | bin/disinto (init) |
|
||||
| `lib/generators.sh` | Template generation for `disinto init`: `generate_compose()` — docker-compose.yml, `generate_caddyfile()` — Caddyfile, `generate_staging_index()` — staging index, `generate_deploy_pipelines()` — Woodpecker deployment pipeline configs. Requires `FACTORY_ROOT`, `PROJECT_NAME`, `PRIMARY_BRANCH`. | bin/disinto (init) |
|
||||
| `lib/hire-agent.sh` | `disinto_hire_an_agent()` — user creation, `.profile` repo setup, formula copying, branch protection, and state marker creation for hiring a new agent. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`, `PROJECT_NAME`. Extracted from `bin/disinto`. | bin/disinto (hire) |
|
||||
| `lib/release.sh` | `disinto_release()` — vault TOML creation, branch setup on ops repo, PR creation, and auto-merge request for a versioned release. `_assert_release_globals()` validates required env vars. Requires `FORGE_URL`, `FORGE_TOKEN`, `FORGE_OPS_REPO`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. Extracted from `bin/disinto`. | bin/disinto (release) |
|
||||
|
|
|
|||
|
|
@ -46,9 +46,23 @@ agent_run() {
|
|||
[ -n "${CLAUDE_MODEL:-}" ] && args+=(--model "$CLAUDE_MODEL")
|
||||
|
||||
local run_dir="${worktree_dir:-$(pwd)}"
|
||||
local output
|
||||
local lock_file="${HOME}/.claude/session.lock"
|
||||
mkdir -p "$(dirname "$lock_file")"
|
||||
local output rc
|
||||
log "agent_run: starting (resume=${resume_id:-(new)}, dir=${run_dir})"
|
||||
output=$(cd "$run_dir" && timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") || true
|
||||
output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") && rc=0 || rc=$?
|
||||
if [ "$rc" -eq 124 ]; then
|
||||
log "agent_run: timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $rc)"
|
||||
elif [ "$rc" -ne 0 ]; then
|
||||
log "agent_run: claude exited with code $rc"
|
||||
# Log last 3 lines of output for diagnostics
|
||||
if [ -n "$output" ]; then
|
||||
log "agent_run: last output lines: $(echo "$output" | tail -3)"
|
||||
fi
|
||||
fi
|
||||
if [ -z "$output" ]; then
|
||||
log "agent_run: empty output (claude may have crashed or failed, exit code: $rc)"
|
||||
fi
|
||||
|
||||
# Extract and persist session_id
|
||||
local new_sid
|
||||
|
|
@ -66,27 +80,37 @@ agent_run() {
|
|||
|
||||
# Nudge: if the model stopped without pushing, resume with encouragement.
|
||||
# Some models emit end_turn prematurely when confused. A nudge often unsticks them.
|
||||
if [ -n "$_AGENT_SESSION_ID" ]; then
|
||||
if [ -n "$_AGENT_SESSION_ID" ] && [ -n "$output" ]; then
|
||||
local has_changes
|
||||
has_changes=$(cd "$run_dir" && git status --porcelain 2>/dev/null | head -1) || true
|
||||
local has_pushed
|
||||
has_pushed=$(cd "$run_dir" && git log --oneline "${FORGE_REMOTE:-origin}/${PRIMARY_BRANCH:-main}..HEAD" 2>/dev/null | head -1) || true
|
||||
if [ -z "$has_pushed" ]; then
|
||||
local nudge="You stopped but did not push any code. "
|
||||
if [ -n "$has_changes" ]; then
|
||||
nudge+="You have uncommitted changes. Commit them and push."
|
||||
# Nudge: there are uncommitted changes
|
||||
local nudge="You stopped but did not push any code. You have uncommitted changes. Commit them and push."
|
||||
log "agent_run: nudging (uncommitted changes)"
|
||||
local nudge_rc
|
||||
output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") && nudge_rc=0 || nudge_rc=$?
|
||||
if [ "$nudge_rc" -eq 124 ]; then
|
||||
log "agent_run: nudge timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $nudge_rc)"
|
||||
elif [ "$nudge_rc" -ne 0 ]; then
|
||||
log "agent_run: nudge claude exited with code $nudge_rc"
|
||||
# Log last 3 lines of output for diagnostics
|
||||
if [ -n "$output" ]; then
|
||||
log "agent_run: nudge last output lines: $(echo "$output" | tail -3)"
|
||||
fi
|
||||
fi
|
||||
new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true
|
||||
if [ -n "$new_sid" ]; then
|
||||
_AGENT_SESSION_ID="$new_sid"
|
||||
printf '%s' "$new_sid" > "$SID_FILE"
|
||||
fi
|
||||
printf '%s' "$output" > "$diag_file" 2>/dev/null || true
|
||||
_AGENT_LAST_OUTPUT="$output"
|
||||
else
|
||||
nudge+="Complete the implementation, commit, and push your branch."
|
||||
log "agent_run: no push and no changes — skipping nudge"
|
||||
fi
|
||||
log "agent_run: nudging (no push detected)"
|
||||
output=$(cd "$run_dir" && timeout "${CLAUDE_TIMEOUT:-7200}" claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") || true
|
||||
new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true
|
||||
if [ -n "$new_sid" ]; then
|
||||
_AGENT_SESSION_ID="$new_sid"
|
||||
printf '%s' "$new_sid" > "$SID_FILE"
|
||||
fi
|
||||
printf '%s' "$output" > "$diag_file" 2>/dev/null || true
|
||||
_AGENT_LAST_OUTPUT="$output"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,486 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# agent-session.sh — Shared tmux + Claude interactive session helpers
|
||||
#
|
||||
# Source this into agent orchestrator scripts for reusable session management.
|
||||
#
|
||||
# Functions:
|
||||
# agent_wait_for_claude_ready SESSION_NAME [TIMEOUT_SECS]
|
||||
# agent_inject_into_session SESSION_NAME TEXT
|
||||
# agent_kill_session SESSION_NAME
|
||||
# monitor_phase_loop PHASE_FILE IDLE_TIMEOUT_SECS CALLBACK_FN [SESSION_NAME]
|
||||
# session_lock_acquire [TIMEOUT_SECS]
|
||||
# session_lock_release
|
||||
|
||||
# --- Cooperative session lock (fd-based) ---
|
||||
# File descriptor for the session lock. Set by create_agent_session().
|
||||
# Callers can release/re-acquire via session_lock_release/session_lock_acquire
|
||||
# to allow other Claude sessions during idle phases (awaiting_review/awaiting_ci).
|
||||
SESSION_LOCK_FD=""
|
||||
|
||||
# Release the session lock without closing the file descriptor.
|
||||
# The fd stays open so it can be re-acquired later.
|
||||
session_lock_release() {
|
||||
if [ -n "${SESSION_LOCK_FD:-}" ]; then
|
||||
flock -u "$SESSION_LOCK_FD"
|
||||
fi
|
||||
}
|
||||
|
||||
# Re-acquire the session lock. Blocks until available or timeout.
|
||||
# Opens the lock fd if not already open (for use by external callers).
|
||||
# Args: [timeout_secs] (default 300)
|
||||
# Returns 0 on success, 1 on timeout/error.
|
||||
# shellcheck disable=SC2120 # timeout arg is used by external callers
|
||||
session_lock_acquire() {
|
||||
local timeout="${1:-300}"
|
||||
if [ -z "${SESSION_LOCK_FD:-}" ]; then
|
||||
local lock_dir="${HOME}/.claude"
|
||||
mkdir -p "$lock_dir"
|
||||
exec {SESSION_LOCK_FD}>>"${lock_dir}/session.lock"
|
||||
fi
|
||||
flock -w "$timeout" "$SESSION_LOCK_FD"
|
||||
}
|
||||
|
||||
# Wait for the Claude ❯ ready prompt in a tmux pane.
|
||||
# Returns 0 if ready within TIMEOUT_SECS (default 120), 1 otherwise.
|
||||
agent_wait_for_claude_ready() {
|
||||
local session="$1"
|
||||
local timeout="${2:-120}"
|
||||
local elapsed=0
|
||||
while [ "$elapsed" -lt "$timeout" ]; do
|
||||
if tmux capture-pane -t "$session" -p 2>/dev/null | grep -q '❯'; then
|
||||
return 0
|
||||
fi
|
||||
sleep 2
|
||||
elapsed=$((elapsed + 2))
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
# Paste TEXT into SESSION (waits for Claude to be ready first), then press Enter.
|
||||
agent_inject_into_session() {
|
||||
local session="$1"
|
||||
local text="$2"
|
||||
local tmpfile
|
||||
# Re-acquire session lock before injecting — Claude will resume working
|
||||
# shellcheck disable=SC2119 # using default timeout
|
||||
session_lock_acquire || true
|
||||
agent_wait_for_claude_ready "$session" 120 || true
|
||||
# Clear idle marker — new work incoming
|
||||
rm -f "/tmp/claude-idle-${session}.ts"
|
||||
tmpfile=$(mktemp /tmp/agent-inject-XXXXXX)
|
||||
printf '%s' "$text" > "$tmpfile"
|
||||
tmux load-buffer -b "agent-inject-$$" "$tmpfile"
|
||||
tmux paste-buffer -t "$session" -b "agent-inject-$$"
|
||||
sleep 0.5
|
||||
tmux send-keys -t "$session" "" Enter
|
||||
tmux delete-buffer -b "agent-inject-$$" 2>/dev/null || true
|
||||
rm -f "$tmpfile"
|
||||
}
|
||||
|
||||
# Create a tmux session running Claude in the given workdir.
|
||||
# Installs a Stop hook for idle detection (see monitor_phase_loop).
|
||||
# Installs a PreToolUse hook to guard destructive Bash operations.
|
||||
# Optionally installs a PostToolUse hook for phase file write detection.
|
||||
# Optionally installs a StopFailure hook for immediate phase file update on API error.
|
||||
# Args: session workdir [phase_file]
|
||||
# Returns 0 if session is ready, 1 otherwise.
|
||||
create_agent_session() {
|
||||
local session="$1"
|
||||
local workdir="${2:-.}"
|
||||
local phase_file="${3:-}"
|
||||
|
||||
# Prepare settings directory for hooks
|
||||
mkdir -p "${workdir}/.claude"
|
||||
local settings="${workdir}/.claude/settings.json"
|
||||
|
||||
# Install Stop hook for idle detection: when Claude finishes a response,
|
||||
# the hook writes a timestamp to a marker file. monitor_phase_loop checks
|
||||
# this marker instead of fragile tmux pane scraping.
|
||||
local idle_marker="/tmp/claude-idle-${session}.ts"
|
||||
local hook_script="${FACTORY_ROOT}/lib/hooks/on-idle-stop.sh"
|
||||
if [ -x "$hook_script" ]; then
|
||||
local hook_cmd="${hook_script} ${idle_marker}"
|
||||
# When a phase file is available, pass it and the session name so the
|
||||
# hook can nudge Claude if it returns to the prompt without signalling.
|
||||
if [ -n "$phase_file" ]; then
|
||||
hook_cmd="${hook_script} ${idle_marker} ${phase_file} ${session}"
|
||||
fi
|
||||
if [ -f "$settings" ]; then
|
||||
# Append our Stop hook to existing project settings
|
||||
jq --arg cmd "$hook_cmd" '
|
||||
if (.hooks.Stop // [] | any(.[]; .hooks[]?.command == $cmd))
|
||||
then .
|
||||
else .hooks.Stop = (.hooks.Stop // []) + [{
|
||||
matcher: "",
|
||||
hooks: [{type: "command", command: $cmd}]
|
||||
}]
|
||||
end
|
||||
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
|
||||
else
|
||||
jq -n --arg cmd "$hook_cmd" '{
|
||||
hooks: {
|
||||
Stop: [{
|
||||
matcher: "",
|
||||
hooks: [{type: "command", command: $cmd}]
|
||||
}]
|
||||
}
|
||||
}' > "$settings"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Install PostToolUse hook for phase file write detection: when Claude
|
||||
# writes to the phase file via Bash or Write, the hook writes a marker
|
||||
# so monitor_phase_loop can react immediately instead of waiting for
|
||||
# the next mtime-based poll cycle.
|
||||
if [ -n "$phase_file" ]; then
|
||||
local phase_marker="/tmp/phase-changed-${session}.marker"
|
||||
local phase_hook_script="${FACTORY_ROOT}/lib/hooks/on-phase-change.sh"
|
||||
if [ -x "$phase_hook_script" ]; then
|
||||
local phase_hook_cmd="${phase_hook_script} ${phase_file} ${phase_marker}"
|
||||
if [ -f "$settings" ]; then
|
||||
jq --arg cmd "$phase_hook_cmd" '
|
||||
if (.hooks.PostToolUse // [] | any(.[]; .hooks[]?.command == $cmd))
|
||||
then .
|
||||
else .hooks.PostToolUse = (.hooks.PostToolUse // []) + [{
|
||||
matcher: "Bash|Write",
|
||||
hooks: [{type: "command", command: $cmd}]
|
||||
}]
|
||||
end
|
||||
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
|
||||
else
|
||||
jq -n --arg cmd "$phase_hook_cmd" '{
|
||||
hooks: {
|
||||
PostToolUse: [{
|
||||
matcher: "Bash|Write",
|
||||
hooks: [{type: "command", command: $cmd}]
|
||||
}]
|
||||
}
|
||||
}' > "$settings"
|
||||
fi
|
||||
rm -f "$phase_marker"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Install StopFailure hook for immediate phase file update on API error:
|
||||
# when Claude hits a rate limit, server error, billing error, or auth failure,
|
||||
# the hook writes PHASE:failed to the phase file and touches the phase-changed
|
||||
# marker so monitor_phase_loop picks it up within one poll cycle instead of
|
||||
# waiting for idle timeout (up to 2 hours).
|
||||
if [ -n "$phase_file" ]; then
|
||||
local stop_failure_hook_script="${FACTORY_ROOT}/lib/hooks/on-stop-failure.sh"
|
||||
if [ -x "$stop_failure_hook_script" ]; then
|
||||
# phase_marker is defined in the PostToolUse block above; redeclare so
|
||||
# this block is self-contained if that block is ever removed.
|
||||
local sf_phase_marker="/tmp/phase-changed-${session}.marker"
|
||||
local stop_failure_hook_cmd="${stop_failure_hook_script} ${phase_file} ${sf_phase_marker}"
|
||||
if [ -f "$settings" ]; then
|
||||
jq --arg cmd "$stop_failure_hook_cmd" '
|
||||
if (.hooks.StopFailure // [] | any(.[]; .hooks[]?.command == $cmd))
|
||||
then .
|
||||
else .hooks.StopFailure = (.hooks.StopFailure // []) + [{
|
||||
matcher: "rate_limit|server_error|authentication_failed|billing_error",
|
||||
hooks: [{type: "command", command: $cmd}]
|
||||
}]
|
||||
end
|
||||
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
|
||||
else
|
||||
jq -n --arg cmd "$stop_failure_hook_cmd" '{
|
||||
hooks: {
|
||||
StopFailure: [{
|
||||
matcher: "rate_limit|server_error|authentication_failed|billing_error",
|
||||
hooks: [{type: "command", command: $cmd}]
|
||||
}]
|
||||
}
|
||||
}' > "$settings"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Install PreToolUse hook for destructive operation guard: blocks force push
|
||||
# to primary branch, rm -rf outside worktree, direct API merge calls, and
|
||||
# checkout/switch to primary branch. Claude sees the denial reason on exit 2
|
||||
# and can self-correct.
|
||||
local guard_hook_script="${FACTORY_ROOT}/lib/hooks/on-pretooluse-guard.sh"
|
||||
if [ -x "$guard_hook_script" ]; then
|
||||
local abs_workdir
|
||||
abs_workdir=$(cd "$workdir" 2>/dev/null && pwd) || abs_workdir="$workdir"
|
||||
local guard_hook_cmd="${guard_hook_script} ${PRIMARY_BRANCH:-main} ${abs_workdir} ${session}"
|
||||
if [ -f "$settings" ]; then
|
||||
jq --arg cmd "$guard_hook_cmd" '
|
||||
if (.hooks.PreToolUse // [] | any(.[]; .hooks[]?.command == $cmd))
|
||||
then .
|
||||
else .hooks.PreToolUse = (.hooks.PreToolUse // []) + [{
|
||||
matcher: "Bash",
|
||||
hooks: [{type: "command", command: $cmd}]
|
||||
}]
|
||||
end
|
||||
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
|
||||
else
|
||||
jq -n --arg cmd "$guard_hook_cmd" '{
|
||||
hooks: {
|
||||
PreToolUse: [{
|
||||
matcher: "Bash",
|
||||
hooks: [{type: "command", command: $cmd}]
|
||||
}]
|
||||
}
|
||||
}' > "$settings"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Install SessionEnd hook for guaranteed cleanup: when the Claude session
|
||||
# exits (clean or crash), write a termination marker so monitor_phase_loop
|
||||
# detects the exit faster than tmux has-session polling alone.
|
||||
local exit_marker="/tmp/claude-exited-${session}.ts"
|
||||
local session_end_hook_script="${FACTORY_ROOT}/lib/hooks/on-session-end.sh"
|
||||
if [ -x "$session_end_hook_script" ]; then
|
||||
local session_end_hook_cmd="${session_end_hook_script} ${exit_marker}"
|
||||
if [ -f "$settings" ]; then
|
||||
jq --arg cmd "$session_end_hook_cmd" '
|
||||
if (.hooks.SessionEnd // [] | any(.[]; .hooks[]?.command == $cmd))
|
||||
then .
|
||||
else .hooks.SessionEnd = (.hooks.SessionEnd // []) + [{
|
||||
matcher: "",
|
||||
hooks: [{type: "command", command: $cmd}]
|
||||
}]
|
||||
end
|
||||
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
|
||||
else
|
||||
jq -n --arg cmd "$session_end_hook_cmd" '{
|
||||
hooks: {
|
||||
SessionEnd: [{
|
||||
matcher: "",
|
||||
hooks: [{type: "command", command: $cmd}]
|
||||
}]
|
||||
}
|
||||
}' > "$settings"
|
||||
fi
|
||||
fi
|
||||
rm -f "$exit_marker"
|
||||
|
||||
# Install SessionStart hook for context re-injection after compaction:
|
||||
# when Claude Code compacts context during long sessions, the phase protocol
|
||||
# instructions are lost. This hook fires after each compaction and outputs
|
||||
# the content of a context file so Claude retains critical instructions.
|
||||
# The context file is written by callers via write_compact_context().
|
||||
if [ -n "$phase_file" ]; then
|
||||
local compact_hook_script="${FACTORY_ROOT}/lib/hooks/on-compact-reinject.sh"
|
||||
if [ -x "$compact_hook_script" ]; then
|
||||
local context_file="${phase_file%.phase}.context"
|
||||
local compact_hook_cmd="${compact_hook_script} ${context_file}"
|
||||
if [ -f "$settings" ]; then
|
||||
jq --arg cmd "$compact_hook_cmd" '
|
||||
if (.hooks.SessionStart // [] | any(.[]; .hooks[]?.command == $cmd))
|
||||
then .
|
||||
else .hooks.SessionStart = (.hooks.SessionStart // []) + [{
|
||||
matcher: "compact",
|
||||
hooks: [{type: "command", command: $cmd}]
|
||||
}]
|
||||
end
|
||||
' "$settings" > "${settings}.tmp" && mv "${settings}.tmp" "$settings"
|
||||
else
|
||||
jq -n --arg cmd "$compact_hook_cmd" '{
|
||||
hooks: {
|
||||
SessionStart: [{
|
||||
matcher: "compact",
|
||||
hooks: [{type: "command", command: $cmd}]
|
||||
}]
|
||||
}
|
||||
}' > "$settings"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f "$idle_marker"
|
||||
local model_flag=""
|
||||
if [ -n "${CLAUDE_MODEL:-}" ]; then
|
||||
model_flag="--model ${CLAUDE_MODEL}"
|
||||
fi
|
||||
|
||||
# Acquire a session-level mutex via fd-based flock to prevent concurrent
|
||||
# Claude sessions from racing on OAuth token refresh. Unlike the previous
|
||||
# command-wrapper flock, the fd approach allows callers to release the lock
|
||||
# during idle phases (awaiting_review/awaiting_ci) and re-acquire before
|
||||
# injecting the next prompt. See #724.
|
||||
# Use ~/.claude/session.lock so the lock is shared across containers when
|
||||
# the host ~/.claude directory is bind-mounted.
|
||||
local lock_dir="${HOME}/.claude"
|
||||
mkdir -p "$lock_dir"
|
||||
local claude_lock="${lock_dir}/session.lock"
|
||||
if [ -z "${SESSION_LOCK_FD:-}" ]; then
|
||||
exec {SESSION_LOCK_FD}>>"${claude_lock}"
|
||||
fi
|
||||
if ! flock -w 300 "$SESSION_LOCK_FD"; then
|
||||
return 1
|
||||
fi
|
||||
local claude_cmd="claude --dangerously-skip-permissions ${model_flag}"
|
||||
|
||||
tmux new-session -d -s "$session" -c "$workdir" \
|
||||
"$claude_cmd" 2>/dev/null
|
||||
sleep 1
|
||||
tmux has-session -t "$session" 2>/dev/null || return 1
|
||||
agent_wait_for_claude_ready "$session" 120 || return 1
|
||||
return 0
|
||||
}
|
||||
|
||||
# Inject a prompt/formula into a session (alias for agent_inject_into_session).
|
||||
inject_formula() {
|
||||
agent_inject_into_session "$@"
|
||||
}
|
||||
|
||||
# Monitor a phase file, calling a callback on changes and handling idle timeout.
|
||||
# Sets _MONITOR_LOOP_EXIT to the exit reason (idle_timeout, idle_prompt, done, crashed, PHASE:failed, PHASE:escalate).
|
||||
# Sets _MONITOR_SESSION to the resolved session name (arg 4 or $SESSION_NAME).
|
||||
# Callbacks should reference _MONITOR_SESSION instead of $SESSION_NAME directly.
|
||||
# Args: phase_file idle_timeout_secs callback_fn [session_name]
|
||||
# session_name — tmux session to health-check; falls back to $SESSION_NAME global
|
||||
#
|
||||
# Idle detection: uses a Stop hook marker file (written by lib/hooks/on-idle-stop.sh)
|
||||
# to detect when Claude finishes responding without writing a phase signal.
|
||||
# If the marker exists for 3 consecutive polls with no phase written, the session
|
||||
# is killed and the callback invoked with "PHASE:failed".
|
||||
monitor_phase_loop() {
|
||||
local phase_file="$1"
|
||||
local idle_timeout="$2"
|
||||
local callback="$3"
|
||||
local _session="${4:-${SESSION_NAME:-}}"
|
||||
# Export resolved session name so callbacks can reference it regardless of
|
||||
# which session was passed to monitor_phase_loop (analogous to _MONITOR_LOOP_EXIT).
|
||||
export _MONITOR_SESSION="$_session"
|
||||
local poll_interval="${PHASE_POLL_INTERVAL:-10}"
|
||||
local last_mtime=0
|
||||
local idle_elapsed=0
|
||||
local idle_pane_count=0
|
||||
|
||||
while true; do
|
||||
sleep "$poll_interval"
|
||||
idle_elapsed=$(( idle_elapsed + poll_interval ))
|
||||
|
||||
# Session health check: SessionEnd hook marker provides fast detection,
|
||||
# tmux has-session is the fallback for unclean exits (e.g. tmux crash).
|
||||
local exit_marker="/tmp/claude-exited-${_session}.ts"
|
||||
if [ -f "$exit_marker" ] || ! tmux has-session -t "${_session}" 2>/dev/null; then
|
||||
local current_phase
|
||||
current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true)
|
||||
case "$current_phase" in
|
||||
PHASE:done|PHASE:failed|PHASE:merged|PHASE:escalate)
|
||||
;; # terminal — fall through to phase handler
|
||||
*)
|
||||
# Call callback with "crashed" — let agent-specific code handle recovery
|
||||
if type "${callback}" &>/dev/null; then
|
||||
"$callback" "PHASE:crashed"
|
||||
fi
|
||||
# If callback didn't restart session, break
|
||||
if ! tmux has-session -t "${_session}" 2>/dev/null; then
|
||||
_MONITOR_LOOP_EXIT="crashed"
|
||||
return 1
|
||||
fi
|
||||
idle_elapsed=0
|
||||
idle_pane_count=0
|
||||
continue
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Check phase-changed marker from PostToolUse hook — if present, the hook
|
||||
# detected a phase file write so we reset last_mtime to force processing
|
||||
# this cycle instead of waiting for the next mtime change.
|
||||
local phase_marker="/tmp/phase-changed-${_session}.marker"
|
||||
if [ -f "$phase_marker" ]; then
|
||||
rm -f "$phase_marker"
|
||||
last_mtime=0
|
||||
fi
|
||||
|
||||
# Check phase file for changes
|
||||
local phase_mtime
|
||||
phase_mtime=$(stat -c %Y "$phase_file" 2>/dev/null || echo 0)
|
||||
local current_phase
|
||||
current_phase=$(head -1 "$phase_file" 2>/dev/null | tr -d '[:space:]' || true)
|
||||
|
||||
if [ -z "$current_phase" ] || [ "$phase_mtime" -le "$last_mtime" ]; then
|
||||
# No phase change — check idle timeout
|
||||
if [ "$idle_elapsed" -ge "$idle_timeout" ]; then
|
||||
_MONITOR_LOOP_EXIT="idle_timeout"
|
||||
agent_kill_session "${_session}"
|
||||
return 0
|
||||
fi
|
||||
# Idle detection via Stop hook: the on-idle-stop.sh hook writes a marker
|
||||
# file when Claude finishes a response. If the marker exists and no phase
|
||||
# has been written, Claude returned to the prompt without following the
|
||||
# phase protocol. 3 consecutive polls = confirmed idle (not mid-turn).
|
||||
local idle_marker="/tmp/claude-idle-${_session}.ts"
|
||||
if [ -z "$current_phase" ] && [ -f "$idle_marker" ]; then
|
||||
idle_pane_count=$(( idle_pane_count + 1 ))
|
||||
if [ "$idle_pane_count" -ge 3 ]; then
|
||||
_MONITOR_LOOP_EXIT="idle_prompt"
|
||||
# Session is killed before the callback is invoked.
|
||||
# Callbacks that handle PHASE:failed must not assume the session is alive.
|
||||
agent_kill_session "${_session}"
|
||||
if type "${callback}" &>/dev/null; then
|
||||
"$callback" "PHASE:failed"
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
else
|
||||
idle_pane_count=0
|
||||
fi
|
||||
continue
|
||||
fi
|
||||
|
||||
# Phase changed
|
||||
last_mtime="$phase_mtime"
|
||||
# shellcheck disable=SC2034 # read by phase-handler.sh callback
|
||||
LAST_PHASE_MTIME="$phase_mtime"
|
||||
idle_elapsed=0
|
||||
idle_pane_count=0
|
||||
|
||||
# Terminal phases
|
||||
case "$current_phase" in
|
||||
PHASE:done|PHASE:merged)
|
||||
_MONITOR_LOOP_EXIT="done"
|
||||
if type "${callback}" &>/dev/null; then
|
||||
"$callback" "$current_phase"
|
||||
fi
|
||||
return 0
|
||||
;;
|
||||
PHASE:failed|PHASE:escalate)
|
||||
_MONITOR_LOOP_EXIT="$current_phase"
|
||||
if type "${callback}" &>/dev/null; then
|
||||
"$callback" "$current_phase"
|
||||
fi
|
||||
return 0
|
||||
;;
|
||||
esac
|
||||
|
||||
# Non-terminal phase — call callback
|
||||
if type "${callback}" &>/dev/null; then
|
||||
"$callback" "$current_phase"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Write context to a file for re-injection after context compaction.
|
||||
# The SessionStart compact hook reads this file and outputs it to stdout.
|
||||
# Args: phase_file content
|
||||
write_compact_context() {
|
||||
local phase_file="$1"
|
||||
local content="$2"
|
||||
local context_file="${phase_file%.phase}.context"
|
||||
printf '%s\n' "$content" > "$context_file"
|
||||
}
|
||||
|
||||
# Kill a tmux session gracefully (no-op if not found).
|
||||
agent_kill_session() {
|
||||
local session="${1:-}"
|
||||
[ -n "$session" ] && tmux kill-session -t "$session" 2>/dev/null || true
|
||||
rm -f "/tmp/claude-idle-${session}.ts"
|
||||
rm -f "/tmp/phase-changed-${session}.marker"
|
||||
rm -f "/tmp/claude-exited-${session}.ts"
|
||||
rm -f "/tmp/claude-nudge-${session}.count"
|
||||
}
|
||||
|
||||
# Read the current phase from a phase file, stripped of whitespace.
|
||||
# Usage: read_phase [file] — defaults to $PHASE_FILE
|
||||
read_phase() {
|
||||
local file="${1:-${PHASE_FILE:-}}"
|
||||
{ cat "$file" 2>/dev/null || true; } | head -1 | tr -d '[:space:]'
|
||||
}
|
||||
|
|
@ -51,14 +51,30 @@ setup_vault_branch_protection() {
|
|||
|
||||
_bp_log "Setting up branch protection for ${branch} on ${FORGE_OPS_REPO}"
|
||||
|
||||
# Check if branch exists
|
||||
local branch_exists
|
||||
branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
|
||||
# Check if branch exists with retry loop (handles race condition after initial push)
|
||||
local branch_exists="0"
|
||||
local max_attempts=3
|
||||
local attempt=1
|
||||
|
||||
while [ "$attempt" -le "$max_attempts" ]; do
|
||||
branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
|
||||
|
||||
if [ "$branch_exists" = "200" ]; then
|
||||
_bp_log "Branch ${branch} exists on ${FORGE_OPS_REPO}"
|
||||
break
|
||||
fi
|
||||
|
||||
if [ "$attempt" -lt "$max_attempts" ]; then
|
||||
_bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..."
|
||||
sleep 2
|
||||
fi
|
||||
attempt=$((attempt + 1))
|
||||
done
|
||||
|
||||
if [ "$branch_exists" != "200" ]; then
|
||||
_bp_log "ERROR: Branch ${branch} does not exist"
|
||||
_bp_log "ERROR: Branch ${branch} does not exist on ${FORGE_OPS_REPO} after ${max_attempts} attempts"
|
||||
return 1
|
||||
fi
|
||||
|
||||
|
|
@ -228,14 +244,30 @@ setup_profile_branch_protection() {
|
|||
local api_url
|
||||
api_url="${FORGE_URL}/api/v1/repos/${repo}"
|
||||
|
||||
# Check if branch exists
|
||||
local branch_exists
|
||||
branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
|
||||
# Check if branch exists with retry loop (handles race condition after initial push)
|
||||
local branch_exists="0"
|
||||
local max_attempts=3
|
||||
local attempt=1
|
||||
|
||||
while [ "$attempt" -le "$max_attempts" ]; do
|
||||
branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
|
||||
|
||||
if [ "$branch_exists" = "200" ]; then
|
||||
_bp_log "Branch ${branch} exists on ${repo}"
|
||||
break
|
||||
fi
|
||||
|
||||
if [ "$attempt" -lt "$max_attempts" ]; then
|
||||
_bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..."
|
||||
sleep 2
|
||||
fi
|
||||
attempt=$((attempt + 1))
|
||||
done
|
||||
|
||||
if [ "$branch_exists" != "200" ]; then
|
||||
_bp_log "ERROR: Branch ${branch} does not exist on ${repo}"
|
||||
_bp_log "ERROR: Branch ${branch} does not exist on ${repo} after ${max_attempts} attempts"
|
||||
return 1
|
||||
fi
|
||||
|
||||
|
|
@ -379,7 +411,7 @@ remove_branch_protection() {
|
|||
# - Allow review-bot to approve PRs
|
||||
#
|
||||
# Args:
|
||||
# $1 - Repo path in format 'owner/repo' (e.g., 'johba/disinto')
|
||||
# $1 - Repo path in format 'owner/repo' (e.g., 'disinto-admin/disinto')
|
||||
# $2 - Branch to protect (default: main)
|
||||
#
|
||||
# Returns: 0 on success, 1 on failure
|
||||
|
|
@ -398,14 +430,30 @@ setup_project_branch_protection() {
|
|||
local api_url
|
||||
api_url="${FORGE_URL}/api/v1/repos/${repo}"
|
||||
|
||||
# Check if branch exists
|
||||
local branch_exists
|
||||
branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
|
||||
# Check if branch exists with retry loop (handles race condition after initial push)
|
||||
local branch_exists="0"
|
||||
local max_attempts=3
|
||||
local attempt=1
|
||||
|
||||
while [ "$attempt" -le "$max_attempts" ]; do
|
||||
branch_exists=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${api_url}/git/branches/${branch}" 2>/dev/null || echo "0")
|
||||
|
||||
if [ "$branch_exists" = "200" ]; then
|
||||
_bp_log "Branch ${branch} exists on ${repo}"
|
||||
break
|
||||
fi
|
||||
|
||||
if [ "$attempt" -lt "$max_attempts" ]; then
|
||||
_bp_log "Branch ${branch} not indexed yet (attempt ${attempt}/${max_attempts}), waiting 2s..."
|
||||
sleep 2
|
||||
fi
|
||||
attempt=$((attempt + 1))
|
||||
done
|
||||
|
||||
if [ "$branch_exists" != "200" ]; then
|
||||
_bp_log "ERROR: Branch ${branch} does not exist on ${repo}"
|
||||
_bp_log "ERROR: Branch ${branch} does not exist on ${repo} after ${max_attempts} attempts"
|
||||
return 1
|
||||
fi
|
||||
|
||||
|
|
@ -536,7 +584,7 @@ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
|||
echo "Required environment variables:"
|
||||
echo " FORGE_TOKEN Forgejo API token (admin user recommended)"
|
||||
echo " FORGE_URL Forgejo instance URL (e.g., https://codeberg.org)"
|
||||
echo " FORGE_OPS_REPO Ops repo in format owner/repo (e.g., johba/disinto-ops)"
|
||||
echo " FORGE_OPS_REPO Ops repo in format owner/repo (e.g., disinto-admin/disinto-ops)"
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
|
|
|
|||
|
|
@ -7,27 +7,6 @@ set -euo pipefail
|
|||
# ci_commit_status() / ci_pipeline_number() require: woodpecker_api(), forge_api() (from env.sh)
|
||||
# classify_pipeline_failure() requires: woodpecker_api() (defined in env.sh)
|
||||
|
||||
# ensure_blocked_label_id — look up (or create) the "blocked" label, print its ID.
|
||||
# Caches the result in _BLOCKED_LABEL_ID to avoid repeated API calls.
|
||||
# Requires: FORGE_TOKEN, FORGE_API (from env.sh), forge_api()
|
||||
ensure_blocked_label_id() {
|
||||
if [ -n "${_BLOCKED_LABEL_ID:-}" ]; then
|
||||
printf '%s' "$_BLOCKED_LABEL_ID"
|
||||
return 0
|
||||
fi
|
||||
_BLOCKED_LABEL_ID=$(forge_api GET "/labels" 2>/dev/null \
|
||||
| jq -r '.[] | select(.name == "blocked") | .id' 2>/dev/null || true)
|
||||
if [ -z "$_BLOCKED_LABEL_ID" ]; then
|
||||
_BLOCKED_LABEL_ID=$(curl -sf -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${FORGE_API}/labels" \
|
||||
-d '{"name":"blocked","color":"#e11d48"}' 2>/dev/null \
|
||||
| jq -r '.id // empty' 2>/dev/null || true)
|
||||
fi
|
||||
printf '%s' "$_BLOCKED_LABEL_ID"
|
||||
}
|
||||
|
||||
# ensure_priority_label — look up (or create) the "priority" label, print its ID.
|
||||
# Caches the result in _PRIORITY_LABEL_ID to avoid repeated API calls.
|
||||
# Requires: FORGE_TOKEN, FORGE_API (from env.sh), forge_api()
|
||||
|
|
|
|||
455
lib/ci-setup.sh
Normal file
455
lib/ci-setup.sh
Normal file
|
|
@ -0,0 +1,455 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# ci-setup.sh — CI setup functions for Woodpecker and cron configuration
|
||||
#
|
||||
# Internal functions (called via _load_ci_context + _*_impl):
|
||||
# _install_cron_impl() - Install crontab entries for project agents
|
||||
# _create_woodpecker_oauth_impl() - Create OAuth2 app on Forgejo for Woodpecker
|
||||
# _generate_woodpecker_token_impl() - Auto-generate WOODPECKER_TOKEN via OAuth2 flow
|
||||
# _activate_woodpecker_repo_impl() - Activate repo in Woodpecker
|
||||
#
|
||||
# Globals expected (asserted by _load_ci_context):
|
||||
# FORGE_URL - Forge instance URL (e.g. http://localhost:3000)
|
||||
# FORGE_TOKEN - Forge API token
|
||||
# FACTORY_ROOT - Root of the disinto factory
|
||||
#
|
||||
# Usage:
|
||||
# source "${FACTORY_ROOT}/lib/ci-setup.sh"
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
# Assert required globals are set before using this module.
|
||||
_load_ci_context() {
|
||||
local missing=()
|
||||
[ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL")
|
||||
[ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN")
|
||||
[ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT")
|
||||
if [ "${#missing[@]}" -gt 0 ]; then
|
||||
echo "Error: ci-setup.sh requires these globals to be set: ${missing[*]}" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Generate and optionally install cron entries for the project agents.
|
||||
# Usage: install_cron <name> <toml_path> <auto_yes> <bare>
|
||||
_install_cron_impl() {
|
||||
local name="$1" toml="$2" auto_yes="$3" bare="${4:-false}"
|
||||
|
||||
# In compose mode, skip host cron — the agents container runs cron internally
|
||||
if [ "$bare" = false ]; then
|
||||
echo ""
|
||||
echo "Cron: skipped (agents container handles scheduling in compose mode)"
|
||||
return
|
||||
fi
|
||||
|
||||
# Bare mode: crontab is required on the host
|
||||
if ! command -v crontab &>/dev/null; then
|
||||
echo "Error: crontab not found (required for bare-metal mode)" >&2
|
||||
echo " Install: apt install cron / brew install cron" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Use absolute path for the TOML in cron entries
|
||||
local abs_toml
|
||||
abs_toml="$(cd "$(dirname "$toml")" && pwd)/$(basename "$toml")"
|
||||
|
||||
local cron_block
|
||||
cron_block="# disinto: ${name}
|
||||
2,7,12,17,22,27,32,37,42,47,52,57 * * * * ${FACTORY_ROOT}/review/review-poll.sh ${abs_toml} >/dev/null 2>&1
|
||||
4,9,14,19,24,29,34,39,44,49,54,59 * * * * ${FACTORY_ROOT}/dev/dev-poll.sh ${abs_toml} >/dev/null 2>&1
|
||||
0 0,6,12,18 * * * cd ${FACTORY_ROOT} && bash gardener/gardener-run.sh ${abs_toml} >/dev/null 2>&1"
|
||||
|
||||
echo ""
|
||||
echo "Cron entries to install:"
|
||||
echo "$cron_block"
|
||||
echo ""
|
||||
|
||||
# Check if cron entries already exist
|
||||
local current_crontab
|
||||
current_crontab=$(crontab -l 2>/dev/null || true)
|
||||
if echo "$current_crontab" | grep -q "# disinto: ${name}"; then
|
||||
echo "Cron: skipped (entries for ${name} already installed)"
|
||||
return
|
||||
fi
|
||||
|
||||
if [ "$auto_yes" = false ] && [ -t 0 ]; then
|
||||
read -rp "Install these cron entries? [y/N] " confirm
|
||||
if [[ ! "$confirm" =~ ^[Yy] ]]; then
|
||||
echo "Skipped cron install. Add manually with: crontab -e"
|
||||
return
|
||||
fi
|
||||
fi
|
||||
|
||||
# Append to existing crontab
|
||||
if { crontab -l 2>/dev/null || true; printf '%s\n' "$cron_block"; } | crontab -; then
|
||||
echo "Cron entries installed for ${name}"
|
||||
else
|
||||
echo "Error: failed to install cron entries" >&2
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Set up Woodpecker CI to use Forgejo as its forge backend.
|
||||
# Creates an OAuth2 app on Forgejo for Woodpecker, activates the repo.
|
||||
# Usage: create_woodpecker_oauth <forge_url> <repo_slug>
|
||||
_create_woodpecker_oauth_impl() {
|
||||
local forge_url="$1"
|
||||
local _repo_slug="$2" # unused but required for signature compatibility
|
||||
|
||||
echo ""
|
||||
echo "── Woodpecker OAuth2 setup ────────────────────────────"
|
||||
|
||||
# Create OAuth2 application on Forgejo for Woodpecker
|
||||
local oauth2_name="woodpecker-ci"
|
||||
local redirect_uri="http://localhost:8000/authorize"
|
||||
local existing_app client_id client_secret
|
||||
|
||||
# Check if OAuth2 app already exists
|
||||
existing_app=$(curl -sf \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${forge_url}/api/v1/user/applications/oauth2" 2>/dev/null \
|
||||
| jq -r --arg name "$oauth2_name" '.[] | select(.name == $name) | .client_id // empty' 2>/dev/null) || true
|
||||
|
||||
if [ -n "$existing_app" ]; then
|
||||
echo "OAuth2: ${oauth2_name} (already exists, client_id=${existing_app})"
|
||||
client_id="$existing_app"
|
||||
else
|
||||
local oauth2_resp
|
||||
oauth2_resp=$(curl -sf -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/user/applications/oauth2" \
|
||||
-d "{\"name\":\"${oauth2_name}\",\"redirect_uris\":[\"${redirect_uri}\"],\"confidential_client\":true}" \
|
||||
2>/dev/null) || oauth2_resp=""
|
||||
|
||||
if [ -z "$oauth2_resp" ]; then
|
||||
echo "Warning: failed to create OAuth2 app on Forgejo" >&2
|
||||
return
|
||||
fi
|
||||
|
||||
client_id=$(printf '%s' "$oauth2_resp" | jq -r '.client_id // empty')
|
||||
client_secret=$(printf '%s' "$oauth2_resp" | jq -r '.client_secret // empty')
|
||||
|
||||
if [ -z "$client_id" ]; then
|
||||
echo "Warning: OAuth2 app creation returned no client_id" >&2
|
||||
return
|
||||
fi
|
||||
|
||||
echo "OAuth2: ${oauth2_name} created (client_id=${client_id})"
|
||||
fi
|
||||
|
||||
# Store Woodpecker forge config in .env
|
||||
# WP_FORGEJO_CLIENT/SECRET match the docker-compose.yml variable references
|
||||
# WOODPECKER_HOST must be host-accessible URL to match OAuth2 redirect_uri
|
||||
local env_file="${FACTORY_ROOT}/.env"
|
||||
local wp_vars=(
|
||||
"WOODPECKER_FORGEJO=true"
|
||||
"WOODPECKER_FORGEJO_URL=${forge_url}"
|
||||
"WOODPECKER_HOST=http://localhost:8000"
|
||||
)
|
||||
if [ -n "${client_id:-}" ]; then
|
||||
wp_vars+=("WP_FORGEJO_CLIENT=${client_id}")
|
||||
fi
|
||||
if [ -n "${client_secret:-}" ]; then
|
||||
wp_vars+=("WP_FORGEJO_SECRET=${client_secret}")
|
||||
fi
|
||||
|
||||
for var_line in "${wp_vars[@]}"; do
|
||||
local var_name="${var_line%%=*}"
|
||||
if grep -q "^${var_name}=" "$env_file" 2>/dev/null; then
|
||||
sed -i "s|^${var_name}=.*|${var_line}|" "$env_file"
|
||||
else
|
||||
printf '%s\n' "$var_line" >> "$env_file"
|
||||
fi
|
||||
done
|
||||
echo "Config: Woodpecker forge vars written to .env"
|
||||
}
|
||||
|
||||
# Auto-generate WOODPECKER_TOKEN by driving the Forgejo OAuth2 login flow.
|
||||
# Requires _FORGE_ADMIN_PASS (set by setup_forge when admin user was just created).
|
||||
# Called after compose stack is up, before activate_woodpecker_repo.
|
||||
# Usage: generate_woodpecker_token <forge_url>
|
||||
_generate_woodpecker_token_impl() {
|
||||
local forge_url="$1"
|
||||
local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}"
|
||||
local env_file="${FACTORY_ROOT}/.env"
|
||||
local admin_user="disinto-admin"
|
||||
local admin_pass="${_FORGE_ADMIN_PASS:-}"
|
||||
|
||||
# Skip if already set
|
||||
if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then
|
||||
echo "Config: WOODPECKER_TOKEN already set in .env"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "── Woodpecker token generation ────────────────────────"
|
||||
|
||||
if [ -z "$admin_pass" ]; then
|
||||
echo "Warning: Forgejo admin password not available — cannot generate WOODPECKER_TOKEN" >&2
|
||||
echo " Log into Woodpecker at ${wp_server} and create a token manually" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Wait for Woodpecker to become ready
|
||||
echo -n "Waiting for Woodpecker"
|
||||
local retries=0
|
||||
while ! curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; do
|
||||
retries=$((retries + 1))
|
||||
if [ "$retries" -gt 30 ]; then
|
||||
echo ""
|
||||
echo "Warning: Woodpecker not ready at ${wp_server} — skipping token generation" >&2
|
||||
return 1
|
||||
fi
|
||||
echo -n "."
|
||||
sleep 2
|
||||
done
|
||||
echo " ready"
|
||||
|
||||
# Flow: Forgejo web login → OAuth2 authorize → Woodpecker callback → token
|
||||
local cookie_jar auth_body_file
|
||||
cookie_jar=$(mktemp /tmp/wp-auth-XXXXXX)
|
||||
auth_body_file=$(mktemp /tmp/wp-body-XXXXXX)
|
||||
|
||||
# Step 1: Log into Forgejo web UI (session cookie needed for OAuth consent)
|
||||
local csrf
|
||||
csrf=$(curl -sf -c "$cookie_jar" "${forge_url}/user/login" 2>/dev/null \
|
||||
| grep -o 'name="_csrf"[^>]*' | head -1 \
|
||||
| grep -oE '(content|value)="[^"]*"' | head -1 \
|
||||
| cut -d'"' -f2) || csrf=""
|
||||
|
||||
if [ -z "$csrf" ]; then
|
||||
echo "Warning: could not get Forgejo CSRF token — skipping token generation" >&2
|
||||
rm -f "$cookie_jar" "$auth_body_file"
|
||||
return 1
|
||||
fi
|
||||
|
||||
curl -sf -b "$cookie_jar" -c "$cookie_jar" -X POST \
|
||||
-o /dev/null \
|
||||
"${forge_url}/user/login" \
|
||||
--data-urlencode "_csrf=${csrf}" \
|
||||
--data-urlencode "user_name=${admin_user}" \
|
||||
--data-urlencode "password=${admin_pass}" \
|
||||
2>/dev/null || true
|
||||
|
||||
# Step 2: Start Woodpecker OAuth2 flow (captures authorize URL with state param)
|
||||
local wp_redir
|
||||
wp_redir=$(curl -sf -o /dev/null -w '%{redirect_url}' \
|
||||
"${wp_server}/authorize" 2>/dev/null) || wp_redir=""
|
||||
|
||||
if [ -z "$wp_redir" ]; then
|
||||
echo "Warning: Woodpecker did not provide OAuth redirect — skipping token generation" >&2
|
||||
rm -f "$cookie_jar" "$auth_body_file"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Rewrite internal Docker network URLs to host-accessible URLs.
|
||||
# Handle both plain and URL-encoded forms of the internal hostnames.
|
||||
local forge_url_enc wp_server_enc
|
||||
forge_url_enc=$(printf '%s' "$forge_url" | sed 's|:|%3A|g; s|/|%2F|g')
|
||||
wp_server_enc=$(printf '%s' "$wp_server" | sed 's|:|%3A|g; s|/|%2F|g')
|
||||
wp_redir=$(printf '%s' "$wp_redir" \
|
||||
| sed "s|http://forgejo:3000|${forge_url}|g" \
|
||||
| sed "s|http%3A%2F%2Fforgejo%3A3000|${forge_url_enc}|g" \
|
||||
| sed "s|http://woodpecker:8000|${wp_server}|g" \
|
||||
| sed "s|http%3A%2F%2Fwoodpecker%3A8000|${wp_server_enc}|g")
|
||||
|
||||
# Step 3: Hit Forgejo OAuth authorize endpoint with session
|
||||
# First time: shows consent page. Already approved: redirects with code.
|
||||
local auth_headers redirect_loc auth_code
|
||||
auth_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \
|
||||
-D - -o "$auth_body_file" \
|
||||
"$wp_redir" 2>/dev/null) || auth_headers=""
|
||||
|
||||
redirect_loc=$(printf '%s' "$auth_headers" \
|
||||
| grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}')
|
||||
|
||||
if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then
|
||||
# Auto-approved: extract code from redirect
|
||||
auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/')
|
||||
else
|
||||
# Consent page: extract CSRF and all form fields, POST grant approval
|
||||
local consent_csrf form_client_id form_state form_redirect_uri
|
||||
consent_csrf=$(grep -o 'name="_csrf"[^>]*' "$auth_body_file" 2>/dev/null \
|
||||
| head -1 | grep -oE '(content|value)="[^"]*"' | head -1 \
|
||||
| cut -d'"' -f2) || consent_csrf=""
|
||||
form_client_id=$(grep 'name="client_id"' "$auth_body_file" 2>/dev/null \
|
||||
| grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_client_id=""
|
||||
form_state=$(grep 'name="state"' "$auth_body_file" 2>/dev/null \
|
||||
| grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_state=""
|
||||
form_redirect_uri=$(grep 'name="redirect_uri"' "$auth_body_file" 2>/dev/null \
|
||||
| grep -oE 'value="[^"]*"' | cut -d'"' -f2) || form_redirect_uri=""
|
||||
|
||||
if [ -n "$consent_csrf" ]; then
|
||||
local grant_headers
|
||||
grant_headers=$(curl -sf -b "$cookie_jar" -c "$cookie_jar" \
|
||||
-D - -o /dev/null -X POST \
|
||||
"${forge_url}/login/oauth/grant" \
|
||||
--data-urlencode "_csrf=${consent_csrf}" \
|
||||
--data-urlencode "client_id=${form_client_id}" \
|
||||
--data-urlencode "state=${form_state}" \
|
||||
--data-urlencode "scope=" \
|
||||
--data-urlencode "nonce=" \
|
||||
--data-urlencode "redirect_uri=${form_redirect_uri}" \
|
||||
--data-urlencode "granted=true" \
|
||||
2>/dev/null) || grant_headers=""
|
||||
|
||||
redirect_loc=$(printf '%s' "$grant_headers" \
|
||||
| grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}')
|
||||
|
||||
if printf '%s' "${redirect_loc:-}" | grep -q 'code='; then
|
||||
auth_code=$(printf '%s' "$redirect_loc" | sed 's/.*code=\([^&]*\).*/\1/')
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f "$auth_body_file"
|
||||
|
||||
if [ -z "${auth_code:-}" ]; then
|
||||
echo "Warning: could not obtain OAuth2 authorization code — skipping token generation" >&2
|
||||
rm -f "$cookie_jar"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Step 4: Complete Woodpecker OAuth callback (exchanges code for session)
|
||||
local state
|
||||
state=$(printf '%s' "$wp_redir" | sed -n 's/.*[&?]state=\([^&]*\).*/\1/p')
|
||||
|
||||
local wp_headers wp_token
|
||||
wp_headers=$(curl -sf -c "$cookie_jar" \
|
||||
-D - -o /dev/null \
|
||||
"${wp_server}/authorize?code=${auth_code}&state=${state:-}" \
|
||||
2>/dev/null) || wp_headers=""
|
||||
|
||||
# Extract token from redirect URL (Woodpecker returns ?access_token=...)
|
||||
redirect_loc=$(printf '%s' "$wp_headers" \
|
||||
| grep -i '^location:' | head -1 | tr -d '\r' | awk '{print $2}')
|
||||
|
||||
wp_token=""
|
||||
if printf '%s' "${redirect_loc:-}" | grep -q 'access_token='; then
|
||||
wp_token=$(printf '%s' "$redirect_loc" | sed 's/.*access_token=\([^&]*\).*/\1/')
|
||||
fi
|
||||
|
||||
# Fallback: check for user_sess cookie
|
||||
if [ -z "$wp_token" ]; then
|
||||
wp_token=$(awk '/user_sess/{print $NF}' "$cookie_jar" 2>/dev/null) || wp_token=""
|
||||
fi
|
||||
|
||||
rm -f "$cookie_jar"
|
||||
|
||||
if [ -z "$wp_token" ]; then
|
||||
echo "Warning: could not obtain Woodpecker token — skipping token generation" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Step 5: Create persistent personal access token via Woodpecker API
|
||||
# WP v3 requires CSRF header for POST operations with session tokens.
|
||||
local wp_csrf
|
||||
wp_csrf=$(curl -sf -b "user_sess=${wp_token}" \
|
||||
"${wp_server}/web-config.js" 2>/dev/null \
|
||||
| sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p') || wp_csrf=""
|
||||
|
||||
local pat_resp final_token
|
||||
pat_resp=$(curl -sf -X POST \
|
||||
-b "user_sess=${wp_token}" \
|
||||
${wp_csrf:+-H "X-CSRF-Token: ${wp_csrf}"} \
|
||||
"${wp_server}/api/user/token" \
|
||||
2>/dev/null) || pat_resp=""
|
||||
|
||||
final_token=""
|
||||
if [ -n "$pat_resp" ]; then
|
||||
final_token=$(printf '%s' "$pat_resp" \
|
||||
| jq -r 'if .token then .token elif .access_token then .access_token else empty end' \
|
||||
2>/dev/null) || final_token=""
|
||||
fi
|
||||
|
||||
# Use persistent token if available, otherwise use session token
|
||||
final_token="${final_token:-$wp_token}"
|
||||
|
||||
# Save to .env
|
||||
if grep -q '^WOODPECKER_TOKEN=' "$env_file" 2>/dev/null; then
|
||||
sed -i "s|^WOODPECKER_TOKEN=.*|WOODPECKER_TOKEN=${final_token}|" "$env_file"
|
||||
else
|
||||
printf 'WOODPECKER_TOKEN=%s\n' "$final_token" >> "$env_file"
|
||||
fi
|
||||
export WOODPECKER_TOKEN="$final_token"
|
||||
echo "Config: WOODPECKER_TOKEN generated and saved to .env"
|
||||
}
|
||||
|
||||
# Activate a repo in Woodpecker CI.
|
||||
# Usage: activate_woodpecker_repo <forge_repo>
|
||||
_activate_woodpecker_repo_impl() {
|
||||
local forge_repo="$1"
|
||||
local wp_server="${WOODPECKER_SERVER:-http://localhost:8000}"
|
||||
|
||||
# Wait for Woodpecker to become ready after stack start
|
||||
local retries=0
|
||||
while [ $retries -lt 10 ]; do
|
||||
if curl -sf --max-time 3 "${wp_server}/api/version" >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
retries=$((retries + 1))
|
||||
sleep 2
|
||||
done
|
||||
|
||||
if ! curl -sf --max-time 5 "${wp_server}/api/version" >/dev/null 2>&1; then
|
||||
echo "Woodpecker: not reachable at ${wp_server} after stack start, skipping repo activation" >&2
|
||||
return
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "── Woodpecker repo activation ─────────────────────────"
|
||||
|
||||
local wp_token="${WOODPECKER_TOKEN:-}"
|
||||
if [ -z "$wp_token" ]; then
|
||||
echo "Warning: WOODPECKER_TOKEN not set — cannot activate repo" >&2
|
||||
echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2
|
||||
return
|
||||
fi
|
||||
|
||||
local wp_repo_id
|
||||
wp_repo_id=$(curl -sf \
|
||||
-H "Authorization: Bearer ${wp_token}" \
|
||||
"${wp_server}/api/repos/lookup/${forge_repo}" 2>/dev/null \
|
||||
| jq -r '.id // empty' 2>/dev/null) || true
|
||||
|
||||
if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then
|
||||
echo "Repo: ${forge_repo} already active in Woodpecker (id=${wp_repo_id})"
|
||||
else
|
||||
# Get Forgejo repo numeric ID for WP activation
|
||||
local forge_repo_id
|
||||
forge_repo_id=$(curl -sf \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_URL:-http://localhost:3000}/api/v1/repos/${forge_repo}" 2>/dev/null \
|
||||
| jq -r '.id // empty' 2>/dev/null) || forge_repo_id=""
|
||||
|
||||
local activate_resp
|
||||
activate_resp=$(curl -sf -X POST \
|
||||
-H "Authorization: Bearer ${wp_token}" \
|
||||
"${wp_server}/api/repos?forge_remote_id=${forge_repo_id:-0}" \
|
||||
2>/dev/null) || activate_resp=""
|
||||
|
||||
wp_repo_id=$(printf '%s' "$activate_resp" | jq -r '.id // empty' 2>/dev/null) || true
|
||||
|
||||
if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then
|
||||
echo "Repo: ${forge_repo} activated in Woodpecker (id=${wp_repo_id})"
|
||||
|
||||
# Set pipeline timeout to 5 minutes (default is 60)
|
||||
if curl -sf -X PATCH \
|
||||
-H "Authorization: Bearer ${wp_token}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${wp_server}/api/repos/${wp_repo_id}" \
|
||||
-d '{"timeout": 5}' >/dev/null 2>&1; then
|
||||
echo "Config: pipeline timeout set to 5 minutes"
|
||||
fi
|
||||
else
|
||||
echo "Warning: could not activate repo in Woodpecker" >&2
|
||||
echo " Activate manually: woodpecker-cli repo add ${forge_repo}" >&2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Store repo ID for later TOML generation
|
||||
if [ -n "$wp_repo_id" ] && [ "$wp_repo_id" != "0" ]; then
|
||||
_WP_REPO_ID="$wp_repo_id"
|
||||
fi
|
||||
}
|
||||
55
lib/env.sh
55
lib/env.sh
|
|
@ -13,7 +13,7 @@ FACTORY_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|||
if [ "${DISINTO_CONTAINER:-}" = "1" ]; then
|
||||
DISINTO_DATA_DIR="${HOME}/data"
|
||||
DISINTO_LOG_DIR="${DISINTO_DATA_DIR}/logs"
|
||||
mkdir -p "${DISINTO_DATA_DIR}" "${DISINTO_LOG_DIR}"/{dev,action,review,supervisor,vault,site,metrics}
|
||||
mkdir -p "${DISINTO_DATA_DIR}" "${DISINTO_LOG_DIR}"/{dev,action,review,supervisor,vault,site,metrics,gardener,planner,predictor,architect,dispatcher}
|
||||
else
|
||||
DISINTO_LOG_DIR="${FACTORY_ROOT}"
|
||||
fi
|
||||
|
|
@ -21,14 +21,13 @@ export DISINTO_LOG_DIR
|
|||
|
||||
# Load secrets: prefer .env.enc (SOPS-encrypted), fall back to plaintext .env.
|
||||
# Always source .env — cron jobs inside the container do NOT inherit compose
|
||||
# env vars (FORGE_TOKEN, etc.). Compose-injected vars (like FORGE_URL) are
|
||||
# already set and won't be clobbered since env.sh uses ${VAR:-default} patterns
|
||||
# for derived values. FORGE_URL from .env (localhost:3000) is overridden below
|
||||
# by the compose-injected value when running via docker exec.
|
||||
# env vars (FORGE_TOKEN, etc.). Only FORGE_URL is preserved across .env
|
||||
# sourcing because compose injects http://forgejo:3000 while .env has
|
||||
# http://localhost:3000. FORGE_TOKEN is NOT preserved so that refreshed
|
||||
# tokens in .env take effect immediately in running containers.
|
||||
if [ -f "$FACTORY_ROOT/.env.enc" ] && command -v sops &>/dev/null; then
|
||||
set -a
|
||||
_saved_forge_url="${FORGE_URL:-}"
|
||||
_saved_forge_token="${FORGE_TOKEN:-}"
|
||||
# Use temp file + validate dotenv format before sourcing (avoids eval injection)
|
||||
# SOPS -d automatically verifies MAC/GCM authentication tag during decryption
|
||||
_tmpenv=$(mktemp) || { echo "Error: failed to create temp file for .env.enc" >&2; exit 1; }
|
||||
|
|
@ -55,17 +54,21 @@ if [ -f "$FACTORY_ROOT/.env.enc" ] && command -v sops &>/dev/null; then
|
|||
rm -f "$_tmpenv"
|
||||
set +a
|
||||
[ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url"
|
||||
[ -n "$_saved_forge_token" ] && export FORGE_TOKEN="$_saved_forge_token"
|
||||
elif [ -f "$FACTORY_ROOT/.env" ]; then
|
||||
# Preserve compose-injected FORGE_URL (localhost in .env != forgejo in Docker)
|
||||
_saved_forge_url="${FORGE_URL:-}"
|
||||
_saved_forge_token="${FORGE_TOKEN:-}"
|
||||
set -a
|
||||
# shellcheck source=/dev/null
|
||||
source "$FACTORY_ROOT/.env"
|
||||
set +a
|
||||
[ -n "$_saved_forge_url" ] && export FORGE_URL="$_saved_forge_url"
|
||||
[ -n "$_saved_forge_token" ] && export FORGE_TOKEN="$_saved_forge_token"
|
||||
fi
|
||||
|
||||
# Allow per-container token override (#375): .env sets the default FORGE_TOKEN
|
||||
# (dev-bot), then FORGE_TOKEN_OVERRIDE replaces it for containers that need a
|
||||
# different Forgejo identity (e.g. dev-qwen).
|
||||
if [ -n "${FORGE_TOKEN_OVERRIDE:-}" ]; then
|
||||
export FORGE_TOKEN="$FORGE_TOKEN_OVERRIDE"
|
||||
fi
|
||||
|
||||
# PATH: foundry, node, system
|
||||
|
|
@ -77,16 +80,11 @@ if [ -n "${PROJECT_TOML:-}" ] && [ -f "$PROJECT_TOML" ]; then
|
|||
source "${FACTORY_ROOT}/lib/load-project.sh" "$PROJECT_TOML"
|
||||
fi
|
||||
|
||||
# Forge token: new FORGE_TOKEN > legacy CODEBERG_TOKEN
|
||||
if [ -z "${FORGE_TOKEN:-}" ]; then
|
||||
FORGE_TOKEN="${CODEBERG_TOKEN:-}"
|
||||
fi
|
||||
export FORGE_TOKEN
|
||||
export CODEBERG_TOKEN="${FORGE_TOKEN}" # backwards compat
|
||||
# Forge token
|
||||
export FORGE_TOKEN="${FORGE_TOKEN:-}"
|
||||
|
||||
# Review bot token: FORGE_REVIEW_TOKEN > legacy REVIEW_BOT_TOKEN
|
||||
# Review bot token
|
||||
export FORGE_REVIEW_TOKEN="${FORGE_REVIEW_TOKEN:-${REVIEW_BOT_TOKEN:-}}"
|
||||
export REVIEW_BOT_TOKEN="${FORGE_REVIEW_TOKEN}" # backwards compat
|
||||
|
||||
# Per-agent tokens (#747): each agent gets its own Forgejo identity.
|
||||
# Falls back to FORGE_TOKEN for backwards compat with single-token setups.
|
||||
|
|
@ -97,18 +95,14 @@ export FORGE_SUPERVISOR_TOKEN="${FORGE_SUPERVISOR_TOKEN:-${FORGE_TOKEN}}"
|
|||
export FORGE_PREDICTOR_TOKEN="${FORGE_PREDICTOR_TOKEN:-${FORGE_TOKEN}}"
|
||||
export FORGE_ARCHITECT_TOKEN="${FORGE_ARCHITECT_TOKEN:-${FORGE_TOKEN}}"
|
||||
|
||||
# Bot usernames filter: FORGE_BOT_USERNAMES > legacy CODEBERG_BOT_USERNAMES
|
||||
export FORGE_BOT_USERNAMES="${FORGE_BOT_USERNAMES:-${CODEBERG_BOT_USERNAMES:-dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot}}"
|
||||
export CODEBERG_BOT_USERNAMES="${FORGE_BOT_USERNAMES}" # backwards compat
|
||||
# Bot usernames filter
|
||||
export FORGE_BOT_USERNAMES="${FORGE_BOT_USERNAMES:-dev-bot,review-bot,planner-bot,gardener-bot,vault-bot,supervisor-bot,predictor-bot,architect-bot}"
|
||||
|
||||
# Project config (FORGE_* preferred, CODEBERG_* fallback)
|
||||
export FORGE_REPO="${FORGE_REPO:-${CODEBERG_REPO:-}}"
|
||||
export CODEBERG_REPO="${FORGE_REPO}" # backwards compat
|
||||
# Project config
|
||||
export FORGE_REPO="${FORGE_REPO:-}"
|
||||
export FORGE_URL="${FORGE_URL:-http://localhost:3000}"
|
||||
export FORGE_API="${FORGE_API:-${FORGE_URL}/api/v1/repos/${FORGE_REPO}}"
|
||||
export FORGE_WEB="${FORGE_WEB:-${FORGE_URL}/${FORGE_REPO}}"
|
||||
export CODEBERG_API="${FORGE_API}" # backwards compat
|
||||
export CODEBERG_WEB="${FORGE_WEB}" # backwards compat
|
||||
# tea CLI login name: derived from FORGE_URL (codeberg vs local forgejo)
|
||||
if [ -z "${TEA_LOGIN:-}" ]; then
|
||||
case "${FORGE_URL}" in
|
||||
|
|
@ -144,8 +138,12 @@ unset CLAWHUB_TOKEN 2>/dev/null || true
|
|||
export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1
|
||||
|
||||
# Shared log helper
|
||||
# Usage: log "message"
|
||||
# Output: [2026-04-03T14:00:00Z] agent: message
|
||||
# Where agent is set via LOG_AGENT variable (defaults to caller's context)
|
||||
log() {
|
||||
printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*"
|
||||
local agent="${LOG_AGENT:-agent}"
|
||||
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
|
|
@ -209,8 +207,6 @@ forge_api() {
|
|||
-H "Content-Type: application/json" \
|
||||
"${FORGE_API}${path}" "$@"
|
||||
}
|
||||
# Backwards-compat alias
|
||||
codeberg_api() { forge_api "$@"; }
|
||||
|
||||
# Paginate a Forge API GET endpoint and return all items as a merged JSON array.
|
||||
# Usage: forge_api_all /path (no existing query params)
|
||||
|
|
@ -227,7 +223,8 @@ forge_api_all() {
|
|||
page=1
|
||||
while true; do
|
||||
page_items=$(forge_api GET "${path_prefix}${sep}limit=50&page=${page}")
|
||||
count=$(printf '%s' "$page_items" | jq 'length')
|
||||
count=$(printf '%s' "$page_items" | jq 'length' 2>/dev/null) || count=0
|
||||
[ -z "$count" ] && count=0
|
||||
[ "$count" -eq 0 ] && break
|
||||
all_items=$(printf '%s\n%s' "$all_items" "$page_items" | jq -s 'add')
|
||||
[ "$count" -lt 50 ] && break
|
||||
|
|
|
|||
|
|
@ -1,59 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# file-action-issue.sh — File an action issue for a formula run
|
||||
#
|
||||
# Usage: source this file, then call file_action_issue.
|
||||
# Requires: forge_api() from lib/env.sh, jq, lib/secret-scan.sh
|
||||
#
|
||||
# file_action_issue <formula_name> <title> <body>
|
||||
# Sets FILED_ISSUE_NUM on success.
|
||||
# Returns: 0=created, 1=duplicate exists, 2=label not found, 3=API error, 4=secrets detected
|
||||
|
||||
# Load secret scanner
|
||||
# shellcheck source=secret-scan.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/secret-scan.sh"
|
||||
|
||||
file_action_issue() {
|
||||
local formula_name="$1" title="$2" body="$3"
|
||||
FILED_ISSUE_NUM=""
|
||||
|
||||
# Secret scan: reject issue bodies containing embedded secrets
|
||||
if ! scan_for_secrets "$body"; then
|
||||
echo "file-action-issue: BLOCKED — issue body for '${formula_name}' contains potential secrets. Use env var references instead." >&2
|
||||
return 4
|
||||
fi
|
||||
|
||||
# Dedup: skip if an open action issue for this formula already exists
|
||||
local open_actions
|
||||
open_actions=$(forge_api_all "/issues?state=open&type=issues&labels=action" 2>/dev/null || true)
|
||||
if [ -n "$open_actions" ] && [ "$open_actions" != "null" ]; then
|
||||
local existing
|
||||
existing=$(printf '%s' "$open_actions" | \
|
||||
jq --arg f "$formula_name" '[.[] | select(.title | test($f))] | length' 2>/dev/null || echo 0)
|
||||
if [ "${existing:-0}" -gt 0 ]; then
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Fetch 'action' label ID
|
||||
local action_label_id
|
||||
action_label_id=$(forge_api GET "/labels" 2>/dev/null | \
|
||||
jq -r '.[] | select(.name == "action") | .id' 2>/dev/null || true)
|
||||
if [ -z "$action_label_id" ]; then
|
||||
return 2
|
||||
fi
|
||||
|
||||
# Create the issue
|
||||
local payload result
|
||||
payload=$(jq -nc \
|
||||
--arg title "$title" \
|
||||
--arg body "$body" \
|
||||
--argjson labels "[$action_label_id]" \
|
||||
'{title: $title, body: $body, labels: $labels}')
|
||||
|
||||
result=$(forge_api POST "/issues" -d "$payload" 2>/dev/null || true)
|
||||
FILED_ISSUE_NUM=$(printf '%s' "$result" | jq -r '.number // empty' 2>/dev/null || true)
|
||||
|
||||
if [ -z "$FILED_ISSUE_NUM" ]; then
|
||||
return 3
|
||||
fi
|
||||
}
|
||||
101
lib/forge-push.sh
Normal file
101
lib/forge-push.sh
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# forge-push.sh — push_to_forge() function
|
||||
#
|
||||
# Handles pushing a local clone to the Forgejo remote and verifying the push.
|
||||
#
|
||||
# Globals expected:
|
||||
# FORGE_URL - Forge instance URL (e.g. http://localhost:3000)
|
||||
# FORGE_TOKEN - API token for Forge operations (used for API verification)
|
||||
# FORGE_PASS - Bot password for git HTTP push (#361: tokens rejected by Forgejo 11.x)
|
||||
# FACTORY_ROOT - Root of the disinto factory
|
||||
# PRIMARY_BRANCH - Primary branch name (e.g. main)
|
||||
#
|
||||
# Usage:
|
||||
# source "${FACTORY_ROOT}/lib/forge-push.sh"
|
||||
# push_to_forge <repo_root> <forge_url> <repo_slug>
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
# Assert required globals are set before using this module.
|
||||
_assert_forge_push_globals() {
|
||||
local missing=()
|
||||
[ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL")
|
||||
[ -z "${FORGE_PASS:-}" ] && missing+=("FORGE_PASS")
|
||||
[ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN")
|
||||
[ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT")
|
||||
[ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH")
|
||||
if [ "${#missing[@]}" -gt 0 ]; then
|
||||
echo "Error: forge-push.sh requires these globals to be set: ${missing[*]}" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Push local clone to the Forgejo remote.
|
||||
push_to_forge() {
|
||||
local repo_root="$1" forge_url="$2" repo_slug="$3"
|
||||
|
||||
# Build authenticated remote URL: http://dev-bot:<password>@host:port/org/repo.git
|
||||
# Forgejo 11.x rejects API tokens for git HTTP push (#361); password auth works.
|
||||
if [ -z "${FORGE_PASS:-}" ]; then
|
||||
echo "Error: FORGE_PASS not set — cannot push to Forgejo (see #361)" >&2
|
||||
return 1
|
||||
fi
|
||||
local auth_url
|
||||
auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_PASS}@|")
|
||||
local remote_url="${auth_url}/${repo_slug}.git"
|
||||
# Display URL without token
|
||||
local display_url="${forge_url}/${repo_slug}.git"
|
||||
|
||||
# Always set the remote URL to ensure credentials are current
|
||||
if git -C "$repo_root" remote get-url forgejo >/dev/null 2>&1; then
|
||||
git -C "$repo_root" remote set-url forgejo "$remote_url"
|
||||
else
|
||||
git -C "$repo_root" remote add forgejo "$remote_url"
|
||||
fi
|
||||
echo "Remote: forgejo -> ${display_url}"
|
||||
|
||||
# Skip push if local repo has no commits (e.g. cloned from empty Forgejo repo)
|
||||
if ! git -C "$repo_root" rev-parse HEAD >/dev/null 2>&1; then
|
||||
echo "Push: skipped (local repo has no commits)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Push all branches and tags
|
||||
echo "Pushing: branches to forgejo"
|
||||
if ! git -C "$repo_root" push forgejo --all 2>&1; then
|
||||
echo "Error: failed to push branches to Forgejo" >&2
|
||||
return 1
|
||||
fi
|
||||
echo "Pushing: tags to forgejo"
|
||||
if ! git -C "$repo_root" push forgejo --tags 2>&1; then
|
||||
echo "Error: failed to push tags to Forgejo" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Verify the repo is no longer empty (Forgejo may need a moment to index pushed refs)
|
||||
local is_empty="true"
|
||||
local verify_attempt
|
||||
for verify_attempt in $(seq 1 5); do
|
||||
local repo_info
|
||||
repo_info=$(curl -sf --max-time 10 \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${forge_url}/api/v1/repos/${repo_slug}" 2>/dev/null) || repo_info=""
|
||||
if [ -z "$repo_info" ]; then
|
||||
is_empty="skipped"
|
||||
break # API unreachable, skip verification
|
||||
fi
|
||||
is_empty=$(printf '%s' "$repo_info" | jq -r '.empty // "unknown"')
|
||||
if [ "$is_empty" != "true" ]; then
|
||||
echo "Verify: repo is not empty (push confirmed)"
|
||||
break
|
||||
fi
|
||||
if [ "$verify_attempt" -lt 5 ]; then
|
||||
sleep 2
|
||||
fi
|
||||
done
|
||||
if [ "$is_empty" = "true" ]; then
|
||||
echo "Warning: Forgejo repo still reports empty after push" >&2
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
518
lib/forge-setup.sh
Normal file
518
lib/forge-setup.sh
Normal file
|
|
@ -0,0 +1,518 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# forge-setup.sh — setup_forge() and helpers for Forgejo provisioning
|
||||
#
|
||||
# Handles admin user creation, bot user creation, token generation,
|
||||
# password resets, repo creation, and collaborator setup.
|
||||
#
|
||||
# Globals expected (asserted by _load_init_context):
|
||||
# FORGE_URL - Forge instance URL (e.g. http://localhost:3000)
|
||||
# FACTORY_ROOT - Root of the disinto factory
|
||||
# PRIMARY_BRANCH - Primary branch name (e.g. main)
|
||||
#
|
||||
# Usage:
|
||||
# source "${FACTORY_ROOT}/lib/forge-setup.sh"
|
||||
# setup_forge <forge_url> <repo_slug>
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
# Assert required globals are set before using this module.
|
||||
_load_init_context() {
|
||||
local missing=()
|
||||
[ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL")
|
||||
[ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT")
|
||||
[ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH")
|
||||
if [ "${#missing[@]}" -gt 0 ]; then
|
||||
echo "Error: forge-setup.sh requires these globals to be set: ${missing[*]}" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Execute a command in the Forgejo container (for admin operations)
|
||||
_forgejo_exec() {
|
||||
local use_bare="${DISINTO_BARE:-false}"
|
||||
if [ "$use_bare" = true ]; then
|
||||
docker exec -u git disinto-forgejo "$@"
|
||||
else
|
||||
docker compose -f "${FACTORY_ROOT}/docker-compose.yml" exec -T -u git forgejo "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
# Provision or connect to a local Forgejo instance.
|
||||
# Creates admin + bot users, generates API tokens, stores in .env.
|
||||
# When $DISINTO_BARE is set, uses standalone docker run; otherwise uses compose.
|
||||
setup_forge() {
|
||||
local forge_url="$1"
|
||||
local repo_slug="$2"
|
||||
local use_bare="${DISINTO_BARE:-false}"
|
||||
|
||||
echo ""
|
||||
echo "── Forge setup ────────────────────────────────────────"
|
||||
|
||||
# Check if Forgejo is already running
|
||||
if curl -sf --max-time 5 "${forge_url}/api/v1/version" >/dev/null 2>&1; then
|
||||
echo "Forgejo: ${forge_url} (already running)"
|
||||
else
|
||||
echo "Forgejo not reachable at ${forge_url}"
|
||||
echo "Starting Forgejo via Docker..."
|
||||
|
||||
if ! command -v docker &>/dev/null; then
|
||||
echo "Error: docker not found — needed to provision Forgejo" >&2
|
||||
echo " Install Docker or start Forgejo manually at ${forge_url}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Extract port from forge_url
|
||||
local forge_port
|
||||
forge_port=$(printf '%s' "$forge_url" | sed -E 's|.*:([0-9]+)/?$|\1|')
|
||||
forge_port="${forge_port:-3000}"
|
||||
|
||||
if [ "$use_bare" = true ]; then
|
||||
# Bare-metal mode: standalone docker run
|
||||
mkdir -p "${FORGEJO_DATA_DIR}"
|
||||
|
||||
if docker ps -a --format '{{.Names}}' | grep -q '^disinto-forgejo$'; then
|
||||
docker start disinto-forgejo >/dev/null 2>&1 || true
|
||||
else
|
||||
docker run -d \
|
||||
--name disinto-forgejo \
|
||||
--restart unless-stopped \
|
||||
-p "${forge_port}:3000" \
|
||||
-p 2222:22 \
|
||||
-v "${FORGEJO_DATA_DIR}:/data" \
|
||||
-e "FORGEJO__database__DB_TYPE=sqlite3" \
|
||||
-e "FORGEJO__server__ROOT_URL=${forge_url}/" \
|
||||
-e "FORGEJO__server__HTTP_PORT=3000" \
|
||||
-e "FORGEJO__service__DISABLE_REGISTRATION=true" \
|
||||
codeberg.org/forgejo/forgejo:11.0
|
||||
fi
|
||||
else
|
||||
# Compose mode: start Forgejo via docker compose
|
||||
docker compose -f "${FACTORY_ROOT}/docker-compose.yml" up -d forgejo
|
||||
fi
|
||||
|
||||
# Wait for Forgejo to become healthy
|
||||
echo -n "Waiting for Forgejo to start"
|
||||
local retries=0
|
||||
while ! curl -sf --max-time 3 "${forge_url}/api/v1/version" >/dev/null 2>&1; do
|
||||
retries=$((retries + 1))
|
||||
if [ "$retries" -gt 60 ]; then
|
||||
echo ""
|
||||
echo "Error: Forgejo did not become ready within 60s" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo -n "."
|
||||
sleep 1
|
||||
done
|
||||
echo " ready"
|
||||
fi
|
||||
|
||||
# Wait for Forgejo database to accept writes (API may be ready before DB is)
|
||||
echo -n "Waiting for Forgejo database"
|
||||
local db_ready=false
|
||||
for _i in $(seq 1 30); do
|
||||
if _forgejo_exec forgejo admin user list >/dev/null 2>&1; then
|
||||
db_ready=true
|
||||
break
|
||||
fi
|
||||
echo -n "."
|
||||
sleep 1
|
||||
done
|
||||
echo ""
|
||||
if [ "$db_ready" != true ]; then
|
||||
echo "Error: Forgejo database not ready after 30s" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create admin user if it doesn't exist
|
||||
local admin_user="disinto-admin"
|
||||
local admin_pass
|
||||
local env_file="${FACTORY_ROOT}/.env"
|
||||
|
||||
# Re-read persisted admin password if available (#158)
|
||||
if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then
|
||||
admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-)
|
||||
fi
|
||||
# Generate a fresh password only when none was persisted
|
||||
if [ -z "${admin_pass:-}" ]; then
|
||||
admin_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
|
||||
fi
|
||||
|
||||
if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then
|
||||
echo "Creating admin user: ${admin_user}"
|
||||
local create_output
|
||||
if ! create_output=$(_forgejo_exec forgejo admin user create \
|
||||
--admin \
|
||||
--username "${admin_user}" \
|
||||
--password "${admin_pass}" \
|
||||
--email "admin@disinto.local" \
|
||||
--must-change-password=false 2>&1); then
|
||||
echo "Error: failed to create admin user '${admin_user}':" >&2
|
||||
echo " ${create_output}" >&2
|
||||
exit 1
|
||||
fi
|
||||
# Forgejo 11.x ignores --must-change-password=false on create;
|
||||
# explicitly clear the flag so basic-auth token creation works.
|
||||
_forgejo_exec forgejo admin user change-password \
|
||||
--username "${admin_user}" \
|
||||
--password "${admin_pass}" \
|
||||
--must-change-password=false
|
||||
|
||||
# Verify admin user was actually created
|
||||
if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${admin_user}" >/dev/null 2>&1; then
|
||||
echo "Error: admin user '${admin_user}' not found after creation" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Persist admin password to .env for idempotent re-runs (#158)
|
||||
if grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then
|
||||
sed -i "s|^FORGE_ADMIN_PASS=.*|FORGE_ADMIN_PASS=${admin_pass}|" "$env_file"
|
||||
else
|
||||
printf 'FORGE_ADMIN_PASS=%s\n' "$admin_pass" >> "$env_file"
|
||||
fi
|
||||
else
|
||||
echo "Admin user: ${admin_user} (already exists)"
|
||||
# Only reset password if basic auth fails (#158, #267)
|
||||
# Forgejo 11.x may ignore --must-change-password=false, blocking token creation
|
||||
if ! curl -sf --max-time 5 -u "${admin_user}:${admin_pass}" \
|
||||
"${forge_url}/api/v1/user" >/dev/null 2>&1; then
|
||||
_forgejo_exec forgejo admin user change-password \
|
||||
--username "${admin_user}" \
|
||||
--password "${admin_pass}" \
|
||||
--must-change-password=false
|
||||
fi
|
||||
fi
|
||||
# Preserve password for Woodpecker OAuth2 token generation (#779)
|
||||
_FORGE_ADMIN_PASS="$admin_pass"
|
||||
|
||||
# Create human user (disinto-admin) as site admin if it doesn't exist
|
||||
local human_user="disinto-admin"
|
||||
local human_pass
|
||||
human_pass="admin-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
|
||||
|
||||
if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then
|
||||
echo "Creating human user: ${human_user}"
|
||||
local create_output
|
||||
if ! create_output=$(_forgejo_exec forgejo admin user create \
|
||||
--admin \
|
||||
--username "${human_user}" \
|
||||
--password "${human_pass}" \
|
||||
--email "admin@disinto.local" \
|
||||
--must-change-password=false 2>&1); then
|
||||
echo "Error: failed to create human user '${human_user}':" >&2
|
||||
echo " ${create_output}" >&2
|
||||
exit 1
|
||||
fi
|
||||
# Forgejo 11.x ignores --must-change-password=false on create;
|
||||
# explicitly clear the flag so basic-auth token creation works.
|
||||
_forgejo_exec forgejo admin user change-password \
|
||||
--username "${human_user}" \
|
||||
--password "${human_pass}" \
|
||||
--must-change-password=false
|
||||
|
||||
# Verify human user was actually created
|
||||
if ! curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then
|
||||
echo "Error: human user '${human_user}' not found after creation" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo " Human user '${human_user}' created as site admin"
|
||||
else
|
||||
echo "Human user: ${human_user} (already exists)"
|
||||
fi
|
||||
|
||||
# Delete existing admin token if present (token sha1 is only returned at creation time)
|
||||
local existing_token_id
|
||||
existing_token_id=$(curl -sf \
|
||||
-u "${admin_user}:${admin_pass}" \
|
||||
"${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \
|
||||
| jq -r '.[] | select(.name == "disinto-admin-token") | .id') || existing_token_id=""
|
||||
if [ -n "$existing_token_id" ]; then
|
||||
curl -sf -X DELETE \
|
||||
-u "${admin_user}:${admin_pass}" \
|
||||
"${forge_url}/api/v1/users/${admin_user}/tokens/${existing_token_id}" >/dev/null 2>&1 || true
|
||||
fi
|
||||
|
||||
# Create admin token (fresh, so sha1 is returned)
|
||||
local admin_token
|
||||
admin_token=$(curl -sf -X POST \
|
||||
-u "${admin_user}:${admin_pass}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/users/${admin_user}/tokens" \
|
||||
-d '{"name":"disinto-admin-token","scopes":["all"]}' 2>/dev/null \
|
||||
| jq -r '.sha1 // empty') || admin_token=""
|
||||
|
||||
if [ -z "$admin_token" ]; then
|
||||
echo "Error: failed to obtain admin API token" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get or create human user token
|
||||
local human_token
|
||||
if curl -sf --max-time 5 "${forge_url}/api/v1/users/${human_user}" >/dev/null 2>&1; then
|
||||
# Delete existing human token if present (token sha1 is only returned at creation time)
|
||||
local existing_human_token_id
|
||||
existing_human_token_id=$(curl -sf \
|
||||
-u "${human_user}:${human_pass}" \
|
||||
"${forge_url}/api/v1/users/${human_user}/tokens" 2>/dev/null \
|
||||
| jq -r '.[] | select(.name == "disinto-human-token") | .id') || existing_human_token_id=""
|
||||
if [ -n "$existing_human_token_id" ]; then
|
||||
curl -sf -X DELETE \
|
||||
-u "${human_user}:${human_pass}" \
|
||||
"${forge_url}/api/v1/users/${human_user}/tokens/${existing_human_token_id}" >/dev/null 2>&1 || true
|
||||
fi
|
||||
|
||||
# Create human token (fresh, so sha1 is returned)
|
||||
human_token=$(curl -sf -X POST \
|
||||
-u "${human_user}:${human_pass}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/users/${human_user}/tokens" \
|
||||
-d '{"name":"disinto-human-token","scopes":["all"]}' 2>/dev/null \
|
||||
| jq -r '.sha1 // empty') || human_token=""
|
||||
|
||||
if [ -n "$human_token" ]; then
|
||||
# Store human token in .env
|
||||
if grep -q '^HUMAN_TOKEN=' "$env_file" 2>/dev/null; then
|
||||
sed -i "s|^HUMAN_TOKEN=.*|HUMAN_TOKEN=${human_token}|" "$env_file"
|
||||
else
|
||||
printf 'HUMAN_TOKEN=%s\n' "$human_token" >> "$env_file"
|
||||
fi
|
||||
export HUMAN_TOKEN="$human_token"
|
||||
echo " Human token saved (HUMAN_TOKEN)"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Create bot users and tokens
|
||||
# Each agent gets its own Forgejo account for identity and audit trail (#747).
|
||||
# Map: bot-username -> env-var-name for the token
|
||||
local -A bot_token_vars=(
|
||||
[dev-bot]="FORGE_TOKEN"
|
||||
[review-bot]="FORGE_REVIEW_TOKEN"
|
||||
[planner-bot]="FORGE_PLANNER_TOKEN"
|
||||
[gardener-bot]="FORGE_GARDENER_TOKEN"
|
||||
[vault-bot]="FORGE_VAULT_TOKEN"
|
||||
[supervisor-bot]="FORGE_SUPERVISOR_TOKEN"
|
||||
[predictor-bot]="FORGE_PREDICTOR_TOKEN"
|
||||
[architect-bot]="FORGE_ARCHITECT_TOKEN"
|
||||
)
|
||||
# Map: bot-username -> env-var-name for the password
|
||||
# Forgejo 11.x API tokens don't work for git HTTP push (#361).
|
||||
# Store passwords so agents can use password auth for git operations.
|
||||
local -A bot_pass_vars=(
|
||||
[dev-bot]="FORGE_PASS"
|
||||
[review-bot]="FORGE_REVIEW_PASS"
|
||||
[planner-bot]="FORGE_PLANNER_PASS"
|
||||
[gardener-bot]="FORGE_GARDENER_PASS"
|
||||
[vault-bot]="FORGE_VAULT_PASS"
|
||||
[supervisor-bot]="FORGE_SUPERVISOR_PASS"
|
||||
[predictor-bot]="FORGE_PREDICTOR_PASS"
|
||||
[architect-bot]="FORGE_ARCHITECT_PASS"
|
||||
)
|
||||
|
||||
local bot_user bot_pass token token_var pass_var
|
||||
|
||||
for bot_user in dev-bot review-bot planner-bot gardener-bot vault-bot supervisor-bot predictor-bot architect-bot; do
|
||||
bot_pass="bot-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
|
||||
token_var="${bot_token_vars[$bot_user]}"
|
||||
|
||||
# Check if bot user exists
|
||||
local user_exists=false
|
||||
if curl -sf --max-time 5 \
|
||||
-H "Authorization: token ${admin_token}" \
|
||||
"${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then
|
||||
user_exists=true
|
||||
fi
|
||||
|
||||
if [ "$user_exists" = false ]; then
|
||||
echo "Creating bot user: ${bot_user}"
|
||||
local create_output
|
||||
if ! create_output=$(_forgejo_exec forgejo admin user create \
|
||||
--username "${bot_user}" \
|
||||
--password "${bot_pass}" \
|
||||
--email "${bot_user}@disinto.local" \
|
||||
--must-change-password=false 2>&1); then
|
||||
echo "Error: failed to create bot user '${bot_user}':" >&2
|
||||
echo " ${create_output}" >&2
|
||||
exit 1
|
||||
fi
|
||||
# Forgejo 11.x ignores --must-change-password=false on create;
|
||||
# explicitly clear the flag so basic-auth token creation works.
|
||||
_forgejo_exec forgejo admin user change-password \
|
||||
--username "${bot_user}" \
|
||||
--password "${bot_pass}" \
|
||||
--must-change-password=false
|
||||
|
||||
# Verify bot user was actually created
|
||||
if ! curl -sf --max-time 5 \
|
||||
-H "Authorization: token ${admin_token}" \
|
||||
"${forge_url}/api/v1/users/${bot_user}" >/dev/null 2>&1; then
|
||||
echo "Error: bot user '${bot_user}' not found after creation" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo " ${bot_user} user created"
|
||||
else
|
||||
echo " ${bot_user} user exists (resetting password for token generation)"
|
||||
# User exists but may not have a known password.
|
||||
# Use admin API to reset the password so we can generate a new token.
|
||||
_forgejo_exec forgejo admin user change-password \
|
||||
--username "${bot_user}" \
|
||||
--password "${bot_pass}" \
|
||||
--must-change-password=false || {
|
||||
echo "Error: failed to reset password for existing bot user '${bot_user}'" >&2
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
|
||||
# Generate token via API (basic auth as the bot user — Forgejo requires
|
||||
# basic auth on POST /users/{username}/tokens, token auth is rejected)
|
||||
# First, try to delete existing tokens to avoid name collision
|
||||
# Use bot user's own Basic Auth (we just set the password above)
|
||||
local existing_token_ids
|
||||
existing_token_ids=$(curl -sf \
|
||||
-u "${bot_user}:${bot_pass}" \
|
||||
"${forge_url}/api/v1/users/${bot_user}/tokens" 2>/dev/null \
|
||||
| jq -r '.[].id // empty' 2>/dev/null) || existing_token_ids=""
|
||||
|
||||
# Delete any existing tokens for this user
|
||||
if [ -n "$existing_token_ids" ]; then
|
||||
while IFS= read -r tid; do
|
||||
[ -n "$tid" ] && curl -sf -X DELETE \
|
||||
-u "${bot_user}:${bot_pass}" \
|
||||
"${forge_url}/api/v1/users/${bot_user}/tokens/${tid}" >/dev/null 2>&1 || true
|
||||
done <<< "$existing_token_ids"
|
||||
fi
|
||||
|
||||
token=$(curl -sf -X POST \
|
||||
-u "${bot_user}:${bot_pass}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/users/${bot_user}/tokens" \
|
||||
-d "{\"name\":\"disinto-${bot_user}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \
|
||||
| jq -r '.sha1 // empty') || token=""
|
||||
|
||||
if [ -z "$token" ]; then
|
||||
echo "Error: failed to create API token for '${bot_user}'" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Store token in .env under the per-agent variable name
|
||||
if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then
|
||||
sed -i "s|^${token_var}=.*|${token_var}=${token}|" "$env_file"
|
||||
else
|
||||
printf '%s=%s\n' "$token_var" "$token" >> "$env_file"
|
||||
fi
|
||||
export "${token_var}=${token}"
|
||||
echo " ${bot_user} token generated and saved (${token_var})"
|
||||
|
||||
# Store password in .env for git HTTP push (#361)
|
||||
# Forgejo 11.x API tokens don't work for git push; password auth does.
|
||||
pass_var="${bot_pass_vars[$bot_user]}"
|
||||
if grep -q "^${pass_var}=" "$env_file" 2>/dev/null; then
|
||||
sed -i "s|^${pass_var}=.*|${pass_var}=${bot_pass}|" "$env_file"
|
||||
else
|
||||
printf '%s=%s\n' "$pass_var" "$bot_pass" >> "$env_file"
|
||||
fi
|
||||
export "${pass_var}=${bot_pass}"
|
||||
echo " ${bot_user} password saved (${pass_var})"
|
||||
|
||||
# Backwards-compat aliases for dev-bot and review-bot
|
||||
if [ "$bot_user" = "dev-bot" ]; then
|
||||
export CODEBERG_TOKEN="$token"
|
||||
elif [ "$bot_user" = "review-bot" ]; then
|
||||
export REVIEW_BOT_TOKEN="$token"
|
||||
fi
|
||||
done
|
||||
|
||||
# Store FORGE_URL in .env if not already present
|
||||
if ! grep -q '^FORGE_URL=' "$env_file" 2>/dev/null; then
|
||||
printf 'FORGE_URL=%s\n' "$forge_url" >> "$env_file"
|
||||
fi
|
||||
|
||||
# Create the repo on Forgejo if it doesn't exist
|
||||
local org_name="${repo_slug%%/*}"
|
||||
local repo_name="${repo_slug##*/}"
|
||||
|
||||
# Check if repo already exists
|
||||
if ! curl -sf --max-time 5 \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${forge_url}/api/v1/repos/${repo_slug}" >/dev/null 2>&1; then
|
||||
|
||||
# Try creating org first (ignore if exists)
|
||||
curl -sf -X POST \
|
||||
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/orgs" \
|
||||
-d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true
|
||||
|
||||
# Create repo under org
|
||||
if ! curl -sf -X POST \
|
||||
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/orgs/${org_name}/repos" \
|
||||
-d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then
|
||||
# Fallback: create under the human user namespace using admin endpoint
|
||||
if [ -n "${admin_token:-}" ]; then
|
||||
if ! curl -sf -X POST \
|
||||
-H "Authorization: token ${admin_token}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/admin/users/${org_name}/repos" \
|
||||
-d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then
|
||||
echo "Error: failed to create repo '${repo_slug}' on Forgejo (admin endpoint)" >&2
|
||||
exit 1
|
||||
fi
|
||||
elif [ -n "${HUMAN_TOKEN:-}" ]; then
|
||||
if ! curl -sf -X POST \
|
||||
-H "Authorization: token ${HUMAN_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/user/repos" \
|
||||
-d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" >/dev/null 2>&1; then
|
||||
echo "Error: failed to create repo '${repo_slug}' on Forgejo (user endpoint)" >&2
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Error: failed to create repo '${repo_slug}' — no admin or human token available" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Add all bot users as collaborators with appropriate permissions
|
||||
# dev-bot: write (PR creation via lib/vault.sh)
|
||||
# review-bot: read (PR review)
|
||||
# planner-bot: write (prerequisites.md, memory)
|
||||
# gardener-bot: write (backlog grooming)
|
||||
# vault-bot: write (vault items)
|
||||
# supervisor-bot: read (health monitoring)
|
||||
# predictor-bot: read (pattern detection)
|
||||
# architect-bot: write (sprint PRs)
|
||||
local bot_perm
|
||||
declare -A bot_permissions=(
|
||||
[dev-bot]="write"
|
||||
[review-bot]="read"
|
||||
[planner-bot]="write"
|
||||
[gardener-bot]="write"
|
||||
[vault-bot]="write"
|
||||
[supervisor-bot]="read"
|
||||
[predictor-bot]="read"
|
||||
[architect-bot]="write"
|
||||
)
|
||||
for bot_user in "${!bot_permissions[@]}"; do
|
||||
bot_perm="${bot_permissions[$bot_user]}"
|
||||
curl -sf -X PUT \
|
||||
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/repos/${repo_slug}/collaborators/${bot_user}" \
|
||||
-d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1 || true
|
||||
done
|
||||
|
||||
# Add disinto-admin as admin collaborator
|
||||
curl -sf -X PUT \
|
||||
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/repos/${repo_slug}/collaborators/disinto-admin" \
|
||||
-d '{"permission":"admin"}' >/dev/null 2>&1 || true
|
||||
|
||||
echo "Repo: ${repo_slug} created on Forgejo"
|
||||
else
|
||||
echo "Repo: ${repo_slug} (already exists on Forgejo)"
|
||||
fi
|
||||
|
||||
echo "Forge: ${forge_url} (ready)"
|
||||
}
|
||||
|
|
@ -1,24 +1,34 @@
|
|||
#!/usr/bin/env bash
|
||||
# formula-session.sh — Shared helpers for formula-driven cron agents
|
||||
#
|
||||
# Provides reusable functions for the common cron-wrapper + tmux-session
|
||||
# pattern used by planner-run.sh, predictor-run.sh, gardener-run.sh, and supervisor-run.sh.
|
||||
# Provides reusable utility functions for the common cron-wrapper pattern
|
||||
# used by planner-run.sh, predictor-run.sh, gardener-run.sh, and supervisor-run.sh.
|
||||
#
|
||||
# Functions:
|
||||
# acquire_cron_lock LOCK_FILE — PID lock with stale cleanup
|
||||
# check_memory [MIN_MB] — skip if available RAM too low
|
||||
# load_formula FORMULA_FILE — sets FORMULA_CONTENT
|
||||
# build_context_block FILE [FILE ...] — sets CONTEXT_BLOCK
|
||||
# start_formula_session SESSION WORKDIR PHASE_FILE — create tmux + claude
|
||||
# build_prompt_footer [EXTRA_API] — sets PROMPT_FOOTER (API ref + env + phase)
|
||||
# run_formula_and_monitor AGENT [TIMEOUT] [CALLBACK] — session start, inject, monitor, log
|
||||
# formula_phase_callback PHASE — standard crash-recovery callback
|
||||
# build_prompt_footer [EXTRA_API_LINES] — sets PROMPT_FOOTER (API ref + env)
|
||||
# build_sdk_prompt_footer [EXTRA_API] — omits phase protocol (SDK mode)
|
||||
# formula_worktree_setup WORKTREE — isolated worktree for formula execution
|
||||
# formula_prepare_profile_context — load lessons from .profile repo (pre-session)
|
||||
# formula_lessons_block — return lessons block for prompt
|
||||
# profile_write_journal ISSUE_NUM TITLE OUTCOME [FILES] — post-session journal
|
||||
# profile_load_lessons — load lessons-learned.md into LESSONS_CONTEXT
|
||||
# ensure_profile_repo [AGENT_IDENTITY] — clone/pull .profile repo
|
||||
# _profile_has_repo — check if agent has .profile repo
|
||||
# _count_undigested_journals — count journal entries to digest
|
||||
# _profile_digest_journals — digest journals into lessons
|
||||
# _profile_commit_and_push MESSAGE [FILES] — commit/push to .profile repo
|
||||
# resolve_agent_identity — resolve agent user login from FORGE_TOKEN
|
||||
# build_graph_section — run build-graph.py and set GRAPH_SECTION
|
||||
# build_scratch_instruction SCRATCH_FILE — return context scratch instruction
|
||||
# read_scratch_context SCRATCH_FILE — return scratch file content block
|
||||
# ensure_ops_repo — clone/pull ops repo
|
||||
# ops_commit_and_push MESSAGE [FILES] — commit/push to ops repo
|
||||
# cleanup_stale_crashed_worktrees [HOURS] — thin wrapper around worktree_cleanup_stale
|
||||
#
|
||||
# Requires: lib/agent-session.sh sourced first (for create_agent_session,
|
||||
# agent_kill_session, agent_inject_into_session).
|
||||
# Globals used by formula_phase_callback: SESSION_NAME, PHASE_FILE,
|
||||
# PROJECT_REPO_ROOT, PROMPT (set by the calling script).
|
||||
# Requires: lib/env.sh, lib/worktree.sh sourced first for shared helpers.
|
||||
|
||||
# ── Cron guards ──────────────────────────────────────────────────────────
|
||||
|
||||
|
|
@ -40,18 +50,6 @@ acquire_cron_lock() {
|
|||
trap 'rm -f "$_CRON_LOCK_FILE"' EXIT
|
||||
}
|
||||
|
||||
# check_memory [MIN_MB]
|
||||
# Exits 0 (skip) if available memory is below MIN_MB (default 2000).
|
||||
check_memory() {
|
||||
local min_mb="${1:-2000}"
|
||||
local avail_mb
|
||||
avail_mb=$(free -m | awk '/Mem:/{print $7}')
|
||||
if [ "${avail_mb:-0}" -lt "$min_mb" ]; then
|
||||
log "run: skipping — only ${avail_mb}MB available (need ${min_mb})"
|
||||
exit 0
|
||||
fi
|
||||
}
|
||||
|
||||
# ── Agent identity resolution ────────────────────────────────────────────
|
||||
|
||||
# resolve_agent_identity
|
||||
|
|
@ -75,6 +73,24 @@ resolve_agent_identity() {
|
|||
return 0
|
||||
}
|
||||
|
||||
# ── Forge remote resolution ──────────────────────────────────────────────
|
||||
|
||||
# resolve_forge_remote
|
||||
# Resolves FORGE_REMOTE by matching FORGE_URL hostname against git remotes.
|
||||
# Falls back to "origin" if no match found.
|
||||
# Requires: FORGE_URL, git repo with remotes configured.
|
||||
# Exports: FORGE_REMOTE (always set).
|
||||
resolve_forge_remote() {
|
||||
# Extract hostname from FORGE_URL (e.g., https://codeberg.org/user/repo -> codeberg.org)
|
||||
_forge_host=$(printf '%s' "$FORGE_URL" | sed 's|https\?://||; s|/.*||; s|:.*||')
|
||||
# Find git remote whose push URL matches the forge host
|
||||
FORGE_REMOTE=$(git remote -v | awk -v host="$_forge_host" '$2 ~ host && /\(push\)/ {print $1; exit}')
|
||||
# Fallback to origin if no match found
|
||||
FORGE_REMOTE="${FORGE_REMOTE:-origin}"
|
||||
export FORGE_REMOTE
|
||||
log "forge remote: ${FORGE_REMOTE}"
|
||||
}
|
||||
|
||||
# ── .profile repo management ──────────────────────────────────────────────
|
||||
|
||||
# ensure_profile_repo [AGENT_IDENTITY]
|
||||
|
|
@ -134,7 +150,7 @@ ensure_profile_repo() {
|
|||
# Checks if the agent has a .profile repo by querying Forgejo API.
|
||||
# Returns 0 if repo exists, 1 otherwise.
|
||||
_profile_has_repo() {
|
||||
local agent_identity="${1:-${AGENT_IDENTITY:-}}"
|
||||
local agent_identity="${AGENT_IDENTITY:-}"
|
||||
|
||||
if [ -z "$agent_identity" ]; then
|
||||
if ! resolve_agent_identity; then
|
||||
|
|
@ -170,8 +186,8 @@ _count_undigested_journals() {
|
|||
# Runs a claude -p one-shot to digest undigested journals into lessons-learned.md
|
||||
# Returns 0 on success, 1 on failure.
|
||||
_profile_digest_journals() {
|
||||
local agent_identity="${1:-${AGENT_IDENTITY:-}}"
|
||||
local model="${2:-${CLAUDE_MODEL:-opus}}"
|
||||
local agent_identity="${AGENT_IDENTITY:-}"
|
||||
local model="${CLAUDE_MODEL:-opus}"
|
||||
|
||||
if [ -z "$agent_identity" ]; then
|
||||
if ! resolve_agent_identity; then
|
||||
|
|
@ -237,7 +253,6 @@ Write the complete, rewritten lessons-learned.md content below. No preamble, no
|
|||
output=$(claude -p "$digest_prompt" \
|
||||
--output-format json \
|
||||
--dangerously-skip-permissions \
|
||||
--max-tokens 1000 \
|
||||
${model:+--model "$model"} \
|
||||
2>>"$LOGFILE" || echo '{"result":"error"}')
|
||||
|
||||
|
|
@ -432,7 +447,6 @@ Write the journal entry below. Use markdown format."
|
|||
output=$(claude -p "$reflection_prompt" \
|
||||
--output-format json \
|
||||
--dangerously-skip-permissions \
|
||||
--max-tokens 500 \
|
||||
${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} \
|
||||
2>>"$LOGFILE" || echo '{"result":"error"}')
|
||||
|
||||
|
|
@ -557,7 +571,7 @@ $(cat "$ctx_path")
|
|||
done
|
||||
}
|
||||
|
||||
# ── Ops repo helpers ─────────────────────────────────────────────────
|
||||
# ── Ops repo helpers ────────────────────────────────────────────────────
|
||||
|
||||
# ensure_ops_repo
|
||||
# Clones or pulls the ops repo so agents can read/write operational data.
|
||||
|
|
@ -620,90 +634,6 @@ ops_commit_and_push() {
|
|||
)
|
||||
}
|
||||
|
||||
# ── Session management ───────────────────────────────────────────────────
|
||||
|
||||
# start_formula_session SESSION WORKDIR PHASE_FILE
|
||||
# Kills stale session, resets phase file, creates a per-agent git worktree
|
||||
# for session isolation, and creates a new tmux + claude session in it.
|
||||
# Sets _FORMULA_SESSION_WORKDIR to the worktree path (or original workdir
|
||||
# on fallback). Callers must clean up via remove_formula_worktree after
|
||||
# the session ends.
|
||||
# Returns 0 on success, 1 on failure.
|
||||
start_formula_session() {
|
||||
local session="$1" workdir="$2" phase_file="$3"
|
||||
agent_kill_session "$session"
|
||||
rm -f "$phase_file"
|
||||
|
||||
# Create per-agent git worktree for session isolation.
|
||||
# Each agent gets its own CWD so Claude Code treats them as separate
|
||||
# projects — no resume collisions between sequential formula runs.
|
||||
_FORMULA_SESSION_WORKDIR="/tmp/disinto-${session}"
|
||||
# Clean up any stale worktree from a previous run
|
||||
git -C "$workdir" worktree remove "$_FORMULA_SESSION_WORKDIR" --force 2>/dev/null || true
|
||||
if git -C "$workdir" worktree add "$_FORMULA_SESSION_WORKDIR" HEAD --detach 2>/dev/null; then
|
||||
log "Created worktree: ${_FORMULA_SESSION_WORKDIR}"
|
||||
else
|
||||
log "WARNING: worktree creation failed — falling back to ${workdir}"
|
||||
_FORMULA_SESSION_WORKDIR="$workdir"
|
||||
fi
|
||||
|
||||
log "Creating tmux session: ${session}"
|
||||
if ! create_agent_session "$session" "$_FORMULA_SESSION_WORKDIR" "$phase_file"; then
|
||||
log "ERROR: failed to create tmux session ${session}"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# remove_formula_worktree
|
||||
# Removes the worktree created by start_formula_session if it differs from
|
||||
# PROJECT_REPO_ROOT. Safe to call multiple times. No-op if no worktree was created.
|
||||
remove_formula_worktree() {
|
||||
if [ -n "${_FORMULA_SESSION_WORKDIR:-}" ] \
|
||||
&& [ "$_FORMULA_SESSION_WORKDIR" != "${PROJECT_REPO_ROOT:-}" ]; then
|
||||
git -C "$PROJECT_REPO_ROOT" worktree remove "$_FORMULA_SESSION_WORKDIR" --force 2>/dev/null || true
|
||||
log "Removed worktree: ${_FORMULA_SESSION_WORKDIR}"
|
||||
fi
|
||||
}
|
||||
|
||||
# formula_phase_callback PHASE
|
||||
# Standard crash-recovery phase callback for formula sessions.
|
||||
# Requires globals: SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT.
|
||||
# Uses _FORMULA_CRASH_COUNT (auto-initialized) for single-retry limit.
|
||||
# shellcheck disable=SC2154 # SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT set by caller
|
||||
formula_phase_callback() {
|
||||
local phase="$1"
|
||||
log "phase: ${phase}"
|
||||
case "$phase" in
|
||||
PHASE:crashed)
|
||||
if [ "${_FORMULA_CRASH_COUNT:-0}" -gt 0 ]; then
|
||||
log "ERROR: session crashed again after recovery — giving up"
|
||||
return 0
|
||||
fi
|
||||
_FORMULA_CRASH_COUNT=$(( ${_FORMULA_CRASH_COUNT:-0} + 1 ))
|
||||
log "WARNING: tmux session died unexpectedly — attempting recovery"
|
||||
if create_agent_session "${_MONITOR_SESSION:-$SESSION_NAME}" "${_FORMULA_SESSION_WORKDIR:-$PROJECT_REPO_ROOT}" "$PHASE_FILE" 2>/dev/null; then
|
||||
agent_inject_into_session "${_MONITOR_SESSION:-$SESSION_NAME}" "$PROMPT"
|
||||
log "Recovery session started"
|
||||
else
|
||||
log "ERROR: could not restart session after crash"
|
||||
fi
|
||||
;;
|
||||
PHASE:done|PHASE:failed|PHASE:escalate|PHASE:merged)
|
||||
agent_kill_session "${_MONITOR_SESSION:-$SESSION_NAME}"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# ── Stale crashed worktree cleanup ─────────────────────────────────────────
|
||||
|
||||
# cleanup_stale_crashed_worktrees [MAX_AGE_HOURS]
|
||||
# Thin wrapper around worktree_cleanup_stale() from lib/worktree.sh.
|
||||
# Kept for backwards compatibility with existing callers.
|
||||
# Requires: lib/worktree.sh sourced.
|
||||
cleanup_stale_crashed_worktrees() {
|
||||
worktree_cleanup_stale "${1:-24}"
|
||||
}
|
||||
|
||||
# ── Scratch file helpers (compaction survival) ────────────────────────────
|
||||
|
||||
# build_scratch_instruction SCRATCH_FILE
|
||||
|
|
@ -779,25 +709,26 @@ build_sdk_prompt_footer() {
|
|||
# Creates an isolated worktree for synchronous formula execution.
|
||||
# Fetches primary branch, cleans stale worktree, creates new one, and
|
||||
# sets an EXIT trap for cleanup.
|
||||
# Requires globals: PROJECT_REPO_ROOT, PRIMARY_BRANCH.
|
||||
# Requires globals: PROJECT_REPO_ROOT, PRIMARY_BRANCH, FORGE_REMOTE.
|
||||
# Ensure resolve_forge_remote() is called before this function.
|
||||
formula_worktree_setup() {
|
||||
local worktree="$1"
|
||||
cd "$PROJECT_REPO_ROOT" || return
|
||||
git fetch origin "$PRIMARY_BRANCH" 2>/dev/null || true
|
||||
git fetch "${FORGE_REMOTE}" "$PRIMARY_BRANCH" 2>/dev/null || true
|
||||
worktree_cleanup "$worktree"
|
||||
git worktree add "$worktree" "origin/${PRIMARY_BRANCH}" --detach 2>/dev/null
|
||||
git worktree add "$worktree" "${FORGE_REMOTE}/${PRIMARY_BRANCH}" --detach 2>/dev/null
|
||||
# shellcheck disable=SC2064 # expand worktree now, not at trap time
|
||||
trap "worktree_cleanup '$worktree'" EXIT
|
||||
}
|
||||
|
||||
# ── Prompt + monitor helpers ──────────────────────────────────────────────
|
||||
# ── Prompt helpers ──────────────────────────────────────────────────────
|
||||
|
||||
# build_prompt_footer [EXTRA_API_LINES]
|
||||
# Assembles the common forge API reference + environment + phase protocol
|
||||
# block for formula prompts. Sets PROMPT_FOOTER.
|
||||
# Assembles the common forge API reference + environment block for formula prompts.
|
||||
# Sets PROMPT_FOOTER.
|
||||
# Pass additional API endpoint lines (pre-formatted, newline-prefixed) via $1.
|
||||
# Requires globals: FORGE_API, FACTORY_ROOT, PROJECT_REPO_ROOT,
|
||||
# PRIMARY_BRANCH, PHASE_FILE.
|
||||
# PRIMARY_BRANCH.
|
||||
build_prompt_footer() {
|
||||
local extra_api="${1:-}"
|
||||
# shellcheck disable=SC2034 # consumed by the calling script's PROMPT
|
||||
|
|
@ -813,66 +744,15 @@ NEVER echo or include the actual token value in output — always reference \${F
|
|||
FACTORY_ROOT=${FACTORY_ROOT}
|
||||
PROJECT_REPO_ROOT=${PROJECT_REPO_ROOT}
|
||||
OPS_REPO_ROOT=${OPS_REPO_ROOT}
|
||||
PRIMARY_BRANCH=${PRIMARY_BRANCH}
|
||||
PHASE_FILE=${PHASE_FILE}
|
||||
|
||||
## Phase protocol (REQUIRED)
|
||||
When all work is done:
|
||||
echo 'PHASE:done' > '${PHASE_FILE}'
|
||||
On unrecoverable error:
|
||||
printf 'PHASE:failed\nReason: %s\n' 'describe error' > '${PHASE_FILE}'"
|
||||
PRIMARY_BRANCH=${PRIMARY_BRANCH}"
|
||||
}
|
||||
|
||||
# run_formula_and_monitor AGENT_NAME [TIMEOUT]
|
||||
# Starts the formula session, injects PROMPT, monitors phase, and logs result.
|
||||
# Requires globals: SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT,
|
||||
# FORGE_REPO, CLAUDE_MODEL (exported).
|
||||
# shellcheck disable=SC2154 # SESSION_NAME, PHASE_FILE, PROJECT_REPO_ROOT, PROMPT set by caller
|
||||
run_formula_and_monitor() {
|
||||
local agent_name="$1"
|
||||
local timeout="${2:-7200}"
|
||||
local callback="${3:-formula_phase_callback}"
|
||||
# ── Stale crashed worktree cleanup ────────────────────────────────────────
|
||||
|
||||
if ! start_formula_session "$SESSION_NAME" "$PROJECT_REPO_ROOT" "$PHASE_FILE"; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Write phase protocol to context file for compaction survival
|
||||
if [ -n "${PROMPT_FOOTER:-}" ]; then
|
||||
write_compact_context "$PHASE_FILE" "$PROMPT_FOOTER"
|
||||
fi
|
||||
|
||||
agent_inject_into_session "$SESSION_NAME" "$PROMPT"
|
||||
log "Prompt sent to tmux session"
|
||||
|
||||
log "Monitoring phase file: ${PHASE_FILE}"
|
||||
_FORMULA_CRASH_COUNT=0
|
||||
|
||||
monitor_phase_loop "$PHASE_FILE" "$timeout" "$callback"
|
||||
|
||||
FINAL_PHASE=$(read_phase "$PHASE_FILE")
|
||||
log "Final phase: ${FINAL_PHASE:-none}"
|
||||
|
||||
if [ "$FINAL_PHASE" != "PHASE:done" ]; then
|
||||
case "${_MONITOR_LOOP_EXIT:-}" in
|
||||
idle_prompt)
|
||||
log "${agent_name}: Claude returned to prompt without writing phase signal"
|
||||
;;
|
||||
idle_timeout)
|
||||
log "${agent_name}: timed out with no phase signal"
|
||||
;;
|
||||
*)
|
||||
log "${agent_name} finished without PHASE:done (phase: ${FINAL_PHASE:-none}, exit: ${_MONITOR_LOOP_EXIT:-})"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Preserve worktree on crash for debugging; clean up on success
|
||||
if [ "${_MONITOR_LOOP_EXIT:-}" = "crashed" ]; then
|
||||
worktree_preserve "${_FORMULA_SESSION_WORKDIR:-}" "crashed (agent=${agent_name})"
|
||||
else
|
||||
remove_formula_worktree
|
||||
fi
|
||||
|
||||
log "--- ${agent_name^} run done ---"
|
||||
# cleanup_stale_crashed_worktrees [MAX_AGE_HOURS]
|
||||
# Thin wrapper around worktree_cleanup_stale() from lib/worktree.sh.
|
||||
# Kept for backwards compatibility with existing callers.
|
||||
# Requires: lib/worktree.sh sourced.
|
||||
cleanup_stale_crashed_worktrees() {
|
||||
worktree_cleanup_stale "${1:-24}"
|
||||
}
|
||||
|
|
|
|||
432
lib/generators.sh
Normal file
432
lib/generators.sh
Normal file
|
|
@ -0,0 +1,432 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# generators — template generation functions for disinto init
|
||||
#
|
||||
# Generates docker-compose.yml, Dockerfile, Caddyfile, staging index, and
|
||||
# deployment pipeline configs.
|
||||
#
|
||||
# Globals expected (must be set before sourcing):
|
||||
# FACTORY_ROOT - Root of the disinto factory
|
||||
# PROJECT_NAME - Project name for the project repo (defaults to 'project')
|
||||
# PRIMARY_BRANCH - Primary branch name (defaults to 'main')
|
||||
#
|
||||
# Usage:
|
||||
# source "${FACTORY_ROOT}/lib/generators.sh"
|
||||
# generate_compose "$forge_port"
|
||||
# generate_caddyfile
|
||||
# generate_staging_index
|
||||
# generate_deploy_pipelines "$repo_root" "$project_name"
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
# Assert required globals are set
|
||||
: "${FACTORY_ROOT:?FACTORY_ROOT must be set}"
|
||||
# PROJECT_NAME defaults to 'project' if not set (env.sh may have set it from FORGE_REPO)
|
||||
PROJECT_NAME="${PROJECT_NAME:-project}"
|
||||
# PRIMARY_BRANCH defaults to main (env.sh may have set it to 'master')
|
||||
PRIMARY_BRANCH="${PRIMARY_BRANCH:-main}"
|
||||
|
||||
# Generate docker-compose.yml in the factory root.
|
||||
_generate_compose_impl() {
|
||||
local forge_port="${1:-3000}"
|
||||
local compose_file="${FACTORY_ROOT}/docker-compose.yml"
|
||||
|
||||
# Check if compose file already exists
|
||||
if [ -f "$compose_file" ]; then
|
||||
echo "Compose: ${compose_file} (already exists, skipping)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
cat > "$compose_file" <<'COMPOSEEOF'
|
||||
# docker-compose.yml — generated by disinto init
|
||||
# Brings up Forgejo, Woodpecker, and the agent runtime.
|
||||
|
||||
services:
|
||||
forgejo:
|
||||
image: codeberg.org/forgejo/forgejo:1
|
||||
container_name: disinto-forgejo
|
||||
restart: unless-stopped
|
||||
security_opt:
|
||||
- apparmor=unconfined
|
||||
volumes:
|
||||
- forgejo-data:/data
|
||||
environment:
|
||||
FORGEJO__database__DB_TYPE: sqlite3
|
||||
FORGEJO__server__ROOT_URL: http://forgejo:3000/
|
||||
FORGEJO__server__HTTP_PORT: "3000"
|
||||
FORGEJO__security__INSTALL_LOCK: "true"
|
||||
FORGEJO__service__DISABLE_REGISTRATION: "true"
|
||||
FORGEJO__webhook__ALLOWED_HOST_LIST: "private"
|
||||
networks:
|
||||
- disinto-net
|
||||
|
||||
woodpecker:
|
||||
image: woodpeckerci/woodpecker-server:v3
|
||||
container_name: disinto-woodpecker
|
||||
restart: unless-stopped
|
||||
security_opt:
|
||||
- apparmor=unconfined
|
||||
ports:
|
||||
- "8000:8000"
|
||||
- "9000:9000"
|
||||
volumes:
|
||||
- woodpecker-data:/var/lib/woodpecker
|
||||
environment:
|
||||
WOODPECKER_FORGEJO: "true"
|
||||
WOODPECKER_FORGEJO_URL: http://forgejo:3000
|
||||
WOODPECKER_FORGEJO_CLIENT: ${WP_FORGEJO_CLIENT:-}
|
||||
WOODPECKER_FORGEJO_SECRET: ${WP_FORGEJO_SECRET:-}
|
||||
WOODPECKER_HOST: ${WOODPECKER_HOST:-http://woodpecker:8000}
|
||||
WOODPECKER_OPEN: "true"
|
||||
WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-}
|
||||
WOODPECKER_DATABASE_DRIVER: sqlite3
|
||||
WOODPECKER_DATABASE_DATASOURCE: /var/lib/woodpecker/woodpecker.sqlite
|
||||
WOODPECKER_ENVIRONMENT: "FORGE_TOKEN:${FORGE_TOKEN}"
|
||||
depends_on:
|
||||
- forgejo
|
||||
networks:
|
||||
- disinto-net
|
||||
|
||||
woodpecker-agent:
|
||||
image: woodpeckerci/woodpecker-agent:v3
|
||||
container_name: disinto-woodpecker-agent
|
||||
restart: unless-stopped
|
||||
network_mode: host
|
||||
privileged: true
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
environment:
|
||||
WOODPECKER_SERVER: localhost:9000
|
||||
WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-}
|
||||
WOODPECKER_GRPC_SECURE: "false"
|
||||
WOODPECKER_HEALTHCHECK_ADDR: ":3333"
|
||||
WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net
|
||||
WOODPECKER_MAX_WORKFLOWS: 1
|
||||
depends_on:
|
||||
- woodpecker
|
||||
|
||||
agents:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/agents/Dockerfile
|
||||
container_name: disinto-agents
|
||||
restart: unless-stopped
|
||||
security_opt:
|
||||
- apparmor=unconfined
|
||||
volumes:
|
||||
- agent-data:/home/agent/data
|
||||
- project-repos:/home/agent/repos
|
||||
- ${HOME}/.claude:/home/agent/.claude
|
||||
- ${HOME}/.claude.json:/home/agent/.claude.json:ro
|
||||
- CLAUDE_BIN_PLACEHOLDER:/usr/local/bin/claude:ro
|
||||
- ${HOME}/.ssh:/home/agent/.ssh:ro
|
||||
- ${HOME}/.config/sops/age:/home/agent/.config/sops/age:ro
|
||||
- woodpecker-data:/woodpecker-data:ro
|
||||
environment:
|
||||
FORGE_URL: http://forgejo:3000
|
||||
WOODPECKER_SERVER: http://woodpecker:8000
|
||||
DISINTO_CONTAINER: "1"
|
||||
PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project}
|
||||
WOODPECKER_DATA_DIR: /woodpecker-data
|
||||
env_file:
|
||||
- .env
|
||||
# IMPORTANT: agents get .env only (forge tokens, CI tokens, config).
|
||||
# Vault-only secrets (GITHUB_TOKEN, CLAWHUB_TOKEN, deploy keys) live in
|
||||
# .env.vault.enc and are NEVER injected here — only the runner
|
||||
# container receives them at fire time (AD-006, #745).
|
||||
depends_on:
|
||||
- forgejo
|
||||
- woodpecker
|
||||
networks:
|
||||
- disinto-net
|
||||
|
||||
runner:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/agents/Dockerfile
|
||||
profiles: ["vault"]
|
||||
security_opt:
|
||||
- apparmor=unconfined
|
||||
volumes:
|
||||
- agent-data:/home/agent/data
|
||||
environment:
|
||||
FORGE_URL: http://forgejo:3000
|
||||
DISINTO_CONTAINER: "1"
|
||||
PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project}
|
||||
# Vault redesign in progress (PR-based approval, see #73-#77)
|
||||
# This container is being replaced — entrypoint will be updated in follow-up
|
||||
networks:
|
||||
- disinto-net
|
||||
|
||||
# Edge proxy — reverse proxy to Forgejo, Woodpecker, and staging
|
||||
# Serves on ports 80/443, routes based on path
|
||||
edge:
|
||||
build: ./docker/edge
|
||||
container_name: disinto-edge
|
||||
ports:
|
||||
- "80:80"
|
||||
- "443:443"
|
||||
environment:
|
||||
- DISINTO_VERSION=${DISINTO_VERSION:-main}
|
||||
- FORGE_URL=http://forgejo:3000
|
||||
- FORGE_REPO=${FORGE_REPO:-disinto-admin/disinto}
|
||||
- FORGE_OPS_REPO=${FORGE_OPS_REPO:-disinto-admin/disinto-ops}
|
||||
- FORGE_TOKEN=${FORGE_TOKEN:-}
|
||||
- FORGE_ADMIN_USERS=${FORGE_ADMIN_USERS:-disinto-admin}
|
||||
- FORGE_ADMIN_TOKEN=${FORGE_ADMIN_TOKEN:-}
|
||||
- OPS_REPO_ROOT=/opt/disinto-ops
|
||||
- PROJECT_REPO_ROOT=/opt/disinto
|
||||
- PRIMARY_BRANCH=main
|
||||
volumes:
|
||||
- ./docker/Caddyfile:/etc/caddy/Caddyfile
|
||||
- caddy_data:/data
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
depends_on:
|
||||
- forgejo
|
||||
- woodpecker
|
||||
- staging
|
||||
networks:
|
||||
- disinto-net
|
||||
|
||||
# Staging container — static file server for staging artifacts
|
||||
# Edge proxy routes to this container for default requests
|
||||
staging:
|
||||
image: caddy:alpine
|
||||
command: ["caddy", "file-server", "--root", "/srv/site"]
|
||||
volumes:
|
||||
- ./docker:/srv/site:ro
|
||||
networks:
|
||||
- disinto-net
|
||||
|
||||
# Staging deployment slot — activated by Woodpecker staging pipeline (#755).
|
||||
# Profile-gated: only starts when explicitly targeted by deploy commands.
|
||||
# Customize image/ports/volumes for your project after init.
|
||||
staging-deploy:
|
||||
image: alpine:3
|
||||
profiles: ["staging"]
|
||||
security_opt:
|
||||
- apparmor=unconfined
|
||||
environment:
|
||||
DEPLOY_ENV: staging
|
||||
networks:
|
||||
- disinto-net
|
||||
command: ["echo", "staging slot — replace with project image"]
|
||||
|
||||
volumes:
|
||||
forgejo-data:
|
||||
woodpecker-data:
|
||||
agent-data:
|
||||
project-repos:
|
||||
caddy_data:
|
||||
|
||||
networks:
|
||||
disinto-net:
|
||||
driver: bridge
|
||||
COMPOSEEOF
|
||||
|
||||
# Patch the Claude CLI binary path — resolve from host PATH at init time.
|
||||
local claude_bin
|
||||
claude_bin="$(command -v claude 2>/dev/null || true)"
|
||||
if [ -n "$claude_bin" ]; then
|
||||
# Resolve symlinks to get the real binary path
|
||||
claude_bin="$(readlink -f "$claude_bin")"
|
||||
sed -i "s|CLAUDE_BIN_PLACEHOLDER|${claude_bin}|" "$compose_file"
|
||||
else
|
||||
echo "Warning: claude CLI not found in PATH — update docker-compose.yml volumes manually" >&2
|
||||
sed -i "s|CLAUDE_BIN_PLACEHOLDER|/usr/local/bin/claude|" "$compose_file"
|
||||
fi
|
||||
|
||||
# Patch the forgejo port mapping into the file if non-default
|
||||
if [ "$forge_port" != "3000" ]; then
|
||||
# Add port mapping to forgejo service so it's reachable from host during init
|
||||
sed -i "/image: codeberg\.org\/forgejo\/forgejo:1/a\\ ports:\\n - \"${forge_port}:3000\"" "$compose_file"
|
||||
else
|
||||
sed -i "/image: codeberg\.org\/forgejo\/forgejo:1/a\\ ports:\\n - \"3000:3000\"" "$compose_file"
|
||||
fi
|
||||
|
||||
echo "Created: ${compose_file}"
|
||||
}
|
||||
|
||||
# Generate docker/agents/ files if they don't already exist.
|
||||
_generate_agent_docker_impl() {
|
||||
local docker_dir="${FACTORY_ROOT}/docker/agents"
|
||||
mkdir -p "$docker_dir"
|
||||
|
||||
if [ ! -f "${docker_dir}/Dockerfile" ]; then
|
||||
echo "Warning: docker/agents/Dockerfile not found — expected in repo" >&2
|
||||
fi
|
||||
if [ ! -f "${docker_dir}/entrypoint.sh" ]; then
|
||||
echo "Warning: docker/agents/entrypoint.sh not found — expected in repo" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
# Generate docker/Caddyfile template for edge proxy.
|
||||
_generate_caddyfile_impl() {
|
||||
local docker_dir="${FACTORY_ROOT}/docker"
|
||||
local caddyfile="${docker_dir}/Caddyfile"
|
||||
|
||||
if [ -f "$caddyfile" ]; then
|
||||
echo "Caddyfile: ${caddyfile} (already exists, skipping)"
|
||||
return
|
||||
fi
|
||||
|
||||
cat > "$caddyfile" <<'CADDYFILEEOF'
|
||||
# Caddyfile — edge proxy configuration
|
||||
# IP-only binding at bootstrap; domain + TLS added later via vault resource request
|
||||
|
||||
:80 {
|
||||
# Reverse proxy to Forgejo
|
||||
handle /forgejo/* {
|
||||
reverse_proxy forgejo:3000
|
||||
}
|
||||
|
||||
# Reverse proxy to Woodpecker CI
|
||||
handle /ci/* {
|
||||
reverse_proxy woodpecker:8000
|
||||
}
|
||||
|
||||
# Default: proxy to staging container
|
||||
handle {
|
||||
reverse_proxy staging:80
|
||||
}
|
||||
}
|
||||
CADDYFILEEOF
|
||||
|
||||
echo "Created: ${caddyfile}"
|
||||
}
|
||||
|
||||
# Generate docker/index.html default page.
|
||||
_generate_staging_index_impl() {
|
||||
local docker_dir="${FACTORY_ROOT}/docker"
|
||||
local index_file="${docker_dir}/index.html"
|
||||
|
||||
if [ -f "$index_file" ]; then
|
||||
echo "Staging: ${index_file} (already exists, skipping)"
|
||||
return
|
||||
fi
|
||||
|
||||
cat > "$index_file" <<'INDEXEOF'
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Nothing shipped yet</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
min-height: 100vh;
|
||||
margin: 0;
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
color: white;
|
||||
}
|
||||
.container {
|
||||
text-align: center;
|
||||
padding: 2rem;
|
||||
}
|
||||
h1 {
|
||||
font-size: 3rem;
|
||||
margin: 0 0 1rem 0;
|
||||
}
|
||||
p {
|
||||
font-size: 1.25rem;
|
||||
opacity: 0.9;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>Nothing shipped yet</h1>
|
||||
<p>CI pipelines will update this page with your staging artifacts.</p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
INDEXEOF
|
||||
|
||||
echo "Created: ${index_file}"
|
||||
}
|
||||
|
||||
# Generate template .woodpecker/ deployment pipeline configs in a project repo.
|
||||
# Creates staging.yml and production.yml alongside the project's existing CI config.
|
||||
# These pipelines trigger on Woodpecker's deployment event with environment filters.
|
||||
_generate_deploy_pipelines_impl() {
|
||||
local repo_root="$1"
|
||||
local project_name="$2"
|
||||
: "${project_name// /}" # Silence SC2034 - variable used in heredoc
|
||||
local wp_dir="${repo_root}/.woodpecker"
|
||||
|
||||
mkdir -p "$wp_dir"
|
||||
|
||||
# Skip if deploy pipelines already exist
|
||||
if [ -f "${wp_dir}/staging.yml" ] && [ -f "${wp_dir}/production.yml" ]; then
|
||||
echo "Deploy: .woodpecker/{staging,production}.yml (already exist)"
|
||||
return
|
||||
fi
|
||||
|
||||
if [ ! -f "${wp_dir}/staging.yml" ]; then
|
||||
cat > "${wp_dir}/staging.yml" <<'STAGINGEOF'
|
||||
# .woodpecker/staging.yml — Staging deployment pipeline
|
||||
# Triggered by runner via Woodpecker promote API.
|
||||
# Human approves promotion in vault → runner calls promote → this runs.
|
||||
|
||||
when:
|
||||
event: deployment
|
||||
environment: staging
|
||||
|
||||
steps:
|
||||
- name: deploy-staging
|
||||
image: docker:27
|
||||
commands:
|
||||
- echo "Deploying to staging environment..."
|
||||
- echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from CI #${CI_PIPELINE_PARENT}"
|
||||
# Pull the image built by CI and deploy to staging
|
||||
# Customize these commands for your project:
|
||||
# - docker compose -f docker-compose.yml --profile staging up -d
|
||||
- echo "Staging deployment complete"
|
||||
|
||||
- name: verify-staging
|
||||
image: alpine:3
|
||||
commands:
|
||||
- echo "Verifying staging deployment..."
|
||||
# Add health checks, smoke tests, or integration tests here:
|
||||
# - curl -sf http://staging:8080/health || exit 1
|
||||
- echo "Staging verification complete"
|
||||
STAGINGEOF
|
||||
echo "Created: ${wp_dir}/staging.yml"
|
||||
fi
|
||||
|
||||
if [ ! -f "${wp_dir}/production.yml" ]; then
|
||||
cat > "${wp_dir}/production.yml" <<'PRODUCTIONEOF'
|
||||
# .woodpecker/production.yml — Production deployment pipeline
|
||||
# Triggered by runner via Woodpecker promote API.
|
||||
# Human approves promotion in vault → runner calls promote → this runs.
|
||||
|
||||
when:
|
||||
event: deployment
|
||||
environment: production
|
||||
|
||||
steps:
|
||||
- name: deploy-production
|
||||
image: docker:27
|
||||
commands:
|
||||
- echo "Deploying to production environment..."
|
||||
- echo "Pipeline ${CI_PIPELINE_NUMBER} promoted from staging"
|
||||
# Pull the verified image and deploy to production
|
||||
# Customize these commands for your project:
|
||||
# - docker compose -f docker-compose.yml up -d
|
||||
- echo "Production deployment complete"
|
||||
|
||||
- name: verify-production
|
||||
image: alpine:3
|
||||
commands:
|
||||
- echo "Verifying production deployment..."
|
||||
# Add production health checks here:
|
||||
# - curl -sf http://production:8080/health || exit 1
|
||||
- echo "Production verification complete"
|
||||
PRODUCTIONEOF
|
||||
echo "Created: ${wp_dir}/production.yml"
|
||||
fi
|
||||
}
|
||||
464
lib/hire-agent.sh
Normal file
464
lib/hire-agent.sh
Normal file
|
|
@ -0,0 +1,464 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# hire-agent — disinto_hire_an_agent() function
|
||||
#
|
||||
# Handles user creation, .profile repo setup, formula copying, branch protection,
|
||||
# and state marker creation for hiring a new agent.
|
||||
#
|
||||
# Globals expected:
|
||||
# FORGE_URL - Forge instance URL
|
||||
# FORGE_TOKEN - Admin token for Forge operations
|
||||
# FACTORY_ROOT - Root of the disinto factory
|
||||
# PROJECT_NAME - Project name for email/domain generation
|
||||
#
|
||||
# Usage:
|
||||
# source "${FACTORY_ROOT}/lib/hire-agent.sh"
|
||||
# disinto_hire_an_agent <agent-name> <role> [--formula <path>] [--local-model <url>] [--poll-interval <seconds>]
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
disinto_hire_an_agent() {
|
||||
local agent_name="${1:-}"
|
||||
local role="${2:-}"
|
||||
local formula_path=""
|
||||
local local_model=""
|
||||
local poll_interval=""
|
||||
|
||||
if [ -z "$agent_name" ] || [ -z "$role" ]; then
|
||||
echo "Error: agent-name and role required" >&2
|
||||
echo "Usage: disinto hire-an-agent <agent-name> <role> [--formula <path>] [--local-model <url>] [--poll-interval <seconds>]" >&2
|
||||
exit 1
|
||||
fi
|
||||
shift 2
|
||||
|
||||
# Parse flags
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--formula)
|
||||
formula_path="$2"
|
||||
shift 2
|
||||
;;
|
||||
--local-model)
|
||||
local_model="$2"
|
||||
shift 2
|
||||
;;
|
||||
--poll-interval)
|
||||
poll_interval="$2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Default formula path — try both naming conventions
|
||||
if [ -z "$formula_path" ]; then
|
||||
formula_path="${FACTORY_ROOT}/formulas/${role}.toml"
|
||||
if [ ! -f "$formula_path" ]; then
|
||||
formula_path="${FACTORY_ROOT}/formulas/run-${role}.toml"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Validate formula exists
|
||||
if [ ! -f "$formula_path" ]; then
|
||||
echo "Error: formula not found at ${formula_path}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "── Hiring agent: ${agent_name} (${role}) ───────────────────────"
|
||||
echo "Formula: ${formula_path}"
|
||||
if [ -n "$local_model" ]; then
|
||||
echo "Local model: ${local_model}"
|
||||
echo "Poll interval: ${poll_interval:-300}s"
|
||||
fi
|
||||
|
||||
# Ensure FORGE_TOKEN is set
|
||||
if [ -z "${FORGE_TOKEN:-}" ]; then
|
||||
echo "Error: FORGE_TOKEN not set" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get Forge URL
|
||||
local forge_url="${FORGE_URL:-http://localhost:3000}"
|
||||
echo "Forge: ${forge_url}"
|
||||
|
||||
# Step 1: Create user via API (skip if exists)
|
||||
echo ""
|
||||
echo "Step 1: Creating user '${agent_name}' (if not exists)..."
|
||||
|
||||
local user_pass=""
|
||||
local admin_pass=""
|
||||
|
||||
# Read admin password from .env for standalone runs (#184)
|
||||
local env_file="${FACTORY_ROOT}/.env"
|
||||
if [ -f "$env_file" ] && grep -q '^FORGE_ADMIN_PASS=' "$env_file" 2>/dev/null; then
|
||||
admin_pass=$(grep '^FORGE_ADMIN_PASS=' "$env_file" | head -1 | cut -d= -f2-)
|
||||
fi
|
||||
|
||||
# Get admin token early (needed for both user creation and password reset)
|
||||
local admin_user="disinto-admin"
|
||||
admin_pass="${admin_pass:-admin}"
|
||||
local admin_token=""
|
||||
local admin_token_name
|
||||
admin_token_name="temp-token-$(date +%s)"
|
||||
admin_token=$(curl -sf -X POST \
|
||||
-u "${admin_user}:${admin_pass}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/users/${admin_user}/tokens" \
|
||||
-d "{\"name\":\"${admin_token_name}\",\"scopes\":[\"all\"]}" 2>/dev/null \
|
||||
| jq -r '.sha1 // empty') || admin_token=""
|
||||
if [ -z "$admin_token" ]; then
|
||||
# Token might already exist — try listing
|
||||
admin_token=$(curl -sf \
|
||||
-u "${admin_user}:${admin_pass}" \
|
||||
"${forge_url}/api/v1/users/${admin_user}/tokens" 2>/dev/null \
|
||||
| jq -r '.[0].sha1 // empty') || admin_token=""
|
||||
fi
|
||||
if [ -z "$admin_token" ]; then
|
||||
echo "Error: failed to obtain admin API token" >&2
|
||||
echo " Cannot proceed without admin privileges" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then
|
||||
echo " User '${agent_name}' already exists"
|
||||
# Reset user password so we can get a token (#184)
|
||||
user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
|
||||
# Use Forgejo CLI to reset password (API PATCH ignores must_change_password in Forgejo 11.x)
|
||||
if _forgejo_exec forgejo admin user change-password \
|
||||
--username "${agent_name}" \
|
||||
--password "${user_pass}" \
|
||||
--must-change-password=false >/dev/null 2>&1; then
|
||||
echo " Reset password for existing user '${agent_name}'"
|
||||
else
|
||||
echo " Warning: could not reset password for existing user" >&2
|
||||
fi
|
||||
else
|
||||
# Create user using basic auth (admin token fallback would poison subsequent calls)
|
||||
# Create the user
|
||||
user_pass="agent-$(head -c 16 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9' | head -c 20)"
|
||||
if curl -sf -X POST \
|
||||
-u "${admin_user}:${admin_pass}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/admin/users" \
|
||||
-d "{\"username\":\"${agent_name}\",\"password\":\"${user_pass}\",\"email\":\"${agent_name}@${PROJECT_NAME:-disinto}.local\",\"full_name\":\"${agent_name}\",\"active\":true,\"admin\":false,\"must_change_password\":false}" >/dev/null 2>&1; then
|
||||
echo " Created user '${agent_name}'"
|
||||
else
|
||||
echo " Warning: failed to create user via admin API" >&2
|
||||
# Try alternative: user might already exist
|
||||
if curl -sf --max-time 5 "${forge_url}/api/v1/users/${agent_name}" >/dev/null 2>&1; then
|
||||
echo " User '${agent_name}' exists (confirmed)"
|
||||
else
|
||||
echo " Error: failed to create user '${agent_name}'" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Step 1.5: Generate Forge token for the new/existing user
|
||||
echo ""
|
||||
echo "Step 1.5: Generating Forge token for '${agent_name}'..."
|
||||
|
||||
# Convert role to uppercase token variable name (e.g., architect -> FORGE_ARCHITECT_TOKEN)
|
||||
local role_upper
|
||||
role_upper=$(echo "$role" | tr '[:lower:]' '[:upper:]')
|
||||
local token_var="FORGE_${role_upper}_TOKEN"
|
||||
|
||||
# Generate token using the user's password (basic auth)
|
||||
local agent_token=""
|
||||
agent_token=$(curl -sf -X POST \
|
||||
-u "${agent_name}:${user_pass}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/users/${agent_name}/tokens" \
|
||||
-d "{\"name\":\"disinto-${agent_name}-token\",\"scopes\":[\"all\"]}" 2>/dev/null \
|
||||
| jq -r '.sha1 // empty') || agent_token=""
|
||||
|
||||
if [ -z "$agent_token" ]; then
|
||||
# Token name collision — create with timestamp suffix
|
||||
agent_token=$(curl -sf -X POST \
|
||||
-u "${agent_name}:${user_pass}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/users/${agent_name}/tokens" \
|
||||
-d "{\"name\":\"disinto-${agent_name}-$(date +%s)\",\"scopes\":[\"all\"]}" 2>/dev/null \
|
||||
| jq -r '.sha1 // empty') || agent_token=""
|
||||
fi
|
||||
|
||||
if [ -z "$agent_token" ]; then
|
||||
echo " Warning: failed to create API token for '${agent_name}'" >&2
|
||||
else
|
||||
# Store token in .env under the role-specific variable name
|
||||
if grep -q "^${token_var}=" "$env_file" 2>/dev/null; then
|
||||
# Use sed with alternative delimiter and proper escaping for special chars in token
|
||||
local escaped_token
|
||||
escaped_token=$(printf '%s\n' "$agent_token" | sed 's/[&/\]/\\&/g')
|
||||
sed -i "s|^${token_var}=.*|${token_var}=${escaped_token}|" "$env_file"
|
||||
echo " ${agent_name} token updated (${token_var})"
|
||||
else
|
||||
printf '%s=%s\n' "$token_var" "$agent_token" >> "$env_file"
|
||||
echo " ${agent_name} token saved (${token_var})"
|
||||
fi
|
||||
export "${token_var}=${agent_token}"
|
||||
fi
|
||||
|
||||
# Step 2: Create .profile repo on Forgejo
|
||||
echo ""
|
||||
echo "Step 2: Creating '${agent_name}/.profile' repo (if not exists)..."
|
||||
|
||||
if curl -sf --max-time 5 "${forge_url}/api/v1/repos/${agent_name}/.profile" >/dev/null 2>&1; then
|
||||
echo " Repo '${agent_name}/.profile' already exists"
|
||||
else
|
||||
# Create the repo using the admin API to ensure it's created in the agent's namespace.
|
||||
# Using POST /api/v1/user/repos with a user token would create the repo under the
|
||||
# authenticated user, which could be wrong if the token belongs to a different user.
|
||||
# The admin API POST /api/v1/admin/users/{username}/repos explicitly creates in the
|
||||
# specified user's namespace.
|
||||
local create_output
|
||||
create_output=$(curl -sf -X POST \
|
||||
-u "${admin_user}:${admin_pass}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/admin/users/${agent_name}/repos" \
|
||||
-d "{\"name\":\".profile\",\"description\":\"${agent_name}'s .profile repo\",\"private\":true,\"auto_init\":false}" 2>&1) || true
|
||||
|
||||
if echo "$create_output" | grep -q '"id":\|[0-9]'; then
|
||||
echo " Created repo '${agent_name}/.profile' (via admin API)"
|
||||
else
|
||||
echo " Error: failed to create repo '${agent_name}/.profile'" >&2
|
||||
echo " Response: ${create_output}" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Step 3: Clone repo and create initial commit
|
||||
echo ""
|
||||
echo "Step 3: Cloning repo and creating initial commit..."
|
||||
|
||||
local clone_dir="/tmp/.profile-clone-${agent_name}"
|
||||
rm -rf "$clone_dir"
|
||||
mkdir -p "$clone_dir"
|
||||
|
||||
# Build authenticated clone URL using basic auth (user_pass is always set in Step 1)
|
||||
if [ -z "${user_pass:-}" ]; then
|
||||
echo " Error: no user password available for cloning" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local auth_url
|
||||
auth_url=$(printf '%s' "$forge_url" | sed "s|://|://${agent_name}:${user_pass}@|")
|
||||
auth_url="${auth_url}/${agent_name}/.profile.git"
|
||||
|
||||
# Display unauthenticated URL (auth token only in actual git clone command)
|
||||
echo " Cloning: ${forge_url}/${agent_name}/.profile.git"
|
||||
|
||||
# Try authenticated clone first (required for private repos)
|
||||
if ! git clone --quiet "$auth_url" "$clone_dir" 2>/dev/null; then
|
||||
echo " Error: failed to clone repo with authentication" >&2
|
||||
echo " Note: Ensure the user has a valid API token with repository access" >&2
|
||||
rm -rf "$clone_dir"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Configure git
|
||||
git -C "$clone_dir" config user.name "disinto-admin"
|
||||
git -C "$clone_dir" config user.email "disinto-admin@localhost"
|
||||
|
||||
# Create directory structure
|
||||
echo " Creating directory structure..."
|
||||
mkdir -p "${clone_dir}/journal"
|
||||
mkdir -p "${clone_dir}/knowledge"
|
||||
touch "${clone_dir}/journal/.gitkeep"
|
||||
touch "${clone_dir}/knowledge/.gitkeep"
|
||||
|
||||
# Copy formula
|
||||
echo " Copying formula..."
|
||||
cp "$formula_path" "${clone_dir}/formula.toml"
|
||||
|
||||
# Create README
|
||||
if [ ! -f "${clone_dir}/README.md" ]; then
|
||||
cat > "${clone_dir}/README.md" <<EOF
|
||||
# ${agent_name}'s .profile
|
||||
|
||||
Agent profile repository for ${agent_name}.
|
||||
|
||||
## Structure
|
||||
|
||||
\`\`\`
|
||||
${agent_name}/.profile/
|
||||
├── formula.toml # Agent's role formula
|
||||
├── journal/ # Issue-by-issue log files (journal branch)
|
||||
│ └── .gitkeep
|
||||
├── knowledge/ # Shared knowledge and best practices
|
||||
│ └── .gitkeep
|
||||
└── README.md
|
||||
\`\`\`
|
||||
|
||||
## Branches
|
||||
|
||||
- \`main\` — Admin-only merge for formula changes (requires 1 approval)
|
||||
- \`journal\` — Agent branch for direct journal entries
|
||||
- Agent can push directly to this branch
|
||||
- Formula changes must go through PR to \`main\`
|
||||
|
||||
## Branch protection
|
||||
|
||||
- \`main\`: Protected — requires 1 admin approval for merges
|
||||
- \`journal\`: Unprotected — agent can push directly
|
||||
EOF
|
||||
fi
|
||||
|
||||
# Commit and push
|
||||
echo " Committing and pushing..."
|
||||
git -C "$clone_dir" add -A
|
||||
if ! git -C "$clone_dir" diff --cached --quiet 2>/dev/null; then
|
||||
git -C "$clone_dir" commit -m "chore: initial .profile setup" -q
|
||||
git -C "$clone_dir" push origin main >/dev/null 2>&1 || \
|
||||
git -C "$clone_dir" push origin master >/dev/null 2>&1 || true
|
||||
echo " Committed: initial .profile setup"
|
||||
else
|
||||
echo " No changes to commit"
|
||||
fi
|
||||
|
||||
rm -rf "$clone_dir"
|
||||
|
||||
# Step 4: Set up branch protection
|
||||
echo ""
|
||||
echo "Step 4: Setting up branch protection..."
|
||||
|
||||
# Source branch-protection.sh helper
|
||||
local bp_script="${FACTORY_ROOT}/lib/branch-protection.sh"
|
||||
if [ -f "$bp_script" ]; then
|
||||
# Source required environment
|
||||
if [ -f "${FACTORY_ROOT}/lib/env.sh" ]; then
|
||||
source "${FACTORY_ROOT}/lib/env.sh"
|
||||
fi
|
||||
|
||||
# Set up branch protection for .profile repo
|
||||
if source "$bp_script" 2>/dev/null && setup_profile_branch_protection "${agent_name}/.profile" "main"; then
|
||||
echo " Branch protection configured for main branch"
|
||||
echo " - Requires 1 approval before merge"
|
||||
echo " - Admin-only merge enforcement"
|
||||
echo " - Journal branch created for direct agent pushes"
|
||||
else
|
||||
echo " Warning: could not configure branch protection (Forgejo API may not be available)"
|
||||
echo " Note: Branch protection can be set up manually later"
|
||||
fi
|
||||
else
|
||||
echo " Warning: branch-protection.sh not found at ${bp_script}"
|
||||
fi
|
||||
|
||||
# Step 5: Create state marker
|
||||
echo ""
|
||||
echo "Step 5: Creating state marker..."
|
||||
|
||||
local state_dir="${FACTORY_ROOT}/state"
|
||||
mkdir -p "$state_dir"
|
||||
local state_file="${state_dir}/.${role}-active"
|
||||
|
||||
if [ ! -f "$state_file" ]; then
|
||||
touch "$state_file"
|
||||
echo " Created: ${state_file}"
|
||||
else
|
||||
echo " State marker already exists: ${state_file}"
|
||||
fi
|
||||
|
||||
# Step 6: Set up local model agent (if --local-model specified)
|
||||
if [ -n "$local_model" ]; then
|
||||
echo ""
|
||||
echo "Step 6: Configuring local model agent..."
|
||||
|
||||
local override_file="${FACTORY_ROOT}/docker-compose.override.yml"
|
||||
local override_dir
|
||||
override_dir=$(dirname "$override_file")
|
||||
mkdir -p "$override_dir"
|
||||
|
||||
# Validate model endpoint is reachable
|
||||
echo " Validating model endpoint: ${local_model}"
|
||||
if ! curl -sf --max-time 10 "${local_model}/health" >/dev/null 2>&1; then
|
||||
# Try /v1/chat/completions as fallback endpoint check
|
||||
if ! curl -sf --max-time 10 "${local_model}/v1/chat/completions" >/dev/null 2>&1; then
|
||||
echo " Warning: model endpoint may not be reachable at ${local_model}"
|
||||
echo " Continuing with configuration..."
|
||||
fi
|
||||
else
|
||||
echo " Model endpoint is reachable"
|
||||
fi
|
||||
|
||||
# Generate service name from agent name (lowercase)
|
||||
local service_name="agents-${agent_name}"
|
||||
service_name=$(echo "$service_name" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
# Set default poll interval
|
||||
local interval="${poll_interval:-300}"
|
||||
|
||||
# Generate the override compose file
|
||||
# Bash expands ${service_name}, ${local_model}, ${interval}, ${PROJECT_NAME} at generation time
|
||||
# \$HOME, \$FORGE_TOKEN become ${HOME}, ${FORGE_TOKEN} in the file for docker-compose runtime expansion
|
||||
cat > "$override_file" <<OVERRIDEOF
|
||||
# docker-compose.override.yml — auto-generated by disinto hire-an-agent
|
||||
# Local model agent configuration for ${agent_name}
|
||||
|
||||
services:
|
||||
${service_name}:
|
||||
image: disinto-agents:latest
|
||||
profiles: ["local-model"]
|
||||
restart: unless-stopped
|
||||
security_opt:
|
||||
- apparmor=unconfined
|
||||
volumes:
|
||||
- agent-data-llama:/home/agent/data
|
||||
- project-repos-llama:/home/agent/repos
|
||||
- \$HOME/.claude:/home/agent/.claude
|
||||
- \$HOME/.claude.json:/home/agent/.claude.json:ro
|
||||
- CLAUDE_BIN_PLACEHOLDER:/usr/local/bin/claude:ro
|
||||
- \$HOME/.ssh:/home/agent/.ssh:ro
|
||||
- \$HOME/.config/sops/age:/home/agent/.config/sops/age:ro
|
||||
environment:
|
||||
FORGE_URL: http://forgejo:3000
|
||||
WOODPECKER_SERVER: http://woodpecker:8000
|
||||
DISINTO_CONTAINER: "1"
|
||||
PROJECT_REPO_ROOT: /home/agent/repos/${PROJECT_NAME:-project}
|
||||
WOODPECKER_DATA_DIR: /woodpecker-data
|
||||
ANTHROPIC_BASE_URL: ${local_model}
|
||||
ANTHROPIC_API_KEY: sk-no-key-required
|
||||
FORGE_TOKEN_OVERRIDE: \$FORGE_TOKEN
|
||||
CLAUDE_CONFIG_DIR: /home/agent/.claude
|
||||
POLL_INTERVAL: ${interval}
|
||||
env_file:
|
||||
- .env
|
||||
depends_on:
|
||||
- forgejo
|
||||
- woodpecker
|
||||
entrypoint: ["/home/agent/entrypoint-llama.sh"]
|
||||
|
||||
volumes:
|
||||
agent-data-llama:
|
||||
project-repos-llama:
|
||||
OVERRIDEOF
|
||||
|
||||
# Patch the Claude CLI binary path
|
||||
local claude_bin
|
||||
claude_bin="$(command -v claude 2>/dev/null || true)"
|
||||
if [ -n "$claude_bin" ]; then
|
||||
claude_bin="$(readlink -f "$claude_bin")"
|
||||
sed -i "s|CLAUDE_BIN_PLACEHOLDER|${claude_bin}|" "$override_file"
|
||||
else
|
||||
echo " Warning: claude CLI not found — update override file manually"
|
||||
sed -i "s|CLAUDE_BIN_PLACEHOLDER|/usr/local/bin/claude|" "$override_file"
|
||||
fi
|
||||
|
||||
echo " Created: ${override_file}"
|
||||
echo " Service name: ${service_name}"
|
||||
echo " Poll interval: ${interval}s"
|
||||
echo " Model endpoint: ${local_model}"
|
||||
echo ""
|
||||
echo " To start the agent, run:"
|
||||
echo " docker compose --profile local-model up -d ${service_name}"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Done! Agent '${agent_name}' hired for role '${role}'."
|
||||
echo " User: ${forge_url}/${agent_name}"
|
||||
echo " Repo: ${forge_url}/${agent_name}/.profile"
|
||||
echo " Formula: ${role}.toml"
|
||||
}
|
||||
|
|
@ -43,7 +43,6 @@ _ilc_log() {
|
|||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Label ID caching — lookup once per name, cache in globals.
|
||||
# Pattern follows ci-helpers.sh (ensure_blocked_label_id).
|
||||
# ---------------------------------------------------------------------------
|
||||
declare -A _ILC_LABEL_IDS
|
||||
_ILC_LABEL_IDS["backlog"]=""
|
||||
|
|
@ -161,6 +160,27 @@ issue_release() {
|
|||
_ilc_log "released issue #${issue}"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _ilc_post_comment — Post a comment to an issue (internal helper)
|
||||
# Args: issue_number body_text
|
||||
# Uses a temp file to avoid large inline strings.
|
||||
# ---------------------------------------------------------------------------
|
||||
_ilc_post_comment() {
|
||||
local issue="$1" body="$2"
|
||||
|
||||
local tmpfile tmpjson
|
||||
tmpfile=$(mktemp /tmp/ilc-comment-XXXXXX.md)
|
||||
tmpjson="${tmpfile}.json"
|
||||
printf '%s' "$body" > "$tmpfile"
|
||||
jq -Rs '{body:.}' < "$tmpfile" > "$tmpjson"
|
||||
curl -sf -o /dev/null -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${FORGE_API}/issues/${issue}/comments" \
|
||||
--data-binary @"$tmpjson" 2>/dev/null || true
|
||||
rm -f "$tmpfile" "$tmpjson"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# issue_block — add "blocked" label, post diagnostic comment, remove in-progress.
|
||||
# Args: issue_number reason [result_text]
|
||||
|
|
@ -187,14 +207,9 @@ issue_block() {
|
|||
fi
|
||||
} > "$tmpfile"
|
||||
|
||||
# Post comment
|
||||
jq -Rs '{body:.}' < "$tmpfile" > "${tmpfile}.json"
|
||||
curl -sf -o /dev/null -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${FORGE_API}/issues/${issue}/comments" \
|
||||
--data-binary @"${tmpfile}.json" 2>/dev/null || true
|
||||
rm -f "$tmpfile" "${tmpfile}.json"
|
||||
# Post comment using shared helper
|
||||
_ilc_post_comment "$issue" "$(cat "$tmpfile")"
|
||||
rm -f "$tmpfile"
|
||||
|
||||
# Remove in-progress, add blocked
|
||||
local ip_id bk_id
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@
|
|||
# PROJECT_CONTAINERS, CHECK_PRS, CHECK_DEV_AGENT,
|
||||
# CHECK_PIPELINE_STALL, CI_STALE_MINUTES,
|
||||
# MIRROR_NAMES, MIRROR_URLS, MIRROR_<NAME> (per configured mirror)
|
||||
# (plus backwards-compat aliases: CODEBERG_REPO, CODEBERG_API, CODEBERG_WEB)
|
||||
#
|
||||
# If no argument given, does nothing (allows poll scripts to work with
|
||||
# plain .env fallback for backwards compatibility).
|
||||
|
|
@ -83,7 +82,7 @@ if mirrors:
|
|||
# Export parsed variables.
|
||||
# Inside the agents container (DISINTO_CONTAINER=1), compose already sets the
|
||||
# correct FORGE_URL (http://forgejo:3000) and path vars for the container
|
||||
# environment. The TOML carries host-perspective values (localhost, /home/johba/…)
|
||||
# environment. The TOML carries host-perspective values (localhost, /home/admin/…)
|
||||
# that would break container API calls and path resolution. Skip overriding
|
||||
# any env var that is already set when running inside the container.
|
||||
while IFS='=' read -r _key _val; do
|
||||
|
|
@ -100,11 +99,9 @@ export FORGE_URL="${FORGE_URL:-http://localhost:3000}"
|
|||
if [ -n "$FORGE_REPO" ]; then
|
||||
export FORGE_API="${FORGE_URL}/api/v1/repos/${FORGE_REPO}"
|
||||
export FORGE_WEB="${FORGE_URL}/${FORGE_REPO}"
|
||||
# Extract repo owner (first path segment of owner/repo)
|
||||
export FORGE_REPO_OWNER="${FORGE_REPO%%/*}"
|
||||
fi
|
||||
# Backwards-compat aliases
|
||||
export CODEBERG_REPO="${FORGE_REPO}"
|
||||
export CODEBERG_API="${FORGE_API:-}"
|
||||
export CODEBERG_WEB="${FORGE_WEB:-}"
|
||||
|
||||
# Derive PROJECT_REPO_ROOT if not explicitly set
|
||||
if [ -z "${PROJECT_REPO_ROOT:-}" ] && [ -n "${PROJECT_NAME:-}" ]; then
|
||||
|
|
|
|||
225
lib/ops-setup.sh
Normal file
225
lib/ops-setup.sh
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
#!/usr/bin/env bash
|
||||
# ops-setup.sh — Setup ops repository (disinto-ops)
|
||||
#
|
||||
# Source from bin/disinto:
|
||||
# source "$(dirname "$0")/../lib/ops-setup.sh"
|
||||
#
|
||||
# Required globals: FORGE_URL, FORGE_TOKEN, FACTORY_ROOT
|
||||
# Optional: admin_token (falls back to FORGE_TOKEN for admin operations)
|
||||
#
|
||||
# Functions:
|
||||
# setup_ops_repo <forge_url> <ops_slug> <ops_root> [primary_branch]
|
||||
# - Create ops repo on Forgejo if it doesn't exist
|
||||
# - Configure bot collaborators with appropriate permissions
|
||||
# - Clone or initialize ops repo locally
|
||||
# - Seed directory structure (vault, knowledge, evidence)
|
||||
# - Export _ACTUAL_OPS_SLUG for caller to use
|
||||
#
|
||||
# Globals modified:
|
||||
# _ACTUAL_OPS_SLUG - resolved ops repo slug after function completes
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
setup_ops_repo() {
|
||||
|
||||
local forge_url="$1" ops_slug="$2" ops_root="$3" primary_branch="${4:-main}"
|
||||
local org_name="${ops_slug%%/*}"
|
||||
local ops_name="${ops_slug##*/}"
|
||||
|
||||
echo ""
|
||||
echo "── Ops repo setup ─────────────────────────────────────"
|
||||
|
||||
# Determine the actual ops repo location by searching across possible namespaces
|
||||
# This handles cases where the repo was created under a different namespace
|
||||
# due to past bugs (e.g., dev-bot/disinto-ops instead of disinto-admin/disinto-ops)
|
||||
local actual_ops_slug=""
|
||||
local -a possible_namespaces=( "$org_name" "dev-bot" "disinto-admin" )
|
||||
local http_code
|
||||
|
||||
for ns in "${possible_namespaces[@]}"; do
|
||||
slug="${ns}/${ops_name}"
|
||||
if curl -sf --max-time 5 \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${forge_url}/api/v1/repos/${slug}" >/dev/null 2>&1; then
|
||||
actual_ops_slug="$slug"
|
||||
echo "Ops repo: ${slug} (found at ${slug})"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
# If not found, try to create it in the configured namespace
|
||||
if [ -z "$actual_ops_slug" ]; then
|
||||
echo "Creating ops repo in namespace: ${org_name}"
|
||||
# Create org if it doesn't exist
|
||||
curl -sf -X POST \
|
||||
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/orgs" \
|
||||
-d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true
|
||||
if curl -sf -X POST \
|
||||
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/orgs/${org_name}/repos" \
|
||||
-d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" >/dev/null 2>&1; then
|
||||
actual_ops_slug="${org_name}/${ops_name}"
|
||||
echo "Ops repo: ${actual_ops_slug} created on Forgejo"
|
||||
else
|
||||
# Fallback: use admin API to create repo under the target namespace
|
||||
http_code=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-X POST \
|
||||
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/admin/users/${org_name}/repos" \
|
||||
-d "{\"name\":\"${ops_name}\",\"auto_init\":true,\"default_branch\":\"${primary_branch}\",\"description\":\"Operational data for ${org_name}/${ops_name%-ops}\"}" 2>/dev/null || echo "0")
|
||||
if [ "$http_code" = "201" ]; then
|
||||
actual_ops_slug="${org_name}/${ops_name}"
|
||||
echo "Ops repo: ${actual_ops_slug} created on Forgejo (via admin API)"
|
||||
else
|
||||
echo "Error: failed to create ops repo '${org_name}/${ops_name}' (HTTP ${http_code})" >&2
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Configure collaborators on the ops repo
|
||||
local bot_user bot_perm
|
||||
declare -A bot_permissions=(
|
||||
[dev-bot]="write"
|
||||
[review-bot]="read"
|
||||
[planner-bot]="write"
|
||||
[gardener-bot]="write"
|
||||
[vault-bot]="write"
|
||||
[supervisor-bot]="read"
|
||||
[predictor-bot]="read"
|
||||
[architect-bot]="write"
|
||||
)
|
||||
|
||||
# Add all bot users as collaborators with appropriate permissions
|
||||
# vault branch protection (#77) requires:
|
||||
# - Admin-only merge to main (enforced by admin_enforced: true)
|
||||
# - Bots can push branches and create PRs, but cannot merge
|
||||
for bot_user in "${!bot_permissions[@]}"; do
|
||||
bot_perm="${bot_permissions[$bot_user]}"
|
||||
if curl -sf -X PUT \
|
||||
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/repos/${actual_ops_slug}/collaborators/${bot_user}" \
|
||||
-d "{\"permission\":\"${bot_perm}\"}" >/dev/null 2>&1; then
|
||||
echo " + ${bot_user} = ${bot_perm} collaborator"
|
||||
else
|
||||
echo " ! ${bot_user} = ${bot_perm} (already set or failed)"
|
||||
fi
|
||||
done
|
||||
|
||||
# Add disinto-admin as admin collaborator
|
||||
if curl -sf -X PUT \
|
||||
-H "Authorization: token ${admin_token:-${FORGE_TOKEN}}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${forge_url}/api/v1/repos/${actual_ops_slug}/collaborators/disinto-admin" \
|
||||
-d '{"permission":"admin"}' >/dev/null 2>&1; then
|
||||
echo " + disinto-admin = admin collaborator"
|
||||
else
|
||||
echo " ! disinto-admin = admin (already set or failed)"
|
||||
fi
|
||||
|
||||
# Clone ops repo locally if not present
|
||||
if [ ! -d "${ops_root}/.git" ]; then
|
||||
local auth_url
|
||||
auth_url=$(printf '%s' "$forge_url" | sed "s|://|://dev-bot:${FORGE_TOKEN}@|")
|
||||
local clone_url="${auth_url}/${actual_ops_slug}.git"
|
||||
echo "Cloning: ops repo -> ${ops_root}"
|
||||
if git clone --quiet "$clone_url" "$ops_root" 2>/dev/null; then
|
||||
echo "Ops repo: ${actual_ops_slug} cloned successfully"
|
||||
else
|
||||
echo "Initializing: ops repo at ${ops_root}"
|
||||
mkdir -p "$ops_root"
|
||||
git -C "$ops_root" init --initial-branch="${primary_branch}" -q
|
||||
# Set remote to the actual ops repo location
|
||||
git -C "$ops_root" remote add origin "${forge_url}/${actual_ops_slug}.git"
|
||||
echo "Ops repo: ${actual_ops_slug} initialized locally"
|
||||
fi
|
||||
else
|
||||
echo "Ops repo: ${ops_root} (already exists locally)"
|
||||
# Verify remote is correct
|
||||
local current_remote
|
||||
current_remote=$(git -C "$ops_root" remote get-url origin 2>/dev/null || true)
|
||||
local expected_remote="${forge_url}/${actual_ops_slug}.git"
|
||||
if [ -n "$current_remote" ] && [ "$current_remote" != "$expected_remote" ]; then
|
||||
echo " Fixing: remote URL from ${current_remote} to ${expected_remote}"
|
||||
git -C "$ops_root" remote set-url origin "$expected_remote"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Seed directory structure
|
||||
local seeded=false
|
||||
mkdir -p "${ops_root}/vault/pending"
|
||||
mkdir -p "${ops_root}/vault/approved"
|
||||
mkdir -p "${ops_root}/vault/fired"
|
||||
mkdir -p "${ops_root}/vault/rejected"
|
||||
mkdir -p "${ops_root}/knowledge"
|
||||
mkdir -p "${ops_root}/evidence/engagement"
|
||||
|
||||
if [ ! -f "${ops_root}/README.md" ]; then
|
||||
cat > "${ops_root}/README.md" <<OPSEOF
|
||||
# ${ops_name}
|
||||
|
||||
Operational data for the ${ops_name%-ops} project.
|
||||
|
||||
## Structure
|
||||
|
||||
\`\`\`
|
||||
${ops_name}/
|
||||
├── vault/
|
||||
│ ├── pending/ # vault items awaiting approval
|
||||
│ ├── approved/ # approved vault items
|
||||
│ ├── fired/ # executed vault items
|
||||
│ └── rejected/ # rejected vault items
|
||||
├── knowledge/ # shared agent knowledge and best practices
|
||||
├── evidence/ # engagement data, experiment results
|
||||
├── portfolio.md # addressables + observables
|
||||
├── prerequisites.md # dependency graph
|
||||
└── RESOURCES.md # accounts, tokens (refs), infra inventory
|
||||
\`\`\`
|
||||
|
||||
> **Note:** Journal directories (journal/planner/ and journal/supervisor/) have been removed from the ops repo. Agent journals are now stored in each agent's .profile repo on Forgejo.
|
||||
|
||||
## Branch protection
|
||||
|
||||
- \`main\`: 2 reviewers required for vault items
|
||||
- Journal/evidence commits may use lighter rules
|
||||
OPSEOF
|
||||
seeded=true
|
||||
fi
|
||||
|
||||
# Create stub files if they don't exist
|
||||
[ -f "${ops_root}/portfolio.md" ] || { echo "# Portfolio" > "${ops_root}/portfolio.md"; seeded=true; }
|
||||
[ -f "${ops_root}/prerequisites.md" ] || { echo "# Prerequisite Tree" > "${ops_root}/prerequisites.md"; seeded=true; }
|
||||
[ -f "${ops_root}/RESOURCES.md" ] || { echo "# Resources" > "${ops_root}/RESOURCES.md"; seeded=true; }
|
||||
|
||||
# Commit and push seed content
|
||||
if [ "$seeded" = true ] && [ -d "${ops_root}/.git" ]; then
|
||||
# Auto-configure repo-local git identity if missing (#778)
|
||||
if [ -z "$(git -C "$ops_root" config user.name 2>/dev/null)" ]; then
|
||||
git -C "$ops_root" config user.name "disinto-admin"
|
||||
fi
|
||||
if [ -z "$(git -C "$ops_root" config user.email 2>/dev/null)" ]; then
|
||||
git -C "$ops_root" config user.email "disinto-admin@localhost"
|
||||
fi
|
||||
|
||||
git -C "$ops_root" add -A
|
||||
if ! git -C "$ops_root" diff --cached --quiet 2>/dev/null; then
|
||||
git -C "$ops_root" commit -m "chore: seed ops repo structure" -q
|
||||
# Push if remote exists
|
||||
if git -C "$ops_root" remote get-url origin >/dev/null 2>&1; then
|
||||
if git -C "$ops_root" push origin "${primary_branch}" -q 2>/dev/null; then
|
||||
echo "Seeded: ops repo with initial structure"
|
||||
else
|
||||
echo "Warning: failed to push seed content to ops repo" >&2
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Export resolved slug for the caller to write back to the project TOML
|
||||
_ACTUAL_OPS_SLUG="${actual_ops_slug}"
|
||||
}
|
||||
|
|
@ -357,11 +357,18 @@ pr_close() {
|
|||
local pr_num="$1"
|
||||
|
||||
_prl_log "closing PR #${pr_num}"
|
||||
curl -sf -X PATCH \
|
||||
local resp http_code
|
||||
resp=$(curl -sf -w "\n%{http_code}" -X PATCH \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${FORGE_API}/pulls/${pr_num}" \
|
||||
-d '{"state":"closed"}' >/dev/null 2>&1 || true
|
||||
-d '{"state":"closed"}' 2>/dev/null) || true
|
||||
http_code=$(printf '%s\n' "$resp" | tail -1)
|
||||
if [ "$http_code" != "200" ] && [ "$http_code" != "204" ]; then
|
||||
_prl_log "pr_close FAILED: HTTP ${http_code} for PR #${pr_num}"
|
||||
return 1
|
||||
fi
|
||||
_prl_log "PR #${pr_num} closed"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -398,11 +405,18 @@ pr_walk_to_merge() {
|
|||
if [ "${_PR_CI_FAILURE_TYPE:-}" = "infra" ] && [ "$ci_retry_count" -lt 1 ]; then
|
||||
ci_retry_count=$((ci_retry_count + 1))
|
||||
_prl_log "infra failure — retriggering CI (retry ${ci_retry_count})"
|
||||
local rebase_output rebase_rc
|
||||
( cd "$worktree" && \
|
||||
git commit --allow-empty -m "ci: retrigger after infra failure" --no-verify && \
|
||||
git fetch "$remote" "${PRIMARY_BRANCH}" 2>/dev/null && \
|
||||
git rebase "${remote}/${PRIMARY_BRANCH}" && \
|
||||
git push --force-with-lease "$remote" HEAD ) 2>&1 | tail -5 || true
|
||||
git push --force-with-lease "$remote" HEAD ) > /tmp/rebase-output-$$ 2>&1
|
||||
rebase_rc=$?
|
||||
rebase_output=$(cat /tmp/rebase-output-$$)
|
||||
rm -f /tmp/rebase-output-$$
|
||||
if [ "$rebase_rc" -ne 0 ]; then
|
||||
_prl_log "rebase/push failed (exit code $rebase_rc): $(echo "$rebase_output" | tail -5)"
|
||||
fi
|
||||
continue
|
||||
fi
|
||||
|
||||
|
|
@ -474,11 +488,7 @@ Fix the issue, run tests, commit, rebase on ${PRIMARY_BRANCH}, and push:
|
|||
_PR_WALK_EXIT_REASON="merged"
|
||||
return 0
|
||||
fi
|
||||
if [ "$rc" -eq 2 ]; then
|
||||
_PR_WALK_EXIT_REASON="merge_blocked"
|
||||
return 1
|
||||
fi
|
||||
# Merge failed (conflict) — ask agent to rebase
|
||||
# Merge failed (conflict or HTTP 405) — ask agent to rebase
|
||||
_prl_log "merge failed — invoking agent to rebase"
|
||||
agent_run --resume "$session_id" --worktree "$worktree" \
|
||||
"PR #${pr_num} approved but merge failed: ${_PR_MERGE_ERROR:-unknown}
|
||||
|
|
@ -524,8 +534,7 @@ Commit, rebase on ${PRIMARY_BRANCH}, and push:
|
|||
# build_phase_protocol_prompt — Generate push/commit instructions for Claude.
|
||||
#
|
||||
# For the synchronous agent_run architecture: tells Claude how to commit and
|
||||
# push (no phase files). For the tmux session architecture, use the
|
||||
# build_phase_protocol_prompt in dev/phase-handler.sh instead.
|
||||
# push (no phase files).
|
||||
#
|
||||
# Args: branch [remote]
|
||||
# Stdout: instruction text
|
||||
|
|
|
|||
210
lib/profile.sh
210
lib/profile.sh
|
|
@ -1,210 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# profile.sh — Helpers for agent .profile repo management
|
||||
#
|
||||
# Source after lib/env.sh and lib/formula-session.sh:
|
||||
# source "$(dirname "$0")/../lib/env.sh"
|
||||
# source "$(dirname "$0")/lib/formula-session.sh"
|
||||
# source "$(dirname "$0")/lib/profile.sh"
|
||||
#
|
||||
# Required globals: FORGE_TOKEN, FORGE_URL, AGENT_IDENTITY, PROFILE_REPO_PATH
|
||||
#
|
||||
# Functions:
|
||||
# profile_propose_formula NEW_FORMULA CONTENT REASON — create PR to update formula.toml
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Internal log helper
|
||||
_profile_log() {
|
||||
if declare -f log >/dev/null 2>&1; then
|
||||
log "profile: $*"
|
||||
else
|
||||
printf '[%s] profile: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# profile_propose_formula — Propose a formula change via PR
|
||||
#
|
||||
# Creates a branch, writes updated formula.toml, opens a PR, and returns PR number.
|
||||
# Branch is protected (requires admin approval per #87).
|
||||
#
|
||||
# Args:
|
||||
# $1 - NEW_FORMULA_CONTENT: The complete new formula.toml content
|
||||
# $2 - REASON: Human-readable explanation of what changed and why
|
||||
#
|
||||
# Returns:
|
||||
# 0 on success, prints PR number to stdout
|
||||
# 1 on failure
|
||||
#
|
||||
# Example:
|
||||
# source "$(dirname "$0")/../lib/env.sh"
|
||||
# source "$(dirname "$0")/lib/formula-session.sh"
|
||||
# source "$(dirname "$0")/lib/profile.sh"
|
||||
# AGENT_IDENTITY="dev-bot"
|
||||
# ensure_profile_repo "$AGENT_IDENTITY"
|
||||
# profile_propose_formula "$new_formula" "Added new prompt pattern for code review"
|
||||
# -----------------------------------------------------------------------------
|
||||
profile_propose_formula() {
|
||||
local new_formula="$1"
|
||||
local reason="$2"
|
||||
|
||||
if [ -z "${AGENT_IDENTITY:-}" ]; then
|
||||
_profile_log "ERROR: AGENT_IDENTITY not set"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [ -z "${PROFILE_REPO_PATH:-}" ]; then
|
||||
_profile_log "ERROR: PROFILE_REPO_PATH not set — ensure_profile_repo not called"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [ -z "${FORGE_TOKEN:-}" ]; then
|
||||
_profile_log "ERROR: FORGE_TOKEN not set"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [ -z "${FORGE_URL:-}" ]; then
|
||||
_profile_log "ERROR: FORGE_URL not set"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Generate short description from reason for branch name
|
||||
local short_desc
|
||||
short_desc=$(printf '%s' "$reason" | \
|
||||
tr '[:upper:]' '[:lower:]' | \
|
||||
sed 's/[^a-z0-9 ]//g' | \
|
||||
sed 's/ */ /g' | \
|
||||
sed 's/^ *//;s/ *$//' | \
|
||||
cut -c1-40 | \
|
||||
tr ' ' '-')
|
||||
|
||||
if [ -z "$short_desc" ]; then
|
||||
short_desc="formula-update"
|
||||
fi
|
||||
|
||||
local branch_name="formula/${short_desc}"
|
||||
local formula_path="${PROFILE_REPO_PATH}/formula.toml"
|
||||
|
||||
_profile_log "Proposing formula change: ${branch_name}"
|
||||
_profile_log "Reason: ${reason}"
|
||||
|
||||
# Ensure we're on main branch and up-to-date
|
||||
_profile_log "Fetching .profile repo"
|
||||
(
|
||||
cd "$PROFILE_REPO_PATH" || return 1
|
||||
|
||||
git fetch origin main --quiet 2>/dev/null || \
|
||||
git fetch origin master --quiet 2>/dev/null || true
|
||||
|
||||
# Reset to main/master
|
||||
if git checkout main --quiet 2>/dev/null; then
|
||||
git pull --ff-only origin main --quiet 2>/dev/null || true
|
||||
elif git checkout master --quiet 2>/dev/null; then
|
||||
git pull --ff-only origin master --quiet 2>/dev/null || true
|
||||
else
|
||||
_profile_log "ERROR: Failed to checkout main/master branch"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Create and checkout new branch
|
||||
git checkout -b "$branch_name" 2>/dev/null || {
|
||||
_profile_log "Branch ${branch_name} may already exist"
|
||||
git checkout "$branch_name" 2>/dev/null || return 1
|
||||
}
|
||||
|
||||
# Write formula.toml
|
||||
printf '%s' "$new_formula" > "$formula_path"
|
||||
|
||||
# Commit the change
|
||||
git config user.name "${AGENT_IDENTITY}" || true
|
||||
git config user.email "${AGENT_IDENTITY}@users.noreply.codeberg.org" || true
|
||||
|
||||
git add "$formula_path"
|
||||
git commit -m "formula: ${reason}" --no-verify || {
|
||||
_profile_log "No changes to commit (formula unchanged)"
|
||||
# Check if branch has any commits
|
||||
if git rev-parse HEAD >/dev/null 2>&1; then
|
||||
: # branch has commits, continue
|
||||
else
|
||||
_profile_log "ERROR: Failed to create commit"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Push branch
|
||||
local remote="${FORGE_REMOTE:-origin}"
|
||||
git push --set-upstream "$remote" "$branch_name" --quiet 2>/dev/null || {
|
||||
_profile_log "ERROR: Failed to push branch"
|
||||
return 1
|
||||
}
|
||||
|
||||
_profile_log "Branch pushed: ${branch_name}"
|
||||
|
||||
# Create PR
|
||||
local forge_url="${FORGE_URL%/}"
|
||||
local api_url="${forge_url}/api/v1/repos/${AGENT_IDENTITY}/.profile"
|
||||
local primary_branch="main"
|
||||
|
||||
# Check if main or master is the primary branch
|
||||
if ! curl -sf -o /dev/null -w "%{http_code}" \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${api_url}/git/branches/main" 2>/dev/null | grep -q "200"; then
|
||||
primary_branch="master"
|
||||
fi
|
||||
|
||||
local pr_title="formula: ${reason}"
|
||||
local pr_body="# Formula Update
|
||||
|
||||
**Reason:** ${reason}
|
||||
|
||||
---
|
||||
*This PR was auto-generated by ${AGENT_IDENTITY}.*
|
||||
"
|
||||
|
||||
local pr_response http_code
|
||||
local pr_json
|
||||
pr_json=$(jq -n \
|
||||
--arg t "$pr_title" \
|
||||
--arg b "$pr_body" \
|
||||
--arg h "$branch_name" \
|
||||
--arg base "$primary_branch" \
|
||||
'{title:$t, body:$b, head:$h, base:$base}') || {
|
||||
_profile_log "ERROR: Failed to build PR JSON"
|
||||
return 1
|
||||
}
|
||||
|
||||
pr_response=$(curl -s -w "\n%{http_code}" -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${api_url}/pulls" \
|
||||
-d "$pr_json" || true)
|
||||
|
||||
http_code=$(printf '%s\n' "$pr_response" | tail -1)
|
||||
pr_response=$(printf '%s\n' "$pr_response" | sed '$d')
|
||||
|
||||
if [ "$http_code" = "201" ] || [ "$http_code" = "200" ]; then
|
||||
local pr_num
|
||||
pr_num=$(printf '%s' "$pr_response" | jq -r '.number')
|
||||
_profile_log "PR created: #${pr_num}"
|
||||
printf '%s' "$pr_num"
|
||||
return 0
|
||||
else
|
||||
# Check if PR already exists (409 conflict)
|
||||
if [ "$http_code" = "409" ]; then
|
||||
local existing_pr
|
||||
existing_pr=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${api_url}/pulls?state=open&head=${AGENT_IDENTITY}:formula/${short_desc}" 2>/dev/null | \
|
||||
jq -r '.[0].number // empty') || true
|
||||
if [ -n "$existing_pr" ]; then
|
||||
_profile_log "PR already exists: #${existing_pr}"
|
||||
printf '%s' "$existing_pr"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
_profile_log "ERROR: Failed to create PR (HTTP ${http_code})"
|
||||
return 1
|
||||
fi
|
||||
)
|
||||
|
||||
return $?
|
||||
}
|
||||
178
lib/release.sh
Normal file
178
lib/release.sh
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# release.sh — disinto_release() function
|
||||
#
|
||||
# Handles vault TOML creation, branch setup on ops repo, PR creation,
|
||||
# and auto-merge request for a versioned release.
|
||||
#
|
||||
# Globals expected:
|
||||
# FORGE_URL - Forge instance URL (e.g. http://localhost:3000)
|
||||
# FORGE_TOKEN - API token for Forge operations
|
||||
# FORGE_OPS_REPO - Ops repo slug (e.g. disinto-admin/myproject-ops)
|
||||
# FACTORY_ROOT - Root of the disinto factory
|
||||
# PRIMARY_BRANCH - Primary branch name (e.g. main)
|
||||
#
|
||||
# Usage:
|
||||
# source "${FACTORY_ROOT}/lib/release.sh"
|
||||
# disinto_release <version>
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
# Source vault.sh for _vault_log helper
|
||||
source "${FACTORY_ROOT}/lib/vault.sh"
|
||||
|
||||
# Assert required globals are set before using this module.
|
||||
_assert_release_globals() {
|
||||
local missing=()
|
||||
[ -z "${FORGE_URL:-}" ] && missing+=("FORGE_URL")
|
||||
[ -z "${FORGE_TOKEN:-}" ] && missing+=("FORGE_TOKEN")
|
||||
[ -z "${FORGE_OPS_REPO:-}" ] && missing+=("FORGE_OPS_REPO")
|
||||
[ -z "${FACTORY_ROOT:-}" ] && missing+=("FACTORY_ROOT")
|
||||
[ -z "${PRIMARY_BRANCH:-}" ] && missing+=("PRIMARY_BRANCH")
|
||||
if [ "${#missing[@]}" -gt 0 ]; then
|
||||
echo "Error: release.sh requires these globals to be set: ${missing[*]}" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
disinto_release() {
|
||||
_assert_release_globals
|
||||
|
||||
local version="${1:-}"
|
||||
local formula_path="${FACTORY_ROOT}/formulas/release.toml"
|
||||
|
||||
if [ -z "$version" ]; then
|
||||
echo "Error: version required" >&2
|
||||
echo "Usage: disinto release <version>" >&2
|
||||
echo "Example: disinto release v1.2.0" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validate version format (must start with 'v' followed by semver)
|
||||
if ! echo "$version" | grep -qE '^v[0-9]+\.[0-9]+\.[0-9]+$'; then
|
||||
echo "Error: version must be in format v1.2.3 (semver with 'v' prefix)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Load project config to get FORGE_OPS_REPO
|
||||
if [ -z "${PROJECT_NAME:-}" ]; then
|
||||
# PROJECT_NAME is unset - detect project TOML from projects/ directory
|
||||
local found_toml
|
||||
found_toml=$(find "${FACTORY_ROOT}/projects" -maxdepth 1 -name "*.toml" ! -name "*.example" 2>/dev/null | head -1)
|
||||
if [ -n "$found_toml" ]; then
|
||||
source "${FACTORY_ROOT}/lib/load-project.sh" "$found_toml"
|
||||
fi
|
||||
else
|
||||
local project_toml="${FACTORY_ROOT}/projects/${PROJECT_NAME}.toml"
|
||||
if [ -f "$project_toml" ]; then
|
||||
source "${FACTORY_ROOT}/lib/load-project.sh" "$project_toml"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check formula exists
|
||||
if [ ! -f "$formula_path" ]; then
|
||||
echo "Error: release formula not found at ${formula_path}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get the ops repo root
|
||||
local ops_root="${FACTORY_ROOT}/../disinto-ops"
|
||||
if [ ! -d "${ops_root}/.git" ]; then
|
||||
echo "Error: ops repo not found at ${ops_root}" >&2
|
||||
echo " Run 'disinto init' to set up the ops repo first" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Generate a unique ID for the vault item
|
||||
local id="release-${version//./}"
|
||||
local vault_toml="${ops_root}/vault/actions/${id}.toml"
|
||||
|
||||
# Create vault TOML with the specific version
|
||||
cat > "$vault_toml" <<EOF
|
||||
# vault/actions/${id}.toml
|
||||
# Release vault item for ${version}
|
||||
# Auto-generated by disinto release
|
||||
|
||||
id = "${id}"
|
||||
formula = "release"
|
||||
context = "Release ${version}"
|
||||
secrets = []
|
||||
EOF
|
||||
|
||||
echo "Created vault item: ${vault_toml}"
|
||||
|
||||
# Create a PR to submit the vault item to the ops repo
|
||||
local branch_name="release/${version//./}"
|
||||
local pr_title="release: ${version}"
|
||||
local pr_body="Release ${version}
|
||||
|
||||
This PR creates a vault item for the release of version ${version}.
|
||||
|
||||
## Changes
|
||||
- Added vault item: ${id}.toml
|
||||
|
||||
## Next Steps
|
||||
1. Review this PR
|
||||
2. Approve and merge
|
||||
3. The vault runner will execute the release formula
|
||||
"
|
||||
|
||||
# Create branch from clean primary branch
|
||||
(
|
||||
cd "$ops_root"
|
||||
git checkout "$PRIMARY_BRANCH"
|
||||
git pull origin "$PRIMARY_BRANCH"
|
||||
git checkout -B "$branch_name" "$PRIMARY_BRANCH"
|
||||
|
||||
# Add and commit only the vault TOML file
|
||||
git add "vault/actions/${id}.toml"
|
||||
git commit -m "$pr_title" -m "$pr_body" 2>/dev/null || true
|
||||
|
||||
# Push branch
|
||||
git push -u origin "$branch_name" 2>/dev/null || {
|
||||
echo "Error: failed to push branch" >&2
|
||||
exit 1
|
||||
}
|
||||
)
|
||||
|
||||
# Create PR
|
||||
local pr_response
|
||||
pr_response=$(curl -sf -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls" \
|
||||
-d "{\"title\":\"${pr_title}\",\"head\":\"${branch_name}\",\"base\":\"${PRIMARY_BRANCH}\",\"body\":\"$(echo "$pr_body" | sed ':a;N;$!ba;s/\n/\\n/g')\"}" 2>/dev/null) || {
|
||||
echo "Error: failed to create PR" >&2
|
||||
echo "Response: ${pr_response}" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
local pr_number
|
||||
pr_number=$(echo "$pr_response" | jq -r '.number')
|
||||
|
||||
local pr_url="${FORGE_URL}/${FORGE_OPS_REPO}/pulls/${pr_number}"
|
||||
|
||||
# Enable auto-merge on the PR — Forgejo will auto-merge after approval
|
||||
_vault_log "Enabling auto-merge for PR #${pr_number}"
|
||||
curl -sf -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${FORGE_URL}/api/v1/repos/${FORGE_OPS_REPO}/pulls/${pr_number}/merge" \
|
||||
-d '{"Do":"merge","merge_when_checks_succeed":true}' >/dev/null 2>&1 || {
|
||||
echo "Warning: failed to enable auto-merge (may already be enabled or not supported)" >&2
|
||||
}
|
||||
|
||||
echo ""
|
||||
echo "Release PR created: ${pr_url}"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo " 1. Review the PR"
|
||||
echo " 2. Approve the PR (auto-merge will trigger after approval)"
|
||||
echo " 3. The vault runner will execute the release formula"
|
||||
echo ""
|
||||
echo "After merge, the release will:"
|
||||
echo " 1. Tag Forgejo main with ${version}"
|
||||
echo " 2. Push tag to mirrors (Codeberg, GitHub)"
|
||||
echo " 3. Build and tag the agents Docker image"
|
||||
echo " 4. Restart agent containers"
|
||||
}
|
||||
197
lib/stack-lock.sh
Normal file
197
lib/stack-lock.sh
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
#!/usr/bin/env bash
|
||||
# stack-lock.sh — File-based lock protocol for singleton project stack access
|
||||
#
|
||||
# Prevents CI pipelines and the reproduce-agent from stepping on each other
|
||||
# when sharing a single project stack (e.g. harb docker compose).
|
||||
#
|
||||
# Lock file: /home/agent/data/locks/<project>-stack.lock
|
||||
# Contents: {"holder": "reproduce-agent-42", "since": "...", "heartbeat": "..."}
|
||||
#
|
||||
# Protocol:
|
||||
# 1. stack_lock_check — inspect current lock state
|
||||
# 2. stack_lock_acquire — wait until lock is free, then claim it
|
||||
# 3. stack_lock_release — delete lock file when done
|
||||
#
|
||||
# Heartbeat: callers must update the heartbeat every 2 minutes while holding
|
||||
# the lock by calling stack_lock_heartbeat. A heartbeat older than 10 minutes
|
||||
# is considered stale — the next acquire will break it.
|
||||
#
|
||||
# Usage:
|
||||
# source "$(dirname "$0")/../lib/stack-lock.sh"
|
||||
# stack_lock_acquire "ci-pipeline-$BUILD_NUMBER" "myproject"
|
||||
# trap 'stack_lock_release "myproject"' EXIT
|
||||
# # ... do work ...
|
||||
# stack_lock_release "myproject"
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
STACK_LOCK_DIR="${HOME}/data/locks"
|
||||
STACK_LOCK_POLL_INTERVAL=30 # seconds between retry polls
|
||||
STACK_LOCK_STALE_SECONDS=600 # 10 minutes — heartbeat older than this = stale
|
||||
STACK_LOCK_MAX_WAIT=3600 # 1 hour — give up after this many seconds
|
||||
|
||||
# _stack_lock_path <project>
|
||||
# Print the path of the lock file for the given project.
|
||||
_stack_lock_path() {
|
||||
local project="$1"
|
||||
echo "${STACK_LOCK_DIR}/${project}-stack.lock"
|
||||
}
|
||||
|
||||
# _stack_lock_now
|
||||
# Print current UTC timestamp in ISO-8601 format.
|
||||
_stack_lock_now() {
|
||||
date -u +"%Y-%m-%dT%H:%M:%SZ"
|
||||
}
|
||||
|
||||
# _stack_lock_epoch <iso_timestamp>
|
||||
# Convert an ISO-8601 UTC timestamp to a Unix epoch integer.
|
||||
_stack_lock_epoch() {
|
||||
local ts="$1"
|
||||
# Strip trailing Z, replace T with space for `date -d`
|
||||
date -u -d "${ts%Z}" +%s 2>/dev/null || date -u -j -f "%Y-%m-%dT%H:%M:%S" "${ts%Z}" +%s 2>/dev/null
|
||||
}
|
||||
|
||||
# stack_lock_check <project>
|
||||
# Print lock status to stdout: "free", "held:<holder>", or "stale:<holder>".
|
||||
# Returns 0 in all cases (status is in stdout).
|
||||
stack_lock_check() {
|
||||
local project="$1"
|
||||
local lock_file
|
||||
lock_file="$(_stack_lock_path "$project")"
|
||||
|
||||
if [ ! -f "$lock_file" ]; then
|
||||
echo "free"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local holder heartbeat
|
||||
holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder","unknown"))' "$lock_file" 2>/dev/null || echo "unknown")
|
||||
heartbeat=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("heartbeat",""))' "$lock_file" 2>/dev/null || echo "")
|
||||
|
||||
if [ -z "$heartbeat" ]; then
|
||||
echo "stale:${holder}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local hb_epoch now_epoch age
|
||||
hb_epoch=$(_stack_lock_epoch "$heartbeat" 2>/dev/null || echo "0")
|
||||
now_epoch=$(date -u +%s)
|
||||
age=$(( now_epoch - hb_epoch ))
|
||||
|
||||
if [ "$age" -gt "$STACK_LOCK_STALE_SECONDS" ]; then
|
||||
echo "stale:${holder}"
|
||||
else
|
||||
echo "held:${holder}"
|
||||
fi
|
||||
}
|
||||
|
||||
# stack_lock_acquire <holder_id> <project> [max_wait_seconds]
|
||||
# Acquire the lock for <project> on behalf of <holder_id>.
|
||||
# Polls every STACK_LOCK_POLL_INTERVAL seconds.
|
||||
# Breaks stale locks automatically.
|
||||
# Exits non-zero if the lock cannot be acquired within max_wait_seconds.
|
||||
stack_lock_acquire() {
|
||||
local holder="$1"
|
||||
local project="$2"
|
||||
local max_wait="${3:-$STACK_LOCK_MAX_WAIT}"
|
||||
local lock_file
|
||||
lock_file="$(_stack_lock_path "$project")"
|
||||
local deadline
|
||||
deadline=$(( $(date -u +%s) + max_wait ))
|
||||
|
||||
mkdir -p "$STACK_LOCK_DIR"
|
||||
|
||||
while true; do
|
||||
local status
|
||||
status=$(stack_lock_check "$project")
|
||||
|
||||
case "$status" in
|
||||
free)
|
||||
# Write to temp file then rename to avoid partial reads by other processes
|
||||
local tmp_lock
|
||||
tmp_lock=$(mktemp "${STACK_LOCK_DIR}/.lock-tmp-XXXXXX")
|
||||
local now
|
||||
now=$(_stack_lock_now)
|
||||
printf '{"holder": "%s", "since": "%s", "heartbeat": "%s"}\n' \
|
||||
"$holder" "$now" "$now" > "$tmp_lock"
|
||||
mv "$tmp_lock" "$lock_file"
|
||||
echo "[stack-lock] acquired lock for ${project} as ${holder}" >&2
|
||||
return 0
|
||||
;;
|
||||
stale:*)
|
||||
local stale_holder="${status#stale:}"
|
||||
echo "[stack-lock] breaking stale lock held by ${stale_holder} for ${project}" >&2
|
||||
rm -f "$lock_file"
|
||||
# Loop back immediately to re-check and claim
|
||||
;;
|
||||
held:*)
|
||||
local cur_holder="${status#held:}"
|
||||
local remaining
|
||||
remaining=$(( deadline - $(date -u +%s) ))
|
||||
if [ "$remaining" -le 0 ]; then
|
||||
echo "[stack-lock] timed out waiting for lock on ${project} (held by ${cur_holder})" >&2
|
||||
return 1
|
||||
fi
|
||||
echo "[stack-lock] ${project} locked by ${cur_holder}, waiting ${STACK_LOCK_POLL_INTERVAL}s (${remaining}s left)..." >&2
|
||||
sleep "$STACK_LOCK_POLL_INTERVAL"
|
||||
;;
|
||||
*)
|
||||
echo "[stack-lock] unexpected status '${status}' for ${project}" >&2
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
# stack_lock_heartbeat <holder_id> <project>
|
||||
# Update the heartbeat timestamp in the lock file.
|
||||
# Should be called every 2 minutes while holding the lock.
|
||||
# No-op if the lock file is absent or held by a different holder.
|
||||
stack_lock_heartbeat() {
|
||||
local holder="$1"
|
||||
local project="$2"
|
||||
local lock_file
|
||||
lock_file="$(_stack_lock_path "$project")"
|
||||
|
||||
[ -f "$lock_file" ] || return 0
|
||||
|
||||
local current_holder
|
||||
current_holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder",""))' "$lock_file" 2>/dev/null || echo "")
|
||||
[ "$current_holder" = "$holder" ] || return 0
|
||||
|
||||
local since
|
||||
since=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("since",""))' "$lock_file" 2>/dev/null || echo "")
|
||||
local now
|
||||
now=$(_stack_lock_now)
|
||||
|
||||
local tmp_lock
|
||||
tmp_lock=$(mktemp "${STACK_LOCK_DIR}/.lock-tmp-XXXXXX")
|
||||
printf '{"holder": "%s", "since": "%s", "heartbeat": "%s"}\n' \
|
||||
"$holder" "$since" "$now" > "$tmp_lock"
|
||||
mv "$tmp_lock" "$lock_file"
|
||||
}
|
||||
|
||||
# stack_lock_release <project> [holder_id]
|
||||
# Release the lock for <project>.
|
||||
# If holder_id is provided, only releases if the lock is held by that holder
|
||||
# (prevents accidentally releasing someone else's lock).
|
||||
stack_lock_release() {
|
||||
local project="$1"
|
||||
local holder="${2:-}"
|
||||
local lock_file
|
||||
lock_file="$(_stack_lock_path "$project")"
|
||||
|
||||
[ -f "$lock_file" ] || return 0
|
||||
|
||||
if [ -n "$holder" ]; then
|
||||
local current_holder
|
||||
current_holder=$(python3 -c 'import sys,json; d=json.load(open(sys.argv[1])); print(d.get("holder",""))' "$lock_file" 2>/dev/null || echo "")
|
||||
if [ "$current_holder" != "$holder" ]; then
|
||||
echo "[stack-lock] refusing to release: lock held by '${current_holder}', not '${holder}'" >&2
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f "$lock_file"
|
||||
echo "[stack-lock] released lock for ${project}" >&2
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a -->
|
||||
<!-- last-reviewed: ac2beac361503c8712ecfc72be0401b5968cce4e -->
|
||||
# Planner Agent
|
||||
|
||||
**Role**: Strategic planning using a Prerequisite Tree (Theory of Constraints),
|
||||
|
|
@ -65,7 +65,7 @@ component, not work.
|
|||
tree, humans steer by editing VISION.md. Tree grows organically as the
|
||||
planner discovers new prerequisites during runs
|
||||
- `$OPS_REPO_ROOT/knowledge/planner-memory.md` — Persistent memory across runs (in ops repo)
|
||||
- `$OPS_REPO_ROOT/journal/planner/*.md` — Daily raw logs from each planner run (in ops repo)
|
||||
|
||||
|
||||
**Constraint focus**: The planner uses Theory of Constraints to avoid premature
|
||||
issue filing. Only the top 3 unresolved prerequisites that block the most
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ source "$FACTORY_ROOT/lib/guard.sh"
|
|||
# shellcheck source=../lib/agent-sdk.sh
|
||||
source "$FACTORY_ROOT/lib/agent-sdk.sh"
|
||||
|
||||
LOG_FILE="$SCRIPT_DIR/planner.log"
|
||||
LOG_FILE="${DISINTO_LOG_DIR}/planner/planner.log"
|
||||
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
|
||||
LOGFILE="$LOG_FILE"
|
||||
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
|
||||
|
|
@ -43,20 +43,29 @@ SID_FILE="/tmp/planner-session-${PROJECT_NAME}.sid"
|
|||
SCRATCH_FILE="/tmp/planner-${PROJECT_NAME}-scratch.md"
|
||||
WORKTREE="/tmp/${PROJECT_NAME}-planner-run"
|
||||
|
||||
log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
|
||||
# Override LOG_AGENT for consistent agent identification
|
||||
# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log()
|
||||
LOG_AGENT="planner"
|
||||
|
||||
# Override log() to append to planner-specific log file
|
||||
# shellcheck disable=SC2034
|
||||
log() {
|
||||
local agent="${LOG_AGENT:-planner}"
|
||||
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE"
|
||||
}
|
||||
|
||||
# ── Guards ────────────────────────────────────────────────────────────────
|
||||
check_active planner
|
||||
acquire_cron_lock "/tmp/planner-run.lock"
|
||||
check_memory 2000
|
||||
memory_guard 2000
|
||||
|
||||
log "--- Planner run start ---"
|
||||
|
||||
# ── Resolve forge remote for git operations ─────────────────────────────
|
||||
resolve_forge_remote
|
||||
|
||||
# ── Resolve agent identity for .profile repo ────────────────────────────
|
||||
if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_PLANNER_TOKEN:-}" ]; then
|
||||
AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_PLANNER_TOKEN}" \
|
||||
"${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true)
|
||||
fi
|
||||
resolve_agent_identity || true
|
||||
|
||||
# ── Load formula + context ───────────────────────────────────────────────
|
||||
load_formula_or_profile "planner" "$FACTORY_ROOT/formulas/run-planner.toml" || exit 1
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a -->
|
||||
<!-- last-reviewed: ac2beac361503c8712ecfc72be0401b5968cce4e -->
|
||||
# Predictor Agent
|
||||
|
||||
**Role**: Abstract adversary (the "goblin"). Runs a 2-step formula
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ source "$FACTORY_ROOT/lib/guard.sh"
|
|||
# shellcheck source=../lib/agent-sdk.sh
|
||||
source "$FACTORY_ROOT/lib/agent-sdk.sh"
|
||||
|
||||
LOG_FILE="$SCRIPT_DIR/predictor.log"
|
||||
LOG_FILE="${DISINTO_LOG_DIR}/predictor/predictor.log"
|
||||
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
|
||||
LOGFILE="$LOG_FILE"
|
||||
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
|
||||
|
|
@ -44,20 +44,29 @@ SID_FILE="/tmp/predictor-session-${PROJECT_NAME}.sid"
|
|||
SCRATCH_FILE="/tmp/predictor-${PROJECT_NAME}-scratch.md"
|
||||
WORKTREE="/tmp/${PROJECT_NAME}-predictor-run"
|
||||
|
||||
log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
|
||||
# Override LOG_AGENT for consistent agent identification
|
||||
# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log()
|
||||
LOG_AGENT="predictor"
|
||||
|
||||
# Override log() to append to predictor-specific log file
|
||||
# shellcheck disable=SC2034
|
||||
log() {
|
||||
local agent="${LOG_AGENT:-predictor}"
|
||||
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE"
|
||||
}
|
||||
|
||||
# ── Guards ────────────────────────────────────────────────────────────────
|
||||
check_active predictor
|
||||
acquire_cron_lock "/tmp/predictor-run.lock"
|
||||
check_memory 2000
|
||||
memory_guard 2000
|
||||
|
||||
log "--- Predictor run start ---"
|
||||
|
||||
# ── Resolve forge remote for git operations ─────────────────────────────
|
||||
resolve_forge_remote
|
||||
|
||||
# ── Resolve agent identity for .profile repo ────────────────────────────
|
||||
if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_PREDICTOR_TOKEN:-}" ]; then
|
||||
AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_PREDICTOR_TOKEN}" \
|
||||
"${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true)
|
||||
fi
|
||||
resolve_agent_identity || true
|
||||
|
||||
# ── Load formula + context ───────────────────────────────────────────────
|
||||
load_formula_or_profile "predictor" "$FACTORY_ROOT/formulas/run-predictor.toml" || exit 1
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
name = "disinto"
|
||||
repo = "johba/disinto"
|
||||
ops_repo = "johba/disinto-ops"
|
||||
ops_repo = "disinto-admin/disinto-ops"
|
||||
forge_url = "http://localhost:3000"
|
||||
repo_root = "/home/YOU/dark-factory"
|
||||
ops_repo_root = "/home/YOU/disinto-ops"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a -->
|
||||
<!-- last-reviewed: ac2beac361503c8712ecfc72be0401b5968cce4e -->
|
||||
# Review Agent
|
||||
|
||||
**Role**: AI-powered PR review — post structured findings and formal
|
||||
|
|
@ -9,8 +9,8 @@ whose CI has passed and that lack a review for the current HEAD SHA, then
|
|||
spawns `review-pr.sh <pr-number>`.
|
||||
|
||||
**Key files**:
|
||||
- `review/review-poll.sh` — Cron scheduler: finds unreviewed PRs with passing CI. Sources `lib/guard.sh` and calls `check_active reviewer` — skips if `$FACTORY_ROOT/state/.reviewer-active` is absent.
|
||||
- `review/review-pr.sh` — Creates/reuses a tmux session (`review-{project}-{pr}`), injects PR diff, waits for Claude to write structured JSON output, posts markdown review + formal forge review, auto-creates follow-up issues for pre-existing tech debt. Before starting the session, runs `lib/build-graph.py --changed-files <PR files>` and appends the JSON structural analysis (affected objectives, orphaned prerequisites, thin evidence) to the review prompt. Graph failures are non-fatal — review proceeds without it.
|
||||
- `review/review-poll.sh` — Cron scheduler: finds unreviewed PRs with passing CI. Sources `lib/guard.sh` and calls `check_active reviewer` — skips if `$FACTORY_ROOT/state/.reviewer-active` is absent. **Circuit breaker**: counts existing `<!-- review-error: <sha> -->` comments; skips a PR if ≥3 consecutive errors for the same HEAD SHA (prevents flooding on repeated review failures).
|
||||
- `review/review-pr.sh` — Creates/reuses a tmux session (`review-{project}-{pr}`), injects PR diff, waits for Claude to write structured JSON output, posts markdown review + formal forge review, auto-creates follow-up issues for pre-existing tech debt. Calls `resolve_forge_remote()` at startup to determine the correct git remote name (avoids hardcoded 'origin'). Before starting the session, runs `lib/build-graph.py --changed-files <PR files>` and appends the JSON structural analysis (affected objectives, orphaned prerequisites, thin evidence) to the review prompt. Graph failures are non-fatal — review proceeds without it.
|
||||
|
||||
**Environment variables consumed**:
|
||||
- `FORGE_TOKEN` — Dev-agent token (must not be the same account as FORGE_REVIEW_TOKEN)
|
||||
|
|
|
|||
|
|
@ -23,8 +23,15 @@ LOGFILE="${DISINTO_LOG_DIR}/review/review-poll.log"
|
|||
MAX_REVIEWS=3
|
||||
REVIEW_IDLE_TIMEOUT=14400 # 4h: kill review session if idle
|
||||
|
||||
# Override LOG_AGENT for consistent agent identification
|
||||
# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log()
|
||||
LOG_AGENT="review"
|
||||
|
||||
# Override log() to append to review-specific log file
|
||||
# shellcheck disable=SC2034
|
||||
log() {
|
||||
printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE"
|
||||
local agent="${LOG_AGENT:-review}"
|
||||
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOGFILE"
|
||||
}
|
||||
|
||||
# Log rotation
|
||||
|
|
@ -126,10 +133,11 @@ if [ -n "$REVIEW_SIDS" ]; then
|
|||
|
||||
log " #${pr_num} re-review: new commits (${reviewed_sha:0:7}→${current_sha:0:7})"
|
||||
|
||||
if "${SCRIPT_DIR}/review-pr.sh" "$pr_num" 2>&1; then
|
||||
review_output=$("${SCRIPT_DIR}/review-pr.sh" "$pr_num" 2>&1) && review_rc=0 || review_rc=$?
|
||||
if [ "$review_rc" -eq 0 ]; then
|
||||
REVIEWED=$((REVIEWED + 1))
|
||||
else
|
||||
log " #${pr_num} re-review failed"
|
||||
log " #${pr_num} re-review failed (exit code $review_rc): $(echo "$review_output" | tail -3)"
|
||||
fi
|
||||
|
||||
[ "$REVIEWED" -lt "$MAX_REVIEWS" ] || break
|
||||
|
|
@ -180,10 +188,11 @@ while IFS= read -r line; do
|
|||
|
||||
log " #${PR_NUM} error check: ${ERROR_COMMENTS:-0} prior error(s) for ${PR_SHA:0:7}"
|
||||
|
||||
if "${SCRIPT_DIR}/review-pr.sh" "$PR_NUM" 2>&1; then
|
||||
review_output=$("${SCRIPT_DIR}/review-pr.sh" "$PR_NUM" 2>&1) && review_rc=0 || review_rc=$?
|
||||
if [ "$review_rc" -eq 0 ]; then
|
||||
REVIEWED=$((REVIEWED + 1))
|
||||
else
|
||||
log " #${PR_NUM} review failed"
|
||||
log " #${PR_NUM} review failed (exit code $review_rc): $(echo "$review_output" | tail -3)"
|
||||
fi
|
||||
|
||||
if [ "$REVIEWED" -ge "$MAX_REVIEWS" ]; then
|
||||
|
|
|
|||
|
|
@ -58,13 +58,15 @@ if [ -f "$LOGFILE" ] && [ "$(stat -c%s "$LOGFILE" 2>/dev/null || echo 0)" -gt 10
|
|||
mv "$LOGFILE" "$LOGFILE.old"
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# RESOLVE FORGE REMOTE FOR GIT OPERATIONS
|
||||
# =============================================================================
|
||||
resolve_forge_remote
|
||||
|
||||
# =============================================================================
|
||||
# RESOLVE AGENT IDENTITY FOR .PROFILE REPO
|
||||
# =============================================================================
|
||||
if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_TOKEN:-}" ]; then
|
||||
AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true)
|
||||
fi
|
||||
resolve_agent_identity || true
|
||||
|
||||
# =============================================================================
|
||||
# MEMORY GUARD
|
||||
|
|
@ -131,7 +133,7 @@ PREV_REV=$(printf '%s' "$ALL_COMMENTS" | jq -r --arg s "$PR_SHA" \
|
|||
if [ -n "$PREV_REV" ] && [ "$PREV_REV" != "null" ]; then
|
||||
PREV_BODY=$(printf '%s' "$PREV_REV" | jq -r '.body')
|
||||
PREV_SHA=$(printf '%s' "$PREV_BODY" | grep -oP '<!-- reviewed: \K[a-f0-9]+' | head -1)
|
||||
cd "${PROJECT_REPO_ROOT}"; git fetch origin "$PR_HEAD" 2>/dev/null || true
|
||||
cd "${PROJECT_REPO_ROOT}"; git fetch "${FORGE_REMOTE}" "$PR_HEAD" 2>/dev/null || true
|
||||
INCR=$(git diff "${PREV_SHA}..${PR_SHA}" 2>/dev/null | head -c "$MAX_DIFF") || true
|
||||
if [ -n "$INCR" ]; then
|
||||
IS_RE_REVIEW=true; log "re-review: previous at ${PREV_SHA:0:7}"
|
||||
|
|
@ -162,7 +164,7 @@ DNOTE=""; [ "$FSIZE" -gt "$MAX_DIFF" ] && DNOTE=" (truncated from ${FSIZE} bytes
|
|||
# WORKTREE SETUP
|
||||
# =============================================================================
|
||||
cd "${PROJECT_REPO_ROOT}"
|
||||
git fetch origin "$PR_HEAD" 2>/dev/null || true
|
||||
git fetch "${FORGE_REMOTE}" "$PR_HEAD" 2>/dev/null || true
|
||||
|
||||
if [ -d "$WORKTREE" ]; then
|
||||
cd "$WORKTREE"; git checkout --detach "$PR_SHA" 2>/dev/null || {
|
||||
|
|
|
|||
2
state/.gitignore
vendored
2
state/.gitignore
vendored
|
|
@ -1,2 +1,4 @@
|
|||
# Active-state files are runtime state, not committed
|
||||
.*-active
|
||||
# Supervisor is always active in the edge container — committed guard file
|
||||
!.supervisor-active
|
||||
|
|
|
|||
0
state/.supervisor-active
Normal file
0
state/.supervisor-active
Normal file
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: f32707ba659de278a3af434e3549fb8a8dce9d3a -->
|
||||
<!-- last-reviewed: ac2beac361503c8712ecfc72be0401b5968cce4e -->
|
||||
# Supervisor Agent
|
||||
|
||||
**Role**: Health monitoring and auto-remediation, executed as a formula-driven
|
||||
|
|
@ -9,19 +9,17 @@ resources or human decisions, files vault items instead of escalating directly.
|
|||
|
||||
**Trigger**: `supervisor-run.sh` runs every 20 min via cron. Sources `lib/guard.sh`
|
||||
and calls `check_active supervisor` first — skips if
|
||||
`$FACTORY_ROOT/state/.supervisor-active` is absent. Then creates a tmux session
|
||||
with `claude --model sonnet`, injects `formulas/run-supervisor.toml` with
|
||||
pre-collected metrics as context, monitors the phase file, and cleans up on
|
||||
completion or timeout (20 min max session). No action issues — the supervisor
|
||||
runs directly from cron like the planner and predictor.
|
||||
`$FACTORY_ROOT/state/.supervisor-active` is absent. Then runs `claude -p`
|
||||
via `agent-sdk.sh`, injects `formulas/run-supervisor.toml` with
|
||||
pre-collected metrics as context, and cleans up on completion or timeout (20 min max session).
|
||||
No action issues — the supervisor runs directly from cron like the planner and predictor.
|
||||
|
||||
**Key files**:
|
||||
- `supervisor/supervisor-run.sh` — Cron wrapper + orchestrator: lock, memory guard,
|
||||
runs preflight.sh, sources disinto project config, creates tmux session, injects
|
||||
formula prompt with metrics, monitors phase file, handles crash recovery via
|
||||
`run_formula_and_monitor`
|
||||
runs preflight.sh, sources disinto project config, runs claude -p via agent-sdk.sh,
|
||||
injects formula prompt with metrics, handles crash recovery
|
||||
- `supervisor/preflight.sh` — Data collection: system resources (RAM, disk, swap,
|
||||
load), Docker status, active tmux sessions + phase files, lock files, agent log
|
||||
load), Docker status, active sessions + phase files, lock files, agent log
|
||||
tails, CI pipeline status, open PRs, issue counts, stale worktrees, blocked
|
||||
issues. Also performs **stale phase cleanup**: scans `/tmp/*-session-*.phase`
|
||||
files for `PHASE:escalate` entries and auto-removes any whose linked issue
|
||||
|
|
@ -31,11 +29,8 @@ runs directly from cron like the planner and predictor.
|
|||
- `formulas/run-supervisor.toml` — Execution spec: five steps (preflight review,
|
||||
health-assessment, decide-actions, report, journal) with `needs` dependencies.
|
||||
Claude evaluates all metrics and takes actions in a single interactive session
|
||||
- `$OPS_REPO_ROOT/journal/supervisor/*.md` — Daily health logs from each supervisor run
|
||||
- `$OPS_REPO_ROOT/knowledge/*.md` — Domain-specific remediation guides (memory,
|
||||
disk, CI, git, dev-agent, review-agent, forge)
|
||||
- `supervisor/supervisor-poll.sh` — Legacy bash orchestrator (superseded by
|
||||
supervisor-run.sh + formula)
|
||||
|
||||
**Alert priorities**: P0 (memory crisis), P1 (disk), P2 (factory stopped/stalled),
|
||||
P3 (degraded PRs, circular deps, stale deps), P4 (housekeeping).
|
||||
|
|
@ -46,5 +41,5 @@ P3 (degraded PRs, circular deps, stale deps), P4 (housekeeping).
|
|||
- `WOODPECKER_TOKEN`, `WOODPECKER_SERVER`, `WOODPECKER_DB_PASSWORD`, `WOODPECKER_DB_USER`, `WOODPECKER_DB_HOST`, `WOODPECKER_DB_NAME` — CI database queries
|
||||
|
||||
**Lifecycle**: supervisor-run.sh (cron */20) → lock + memory guard → run
|
||||
preflight.sh (collect metrics) → load formula + context → create tmux
|
||||
session → Claude assesses health, auto-fixes, writes journal → `PHASE:done`.
|
||||
preflight.sh (collect metrics) → load formula + context → run claude -p via agent-sdk.sh
|
||||
→ Claude assesses health, auto-fixes, writes journal → `PHASE:done`.
|
||||
|
|
|
|||
|
|
@ -1,808 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
# supervisor-poll.sh — Supervisor agent: bash checks + claude -p for fixes
|
||||
#
|
||||
# Two-layer architecture:
|
||||
# 1. Factory infrastructure (project-agnostic): RAM, disk, swap, docker, stale processes
|
||||
# 2. Per-project checks (config-driven): CI, PRs, dev-agent, deps — iterated over projects/*.toml
|
||||
#
|
||||
# Runs every 10min via cron.
|
||||
#
|
||||
# Cron: */10 * * * * /path/to/disinto/supervisor/supervisor-poll.sh
|
||||
#
|
||||
# Peek: cat /tmp/supervisor-status
|
||||
# Log: tail -f /path/to/disinto/supervisor/supervisor.log
|
||||
|
||||
source "$(dirname "$0")/../lib/env.sh"
|
||||
source "$(dirname "$0")/../lib/ci-helpers.sh"
|
||||
|
||||
LOGFILE="${DISINTO_LOG_DIR}/supervisor/supervisor.log"
|
||||
STATUSFILE="/tmp/supervisor-status"
|
||||
LOCKFILE="/tmp/supervisor-poll.lock"
|
||||
PROMPT_FILE="${FACTORY_ROOT}/formulas/run-supervisor.toml"
|
||||
PROJECTS_DIR="${FACTORY_ROOT}/projects"
|
||||
|
||||
METRICS_FILE="${DISINTO_LOG_DIR}/metrics/supervisor-metrics.jsonl"
|
||||
|
||||
emit_metric() {
|
||||
printf '%s\n' "$1" >> "$METRICS_FILE"
|
||||
}
|
||||
|
||||
# Count all matching items from a paginated forge API endpoint.
|
||||
# Usage: codeberg_count_paginated "/issues?state=open&labels=backlog&type=issues"
|
||||
# Returns total count across all pages (max 20 pages = 1000 items).
|
||||
codeberg_count_paginated() {
|
||||
local endpoint="$1" total=0 page=1 count
|
||||
while true; do
|
||||
count=$(forge_api GET "${endpoint}&limit=50&page=${page}" 2>/dev/null | jq 'length' 2>/dev/null || echo 0)
|
||||
total=$((total + ${count:-0}))
|
||||
[ "${count:-0}" -lt 50 ] && break
|
||||
page=$((page + 1))
|
||||
[ "$page" -gt 20 ] && break
|
||||
done
|
||||
echo "$total"
|
||||
}
|
||||
|
||||
rotate_metrics() {
|
||||
[ -f "$METRICS_FILE" ] || return 0
|
||||
local cutoff tmpfile
|
||||
cutoff=$(date -u -d '30 days ago' +%Y-%m-%dT%H:%M)
|
||||
tmpfile="${METRICS_FILE}.tmp"
|
||||
jq -c --arg cutoff "$cutoff" 'select(.ts >= $cutoff)' \
|
||||
"$METRICS_FILE" > "$tmpfile" 2>/dev/null
|
||||
# Only replace if jq produced output, or the source is already empty
|
||||
if [ -s "$tmpfile" ] || [ ! -s "$METRICS_FILE" ]; then
|
||||
mv "$tmpfile" "$METRICS_FILE"
|
||||
else
|
||||
rm -f "$tmpfile"
|
||||
fi
|
||||
}
|
||||
|
||||
# Prevent overlapping runs
|
||||
if [ -f "$LOCKFILE" ]; then
|
||||
LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null)
|
||||
if kill -0 "$LOCK_PID" 2>/dev/null; then
|
||||
exit 0
|
||||
fi
|
||||
rm -f "$LOCKFILE"
|
||||
fi
|
||||
echo $$ > "$LOCKFILE"
|
||||
trap 'rm -f "$LOCKFILE" "$STATUSFILE"' EXIT
|
||||
mkdir -p "$(dirname "$METRICS_FILE")"
|
||||
rotate_metrics
|
||||
|
||||
flog() {
|
||||
printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE"
|
||||
}
|
||||
|
||||
status() {
|
||||
printf '[%s] supervisor: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" > "$STATUSFILE"
|
||||
flog "$*"
|
||||
}
|
||||
|
||||
# Alerts by priority
|
||||
P0_ALERTS=""
|
||||
P1_ALERTS=""
|
||||
P2_ALERTS=""
|
||||
P3_ALERTS=""
|
||||
P4_ALERTS=""
|
||||
|
||||
p0() { P0_ALERTS="${P0_ALERTS}• [P0] $*\n"; flog "P0: $*"; }
|
||||
p1() { P1_ALERTS="${P1_ALERTS}• [P1] $*\n"; flog "P1: $*"; }
|
||||
p2() { P2_ALERTS="${P2_ALERTS}• [P2] $*\n"; flog "P2: $*"; }
|
||||
p3() { P3_ALERTS="${P3_ALERTS}• [P3] $*\n"; flog "P3: $*"; }
|
||||
p4() { P4_ALERTS="${P4_ALERTS}• [P4] $*\n"; flog "P4: $*"; }
|
||||
|
||||
FIXES=""
|
||||
fixed() { FIXES="${FIXES}• ✅ $*\n"; flog "FIXED: $*"; }
|
||||
|
||||
# #############################################################################
|
||||
# LAYER 1: FACTORY INFRASTRUCTURE
|
||||
# (project-agnostic, runs once)
|
||||
# #############################################################################
|
||||
|
||||
# =============================================================================
|
||||
# P0: MEMORY — check first, fix first
|
||||
# =============================================================================
|
||||
status "P0: checking memory"
|
||||
|
||||
AVAIL_MB=$(free -m | awk '/Mem:/{print $7}')
|
||||
SWAP_USED_MB=$(free -m | awk '/Swap:/{print $3}')
|
||||
|
||||
if [ "${AVAIL_MB:-9999}" -lt 500 ] || { [ "${SWAP_USED_MB:-0}" -gt 3000 ] && [ "${AVAIL_MB:-9999}" -lt 2000 ]; }; then
|
||||
flog "MEMORY CRISIS: avail=${AVAIL_MB}MB swap_used=${SWAP_USED_MB}MB — auto-fixing"
|
||||
|
||||
# Kill stale agent-spawned claude processes (>3h old) — skip interactive sessions
|
||||
STALE_CLAUDES=$(pgrep -f "claude -p" --older 10800 2>/dev/null || true)
|
||||
if [ -n "$STALE_CLAUDES" ]; then
|
||||
echo "$STALE_CLAUDES" | xargs kill 2>/dev/null || true
|
||||
fixed "Killed stale claude processes: ${STALE_CLAUDES}"
|
||||
fi
|
||||
|
||||
# Drop filesystem caches
|
||||
sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null 2>&1
|
||||
fixed "Dropped filesystem caches"
|
||||
|
||||
# Re-check after fixes
|
||||
AVAIL_MB_AFTER=$(free -m | awk '/Mem:/{print $7}')
|
||||
SWAP_AFTER=$(free -m | awk '/Swap:/{print $3}')
|
||||
|
||||
if [ "${AVAIL_MB_AFTER:-0}" -lt 500 ] || [ "${SWAP_AFTER:-0}" -gt 3000 ]; then
|
||||
p0 "Memory still critical after auto-fix: avail=${AVAIL_MB_AFTER}MB swap=${SWAP_AFTER}MB"
|
||||
else
|
||||
flog "Memory recovered: avail=${AVAIL_MB_AFTER}MB swap=${SWAP_AFTER}MB"
|
||||
fi
|
||||
fi
|
||||
|
||||
# P0 alerts already logged — clear so they are not duplicated in the final consolidated log
|
||||
if [ -n "$P0_ALERTS" ]; then
|
||||
P0_ALERTS=""
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# P1: DISK
|
||||
# =============================================================================
|
||||
status "P1: checking disk"
|
||||
|
||||
DISK_PERCENT=$(df -h / | awk 'NR==2{print $5}' | tr -d '%')
|
||||
|
||||
if [ "${DISK_PERCENT:-0}" -gt 80 ]; then
|
||||
flog "DISK PRESSURE: ${DISK_PERCENT}% — auto-cleaning"
|
||||
|
||||
# Docker cleanup (safe — keeps images)
|
||||
sudo docker system prune -f >/dev/null 2>&1 && fixed "Docker prune"
|
||||
|
||||
# Truncate logs >10MB
|
||||
for logfile in "${DISINTO_LOG_DIR}"/{dev,review,supervisor}/*.log; do
|
||||
if [ -f "$logfile" ]; then
|
||||
SIZE_KB=$(du -k "$logfile" 2>/dev/null | cut -f1)
|
||||
if [ "${SIZE_KB:-0}" -gt 10240 ]; then
|
||||
truncate -s 0 "$logfile"
|
||||
fixed "Truncated $(basename "$logfile") (was ${SIZE_KB}KB)"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Woodpecker log_entries cleanup
|
||||
LOG_ENTRIES_MB=$(wpdb -c "SELECT pg_size_pretty(pg_total_relation_size('log_entries'));" 2>/dev/null | xargs)
|
||||
if echo "$LOG_ENTRIES_MB" | grep -qP '\d+\s*(GB|MB)'; then
|
||||
SIZE_NUM=$(echo "$LOG_ENTRIES_MB" | grep -oP '\d+')
|
||||
SIZE_UNIT=$(echo "$LOG_ENTRIES_MB" | grep -oP '(GB|MB)')
|
||||
if [ "$SIZE_UNIT" = "GB" ] || { [ "$SIZE_UNIT" = "MB" ] && [ "$SIZE_NUM" -gt 500 ]; }; then
|
||||
wpdb -c "DELETE FROM log_entries WHERE id < (SELECT max(id) - 100000 FROM log_entries);" 2>/dev/null
|
||||
fixed "Trimmed Woodpecker log_entries (was ${LOG_ENTRIES_MB})"
|
||||
fi
|
||||
fi
|
||||
|
||||
DISK_AFTER=$(df -h / | awk 'NR==2{print $5}' | tr -d '%')
|
||||
if [ "${DISK_AFTER:-0}" -gt 80 ]; then
|
||||
p1 "Disk still ${DISK_AFTER}% after auto-clean"
|
||||
else
|
||||
flog "Disk recovered: ${DISK_AFTER}%"
|
||||
fi
|
||||
fi
|
||||
|
||||
# P1 alerts already logged — clear so they are not duplicated in the final consolidated log
|
||||
if [ -n "$P1_ALERTS" ]; then
|
||||
P1_ALERTS=""
|
||||
fi
|
||||
|
||||
# Emit infra metric
|
||||
_RAM_TOTAL_MB=$(free -m | awk '/Mem:/{print $2}')
|
||||
_RAM_USED_PCT=$(( ${_RAM_TOTAL_MB:-0} > 0 ? (${_RAM_TOTAL_MB:-0} - ${AVAIL_MB:-0}) * 100 / ${_RAM_TOTAL_MB:-1} : 0 ))
|
||||
emit_metric "$(jq -nc \
|
||||
--arg ts "$(date -u +%Y-%m-%dT%H:%MZ)" \
|
||||
--argjson ram "${_RAM_USED_PCT:-0}" \
|
||||
--argjson disk "${DISK_PERCENT:-0}" \
|
||||
--argjson swap "${SWAP_USED_MB:-0}" \
|
||||
'{ts:$ts,type:"infra",ram_used_pct:$ram,disk_used_pct:$disk,swap_mb:$swap}' 2>/dev/null)" 2>/dev/null || true
|
||||
|
||||
# =============================================================================
|
||||
# P4-INFRA: HOUSEKEEPING — stale processes, log rotation (project-agnostic)
|
||||
# =============================================================================
|
||||
status "P4: infra housekeeping"
|
||||
|
||||
# Stale agent-spawned claude processes (>3h) — skip interactive sessions
|
||||
STALE_CLAUDES=$(pgrep -f "claude -p" --older 10800 2>/dev/null || true)
|
||||
if [ -n "$STALE_CLAUDES" ]; then
|
||||
echo "$STALE_CLAUDES" | xargs kill 2>/dev/null || true
|
||||
fixed "Killed stale claude processes: $(echo $STALE_CLAUDES | wc -w) procs"
|
||||
fi
|
||||
|
||||
# Rotate logs >5MB
|
||||
for logfile in "${DISINTO_LOG_DIR}"/{dev,review,supervisor}/*.log; do
|
||||
if [ -f "$logfile" ]; then
|
||||
SIZE_KB=$(du -k "$logfile" 2>/dev/null | cut -f1)
|
||||
if [ "${SIZE_KB:-0}" -gt 5120 ]; then
|
||||
mv "$logfile" "${logfile}.old" 2>/dev/null
|
||||
fixed "Rotated $(basename "$logfile")"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# #############################################################################
|
||||
# LAYER 2: PER-PROJECT CHECKS
|
||||
# (iterated over projects/*.toml, config-driven)
|
||||
# #############################################################################
|
||||
|
||||
# Infra retry tracking (shared across projects, created once)
|
||||
_RETRY_DIR="/tmp/supervisor-infra-retries"
|
||||
mkdir -p "$_RETRY_DIR"
|
||||
|
||||
# Function: run all per-project checks for the currently loaded project config
|
||||
check_project() {
|
||||
local proj_name="${PROJECT_NAME:-unknown}"
|
||||
flog "── checking project: ${proj_name} (${FORGE_REPO}) ──"
|
||||
|
||||
# ===========================================================================
|
||||
# P2: FACTORY STOPPED — CI, dev-agent, git
|
||||
# ===========================================================================
|
||||
status "P2: ${proj_name}: checking pipeline"
|
||||
|
||||
# CI stuck
|
||||
STUCK_CI=$(wpdb -c "SELECT count(*) FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status='running' AND EXTRACT(EPOCH FROM now() - to_timestamp(started)) > 1200;" 2>/dev/null | xargs || true)
|
||||
[ "${STUCK_CI:-0}" -gt 0 ] 2>/dev/null && p2 "${proj_name}: CI: ${STUCK_CI} pipeline(s) running >20min"
|
||||
|
||||
PENDING_CI=$(wpdb -c "SELECT count(*) FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status='pending' AND EXTRACT(EPOCH FROM now() - to_timestamp(created)) > 1800;" 2>/dev/null | xargs || true)
|
||||
[ "${PENDING_CI:-0}" -gt 0 ] && p2 "${proj_name}: CI: ${PENDING_CI} pipeline(s) pending >30min"
|
||||
|
||||
# Emit CI metric (last completed pipeline within 24h — skip if project has no recent CI)
|
||||
_CI_ROW=$(wpdb -A -F ',' -c "SELECT id, COALESCE(ROUND(EXTRACT(EPOCH FROM (to_timestamp(finished) - to_timestamp(started)))/60)::int, 0), status FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND status IN ('success','failure','error') AND finished > 0 AND to_timestamp(finished) > now() - interval '24 hours' ORDER BY id DESC LIMIT 1;" 2>/dev/null | grep -E '^[0-9]' | head -1 || true)
|
||||
if [ -n "$_CI_ROW" ]; then
|
||||
_CI_ID=$(echo "$_CI_ROW" | cut -d',' -f1 | tr -d ' ')
|
||||
_CI_DUR=$(echo "$_CI_ROW" | cut -d',' -f2 | tr -d ' ')
|
||||
_CI_STAT=$(echo "$_CI_ROW" | cut -d',' -f3 | tr -d ' ')
|
||||
emit_metric "$(jq -nc \
|
||||
--arg ts "$(date -u +%Y-%m-%dT%H:%MZ)" \
|
||||
--arg proj "$proj_name" \
|
||||
--argjson pipeline "${_CI_ID:-0}" \
|
||||
--argjson duration "${_CI_DUR:-0}" \
|
||||
--arg status "${_CI_STAT:-unknown}" \
|
||||
'{ts:$ts,type:"ci",project:$proj,pipeline:$pipeline,duration_min:$duration,status:$status}' 2>/dev/null)" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# ===========================================================================
|
||||
# P2e: INFRA FAILURES — auto-retrigger pipelines with infra failures
|
||||
# ===========================================================================
|
||||
if [ "${CHECK_INFRA_RETRY:-true}" = "true" ]; then
|
||||
status "P2e: ${proj_name}: checking infra failures"
|
||||
|
||||
# Recent failed pipelines (last 6h)
|
||||
_failed_nums=$(wpdb -A -c "
|
||||
SELECT number FROM pipelines
|
||||
WHERE repo_id = ${WOODPECKER_REPO_ID}
|
||||
AND status IN ('failure', 'error')
|
||||
AND finished > 0
|
||||
AND to_timestamp(finished) > now() - interval '6 hours'
|
||||
ORDER BY number DESC LIMIT 5;" 2>/dev/null \
|
||||
| tr -d ' ' | grep -E '^[0-9]+$' || true)
|
||||
|
||||
# shellcheck disable=SC2086
|
||||
for _pip_num in $_failed_nums; do
|
||||
[ -z "$_pip_num" ] && continue
|
||||
|
||||
# Check retry count; alert if retries exhausted
|
||||
_retry_file="${_RETRY_DIR}/${WOODPECKER_REPO_ID}-${_pip_num}"
|
||||
_retries=0
|
||||
[ -f "$_retry_file" ] && _retries=$(cat "$_retry_file" 2>/dev/null || echo 0)
|
||||
if [ "${_retries:-0}" -ge 2 ]; then
|
||||
p2 "${proj_name}: Pipeline #${_pip_num}: infra retries exhausted (2/2), needs manual investigation"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Classify failure type via shared helper
|
||||
_classification=$(classify_pipeline_failure "${WOODPECKER_REPO_ID}" "$_pip_num" 2>/dev/null || echo "code")
|
||||
|
||||
if [[ "$_classification" == infra* ]]; then
|
||||
_infra_reason="${_classification#infra }"
|
||||
_new_retries=$(( _retries + 1 ))
|
||||
if woodpecker_api "/repos/${WOODPECKER_REPO_ID}/pipelines/${_pip_num}" \
|
||||
-X POST >/dev/null 2>&1; then
|
||||
echo "$_new_retries" > "$_retry_file"
|
||||
fixed "${proj_name}: Retriggered pipeline #${_pip_num} (${_infra_reason}, retry ${_new_retries}/2)"
|
||||
else
|
||||
p2 "${proj_name}: Pipeline #${_pip_num}: infra failure (${_infra_reason}) but retrigger API call failed"
|
||||
flog "${proj_name}: Failed to retrigger pipeline #${_pip_num}: API error"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Clean up stale retry tracking files (>24h)
|
||||
find "$_RETRY_DIR" -type f -mmin +1440 -delete 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Dev-agent health (only if monitoring enabled)
|
||||
if [ "${CHECK_DEV_AGENT:-true}" = "true" ]; then
|
||||
DEV_LOCK="/tmp/dev-agent-${proj_name}.lock"
|
||||
if [ -f "$DEV_LOCK" ]; then
|
||||
DEV_PID=$(cat "$DEV_LOCK" 2>/dev/null)
|
||||
if ! kill -0 "$DEV_PID" 2>/dev/null; then
|
||||
rm -f "$DEV_LOCK"
|
||||
fixed "${proj_name}: Removed stale dev-agent lock (PID ${DEV_PID} dead)"
|
||||
else
|
||||
DEV_STATUS_AGE=$(stat -c %Y "/tmp/dev-agent-status-${proj_name}" 2>/dev/null || echo 0)
|
||||
NOW_EPOCH=$(date +%s)
|
||||
STATUS_AGE_MIN=$(( (NOW_EPOCH - DEV_STATUS_AGE) / 60 ))
|
||||
if [ "$STATUS_AGE_MIN" -gt 30 ]; then
|
||||
p2 "${proj_name}: Dev-agent: status unchanged for ${STATUS_AGE_MIN}min"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Git repo health
|
||||
if [ -d "${PROJECT_REPO_ROOT}" ]; then
|
||||
cd "${PROJECT_REPO_ROOT}" 2>/dev/null || true
|
||||
GIT_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
|
||||
GIT_REBASE=$([ -d .git/rebase-merge ] || [ -d .git/rebase-apply ] && echo "yes" || echo "no")
|
||||
|
||||
if [ "$GIT_REBASE" = "yes" ]; then
|
||||
git rebase --abort 2>/dev/null && git checkout "${PRIMARY_BRANCH}" 2>/dev/null && \
|
||||
fixed "${proj_name}: Aborted stale rebase, switched to ${PRIMARY_BRANCH}" || \
|
||||
p2 "${proj_name}: Git: stale rebase, auto-abort failed"
|
||||
fi
|
||||
if [ "$GIT_BRANCH" != "${PRIMARY_BRANCH}" ] && [ "$GIT_BRANCH" != "unknown" ]; then
|
||||
git checkout "${PRIMARY_BRANCH}" 2>/dev/null && \
|
||||
fixed "${proj_name}: Switched repo from '${GIT_BRANCH}' to ${PRIMARY_BRANCH}" || \
|
||||
p2 "${proj_name}: Git: on '${GIT_BRANCH}' instead of ${PRIMARY_BRANCH}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ===========================================================================
|
||||
# P2b: FACTORY STALLED — backlog exists but no agent running
|
||||
# ===========================================================================
|
||||
if [ "${CHECK_PIPELINE_STALL:-true}" = "true" ]; then
|
||||
status "P2: ${proj_name}: checking pipeline stall"
|
||||
|
||||
BACKLOG_COUNT=$(forge_api GET "/issues?state=open&labels=backlog&type=issues&limit=1" 2>/dev/null | jq -r 'length' 2>/dev/null || echo "0")
|
||||
IN_PROGRESS=$(forge_api GET "/issues?state=open&labels=in-progress&type=issues&limit=1" 2>/dev/null | jq -r 'length' 2>/dev/null || echo "0")
|
||||
|
||||
if [ "${BACKLOG_COUNT:-0}" -gt 0 ] && [ "${IN_PROGRESS:-0}" -eq 0 ]; then
|
||||
DEV_LOG="${DISINTO_LOG_DIR}/dev/dev-agent.log"
|
||||
if [ -f "$DEV_LOG" ]; then
|
||||
LAST_LOG_EPOCH=$(stat -c %Y "$DEV_LOG" 2>/dev/null || echo 0)
|
||||
else
|
||||
LAST_LOG_EPOCH=0
|
||||
fi
|
||||
NOW_EPOCH=$(date +%s)
|
||||
IDLE_MIN=$(( (NOW_EPOCH - LAST_LOG_EPOCH) / 60 ))
|
||||
|
||||
if [ "$IDLE_MIN" -gt 20 ]; then
|
||||
p2 "${proj_name}: Pipeline stalled: ${BACKLOG_COUNT} backlog issue(s), no agent ran for ${IDLE_MIN}min"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# ===========================================================================
|
||||
# P2c: DEV-AGENT PRODUCTIVITY — all backlog blocked for too long
|
||||
# ===========================================================================
|
||||
if [ "${CHECK_DEV_AGENT:-true}" = "true" ]; then
|
||||
status "P2: ${proj_name}: checking dev-agent productivity"
|
||||
|
||||
DEV_LOG_FILE="${DISINTO_LOG_DIR}/dev/dev-agent.log"
|
||||
if [ -f "$DEV_LOG_FILE" ]; then
|
||||
RECENT_POLLS=$(tail -100 "$DEV_LOG_FILE" | grep "poll:" | tail -6)
|
||||
TOTAL_RECENT=$(echo "$RECENT_POLLS" | grep -c "." || true)
|
||||
BLOCKED_IN_RECENT=$(echo "$RECENT_POLLS" | grep -c "no ready issues" || true)
|
||||
if [ "$TOTAL_RECENT" -ge 6 ] && [ "$BLOCKED_IN_RECENT" -eq "$TOTAL_RECENT" ]; then
|
||||
p2 "${proj_name}: Dev-agent blocked: last ${BLOCKED_IN_RECENT} polls all report 'no ready issues'"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# ===========================================================================
|
||||
# P3: FACTORY DEGRADED — derailed PRs, unreviewed PRs
|
||||
# ===========================================================================
|
||||
if [ "${CHECK_PRS:-true}" = "true" ]; then
|
||||
status "P3: ${proj_name}: checking PRs"
|
||||
|
||||
OPEN_PRS=$(forge_api GET "/pulls?state=open&limit=10" 2>/dev/null | jq -r '.[].number' 2>/dev/null || true)
|
||||
for pr in $OPEN_PRS; do
|
||||
PR_JSON=$(forge_api GET "/pulls/${pr}" 2>/dev/null || true)
|
||||
[ -z "$PR_JSON" ] && continue
|
||||
PR_SHA=$(echo "$PR_JSON" | jq -r '.head.sha // ""')
|
||||
[ -z "$PR_SHA" ] && continue
|
||||
|
||||
CI_STATE=$(ci_commit_status "$PR_SHA" 2>/dev/null || true)
|
||||
|
||||
MERGEABLE=$(echo "$PR_JSON" | jq -r '.mergeable // true')
|
||||
if [ "$MERGEABLE" = "false" ] && ci_passed "$CI_STATE"; then
|
||||
p3 "${proj_name}: PR #${pr}: CI pass but merge conflict — needs rebase"
|
||||
elif [ "$CI_STATE" = "failure" ] || [ "$CI_STATE" = "error" ]; then
|
||||
UPDATED=$(echo "$PR_JSON" | jq -r '.updated_at // ""')
|
||||
if [ -n "$UPDATED" ]; then
|
||||
UPDATED_EPOCH=$(date -d "$UPDATED" +%s 2>/dev/null || echo 0)
|
||||
NOW_EPOCH=$(date +%s)
|
||||
AGE_MIN=$(( (NOW_EPOCH - UPDATED_EPOCH) / 60 ))
|
||||
[ "$AGE_MIN" -gt 30 ] && p3 "${proj_name}: PR #${pr}: CI=${CI_STATE}, stale ${AGE_MIN}min"
|
||||
fi
|
||||
elif ci_passed "$CI_STATE"; then
|
||||
HAS_REVIEW=$(forge_api GET "/issues/${pr}/comments?limit=50" 2>/dev/null | \
|
||||
jq -r --arg sha "$PR_SHA" '[.[] | select(.body | contains("<!-- reviewed: " + $sha))] | length' 2>/dev/null || echo "0")
|
||||
|
||||
if [ "${HAS_REVIEW:-0}" -eq 0 ]; then
|
||||
UPDATED=$(echo "$PR_JSON" | jq -r '.updated_at // ""')
|
||||
if [ -n "$UPDATED" ]; then
|
||||
UPDATED_EPOCH=$(date -d "$UPDATED" +%s 2>/dev/null || echo 0)
|
||||
NOW_EPOCH=$(date +%s)
|
||||
AGE_MIN=$(( (NOW_EPOCH - UPDATED_EPOCH) / 60 ))
|
||||
if [ "$AGE_MIN" -gt 60 ]; then
|
||||
p3 "${proj_name}: PR #${pr}: CI passed, no review for ${AGE_MIN}min"
|
||||
bash "${FACTORY_ROOT}/review/review-pr.sh" "$pr" >> "${DISINTO_LOG_DIR}/review/review.log" 2>&1 &
|
||||
fixed "${proj_name}: Auto-triggered review for PR #${pr}"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# ===========================================================================
|
||||
# P3b: CIRCULAR DEPENDENCIES — deadlock detection
|
||||
# ===========================================================================
|
||||
status "P3: ${proj_name}: checking for circular dependencies"
|
||||
|
||||
BACKLOG_FOR_DEPS=$(forge_api GET "/issues?state=open&labels=backlog&type=issues&limit=50" 2>/dev/null || true)
|
||||
if [ -n "$BACKLOG_FOR_DEPS" ] && [ "$BACKLOG_FOR_DEPS" != "null" ] && [ "$(echo "$BACKLOG_FOR_DEPS" | jq 'length' 2>/dev/null || echo 0)" -gt 0 ]; then
|
||||
|
||||
PARSE_DEPS="${FACTORY_ROOT}/lib/parse-deps.sh"
|
||||
ISSUE_COUNT=$(echo "$BACKLOG_FOR_DEPS" | jq 'length')
|
||||
|
||||
declare -A DEPS_OF
|
||||
declare -A BACKLOG_NUMS
|
||||
for i in $(seq 0 $((ISSUE_COUNT - 1))); do
|
||||
NUM=$(echo "$BACKLOG_FOR_DEPS" | jq -r ".[$i].number")
|
||||
BODY=$(echo "$BACKLOG_FOR_DEPS" | jq -r ".[$i].body // \"\"")
|
||||
ISSUE_DEPS=$(echo "$BODY" | bash "$PARSE_DEPS" | grep -v "^${NUM}$" || true)
|
||||
[ -n "$ISSUE_DEPS" ] && DEPS_OF[$NUM]="$ISSUE_DEPS"
|
||||
BACKLOG_NUMS[$NUM]=1
|
||||
done
|
||||
|
||||
declare -A NODE_COLOR
|
||||
for node in "${!BACKLOG_NUMS[@]}"; do NODE_COLOR[$node]=0; done
|
||||
|
||||
FOUND_CYCLES=""
|
||||
declare -A SEEN_CYCLES
|
||||
|
||||
dfs_detect_cycle() {
|
||||
local node="$1" path="$2"
|
||||
NODE_COLOR[$node]=1
|
||||
for dep in ${DEPS_OF[$node]:-}; do
|
||||
[ -z "${BACKLOG_NUMS[$dep]+x}" ] && continue
|
||||
if [ "${NODE_COLOR[$dep]}" = "1" ]; then
|
||||
local cycle_key
|
||||
cycle_key=$(echo "$path $dep" | tr ' ' '\n' | sort -n | tr '\n' ' ')
|
||||
if [ -z "${SEEN_CYCLES[$cycle_key]+x}" ]; then
|
||||
SEEN_CYCLES[$cycle_key]=1
|
||||
local in_cycle=0 cycle_str=""
|
||||
for p in $path $dep; do
|
||||
[ "$p" = "$dep" ] && in_cycle=1
|
||||
[ "$in_cycle" = "1" ] && cycle_str="${cycle_str:+$cycle_str -> }#${p}"
|
||||
done
|
||||
FOUND_CYCLES="${FOUND_CYCLES}${cycle_str}\n"
|
||||
fi
|
||||
elif [ "${NODE_COLOR[$dep]}" = "0" ]; then
|
||||
dfs_detect_cycle "$dep" "$path $dep"
|
||||
fi
|
||||
done
|
||||
NODE_COLOR[$node]=2
|
||||
}
|
||||
|
||||
for node in "${!DEPS_OF[@]}"; do
|
||||
[ "${NODE_COLOR[$node]:-2}" = "0" ] && dfs_detect_cycle "$node" "$node"
|
||||
done
|
||||
|
||||
if [ -n "$FOUND_CYCLES" ]; then
|
||||
echo -e "$FOUND_CYCLES" | while IFS= read -r cycle; do
|
||||
[ -z "$cycle" ] && continue
|
||||
p3 "${proj_name}: Circular dependency deadlock: ${cycle}"
|
||||
done
|
||||
fi
|
||||
|
||||
# =========================================================================
|
||||
# P3c: STALE DEPENDENCIES — blocked by old open issues (>30 days)
|
||||
# =========================================================================
|
||||
status "P3: ${proj_name}: checking for stale dependencies"
|
||||
|
||||
NOW_EPOCH=$(date +%s)
|
||||
declare -A DEP_CACHE
|
||||
|
||||
for issue_num in "${!DEPS_OF[@]}"; do
|
||||
for dep in ${DEPS_OF[$issue_num]}; do
|
||||
if [ -n "${DEP_CACHE[$dep]+x}" ]; then
|
||||
DEP_INFO="${DEP_CACHE[$dep]}"
|
||||
else
|
||||
DEP_JSON=$(forge_api GET "/issues/${dep}" 2>/dev/null || true)
|
||||
[ -z "$DEP_JSON" ] && continue
|
||||
DEP_STATE=$(echo "$DEP_JSON" | jq -r '.state // "unknown"')
|
||||
DEP_CREATED=$(echo "$DEP_JSON" | jq -r '.created_at // ""')
|
||||
DEP_TITLE=$(echo "$DEP_JSON" | jq -r '.title // ""' | head -c 50)
|
||||
DEP_INFO="${DEP_STATE}|${DEP_CREATED}|${DEP_TITLE}"
|
||||
DEP_CACHE[$dep]="$DEP_INFO"
|
||||
fi
|
||||
|
||||
DEP_STATE="${DEP_INFO%%|*}"
|
||||
[ "$DEP_STATE" != "open" ] && continue
|
||||
|
||||
DEP_REST="${DEP_INFO#*|}"
|
||||
DEP_CREATED="${DEP_REST%%|*}"
|
||||
DEP_TITLE="${DEP_REST#*|}"
|
||||
|
||||
[ -z "$DEP_CREATED" ] && continue
|
||||
CREATED_EPOCH=$(date -d "$DEP_CREATED" +%s 2>/dev/null || echo 0)
|
||||
AGE_DAYS=$(( (NOW_EPOCH - CREATED_EPOCH) / 86400 ))
|
||||
if [ "$AGE_DAYS" -gt 30 ]; then
|
||||
p3 "${proj_name}: Stale dependency: #${issue_num} blocked by #${dep} \"${DEP_TITLE}\" (open ${AGE_DAYS} days)"
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
unset DEPS_OF BACKLOG_NUMS NODE_COLOR SEEN_CYCLES DEP_CACHE
|
||||
fi
|
||||
|
||||
# Emit dev metric (paginated to avoid silent cap at 50)
|
||||
_BACKLOG_COUNT=$(codeberg_count_paginated "/issues?state=open&labels=backlog&type=issues")
|
||||
_BLOCKED_COUNT=$(codeberg_count_paginated "/issues?state=open&labels=blocked&type=issues")
|
||||
_PR_COUNT=$(codeberg_count_paginated "/pulls?state=open")
|
||||
emit_metric "$(jq -nc \
|
||||
--arg ts "$(date -u +%Y-%m-%dT%H:%MZ)" \
|
||||
--arg proj "$proj_name" \
|
||||
--argjson backlog "${_BACKLOG_COUNT:-0}" \
|
||||
--argjson blocked "${_BLOCKED_COUNT:-0}" \
|
||||
--argjson prs "${_PR_COUNT:-0}" \
|
||||
'{ts:$ts,type:"dev",project:$proj,issues_in_backlog:$backlog,issues_blocked:$blocked,pr_open:$prs}' 2>/dev/null)" 2>/dev/null || true
|
||||
|
||||
# ===========================================================================
|
||||
# P2d: ESCALATE — inject human replies into escalated dev sessions
|
||||
# ===========================================================================
|
||||
status "P2: ${proj_name}: checking escalate sessions"
|
||||
|
||||
HUMAN_REPLY_FILE="/tmp/dev-escalation-reply"
|
||||
|
||||
for _nh_phase_file in /tmp/dev-session-"${proj_name}"-*.phase; do
|
||||
[ -f "$_nh_phase_file" ] || continue
|
||||
_nh_phase=$(head -1 "$_nh_phase_file" 2>/dev/null | tr -d '[:space:]' || true)
|
||||
[ "$_nh_phase" = "PHASE:escalate" ] || continue
|
||||
|
||||
_nh_issue=$(basename "$_nh_phase_file" .phase)
|
||||
_nh_issue="${_nh_issue#dev-session-${proj_name}-}"
|
||||
[ -z "$_nh_issue" ] && continue
|
||||
_nh_session="dev-${proj_name}-${_nh_issue}"
|
||||
|
||||
# Check tmux session is alive
|
||||
if ! tmux has-session -t "$_nh_session" 2>/dev/null; then
|
||||
flog "${proj_name}: #${_nh_issue} phase=escalate but tmux session gone"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Inject human reply if available (atomic mv to prevent double-injection with gardener)
|
||||
_nh_claimed="/tmp/dev-escalation-reply.supervisor.$$"
|
||||
if [ -s "$HUMAN_REPLY_FILE" ] && mv "$HUMAN_REPLY_FILE" "$_nh_claimed" 2>/dev/null; then
|
||||
_nh_reply=$(cat "$_nh_claimed")
|
||||
_nh_inject_msg="Human reply received for issue #${_nh_issue}:
|
||||
|
||||
${_nh_reply}
|
||||
|
||||
Instructions:
|
||||
1. Read the human's guidance carefully.
|
||||
2. Continue your work based on their input.
|
||||
3. When done, push your changes and write the appropriate phase."
|
||||
|
||||
_nh_tmpfile=$(mktemp /tmp/human-inject-XXXXXX)
|
||||
printf '%s' "$_nh_inject_msg" > "$_nh_tmpfile"
|
||||
# All tmux calls guarded: session may die between has-session and here
|
||||
tmux load-buffer -b "human-inject-${_nh_issue}" "$_nh_tmpfile" || true
|
||||
tmux paste-buffer -t "$_nh_session" -b "human-inject-${_nh_issue}" || true
|
||||
sleep 0.5
|
||||
tmux send-keys -t "$_nh_session" "" Enter || true
|
||||
tmux delete-buffer -b "human-inject-${_nh_issue}" 2>/dev/null || true
|
||||
rm -f "$_nh_tmpfile" "$_nh_claimed"
|
||||
|
||||
rm -f "/tmp/dev-renotify-${proj_name}-${_nh_issue}"
|
||||
flog "${proj_name}: #${_nh_issue} human reply injected into session ${_nh_session}"
|
||||
fixed "${proj_name}: Injected human reply into dev session #${_nh_issue}"
|
||||
break # one reply to deliver
|
||||
else
|
||||
# No reply yet — check for timeout (re-notify at 6h, alert at 24h)
|
||||
_nh_mtime=$(stat -c %Y "$_nh_phase_file" 2>/dev/null || echo 0)
|
||||
_nh_now=$(date +%s)
|
||||
_nh_age=$(( _nh_now - _nh_mtime ))
|
||||
|
||||
if [ "$_nh_age" -gt 86400 ]; then
|
||||
p2 "${proj_name}: Dev session #${_nh_issue} stuck in escalate for >24h"
|
||||
elif [ "$_nh_age" -gt 21600 ]; then
|
||||
_nh_renotify="/tmp/dev-renotify-${proj_name}-${_nh_issue}"
|
||||
if [ ! -f "$_nh_renotify" ]; then
|
||||
_nh_age_h=$(( _nh_age / 3600 ))
|
||||
touch "$_nh_renotify"
|
||||
flog "${proj_name}: #${_nh_issue} re-notified (escalate for ${_nh_age_h}h)"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# ===========================================================================
|
||||
# P4-PROJECT: Orphaned tmux sessions — PR/issue closed externally
|
||||
# ===========================================================================
|
||||
status "P4: ${proj_name}: sweeping orphaned dev sessions"
|
||||
|
||||
while IFS= read -r _sess; do
|
||||
[ -z "$_sess" ] && continue
|
||||
|
||||
# Extract issue number from dev-{project}-{issue}
|
||||
_sess_issue="${_sess#dev-"${proj_name}"-}"
|
||||
[[ "$_sess_issue" =~ ^[0-9]+$ ]] || continue
|
||||
|
||||
# Check forge: is the issue still open?
|
||||
_issue_state=$(forge_api GET "/issues/${_sess_issue}" 2>/dev/null \
|
||||
| jq -r '.state // "open"' 2>/dev/null || echo "open")
|
||||
|
||||
_should_cleanup=false
|
||||
_cleanup_reason=""
|
||||
|
||||
if [ "$_issue_state" = "closed" ]; then
|
||||
_should_cleanup=true
|
||||
_cleanup_reason="issue #${_sess_issue} closed externally"
|
||||
else
|
||||
# Issue still open — skip cleanup during active-wait phases (no PR yet is normal)
|
||||
_phase_file="/tmp/dev-session-${proj_name}-${_sess_issue}.phase"
|
||||
_curr_phase=$(head -1 "$_phase_file" 2>/dev/null | tr -d '[:space:]' || true)
|
||||
case "${_curr_phase:-}" in
|
||||
PHASE:escalate|PHASE:awaiting_ci|PHASE:awaiting_review)
|
||||
continue # session has legitimate pending work
|
||||
;;
|
||||
esac
|
||||
|
||||
# Check if associated PR is open (paginated)
|
||||
_pr_branch="fix/issue-${_sess_issue}"
|
||||
_has_open_pr=0
|
||||
_pr_page=1
|
||||
while true; do
|
||||
_pr_page_json=$(forge_api GET "/pulls?state=open&limit=50&page=${_pr_page}" \
|
||||
2>/dev/null || echo "[]")
|
||||
_pr_page_len=$(printf '%s' "$_pr_page_json" | jq 'length' 2>/dev/null || echo 0)
|
||||
_pr_match=$(printf '%s' "$_pr_page_json" | \
|
||||
jq --arg b "$_pr_branch" '[.[] | select(.head.ref == $b)] | length' \
|
||||
2>/dev/null || echo 0)
|
||||
_has_open_pr=$(( _has_open_pr + ${_pr_match:-0} ))
|
||||
[ "${_has_open_pr:-0}" -gt 0 ] && break
|
||||
[ "${_pr_page_len:-0}" -lt 50 ] && break
|
||||
_pr_page=$(( _pr_page + 1 ))
|
||||
[ "$_pr_page" -gt 20 ] && break
|
||||
done
|
||||
|
||||
if [ "$_has_open_pr" -eq 0 ]; then
|
||||
# No open PR — check for a closed/merged PR with this branch (paginated)
|
||||
_has_closed_pr=0
|
||||
_pr_page=1
|
||||
while true; do
|
||||
_pr_page_json=$(forge_api GET "/pulls?state=closed&limit=50&page=${_pr_page}" \
|
||||
2>/dev/null || echo "[]")
|
||||
_pr_page_len=$(printf '%s' "$_pr_page_json" | jq 'length' 2>/dev/null || echo 0)
|
||||
_pr_match=$(printf '%s' "$_pr_page_json" | \
|
||||
jq --arg b "$_pr_branch" '[.[] | select(.head.ref == $b)] | length' \
|
||||
2>/dev/null || echo 0)
|
||||
_has_closed_pr=$(( _has_closed_pr + ${_pr_match:-0} ))
|
||||
[ "${_has_closed_pr:-0}" -gt 0 ] && break
|
||||
[ "${_pr_page_len:-0}" -lt 50 ] && break
|
||||
_pr_page=$(( _pr_page + 1 ))
|
||||
[ "$_pr_page" -gt 20 ] && break
|
||||
done
|
||||
|
||||
if [ "$_has_closed_pr" -gt 0 ]; then
|
||||
_should_cleanup=true
|
||||
_cleanup_reason="PR for issue #${_sess_issue} is closed/merged"
|
||||
else
|
||||
# No PR at all — clean up if session idle >30min
|
||||
# On query failure, skip rather than defaulting to epoch 0
|
||||
if ! _sess_activity=$(tmux display-message -t "$_sess" \
|
||||
-p '#{session_activity}' 2>/dev/null); then
|
||||
flog "${proj_name}: Could not query activity for session ${_sess}, skipping"
|
||||
continue
|
||||
fi
|
||||
_now_ts=$(date +%s)
|
||||
_idle_min=$(( (_now_ts - _sess_activity) / 60 ))
|
||||
if [ "$_idle_min" -gt 30 ]; then
|
||||
_should_cleanup=true
|
||||
_cleanup_reason="no PR found, session idle ${_idle_min}min"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$_should_cleanup" = true ]; then
|
||||
tmux kill-session -t "$_sess" 2>/dev/null || true
|
||||
_wt="/tmp/${proj_name}-worktree-${_sess_issue}"
|
||||
if [ -d "$_wt" ]; then
|
||||
git -C "$PROJECT_REPO_ROOT" worktree remove --force "$_wt" 2>/dev/null || true
|
||||
fi
|
||||
# Remove lock only if its recorded PID is no longer alive
|
||||
_lock="/tmp/dev-agent-${proj_name}.lock"
|
||||
if [ -f "$_lock" ]; then
|
||||
_lock_pid=$(cat "$_lock" 2>/dev/null || true)
|
||||
if [ -n "${_lock_pid:-}" ] && ! kill -0 "$_lock_pid" 2>/dev/null; then
|
||||
rm -f "$_lock"
|
||||
fi
|
||||
fi
|
||||
rm -f "/tmp/dev-session-${proj_name}-${_sess_issue}.phase"
|
||||
fixed "${proj_name}: Cleaned orphaned session ${_sess} (${_cleanup_reason})"
|
||||
fi
|
||||
done < <(tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "^dev-${proj_name}-" || true)
|
||||
|
||||
# ===========================================================================
|
||||
# P4-PROJECT: Clean stale worktrees for this project
|
||||
# ===========================================================================
|
||||
NOW_TS=$(date +%s)
|
||||
for wt in /tmp/${proj_name}-worktree-* /tmp/${proj_name}-review-* /tmp/${proj_name}-sup-retry-*; do
|
||||
[ -d "$wt" ] || continue
|
||||
WT_AGE_MIN=$(( (NOW_TS - $(stat -c %Y "$wt")) / 60 ))
|
||||
if [ "$WT_AGE_MIN" -gt 120 ]; then
|
||||
WT_BASE=$(basename "$wt")
|
||||
if ! pgrep -f "$WT_BASE" >/dev/null 2>&1; then
|
||||
git -C "$PROJECT_REPO_ROOT" worktree remove --force "$wt" 2>/dev/null && \
|
||||
fixed "${proj_name}: Removed stale worktree: $wt (${WT_AGE_MIN}min old)" || true
|
||||
fi
|
||||
fi
|
||||
done
|
||||
git -C "$PROJECT_REPO_ROOT" worktree prune 2>/dev/null || true
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Iterate over all registered projects
|
||||
# =============================================================================
|
||||
status "checking projects"
|
||||
|
||||
PROJECT_COUNT=0
|
||||
if [ -d "$PROJECTS_DIR" ]; then
|
||||
for project_toml in "${PROJECTS_DIR}"/*.toml; do
|
||||
[ -f "$project_toml" ] || continue
|
||||
PROJECT_COUNT=$((PROJECT_COUNT + 1))
|
||||
|
||||
# Load project config (overrides FORGE_REPO, PROJECT_REPO_ROOT, etc.)
|
||||
source "${FACTORY_ROOT}/lib/load-project.sh" "$project_toml"
|
||||
|
||||
check_project || flog "check_project failed for ${project_toml} (per-project checks incomplete)"
|
||||
done
|
||||
fi
|
||||
|
||||
if [ "$PROJECT_COUNT" -eq 0 ]; then
|
||||
# Fallback: no project TOML files, use .env config (backwards compatible)
|
||||
flog "No projects/*.toml found, using .env defaults"
|
||||
check_project || flog "check_project failed with .env defaults (per-project checks incomplete)"
|
||||
fi
|
||||
|
||||
# #############################################################################
|
||||
# RESULT
|
||||
# #############################################################################
|
||||
|
||||
ALL_ALERTS="${P0_ALERTS}${P1_ALERTS}${P2_ALERTS}${P3_ALERTS}${P4_ALERTS}"
|
||||
|
||||
if [ -n "$ALL_ALERTS" ]; then
|
||||
ALERT_TEXT=$(echo -e "$ALL_ALERTS")
|
||||
|
||||
flog "Invoking claude -p for alerts"
|
||||
|
||||
CLAUDE_PROMPT="$(cat "$PROMPT_FILE" 2>/dev/null || echo "You are a supervisor agent. Fix the issue below.")
|
||||
|
||||
## Current Alerts
|
||||
${ALERT_TEXT}
|
||||
|
||||
## Auto-fixes already applied by bash
|
||||
$(echo -e "${FIXES:-None}")
|
||||
|
||||
## System State
|
||||
RAM: $(free -m | awk '/Mem:/{printf "avail=%sMB", $7}') $(free -m | awk '/Swap:/{printf "swap=%sMB", $3}')
|
||||
Disk: $(df -h / | awk 'NR==2{printf "%s used of %s (%s)", $3, $2, $5}')
|
||||
Docker: $(sudo docker ps --format '{{.Names}}' 2>/dev/null | wc -l) containers running
|
||||
Claude procs: $(pgrep -f "claude" 2>/dev/null | wc -l)
|
||||
|
||||
Fix what you can. File vault items for what you can't. Read the relevant best-practices file first."
|
||||
|
||||
CLAUDE_OUTPUT=$(timeout 300 claude -p --model sonnet --dangerously-skip-permissions \
|
||||
"$CLAUDE_PROMPT" 2>&1) || true
|
||||
flog "claude output: $(echo "$CLAUDE_OUTPUT" | tail -20)"
|
||||
status "claude responded"
|
||||
else
|
||||
[ -n "$FIXES" ] && flog "Housekeeping: $(echo -e "$FIXES")"
|
||||
status "all clear"
|
||||
fi
|
||||
|
|
@ -38,7 +38,7 @@ source "$FACTORY_ROOT/lib/guard.sh"
|
|||
# shellcheck source=../lib/agent-sdk.sh
|
||||
source "$FACTORY_ROOT/lib/agent-sdk.sh"
|
||||
|
||||
LOG_FILE="$SCRIPT_DIR/supervisor.log"
|
||||
LOG_FILE="${DISINTO_LOG_DIR}/supervisor/supervisor.log"
|
||||
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
|
||||
LOGFILE="$LOG_FILE"
|
||||
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
|
||||
|
|
@ -46,31 +46,45 @@ SID_FILE="/tmp/supervisor-session-${PROJECT_NAME}.sid"
|
|||
SCRATCH_FILE="/tmp/supervisor-${PROJECT_NAME}-scratch.md"
|
||||
WORKTREE="/tmp/${PROJECT_NAME}-supervisor-run"
|
||||
|
||||
log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
|
||||
# Override LOG_AGENT for consistent agent identification
|
||||
# shellcheck disable=SC2034 # consumed by agent-sdk.sh and env.sh log()
|
||||
LOG_AGENT="supervisor"
|
||||
|
||||
# Override log() to append to supervisor-specific log file
|
||||
# shellcheck disable=SC2034
|
||||
log() {
|
||||
local agent="${LOG_AGENT:-supervisor}"
|
||||
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$LOG_FILE"
|
||||
}
|
||||
|
||||
# ── Guards ────────────────────────────────────────────────────────────────
|
||||
check_active supervisor
|
||||
acquire_cron_lock "/tmp/supervisor-run.lock"
|
||||
check_memory 2000
|
||||
memory_guard 2000
|
||||
|
||||
log "--- Supervisor run start ---"
|
||||
|
||||
# ── Resolve forge remote for git operations ─────────────────────────────
|
||||
resolve_forge_remote
|
||||
|
||||
# ── Housekeeping: clean up stale crashed worktrees (>24h) ────────────────
|
||||
cleanup_stale_crashed_worktrees 24
|
||||
|
||||
# ── Resolve agent identity for .profile repo ────────────────────────────
|
||||
if [ -z "${AGENT_IDENTITY:-}" ] && [ -n "${FORGE_SUPERVISOR_TOKEN:-}" ]; then
|
||||
AGENT_IDENTITY=$(curl -sf -H "Authorization: token ${FORGE_SUPERVISOR_TOKEN}" \
|
||||
"${FORGE_URL:-http://localhost:3000}/api/v1/user" 2>/dev/null | jq -r '.login // empty' 2>/dev/null || true)
|
||||
fi
|
||||
resolve_agent_identity || true
|
||||
|
||||
# ── Collect pre-flight metrics ────────────────────────────────────────────
|
||||
log "Running preflight.sh"
|
||||
PREFLIGHT_OUTPUT=""
|
||||
PREFLIGHT_RC=0
|
||||
if PREFLIGHT_OUTPUT=$(bash "$SCRIPT_DIR/preflight.sh" "$PROJECT_TOML" 2>&1); then
|
||||
log "Preflight collected ($(echo "$PREFLIGHT_OUTPUT" | wc -l) lines)"
|
||||
else
|
||||
log "WARNING: preflight.sh failed, continuing with partial data"
|
||||
PREFLIGHT_RC=$?
|
||||
log "WARNING: preflight.sh failed (exit code $PREFLIGHT_RC), continuing with partial data"
|
||||
if [ -n "$PREFLIGHT_OUTPUT" ]; then
|
||||
log "Preflight error: $(echo "$PREFLIGHT_OUTPUT" | tail -3)"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ── Load formula + context ───────────────────────────────────────────────
|
||||
|
|
|
|||
|
|
@ -1,47 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# update-prompt.sh — Append a lesson to a best-practices file
|
||||
#
|
||||
# Usage:
|
||||
# ./supervisor/update-prompt.sh "best-practices/memory.md" "### Title\nBody text"
|
||||
# ./supervisor/update-prompt.sh --from-file "best-practices/memory.md" /tmp/lesson.md
|
||||
#
|
||||
# Called by claude -p when it learns something during a fix.
|
||||
# Commits and pushes the update to the disinto repo.
|
||||
|
||||
source "$(dirname "$0")/../lib/env.sh"
|
||||
|
||||
TARGET_FILE="${FACTORY_ROOT}/supervisor/$1"
|
||||
shift
|
||||
|
||||
if [ "$1" = "--from-file" ] && [ -f "$2" ]; then
|
||||
LESSON=$(cat "$2")
|
||||
elif [ -n "$1" ]; then
|
||||
LESSON="$1"
|
||||
else
|
||||
echo "Usage: update-prompt.sh <relative-path> '<lesson text>'" >&2
|
||||
echo " or: update-prompt.sh <relative-path> --from-file <path>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "$TARGET_FILE" ]; then
|
||||
echo "Target file not found: $TARGET_FILE" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Append under "Lessons Learned" section if it exists, otherwise at end
|
||||
if grep -q "## Lessons Learned" "$TARGET_FILE"; then
|
||||
echo "" >> "$TARGET_FILE"
|
||||
echo "$LESSON" >> "$TARGET_FILE"
|
||||
else
|
||||
echo "" >> "$TARGET_FILE"
|
||||
echo "## Lessons Learned" >> "$TARGET_FILE"
|
||||
echo "" >> "$TARGET_FILE"
|
||||
echo "$LESSON" >> "$TARGET_FILE"
|
||||
fi
|
||||
|
||||
cd "$FACTORY_ROOT" || exit 1
|
||||
git add "supervisor/$1" 2>/dev/null || git add "$TARGET_FILE"
|
||||
git commit -m "supervisor: learned — $(echo "$LESSON" | head -1 | sed 's/^#* *//')" --no-verify 2>/dev/null
|
||||
git push origin main 2>/dev/null
|
||||
|
||||
log "Updated $(basename "$TARGET_FILE") with new lesson"
|
||||
28
templates/issue/bug.md
Normal file
28
templates/issue/bug.md
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
---
|
||||
name: Bug Report
|
||||
about: Report a bug or unexpected behavior
|
||||
labels: bug-report
|
||||
---
|
||||
|
||||
## What happened
|
||||
|
||||
<!-- Describe the observed behavior -->
|
||||
|
||||
## What was expected
|
||||
|
||||
<!-- Describe the expected behavior -->
|
||||
|
||||
## Steps to reproduce
|
||||
|
||||
<!-- Required: List the exact steps to reproduce the issue -->
|
||||
1.
|
||||
2.
|
||||
3.
|
||||
|
||||
## Environment
|
||||
|
||||
<!-- Browser, wallet, network, or other relevant environment details -->
|
||||
- Browser/Client:
|
||||
- Wallet (if applicable):
|
||||
- Network (if applicable):
|
||||
- Version:
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Mock Forgejo API server for CI smoke tests.
|
||||
|
||||
Implements 15 Forgejo API endpoints that disinto init calls.
|
||||
Implements 16 Forgejo API endpoints that disinto init calls.
|
||||
State stored in-memory (dicts), responds instantly.
|
||||
"""
|
||||
|
||||
|
|
@ -135,6 +135,7 @@ class ForgejoHandler(BaseHTTPRequestHandler):
|
|||
# Users patterns
|
||||
(r"^users/([^/]+)$", f"handle_{method}_users_username"),
|
||||
(r"^users/([^/]+)/tokens$", f"handle_{method}_users_username_tokens"),
|
||||
(r"^users/([^/]+)/tokens/([^/]+)$", f"handle_{method}_users_username_tokens_token_id"),
|
||||
(r"^users/([^/]+)/repos$", f"handle_{method}_users_username_repos"),
|
||||
# Repos patterns
|
||||
(r"^repos/([^/]+)/([^/]+)$", f"handle_{method}_repos_owner_repo"),
|
||||
|
|
@ -149,6 +150,7 @@ class ForgejoHandler(BaseHTTPRequestHandler):
|
|||
# Admin patterns
|
||||
(r"^admin/users$", f"handle_{method}_admin_users"),
|
||||
(r"^admin/users/([^/]+)$", f"handle_{method}_admin_users_username"),
|
||||
(r"^admin/users/([^/]+)/repos$", f"handle_{method}_admin_users_username_repos"),
|
||||
# Org patterns
|
||||
(r"^orgs$", f"handle_{method}_orgs"),
|
||||
]
|
||||
|
|
@ -294,7 +296,10 @@ class ForgejoHandler(BaseHTTPRequestHandler):
|
|||
|
||||
def handle_GET_users_username_tokens(self, query):
|
||||
"""GET /api/v1/users/{username}/tokens"""
|
||||
# Support both token auth (for listing own tokens) and basic auth (for admin listing)
|
||||
username = require_token(self)
|
||||
if not username:
|
||||
username = require_basic_auth(self)
|
||||
if not username:
|
||||
json_response(self, 401, {"message": "invalid authentication"})
|
||||
return
|
||||
|
|
@ -303,6 +308,38 @@ class ForgejoHandler(BaseHTTPRequestHandler):
|
|||
tokens = [t for t in state["tokens"].values() if t.get("username") == username]
|
||||
json_response(self, 200, tokens)
|
||||
|
||||
def handle_DELETE_users_username_tokens_token_id(self, query):
|
||||
"""DELETE /api/v1/users/{username}/tokens/{id}"""
|
||||
# Support both token auth and basic auth
|
||||
username = require_token(self)
|
||||
if not username:
|
||||
username = require_basic_auth(self)
|
||||
if not username:
|
||||
json_response(self, 401, {"message": "invalid authentication"})
|
||||
return
|
||||
|
||||
parts = self.path.split("/")
|
||||
if len(parts) >= 8:
|
||||
token_id_str = parts[7]
|
||||
else:
|
||||
json_response(self, 404, {"message": "token not found"})
|
||||
return
|
||||
|
||||
# Find and delete token by ID
|
||||
deleted = False
|
||||
for tok_sha1, tok in list(state["tokens"].items()):
|
||||
if tok.get("id") == int(token_id_str) and tok.get("username") == username:
|
||||
del state["tokens"][tok_sha1]
|
||||
deleted = True
|
||||
break
|
||||
|
||||
if deleted:
|
||||
self.send_response(204)
|
||||
self.send_header("Content-Length", 0)
|
||||
self.end_headers()
|
||||
else:
|
||||
json_response(self, 404, {"message": "token not found"})
|
||||
|
||||
def handle_POST_users_username_tokens(self, query):
|
||||
"""POST /api/v1/users/{username}/tokens"""
|
||||
username = require_basic_auth(self)
|
||||
|
|
@ -460,6 +497,55 @@ class ForgejoHandler(BaseHTTPRequestHandler):
|
|||
state["repos"][key] = repo
|
||||
json_response(self, 201, repo)
|
||||
|
||||
def handle_POST_admin_users_username_repos(self, query):
|
||||
"""POST /api/v1/admin/users/{username}/repos
|
||||
Admin API to create a repo under a specific user namespace.
|
||||
This allows creating repos in any user's namespace when authenticated as admin.
|
||||
"""
|
||||
require_token(self)
|
||||
|
||||
parts = self.path.split("/")
|
||||
if len(parts) >= 6:
|
||||
target_user = parts[4]
|
||||
else:
|
||||
json_response(self, 400, {"message": "username required"})
|
||||
return
|
||||
|
||||
if target_user not in state["users"]:
|
||||
json_response(self, 404, {"message": "user not found"})
|
||||
return
|
||||
|
||||
content_length = int(self.headers.get("Content-Length", 0))
|
||||
body = self.rfile.read(content_length).decode("utf-8")
|
||||
data = json.loads(body) if body else {}
|
||||
|
||||
repo_name = data.get("name")
|
||||
if not repo_name:
|
||||
json_response(self, 400, {"message": "name is required"})
|
||||
return
|
||||
|
||||
repo_id = next_ids["repos"]
|
||||
next_ids["repos"] += 1
|
||||
|
||||
key = f"{target_user}/{repo_name}"
|
||||
repo = {
|
||||
"id": repo_id,
|
||||
"full_name": key,
|
||||
"name": repo_name,
|
||||
"owner": {"id": state["users"][target_user]["id"], "login": target_user},
|
||||
"empty": not data.get("auto_init", False),
|
||||
"default_branch": data.get("default_branch", "main"),
|
||||
"description": data.get("description", ""),
|
||||
"private": data.get("private", False),
|
||||
"html_url": f"https://example.com/{key}",
|
||||
"ssh_url": f"git@example.com:{key}.git",
|
||||
"clone_url": f"https://example.com/{key}.git",
|
||||
"created_at": "2026-04-01T00:00:00Z",
|
||||
}
|
||||
|
||||
state["repos"][key] = repo
|
||||
json_response(self, 201, repo)
|
||||
|
||||
def handle_POST_user_repos(self, query):
|
||||
"""POST /api/v1/user/repos"""
|
||||
require_token(self)
|
||||
|
|
|
|||
|
|
@ -15,7 +15,8 @@
|
|||
set -euo pipefail
|
||||
|
||||
FACTORY_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
FORGE_URL="${FORGE_URL:-http://localhost:3000}"
|
||||
# Always use localhost for mock Forgejo (in case FORGE_URL is set from docker-compose)
|
||||
export FORGE_URL="http://localhost:3000"
|
||||
MOCK_BIN="/tmp/smoke-mock-bin"
|
||||
TEST_SLUG="smoke-org/smoke-repo"
|
||||
FAILED=0
|
||||
|
|
@ -24,6 +25,8 @@ fail() { printf 'FAIL: %s\n' "$*" >&2; FAILED=1; }
|
|||
pass() { printf 'PASS: %s\n' "$*"; }
|
||||
|
||||
cleanup() {
|
||||
# Kill any leftover mock-forgejo.py processes by name
|
||||
pkill -f "mock-forgejo.py" 2>/dev/null || true
|
||||
rm -rf "$MOCK_BIN" /tmp/smoke-test-repo \
|
||||
"${FACTORY_ROOT}/projects/smoke-repo.toml"
|
||||
# Restore .env only if we created the backup
|
||||
|
|
@ -172,6 +175,18 @@ else
|
|||
fail "disinto init exited non-zero"
|
||||
fi
|
||||
|
||||
# ── Idempotency test: run init again ───────────────────────────────────────
|
||||
echo "=== Idempotency test: running disinto init again ==="
|
||||
if bash "${FACTORY_ROOT}/bin/disinto" init \
|
||||
"${TEST_SLUG}" \
|
||||
--bare --yes \
|
||||
--forge-url "$FORGE_URL" \
|
||||
--repo-root "/tmp/smoke-test-repo"; then
|
||||
pass "disinto init (re-run) completed successfully"
|
||||
else
|
||||
fail "disinto init (re-run) exited non-zero"
|
||||
fi
|
||||
|
||||
# ── 4. Verify Forgejo state ─────────────────────────────────────────────────
|
||||
echo "=== 4/6 Verifying Forgejo state ==="
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue