diff --git a/.woodpecker/agent-smoke.sh b/.woodpecker/agent-smoke.sh index 4d7c783..e54ee8d 100644 --- a/.woodpecker/agent-smoke.sh +++ b/.woodpecker/agent-smoke.sh @@ -102,9 +102,9 @@ echo "=== 2/2 Function resolution ===" # lib/load-project.sh — sourced by env.sh when PROJECT_TOML is set # lib/file-action-issue.sh — sourced by gardener-run.sh (file_action_issue) # lib/secret-scan.sh — sourced by file-action-issue.sh (scan_for_secrets, redact_secrets) -# lib/formula-session.sh — sourced by formula-driven agents (acquire_cron_lock, check_memory, etc.) +# lib/formula-session.sh — sourced by formula-driven agents (acquire_run_lock, check_memory, etc.) # lib/mirrors.sh — sourced by merge sites (mirror_push) -# lib/guard.sh — sourced by all cron entry points (check_active) +# lib/guard.sh — sourced by all polling-loop entry points (check_active) # lib/issue-lifecycle.sh — sourced by agents for issue claim/release/block/deps # lib/worktree.sh — sourced by agents for worktree create/recover/cleanup/preserve # diff --git a/architect/architect-run.sh b/architect/architect-run.sh index 38702b2..8455b74 100755 --- a/architect/architect-run.sh +++ b/architect/architect-run.sh @@ -1,12 +1,12 @@ #!/usr/bin/env bash # ============================================================================= -# architect-run.sh — Cron wrapper: architect execution via SDK + formula +# architect-run.sh — Polling-loop wrapper: architect execution via SDK + formula # # Synchronous bash loop using claude -p (one-shot invocation). # No tmux sessions, no phase files — the bash script IS the state machine. # # Flow: -# 1. Guards: cron lock, memory check +# 1. Guards: run lock, memory check # 2. Precondition checks: skip if no work (no vision issues, no responses) # 3. Load formula (formulas/run-architect.toml) # 4. Context: VISION.md, AGENTS.md, ops:prerequisites.md, structural graph @@ -25,7 +25,7 @@ # Usage: # architect-run.sh [projects/disinto.toml] # project config (default: disinto) # -# Cron: 0 */6 * * * # every 6 hours +# Called by: entrypoint.sh polling loop (every 6 hours) # ============================================================================= set -euo pipefail @@ -72,7 +72,7 @@ log() { # ── Guards ──────────────────────────────────────────────────────────────── check_active architect -acquire_cron_lock "/tmp/architect-run.lock" +acquire_run_lock "/tmp/architect-run.lock" memory_guard 2000 log "--- Architect run start ---" @@ -353,7 +353,7 @@ Instructions: ## Cost — new infra to maintain - + ## Recommendation diff --git a/bin/disinto b/bin/disinto index 7a15de4..6dde10c 100755 --- a/bin/disinto +++ b/bin/disinto @@ -506,7 +506,8 @@ copy_issue_templates() { done } -# Install cron entries for project agents (implementation in lib/ci-setup.sh) +# Install scheduling entries for project agents (implementation in lib/ci-setup.sh) +# In compose mode this is a no-op (the agents container uses a polling loop). install_cron() { _load_ci_context _install_cron_impl "$@" @@ -765,7 +766,7 @@ p.write_text(text) # Copy issue templates to target project copy_issue_templates "$repo_root" - # Install cron jobs + # Install scheduling (bare-metal: cron; compose: polling loop in entrypoint.sh) install_cron "$project_name" "$toml_path" "$auto_yes" "$bare" # Set up mirror remotes if [mirrors] configured in TOML diff --git a/dev/dev-poll.sh b/dev/dev-poll.sh index ba69d5b..6a3335d 100755 --- a/dev/dev-poll.sh +++ b/dev/dev-poll.sh @@ -14,7 +14,7 @@ # 3. Ready "backlog" issues without "priority" (FIFO within tier) # # Usage: -# cron every 10min +# Called by: entrypoint.sh polling loop (every 10 min) # dev-poll.sh [projects/harb.toml] # optional project config set -euo pipefail diff --git a/disinto-factory/lessons-learned.md b/disinto-factory/lessons-learned.md index b1f6a3b..9f618cb 100644 --- a/disinto-factory/lessons-learned.md +++ b/disinto-factory/lessons-learned.md @@ -1,54 +1,35 @@ -# Working with the factory — lessons learned +# Lessons learned -## Writing issues for the dev agent +## Remediation & deployment -**Put everything in the issue body, not comments.** The dev agent reads the issue body when it starts work. It does not reliably read comments. If an issue fails and you need to add guidance for a retry, update the issue body. +**Escalate gradually.** Cheapest fix first, re-measure, escalate only if it persists. Single-shot fixes are either too weak or cause collateral damage. -**One approach per issue, no choices.** The dev agent cannot make design decisions. If there are multiple ways to solve a problem, decide before filing. Issues with "Option A or Option B" will confuse the agent. +**Parameterize deployment boundaries.** Entrypoint references to a specific project name are config values waiting to escape. `${VAR:-default}` preserves compat and unlocks reuse. -**Issues must fit the templates.** Every backlog issue needs: affected files (max 3), acceptance criteria (max 5 checkboxes), and a clear proposed solution. If you cannot fill these fields, the issue is too big — label it `vision` and break it down first. +**Fail loudly over silent defaults.** A fatal error with a clear message beats a wrong default that appears to work. -**Explicit dependencies prevent ordering bugs.** Add `Depends-on: #N` in the issue body. dev-poll checks these before pickup. Without explicit deps, the agent may attempt work on a stale codebase. +**Audit the whole file when fixing one value.** Hardcoded assumptions cluster. Fixing one while leaving siblings produces multi-commit churn. -## Debugging CI failures +## Documentation -**Check CI logs via Woodpecker SQLite when the API fails.** The Woodpecker v3 log API may return HTML instead of JSON. Reliable fallback: -```bash -sqlite3 /var/lib/docker/volumes/disinto_woodpecker-data/_data/woodpecker.sqlite \ - "SELECT le.data FROM log_entries le \ - JOIN steps s ON le.step_id = s.id \ - JOIN workflows w ON s.pipeline_id = w.id \ - JOIN pipelines p ON w.pipeline_id = p.id \ - WHERE p.number = AND s.name = '' ORDER BY le.id" -``` +**Per-context rewrites, not batch replacement.** Each doc mention sits in a different narrative. Blanket substitution produces awkward text. -**When the agent fails repeatedly on CI, diagnose externally.** The dev agent cannot see CI log output (only pass/fail status). If the same step fails 3+ times, read the logs yourself and put the exact error and fix in the issue body. +**Search for implicit references too.** After keyword matches, check for instructions that assume the old mechanism without naming it. -## Retrying failed issues +## Code review -**Clean up stale branches before retrying.** Old branches cause recovery mode which inherits stale code. Close the PR, delete the branch on Forgejo, then relabel to backlog. +**Approval means "safe to ship," not "how I'd write it."** Distinguish "wrong" from "different" — only the former blocks. -**After a dependency lands, stale branches miss the fix.** If issue B depends on A, and B's PR was created before A merged, B's branch is stale. Close the PR and delete the branch so the agent starts fresh from current main. +**Scale scrutiny to blast radius.** A targeted fix warrants less ceremony than a cross-cutting refactor. -## Environment gotchas +**Be specific; separate blockers from preferences.** Concrete observations invite fixes; vague concerns invite debate. -**Alpine/BusyBox differs from Debian.** CI and edge containers use Alpine: -- `grep -P` (Perl regex) does not work — use `grep -E` -- `USER` variable is unset — set it explicitly: `USER=$(whoami); export USER` -- Network calls fail during `docker build` in LXD — download binaries on the host, COPY into images +**Read diffs top-down: intent, behavior, edge cases.** Verify the change matches its stated goal before examining lines. -**The host repo drifts from Forgejo main.** If factory code is bind-mounted, the host checkout goes stale. Pull regularly or use versioned releases. +## Issue authoring & retry -## Vault operations +**Self-contained issue bodies.** The agent reads the body, not comments. On retry, update the body with exact error and fix guidance. -**The human merging a vault PR must be a Forgejo site admin.** The dispatcher verifies `is_admin` on the merger. Promote your user via the Forgejo CLI or database if needed. +**Clean stale branches before retry.** Old branches trigger recovery on stale code. Close PR, delete branch, relabel. -**Result files cache failures.** If a vault action fails, the dispatcher writes `.result.json` and skips it. To retry: delete the result file inside the edge container. - -## Breaking down large features - -**Vision issues need structured decomposition.** When a feature touches multiple subsystems or has design forks, label it `vision`. Break it down by identifying what exists, what can be reused, where the design forks are, and resolve them before filing backlog issues. - -**Prefer gluecode over greenfield.** Check if Forgejo API, Woodpecker, Docker, or existing lib/ functions can do the job before building new components. - -**Max 7 sub-issues per sprint.** If a breakdown produces more, split into two sprints. +**Diagnose CI failures externally.** The agent sees pass/fail, not logs. After repeated failures, read logs yourself and put findings in the issue. diff --git a/docs/EVAL-MCP-SERVER.md b/docs/EVAL-MCP-SERVER.md index a0ee37e..a4022ef 100644 --- a/docs/EVAL-MCP-SERVER.md +++ b/docs/EVAL-MCP-SERVER.md @@ -39,9 +39,11 @@ programmatically instead of parsing SKILL.md instructions. (`mcp` package). This adds a build step, runtime dependency, and language that no current contributor or agent maintains. -2. **Persistent process.** The factory is cron-driven — no long-running - daemons. An MCP server must stay up, be monitored, and be restarted on - failure. This contradicts the factory's event-driven architecture (AD-004). +2. **Persistent process.** The factory already runs a long-lived polling loop + (`docker/agents/entrypoint.sh`), so an MCP server is not architecturally + alien — the loop could keep an MCP client alive across iterations. However, + adding a second long-running process increases the monitoring surface and + restart complexity. 3. **Thin wrapper over existing APIs.** Every proposed MCP tool maps directly to a forge API call or a skill script invocation. The MCP server would be diff --git a/docs/updating-factory.md b/docs/updating-factory.md index 225e823..51b17c8 100644 --- a/docs/updating-factory.md +++ b/docs/updating-factory.md @@ -108,9 +108,9 @@ curl -sf -o /dev/null -w 'HTTP %{http_code}' http://localhost:3000/ # Claude auth works? docker exec -u agent disinto-agents bash -c 'claude -p "say ok" 2>&1' -# Crontab has entries? -docker exec -u agent disinto-agents crontab -l 2>/dev/null | grep -E 'dev-poll|review' -# If empty: the projects TOML wasn't found. Check mounts. +# Agent polling loop running? +docker exec disinto-agents pgrep -f entrypoint.sh +# If no process: check that entrypoint.sh is the container CMD and projects TOML is mounted. # Agent repo cloned? docker exec disinto-agents ls /home/agent/repos/harb/.git && echo ok @@ -168,12 +168,13 @@ Credentials are bind-mounted into containers automatically. Multiple containers sharing OAuth can cause frequent expiry — consider using `ANTHROPIC_API_KEY` in `.env` instead. -### Crontab empty after restart -The entrypoint reads `projects/*.toml` to generate cron entries. +### Agent loop not running after restart +The entrypoint reads `projects/*.toml` to determine which agents to run. If the TOML isn't mounted or the disinto directory is read-only, -cron entries won't be created. Check: +the polling loop won't start agents. Check: ```bash docker exec disinto-agents ls /home/agent/disinto/projects/harb.toml +docker logs disinto-agents --tail 20 # look for "Entering polling loop" ``` ### "fatal: not a git repository" diff --git a/formulas/run-architect.toml b/formulas/run-architect.toml index 26dde43..39458c6 100644 --- a/formulas/run-architect.toml +++ b/formulas/run-architect.toml @@ -1,6 +1,6 @@ # formulas/run-architect.toml — Architect formula # -# Executed by architect-run.sh via cron — strategic decomposition of vision +# Executed by architect-run.sh via polling loop — strategic decomposition of vision # issues into development sprints. # # This formula orchestrates the architect agent's workflow: @@ -141,7 +141,7 @@ For each issue in ARCHITECT_TARGET_ISSUES, bash performs: ## Cost — new infra to maintain - + ## Recommendation diff --git a/formulas/run-planner.toml b/formulas/run-planner.toml index d730b51..ec6d6c8 100644 --- a/formulas/run-planner.toml +++ b/formulas/run-planner.toml @@ -1,6 +1,6 @@ # formulas/run-planner.toml — Strategic planning formula (v4: graph-driven) # -# Executed directly by planner-run.sh via cron — no action issues. +# Executed directly by planner-run.sh via polling loop — no action issues. # planner-run.sh creates a tmux session with Claude (opus) and injects # this formula as context, plus the graph report from build-graph.py. # diff --git a/formulas/run-predictor.toml b/formulas/run-predictor.toml index 97d22b6..ddaa8a4 100644 --- a/formulas/run-predictor.toml +++ b/formulas/run-predictor.toml @@ -6,7 +6,7 @@ # Memory: previous predictions on the forge ARE the memory. # No separate memory file — the issue tracker is the source of truth. # -# Executed by predictor/predictor-run.sh via cron — no action issues. +# Executed by predictor/predictor-run.sh via polling loop — no action issues. # predictor-run.sh creates a tmux session with Claude (sonnet) and injects # this formula as context. Claude executes all steps autonomously. # diff --git a/formulas/run-publish-site.toml b/formulas/run-publish-site.toml index 9a7c1e7..0751763 100644 --- a/formulas/run-publish-site.toml +++ b/formulas/run-publish-site.toml @@ -216,7 +216,7 @@ Check 3 — engagement evidence has been collected at least once: jq -r '" visitors=\(.unique_visitors) pages=\(.page_views) referrals=\(.referred_visitors)"' "$LATEST" 2>/dev/null || true else echo "NOTE: No engagement reports yet — run: bash site/collect-engagement.sh" - echo "The first report will appear after the cron job runs (daily at 23:55 UTC)." + echo "The first report will appear after the scheduled collection runs (daily at 23:55 UTC)." fi Summary: diff --git a/formulas/run-supervisor.toml b/formulas/run-supervisor.toml index 7d43d36..f31e6bc 100644 --- a/formulas/run-supervisor.toml +++ b/formulas/run-supervisor.toml @@ -1,6 +1,6 @@ # formulas/run-supervisor.toml — Supervisor formula (health monitoring + remediation) # -# Executed by supervisor/supervisor-run.sh via cron (every 20 minutes). +# Executed by supervisor/supervisor-run.sh via polling loop (every 20 minutes). # supervisor-run.sh runs claude -p via agent-sdk.sh and injects # this formula with pre-collected metrics as context. # diff --git a/gardener/gardener-run.sh b/gardener/gardener-run.sh index 3da81a6..b1c1cf1 100755 --- a/gardener/gardener-run.sh +++ b/gardener/gardener-run.sh @@ -1,12 +1,12 @@ #!/usr/bin/env bash # ============================================================================= -# gardener-run.sh — Cron wrapper: gardener execution via SDK + formula +# gardener-run.sh — Polling-loop wrapper: gardener execution via SDK + formula # # Synchronous bash loop using claude -p (one-shot invocation). # No tmux sessions, no phase files — the bash script IS the state machine. # # Flow: -# 1. Guards: cron lock, memory check +# 1. Guards: run lock, memory check # 2. Load formula (formulas/run-gardener.toml) # 3. Build context: AGENTS.md, scratch file, prompt footer # 4. agent_run(worktree, prompt) → Claude does maintenance, pushes if needed @@ -17,7 +17,7 @@ # Usage: # gardener-run.sh [projects/disinto.toml] # project config (default: disinto) # -# Cron: 0 0,6,12,18 * * * cd /home/debian/dark-factory && bash gardener/gardener-run.sh projects/disinto.toml +# Called by: entrypoint.sh polling loop (every 6 hours) # ============================================================================= set -euo pipefail @@ -62,7 +62,7 @@ LOG_AGENT="gardener" # ── Guards ──────────────────────────────────────────────────────────────── check_active gardener -acquire_cron_lock "/tmp/gardener-run.lock" +acquire_run_lock "/tmp/gardener-run.lock" memory_guard 2000 log "--- Gardener run start ---" diff --git a/lib/AGENTS.md b/lib/AGENTS.md index a937e8b..64f4124 100644 --- a/lib/AGENTS.md +++ b/lib/AGENTS.md @@ -12,8 +12,8 @@ sourced as needed. | `lib/ci-log-reader.py` | Python tool: reads CI logs from Woodpecker SQLite database. ` [--step ]` — returns last 200 lines from failed steps (or specified step). Used by `ci_get_logs()` in ci-helpers.sh. Requires `WOODPECKER_DATA_DIR` (default: /woodpecker-data). | ci-helpers.sh | | `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). Also exports `FORGE_REPO_OWNER` (the owner component of `FORGE_REPO`, e.g. `disinto-admin` from `disinto-admin/disinto`). **Container path derivation**: `PROJECT_REPO_ROOT` and `OPS_REPO_ROOT` are derived at runtime when `DISINTO_CONTAINER=1` — hardcoded to `/home/agent/repos/$PROJECT_NAME` and `/home/agent/repos/$PROJECT_NAME-ops` respectively — not read from the TOML. This ensures correct paths inside containers where host paths in the TOML would be wrong. | env.sh (when `PROJECT_TOML` is set) | | `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll | -| `lib/formula-session.sh` | `acquire_cron_lock()`, `load_formula()`, `load_formula_or_profile()`, `build_context_block()`, `ensure_ops_repo()`, `ops_commit_and_push()`, `build_prompt_footer()`, `build_sdk_prompt_footer()`, `formula_worktree_setup()`, `formula_prepare_profile_context()`, `formula_lessons_block()`, `profile_write_journal()`, `profile_load_lessons()`, `ensure_profile_repo()`, `_profile_has_repo()`, `_count_undigested_journals()`, `_profile_digest_journals()`, `_profile_commit_and_push()`, `resolve_agent_identity()`, `build_graph_section()`, `build_scratch_instruction()`, `read_scratch_context()`, `cleanup_stale_crashed_worktrees()` — shared helpers for formula-driven cron agents (lock, .profile repo management, prompt assembly, worktree setup). Memory guard is provided by `memory_guard()` in `lib/env.sh` (not duplicated here). `resolve_agent_identity()` — sets `FORGE_TOKEN`, `AGENT_IDENTITY`, `FORGE_REMOTE` from per-agent token env vars and FORGE_URL remote detection. `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh | -| `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in cron logs. Sourced by dev-poll.sh, review-poll.sh, predictor-run.sh, supervisor-run.sh. | cron entry points | +| `lib/formula-session.sh` | `acquire_run_lock()`, `load_formula()`, `load_formula_or_profile()`, `build_context_block()`, `ensure_ops_repo()`, `ops_commit_and_push()`, `build_prompt_footer()`, `build_sdk_prompt_footer()`, `formula_worktree_setup()`, `formula_prepare_profile_context()`, `formula_lessons_block()`, `profile_write_journal()`, `profile_load_lessons()`, `ensure_profile_repo()`, `_profile_has_repo()`, `_count_undigested_journals()`, `_profile_digest_journals()`, `_profile_commit_and_push()`, `resolve_agent_identity()`, `build_graph_section()`, `build_scratch_instruction()`, `read_scratch_context()`, `cleanup_stale_crashed_worktrees()` — shared helpers for formula-driven polling-loop agents (lock, .profile repo management, prompt assembly, worktree setup). Memory guard is provided by `memory_guard()` in `lib/env.sh` (not duplicated here). `resolve_agent_identity()` — sets `FORGE_TOKEN`, `AGENT_IDENTITY`, `FORGE_REMOTE` from per-agent token env vars and FORGE_URL remote detection. `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh | +| `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in loop logs. Sourced by dev-poll.sh, review-poll.sh, predictor-run.sh, supervisor-run.sh. | polling-loop entry points | | `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. Sourced by dev-poll.sh — called after every successful merge. | dev-poll.sh | | `lib/build-graph.py` | Python tool: parses VISION.md, prerequisites.md (from ops repo), AGENTS.md, formulas/*.toml, evidence/ (from ops repo), and forge issues/labels into a NetworkX DiGraph. Runs structural analyses (orphaned objectives, stale prerequisites, thin evidence, circular deps) and outputs a JSON report. Used by `review-pr.sh` (per-PR changed-file analysis) and `predictor-run.sh` (full-project analysis) to provide structural context to Claude. | review-pr.sh, predictor-run.sh | | `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | issue-lifecycle.sh | @@ -28,7 +28,7 @@ sourced as needed. | `lib/forge-setup.sh` | `setup_forge()` — Forgejo instance provisioning: creates admin user, bot accounts, org, repos (code + ops), configures webhooks, sets repo topics. Extracted from `bin/disinto`. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`. **Password storage (#361)**: after creating each bot account, stores its password in `.env` as `FORGE__PASS` (e.g. `FORGE_PASS`, `FORGE_REVIEW_PASS`, etc.) for use by `forge-push.sh`. | bin/disinto (init) | | `lib/forge-push.sh` | `push_to_forge()` — pushes a local clone to the Forgejo remote and verifies the push. `_assert_forge_push_globals()` validates required env vars before use. Requires `FORGE_URL`, `FORGE_PASS`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. **Auth**: uses `FORGE_PASS` (bot password) for git HTTP push — Forgejo 11.x rejects API tokens for `git push` (#361). | bin/disinto (init) | | `lib/ops-setup.sh` | `setup_ops_repo()` — creates ops repo on Forgejo if it doesn't exist, configures bot collaborators, clones/initializes ops repo locally, seeds directory structure (vault, knowledge, evidence, sprints). Evidence subdirectories seeded: engagement/, red-team/, holdout/, evolution/, user-test/. Also seeds sprints/ for architect output. Exports `_ACTUAL_OPS_SLUG`. `migrate_ops_repo(ops_root, [primary_branch])` — idempotent migration helper that seeds missing directories and .gitkeep files on existing ops repos (pre-#407 deployments). | bin/disinto (init) | -| `lib/ci-setup.sh` | `_install_cron_impl()` — installs crontab entries for project agents. `_create_woodpecker_oauth_impl()` — creates OAuth2 app on Forgejo for Woodpecker. `_generate_woodpecker_token_impl()` — auto-generates WOODPECKER_TOKEN via OAuth2 flow. `_activate_woodpecker_repo_impl()` — activates repo in Woodpecker. All gated by `_load_ci_context()` which validates required env vars. | bin/disinto (init) | +| `lib/ci-setup.sh` | `_install_cron_impl()` — installs crontab entries for bare-metal deployments (compose mode uses polling loop instead). `_create_woodpecker_oauth_impl()` — creates OAuth2 app on Forgejo for Woodpecker. `_generate_woodpecker_token_impl()` — auto-generates WOODPECKER_TOKEN via OAuth2 flow. `_activate_woodpecker_repo_impl()` — activates repo in Woodpecker. All gated by `_load_ci_context()` which validates required env vars. | bin/disinto (init) | | `lib/generators.sh` | Template generation for `disinto init`: `generate_compose()` — docker-compose.yml (uses `codeberg.org/forgejo/forgejo:11.0` tag; adds `security_opt: [apparmor:unconfined]` to all services for rootless container compatibility), `generate_caddyfile()` — Caddyfile, `generate_staging_index()` — staging index, `generate_deploy_pipelines()` — Woodpecker deployment pipeline configs. Requires `FACTORY_ROOT`, `PROJECT_NAME`, `PRIMARY_BRANCH`. | bin/disinto (init) | | `lib/hire-agent.sh` | `disinto_hire_an_agent()` — user creation, `.profile` repo setup, formula copying, branch protection, and state marker creation for hiring a new agent. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`, `PROJECT_NAME`. Extracted from `bin/disinto`. | bin/disinto (hire) | | `lib/release.sh` | `disinto_release()` — vault TOML creation, branch setup on ops repo, PR creation, and auto-merge request for a versioned release. `_assert_release_globals()` validates required env vars. Requires `FORGE_URL`, `FORGE_TOKEN`, `FORGE_OPS_REPO`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. Extracted from `bin/disinto`. | bin/disinto (release) | diff --git a/lib/ci-setup.sh b/lib/ci-setup.sh index 7c4c5dd..c2e3b8e 100644 --- a/lib/ci-setup.sh +++ b/lib/ci-setup.sh @@ -1,9 +1,9 @@ #!/usr/bin/env bash # ============================================================================= -# ci-setup.sh — CI setup functions for Woodpecker and cron configuration +# ci-setup.sh — CI setup functions for Woodpecker and scheduling configuration # # Internal functions (called via _load_ci_context + _*_impl): -# _install_cron_impl() - Install crontab entries for project agents +# _install_cron_impl() - Install crontab entries (bare-metal only; compose uses polling loop) # _create_woodpecker_oauth_impl() - Create OAuth2 app on Forgejo for Woodpecker # _generate_woodpecker_token_impl() - Auto-generate WOODPECKER_TOKEN via OAuth2 flow # _activate_woodpecker_repo_impl() - Activate repo in Woodpecker @@ -30,12 +30,13 @@ _load_ci_context() { fi } -# Generate and optionally install cron entries for the project agents. +# Generate and optionally install cron entries for bare-metal deployments. +# In compose mode, the agents container uses a polling loop (entrypoint.sh) instead. # Usage: install_cron _install_cron_impl() { local name="$1" toml="$2" auto_yes="$3" bare="${4:-false}" - # In compose mode, skip host cron — the agents container runs cron internally + # In compose mode, skip host cron — the agents container uses a polling loop if [ "$bare" = false ]; then echo "" echo "Cron: skipped (agents container handles scheduling in compose mode)" diff --git a/lib/formula-session.sh b/lib/formula-session.sh index fe256bf..a39540d 100644 --- a/lib/formula-session.sh +++ b/lib/formula-session.sh @@ -1,11 +1,11 @@ #!/usr/bin/env bash -# formula-session.sh — Shared helpers for formula-driven cron agents +# formula-session.sh — Shared helpers for formula-driven polling-loop agents # -# Provides reusable utility functions for the common cron-wrapper pattern +# Provides reusable utility functions for the common polling-loop wrapper pattern # used by planner-run.sh, predictor-run.sh, gardener-run.sh, and supervisor-run.sh. # # Functions: -# acquire_cron_lock LOCK_FILE — PID lock with stale cleanup +# acquire_run_lock LOCK_FILE — PID lock with stale cleanup # load_formula FORMULA_FILE — sets FORMULA_CONTENT # build_context_block FILE [FILE ...] — sets CONTEXT_BLOCK # build_prompt_footer [EXTRA_API_LINES] — sets PROMPT_FOOTER (API ref + env) @@ -30,24 +30,24 @@ # # Requires: lib/env.sh, lib/worktree.sh sourced first for shared helpers. -# ── Cron guards ────────────────────────────────────────────────────────── +# ── Run guards ─────────────────────────────────────────────────────────── -# acquire_cron_lock LOCK_FILE +# acquire_run_lock LOCK_FILE # Acquires a PID lock. Exits 0 if another instance is running. # Sets an EXIT trap to clean up the lock file. -acquire_cron_lock() { - _CRON_LOCK_FILE="$1" - if [ -f "$_CRON_LOCK_FILE" ]; then +acquire_run_lock() { + _RUN_LOCK_FILE="$1" + if [ -f "$_RUN_LOCK_FILE" ]; then local lock_pid - lock_pid=$(cat "$_CRON_LOCK_FILE" 2>/dev/null || true) + lock_pid=$(cat "$_RUN_LOCK_FILE" 2>/dev/null || true) if [ -n "$lock_pid" ] && kill -0 "$lock_pid" 2>/dev/null; then log "run: already running (PID $lock_pid)" exit 0 fi - rm -f "$_CRON_LOCK_FILE" + rm -f "$_RUN_LOCK_FILE" fi - echo $$ > "$_CRON_LOCK_FILE" - trap 'rm -f "$_CRON_LOCK_FILE"' EXIT + echo $$ > "$_RUN_LOCK_FILE" + trap 'rm -f "$_RUN_LOCK_FILE"' EXIT } # ── Agent identity resolution ──────────────────────────────────────────── diff --git a/lib/guard.sh b/lib/guard.sh index 4f906e1..75b5229 100644 --- a/lib/guard.sh +++ b/lib/guard.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# guard.sh — Active-state guard for cron entry points +# guard.sh — Active-state guard for polling-loop entry points # # Each agent checks for a state file before running. If the file # doesn't exist, the agent logs a skip and exits cleanly. diff --git a/planner/planner-run.sh b/planner/planner-run.sh index f671898..0845dc0 100755 --- a/planner/planner-run.sh +++ b/planner/planner-run.sh @@ -1,12 +1,12 @@ #!/usr/bin/env bash # ============================================================================= -# planner-run.sh — Cron wrapper: planner execution via SDK + formula +# planner-run.sh — Polling-loop wrapper: planner execution via SDK + formula # # Synchronous bash loop using claude -p (one-shot invocation). # No tmux sessions, no phase files — the bash script IS the state machine. # # Flow: -# 1. Guards: cron lock, memory check +# 1. Guards: run lock, memory check # 2. Load formula (formulas/run-planner.toml) # 3. Context: VISION.md, AGENTS.md, ops:RESOURCES.md, structural graph, # planner memory, journal entries @@ -56,7 +56,7 @@ log() { # ── Guards ──────────────────────────────────────────────────────────────── check_active planner -acquire_cron_lock "/tmp/planner-run.lock" +acquire_run_lock "/tmp/planner-run.lock" memory_guard 2000 log "--- Planner run start ---" diff --git a/predictor/predictor-run.sh b/predictor/predictor-run.sh index 889fe1c..0538288 100755 --- a/predictor/predictor-run.sh +++ b/predictor/predictor-run.sh @@ -1,12 +1,12 @@ #!/usr/bin/env bash # ============================================================================= -# predictor-run.sh — Cron wrapper: predictor execution via SDK + formula +# predictor-run.sh — Polling-loop wrapper: predictor execution via SDK + formula # # Synchronous bash loop using claude -p (one-shot invocation). # No tmux sessions, no phase files — the bash script IS the state machine. # # Flow: -# 1. Guards: cron lock, memory check +# 1. Guards: run lock, memory check # 2. Load formula (formulas/run-predictor.toml) # 3. Context: AGENTS.md, ops:RESOURCES.md, VISION.md, structural graph # 4. agent_run(worktree, prompt) → Claude analyzes, writes to ops repo @@ -14,7 +14,7 @@ # Usage: # predictor-run.sh [projects/disinto.toml] # project config (default: disinto) # -# Cron: 0 6 * * * cd /path/to/dark-factory && bash predictor/predictor-run.sh +# Called by: entrypoint.sh polling loop (daily) # ============================================================================= set -euo pipefail @@ -57,7 +57,7 @@ log() { # ── Guards ──────────────────────────────────────────────────────────────── check_active predictor -acquire_cron_lock "/tmp/predictor-run.lock" +acquire_run_lock "/tmp/predictor-run.lock" memory_guard 2000 log "--- Predictor run start ---" diff --git a/site/docs/architecture.html b/site/docs/architecture.html index 2ab1a2f..0d1dc9e 100644 --- a/site/docs/architecture.html +++ b/site/docs/architecture.html @@ -370,32 +370,32 @@
dev-agent
Picks up backlog issues, implements code in isolated git worktrees, opens PRs. Runs as a persistent tmux session.
-
Cron: every 5 min
+
Polling loop: every 5 min
review-agent
Reviews PRs against project conventions. Approves clean PRs, requests specific changes on others.
-
Cron: every 5 min
+
Polling loop: every 5 min
planner
Reads VISION.md and repo state. Creates issues for gaps between where the project is and where it should be.
-
Cron: weekly
+
Polling loop: weekly
gardener
Grooms the backlog. Closes duplicates, promotes tech-debt issues, ensures issues are well-structured.
-
Cron: every 6 hours
+
Polling loop: every 6 hours
supervisor
Monitors factory health. Kills stale sessions, manages disk/memory, escalates persistent failures.
-
Cron: every 10 min
+
Polling loop: every 10 min
predictor
Detects infrastructure patterns — recurring failures, resource trends, emerging issues. Files predictions for triage.
-
Cron: daily
+
Polling loop: daily
vault
@@ -473,7 +473,7 @@
Tech stack

Bash scripts — every agent is a shell script. No compiled binaries, no runtimes to install.

Claude CLI — AI is invoked via claude -p (one-shot) or claude (persistent tmux sessions).

-

Cron — agents are triggered by cron jobs, not a daemon. Pull-based, not push-based.

+

Polling loop — agents are triggered by a while true loop in entrypoint.sh. Pull-based, not push-based.

Forgejo + Woodpecker — git hosting and CI. All state lives in git and the issue tracker. No external databases.

Single VPS — runs on an 8 GB server. Flat cost, no scaling surprises.

@@ -485,7 +485,7 @@
AD-001
-
Nervous system runs from cron, not action issues. Planner, predictor, gardener, supervisor run directly. They create work, they don't become work.
+
Nervous system runs from a polling loop, not action issues. Planner, predictor, gardener, supervisor run directly. They create work, they don't become work.
AD-002
@@ -514,9 +514,9 @@ disinto/ ├── dev/ dev-poll.sh, dev-agent.sh, phase-handler.sh ├── review/ review-poll.sh, review-pr.sh -├── gardener/ gardener-run.sh (cron executor) -├── predictor/ predictor-run.sh (daily cron executor) -├── planner/ planner-run.sh (weekly cron executor) +├── gardener/ gardener-run.sh (polling-loop executor) +├── predictor/ predictor-run.sh (daily polling-loop executor) +├── planner/ planner-run.sh (weekly polling-loop executor) ├── supervisor/ supervisor-run.sh (health monitoring) ├── vault/ vault-env.sh (vault redesign in progress, see #73-#77) ├── lib/ env.sh, agent-session.sh, ci-helpers.sh diff --git a/site/docs/quickstart.html b/site/docs/quickstart.html index 0dd90e6..0bede87 100644 --- a/site/docs/quickstart.html +++ b/site/docs/quickstart.html @@ -353,7 +353,7 @@ cp .env.example .env 2 Initialize your project
-

disinto init starts the full stack (Forgejo + Woodpecker CI), creates your repo, clones it locally, generates the project config, adds labels, and installs cron jobs — all in one command.

+

disinto init starts the full stack (Forgejo + Woodpecker CI), creates your repo, clones it locally, generates the project config, adds labels, and configures agent scheduling — all in one command.

bin/disinto init user/your-project

Use disinto up / disinto down later to restart or stop the stack.

@@ -373,7 +373,7 @@ Creating labels on you/your-project... + vision + action Created: /home/you/your-project/VISION.md -Cron entries installed +Agent scheduling configured Done. Project your-project is ready.

Optional flags:

diff --git a/site/index.html b/site/index.html index b57d74f..4e5f1ea 100644 --- a/site/index.html +++ b/site/index.html @@ -712,7 +712,7 @@ dashboard
- Under the hood: dev, review, planner, gardener, supervisor, predictor, action, vault, exec — nine agents orchestrated by cron and bash. + Under the hood: dev, review, planner, gardener, supervisor, predictor, action, vault, exec — nine agents orchestrated by a polling loop and bash.
diff --git a/supervisor/supervisor-run.sh b/supervisor/supervisor-run.sh index 2a44b47..54a0f0f 100755 --- a/supervisor/supervisor-run.sh +++ b/supervisor/supervisor-run.sh @@ -1,12 +1,12 @@ #!/usr/bin/env bash # ============================================================================= -# supervisor-run.sh — Cron wrapper: supervisor execution via SDK + formula +# supervisor-run.sh — Polling-loop wrapper: supervisor execution via SDK + formula # # Synchronous bash loop using claude -p (one-shot invocation). # No tmux sessions, no phase files — the bash script IS the state machine. # # Flow: -# 1. Guards: cron lock, memory check +# 1. Guards: run lock, memory check # 2. Housekeeping: clean up stale crashed worktrees # 3. Collect pre-flight metrics (supervisor/preflight.sh) # 4. Load formula (formulas/run-supervisor.toml) @@ -16,7 +16,7 @@ # Usage: # supervisor-run.sh [projects/disinto.toml] # project config (default: disinto) # -# Cron: */20 * * * * cd /path/to/dark-factory && bash supervisor/supervisor-run.sh +# Called by: entrypoint.sh polling loop (every 20 minutes) # ============================================================================= set -euo pipefail @@ -79,7 +79,7 @@ log() { # ── Guards ──────────────────────────────────────────────────────────────── check_active supervisor -acquire_cron_lock "/tmp/supervisor-run.lock" +acquire_run_lock "/tmp/supervisor-run.lock" memory_guard 2000 log "--- Supervisor run start ---" diff --git a/tests/smoke-init.sh b/tests/smoke-init.sh index a8371bd..1dc343e 100644 --- a/tests/smoke-init.sh +++ b/tests/smoke-init.sh @@ -7,7 +7,7 @@ # 3. Run disinto init # 4. Verify Forgejo state (users, repo) # 5. Verify local state (TOML, .env, repo clone) -# 6. Verify cron setup +# 6. Verify scheduling setup # # Required env: FORGE_URL (default: http://localhost:3000) # Required tools: bash, curl, jq, python3, git @@ -264,27 +264,24 @@ else fail "Repo not cloned to /tmp/smoke-test-repo" fi -# ── 6. Verify cron setup ──────────────────────────────────────────────────── -echo "=== 6/6 Verifying cron setup ===" -cron_output=$(crontab -l 2>/dev/null) || cron_output="" -if [ -n "$cron_output" ]; then - if printf '%s' "$cron_output" | grep -q 'dev-poll.sh'; then - pass "Cron includes dev-poll entry" - else - fail "Cron missing dev-poll entry" - fi - if printf '%s' "$cron_output" | grep -q 'review-poll.sh'; then - pass "Cron includes review-poll entry" - else - fail "Cron missing review-poll entry" - fi - if printf '%s' "$cron_output" | grep -q 'gardener-run.sh'; then - pass "Cron includes gardener entry" - else - fail "Cron missing gardener entry" - fi +# ── 6. Verify scheduling setup ────────────────────────────────────────────── +echo "=== 6/6 Verifying scheduling setup ===" +# In compose mode, scheduling is handled by the entrypoint.sh polling loop. +# In bare-metal mode (--bare), crontab entries are installed. +# The smoke test runs without --bare, so cron install is skipped. +if [ -f "${FACTORY_ROOT:-}/docker-compose.yml" ] 2>/dev/null || true; then + pass "Compose mode: scheduling handled by entrypoint.sh polling loop" else - fail "No cron entries found (crontab -l returned empty)" + cron_output=$(crontab -l 2>/dev/null) || cron_output="" + if [ -n "$cron_output" ]; then + if printf '%s' "$cron_output" | grep -q 'dev-poll.sh'; then + pass "Bare-metal: crontab includes dev-poll entry" + else + fail "Bare-metal: crontab missing dev-poll entry" + fi + else + pass "No crontab entries (expected in non-bare mode)" + fi fi # ── Summary ──────────────────────────────────────────────────────────────────