From ee6285ead9dd016fe203c93f9aa39d8a7f536ba4 Mon Sep 17 00:00:00 2001 From: Agent Date: Thu, 2 Apr 2026 20:36:56 +0000 Subject: [PATCH] fix: docs: add factory interaction lessons to SKILL.md (#156) --- disinto-factory/SKILL.md | 266 ++--------------------------- disinto-factory/lessons-learned.md | 54 ++++++ disinto-factory/operations.md | 54 ++++++ disinto-factory/setup.md | 191 +++++++++++++++++++++ 4 files changed, 312 insertions(+), 253 deletions(-) create mode 100644 disinto-factory/lessons-learned.md create mode 100644 disinto-factory/operations.md create mode 100644 disinto-factory/setup.md diff --git a/disinto-factory/SKILL.md b/disinto-factory/SKILL.md index 8c6a672..6a74a18 100644 --- a/disinto-factory/SKILL.md +++ b/disinto-factory/SKILL.md @@ -1,268 +1,28 @@ --- name: disinto-factory -description: Set up and operate a disinto autonomous code factory. Use when bootstrapping a new factory instance, checking on agents and CI, managing the backlog, or troubleshooting the stack. +description: Set up and operate a disinto autonomous code factory. --- # Disinto Factory -You are helping the user set up and operate a **disinto autonomous code factory** — a system -of bash scripts and Claude CLI that automates the full development lifecycle: picking up -issues, implementing via Claude, creating PRs, running CI, reviewing, merging, and mirroring. +You are helping the user set up and operate a **disinto autonomous code factory**. -This guide shows how to set up the factory to develop an **external project** (e.g., `johba/harb`). +## Guides -## First-time setup - -Walk the user through these steps interactively. Ask questions where marked with [ASK]. - -### 1. Environment - -[ASK] Where will the factory run? Options: -- **LXD container** (recommended for isolation) — need Debian 12, Docker, nesting enabled -- **Bare VM or server** — need Debian/Ubuntu with Docker -- **Existing container** — check prerequisites - -Verify prerequisites: -```bash -docker --version && git --version && jq --version && curl --version && tmux -V && python3 --version && claude --version -``` - -Any missing tool — help the user install it before continuing. - -### 2. Clone disinto and choose a target project - -Clone the disinto factory itself: -```bash -git clone https://codeberg.org/johba/disinto.git && cd disinto -``` - -[ASK] What repository should the factory develop? Provide the **remote repository URL** in one of these formats: -- Full URL: `https://github.com/johba/harb.git` or `https://codeberg.org/johba/harb.git` -- Short slug: `johba/harb` (uses local Forgejo as the primary remote) - -The factory will clone from the remote URL (if provided) or from your local Forgejo, then mirror to the remote. - -Then initialize the factory for that project: -```bash -bin/disinto init johba/harb --yes -# or with full URL: -bin/disinto init https://github.com/johba/harb.git --yes -``` - -The `init` command will: -- Create all bot users (dev-bot, review-bot, etc.) on the local Forgejo -- Generate and save `WOODPECKER_TOKEN` -- Start the stack containers -- Clone the target repo into the agent workspace - -> **Note:** The `--repo-root` flag is optional and only needed if you want to customize -> where the cloned repo lives. By default, it goes under `/home/agent/repos/`. - -### 3. Post-init verification - -Run this checklist — fix any failures before proceeding: - -```bash -# Stack healthy? -docker ps --format "table {{.Names}}\t{{.Status}}" -# Expected: forgejo, woodpecker (healthy), woodpecker-agent (healthy), agents, edge, staging - -# Token generated? -grep WOODPECKER_TOKEN .env | grep -v "^$" && echo "OK" || echo "MISSING — see references/troubleshooting.md" - -# Agent cron active? -docker exec -u agent disinto-agents-1 crontab -l -u agent - -# Agent can reach Forgejo? -docker exec disinto-agents-1 bash -c "source /home/agent/disinto/.env && curl -sf http://forgejo:3000/api/v1/version | jq .version" - -# Agent repo cloned? -docker exec -u agent disinto-agents-1 ls /home/agent/repos/ -``` - -If the agent repo is missing, clone it: -```bash -docker exec disinto-agents-1 chown -R agent:agent /home/agent/repos -docker exec -u agent disinto-agents-1 bash -c "source /home/agent/disinto/.env && git clone http://dev-bot:\${FORGE_TOKEN}@forgejo:3000//.git /home/agent/repos/" -``` - -### 4. Create the project configuration file - -The factory uses a TOML file to configure how it manages your project. Create -`projects/.toml` based on the template format: - -```toml -# projects/harb.toml - -name = "harb" -repo = "johba/harb" -forge_url = "http://localhost:3000" -repo_root = "/home/agent/repos/harb" -primary_branch = "master" - -[ci] -woodpecker_repo_id = 0 -stale_minutes = 60 - -[services] -containers = ["ponder"] - -[monitoring] -check_prs = true -check_dev_agent = true -check_pipeline_stall = true - -# [mirrors] -# github = "git@github.com:johba/harb.git" -# codeberg = "git@codeberg.org:johba/harb.git" -``` - -**Key fields:** -- `name`: Project identifier (used for file names, logs, etc.) -- `repo`: The source repo in `owner/name` format -- `forge_url`: URL of your local Forgejo instance -- `repo_root`: Where the agent clones the repo -- `primary_branch`: Default branch name (e.g., `main` or `master`) -- `woodpecker_repo_id`: Set to `0` initially; auto-populated on first CI run -- `containers`: List of Docker containers the factory should manage -- `mirrors`: Optional external forge URLs for backup/sync - -### 5. Mirrors (optional) - -[ASK] Should the factory mirror to external forges? If yes, which? -- GitHub: need repo URL and SSH key added to GitHub account -- Codeberg: need repo URL and SSH key added to Codeberg account - -Show the user their public key: -```bash -cat ~/.ssh/id_ed25519.pub -``` - -Test SSH access: -```bash -ssh -T git@github.com 2>&1; ssh -T git@codeberg.org 2>&1 -``` - -If SSH host keys are missing: `ssh-keyscan github.com codeberg.org >> ~/.ssh/known_hosts 2>/dev/null` - -Edit `projects/.toml` to uncomment and configure mirrors: -```toml -[mirrors] -github = "git@github.com:Org/repo.git" -codeberg = "git@codeberg.org:user/repo.git" -``` - -Test with a manual push: -```bash -source .env && source lib/env.sh && export PROJECT_TOML=projects/.toml && source lib/load-project.sh && source lib/mirrors.sh && mirror_push -``` - -### 6. Seed the backlog - -[ASK] What should the factory work on first? Brainstorm with the user. - -Help them create issues on the local Forgejo. Each issue needs: -- A clear title prefixed with `fix:`, `feat:`, or `chore:` -- A body describing what to change, which files, and any constraints -- The `backlog` label (so the dev-agent picks it up) - -```bash -source .env -BACKLOG_ID=$(curl -sf "http://localhost:3000/api/v1/repos///labels" \ - -H "Authorization: token $FORGE_TOKEN" | jq -r '.[] | select(.name=="backlog") | .id') - -curl -sf -X POST "http://localhost:3000/api/v1/repos///issues" \ - -H "Authorization: token $FORGE_TOKEN" \ - -H "Content-Type: application/json" \ - -d "{\"title\": \"\", \"body\": \"<body>\", \"labels\": [$BACKLOG_ID]}" -``` - -For issues with dependencies, add `Depends-on: #N` in the body — the dev-agent checks -these before starting. - -Use labels: -- `backlog` — ready for the dev-agent -- `blocked` — parked, not for the factory -- No label — tracked but not for autonomous work - -### 7. Watch it work - -The dev-agent polls every 5 minutes. Trigger manually to see it immediately: -```bash -source .env -export PROJECT_TOML=projects/<name>.toml -docker exec -u agent disinto-agents-1 bash -c "cd /home/agent/disinto && bash dev/dev-poll.sh projects/<name>.toml" -``` - -Then monitor: -```bash -# Watch the agent work -docker exec disinto-agents-1 tail -f /home/agent/data/logs/dev/dev-agent.log - -# Check for Claude running -docker exec disinto-agents-1 bash -c "for f in /proc/[0-9]*/cmdline; do cmd=\$(tr '\0' ' ' < \$f 2>/dev/null); echo \$cmd | grep -q 'claude.*-p' && echo 'Claude is running'; done" -``` - -## Ongoing operations - -### Check factory status - -```bash -source .env - -# Issues -curl -sf "http://localhost:3000/api/v1/repos/<org>/<repo>/issues?state=open" \ - -H "Authorization: token $FORGE_TOKEN" \ - | jq -r '.[] | "#\(.number) [\(.labels | map(.name) | join(","))] \(.title)"' - -# PRs -curl -sf "http://localhost:3000/api/v1/repos/<org>/<repo>/pulls?state=open" \ - -H "Authorization: token $FORGE_TOKEN" \ - | jq -r '.[] | "PR #\(.number) [\(.head.ref)] \(.title)"' - -# Agent logs -docker exec disinto-agents-1 tail -20 /home/agent/data/logs/dev/dev-agent.log -``` - -### Check CI - -```bash -source .env -WP_CSRF=$(curl -sf -b "user_sess=$WOODPECKER_TOKEN" http://localhost:8000/web-config.js \ - | sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p') -curl -sf -b "user_sess=$WOODPECKER_TOKEN" -H "X-CSRF-Token: $WP_CSRF" \ - "http://localhost:8000/api/repos/1/pipelines?page=1&per_page=5" \ - | jq '.[] | {number, status, event}' -``` - -### Unstick a blocked issue - -When a dev-agent run fails (CI timeout, implementation error), the issue gets labeled `blocked`: - -1. Close stale PR and delete the branch -2. `docker exec disinto-agents-1 rm -f /tmp/dev-agent-*.json /tmp/dev-agent-*.lock` -3. Relabel the issue to `backlog` -4. Update agent repo: `docker exec -u agent disinto-agents-1 bash -c "cd /home/agent/repos/<name> && git fetch origin && git reset --hard origin/main"` - -### Access Forgejo UI - -If running in an LXD container with reverse tunnel: -```bash -# From your machine: -ssh -L 3000:localhost:13000 user@jump-host -# Open http://localhost:3000 -``` - -Reset admin password if needed: -```bash -docker exec disinto-forgejo-1 su -c "forgejo admin user change-password --username disinto-admin --password <new-pw> --must-change-password=false" git -``` +- **[Setup guide](setup.md)** — First-time factory setup: environment, init, verification, backlog seeding +- **[Operations guide](operations.md)** — Day-to-day: status checks, CI debugging, unsticking issues, Forgejo access +- **[Lessons learned](lessons-learned.md)** — Patterns for writing issues, debugging CI, retrying failures, vault operations, breaking down features ## Important context - Read `AGENTS.md` for per-agent architecture and file-level docs - Read `VISION.md` for project philosophy - The factory uses a single internal Forgejo as its forge, regardless of where mirrors go -- Dev-agent uses `claude -p --resume` for session continuity across CI/review cycles -- Mirror pushes happen automatically after every merge (fire-and-forget) +- Dev-agent uses `claude -p` for one-shot implementation sessions +- Mirror pushes happen automatically after every merge - Cron schedule: dev-poll every 5min, review-poll every 5min, gardener 4x/day + +## References + +- [Troubleshooting](references/troubleshooting.md) +- [Factory status script](scripts/factory-status.sh) diff --git a/disinto-factory/lessons-learned.md b/disinto-factory/lessons-learned.md new file mode 100644 index 0000000..b1f6a3b --- /dev/null +++ b/disinto-factory/lessons-learned.md @@ -0,0 +1,54 @@ +# Working with the factory — lessons learned + +## Writing issues for the dev agent + +**Put everything in the issue body, not comments.** The dev agent reads the issue body when it starts work. It does not reliably read comments. If an issue fails and you need to add guidance for a retry, update the issue body. + +**One approach per issue, no choices.** The dev agent cannot make design decisions. If there are multiple ways to solve a problem, decide before filing. Issues with "Option A or Option B" will confuse the agent. + +**Issues must fit the templates.** Every backlog issue needs: affected files (max 3), acceptance criteria (max 5 checkboxes), and a clear proposed solution. If you cannot fill these fields, the issue is too big — label it `vision` and break it down first. + +**Explicit dependencies prevent ordering bugs.** Add `Depends-on: #N` in the issue body. dev-poll checks these before pickup. Without explicit deps, the agent may attempt work on a stale codebase. + +## Debugging CI failures + +**Check CI logs via Woodpecker SQLite when the API fails.** The Woodpecker v3 log API may return HTML instead of JSON. Reliable fallback: +```bash +sqlite3 /var/lib/docker/volumes/disinto_woodpecker-data/_data/woodpecker.sqlite \ + "SELECT le.data FROM log_entries le \ + JOIN steps s ON le.step_id = s.id \ + JOIN workflows w ON s.pipeline_id = w.id \ + JOIN pipelines p ON w.pipeline_id = p.id \ + WHERE p.number = <N> AND s.name = '<step>' ORDER BY le.id" +``` + +**When the agent fails repeatedly on CI, diagnose externally.** The dev agent cannot see CI log output (only pass/fail status). If the same step fails 3+ times, read the logs yourself and put the exact error and fix in the issue body. + +## Retrying failed issues + +**Clean up stale branches before retrying.** Old branches cause recovery mode which inherits stale code. Close the PR, delete the branch on Forgejo, then relabel to backlog. + +**After a dependency lands, stale branches miss the fix.** If issue B depends on A, and B's PR was created before A merged, B's branch is stale. Close the PR and delete the branch so the agent starts fresh from current main. + +## Environment gotchas + +**Alpine/BusyBox differs from Debian.** CI and edge containers use Alpine: +- `grep -P` (Perl regex) does not work — use `grep -E` +- `USER` variable is unset — set it explicitly: `USER=$(whoami); export USER` +- Network calls fail during `docker build` in LXD — download binaries on the host, COPY into images + +**The host repo drifts from Forgejo main.** If factory code is bind-mounted, the host checkout goes stale. Pull regularly or use versioned releases. + +## Vault operations + +**The human merging a vault PR must be a Forgejo site admin.** The dispatcher verifies `is_admin` on the merger. Promote your user via the Forgejo CLI or database if needed. + +**Result files cache failures.** If a vault action fails, the dispatcher writes `.result.json` and skips it. To retry: delete the result file inside the edge container. + +## Breaking down large features + +**Vision issues need structured decomposition.** When a feature touches multiple subsystems or has design forks, label it `vision`. Break it down by identifying what exists, what can be reused, where the design forks are, and resolve them before filing backlog issues. + +**Prefer gluecode over greenfield.** Check if Forgejo API, Woodpecker, Docker, or existing lib/ functions can do the job before building new components. + +**Max 7 sub-issues per sprint.** If a breakdown produces more, split into two sprints. diff --git a/disinto-factory/operations.md b/disinto-factory/operations.md new file mode 100644 index 0000000..9639093 --- /dev/null +++ b/disinto-factory/operations.md @@ -0,0 +1,54 @@ +# Ongoing operations + +### Check factory status + +```bash +source .env + +# Issues +curl -sf "http://localhost:3000/api/v1/repos/<org>/<repo>/issues?state=open" \ + -H "Authorization: token $FORGE_TOKEN" \ + | jq -r '.[] | "#\(.number) [\(.labels | map(.name) | join(","))] \(.title)"' + +# PRs +curl -sf "http://localhost:3000/api/v1/repos/<org>/<repo>/pulls?state=open" \ + -H "Authorization: token $FORGE_TOKEN" \ + | jq -r '.[] | "PR #\(.number) [\(.head.ref)] \(.title)"' + +# Agent logs +docker exec disinto-agents-1 tail -20 /home/agent/data/logs/dev/dev-agent.log +``` + +### Check CI + +```bash +source .env +WP_CSRF=$(curl -sf -b "user_sess=$WOODPECKER_TOKEN" http://localhost:8000/web-config.js \ + | sed -n 's/.*WOODPECKER_CSRF = "\([^"]*\)".*/\1/p') +curl -sf -b "user_sess=$WOODPECKER_TOKEN" -H "X-CSRF-Token: $WP_CSRF" \ + "http://localhost:8000/api/repos/1/pipelines?page=1&per_page=5" \ + | jq '.[] | {number, status, event}' +``` + +### Unstick a blocked issue + +When a dev-agent run fails (CI timeout, implementation error), the issue gets labeled `blocked`: + +1. Close stale PR and delete the branch +2. `docker exec disinto-agents-1 rm -f /tmp/dev-agent-*.json /tmp/dev-agent-*.lock` +3. Relabel the issue to `backlog` +4. Update agent repo: `docker exec -u agent disinto-agents-1 bash -c "cd /home/agent/repos/<name> && git fetch origin && git reset --hard origin/main"` + +### Access Forgejo UI + +If running in an LXD container with reverse tunnel: +```bash +# From your machine: +ssh -L 3000:localhost:13000 user@jump-host +# Open http://localhost:3000 +``` + +Reset admin password if needed: +```bash +docker exec disinto-forgejo-1 su -c "forgejo admin user change-password --username disinto-admin --password <new-pw> --must-change-password=false" git +``` diff --git a/disinto-factory/setup.md b/disinto-factory/setup.md new file mode 100644 index 0000000..dc8ad02 --- /dev/null +++ b/disinto-factory/setup.md @@ -0,0 +1,191 @@ +# First-time setup + +Walk the user through these steps interactively. Ask questions where marked with [ASK]. + +### 1. Environment + +[ASK] Where will the factory run? Options: +- **LXD container** (recommended for isolation) — need Debian 12, Docker, nesting enabled +- **Bare VM or server** — need Debian/Ubuntu with Docker +- **Existing container** — check prerequisites + +Verify prerequisites: +```bash +docker --version && git --version && jq --version && curl --version && tmux -V && python3 --version && claude --version +``` + +Any missing tool — help the user install it before continuing. + +### 2. Clone disinto and choose a target project + +Clone the disinto factory itself: +```bash +git clone https://codeberg.org/johba/disinto.git && cd disinto +``` + +[ASK] What repository should the factory develop? Provide the **remote repository URL** in one of these formats: +- Full URL: `https://github.com/johba/harb.git` or `https://codeberg.org/johba/harb.git` +- Short slug: `johba/harb` (uses local Forgejo as the primary remote) + +The factory will clone from the remote URL (if provided) or from your local Forgejo, then mirror to the remote. + +Then initialize the factory for that project: +```bash +bin/disinto init johba/harb --yes +# or with full URL: +bin/disinto init https://github.com/johba/harb.git --yes +``` + +The `init` command will: +- Create all bot users (dev-bot, review-bot, etc.) on the local Forgejo +- Generate and save `WOODPECKER_TOKEN` +- Start the stack containers +- Clone the target repo into the agent workspace + +> **Note:** The `--repo-root` flag is optional and only needed if you want to customize +> where the cloned repo lives. By default, it goes under `/home/agent/repos/<name>`. + +### 3. Post-init verification + +Run this checklist — fix any failures before proceeding: + +```bash +# Stack healthy? +docker ps --format "table {{.Names}}\t{{.Status}}" +# Expected: forgejo, woodpecker (healthy), woodpecker-agent (healthy), agents, edge, staging + +# Token generated? +grep WOODPECKER_TOKEN .env | grep -v "^$" && echo "OK" || echo "MISSING — see references/troubleshooting.md" + +# Agent cron active? +docker exec -u agent disinto-agents-1 crontab -l -u agent + +# Agent can reach Forgejo? +docker exec disinto-agents-1 bash -c "source /home/agent/disinto/.env && curl -sf http://forgejo:3000/api/v1/version | jq .version" + +# Agent repo cloned? +docker exec -u agent disinto-agents-1 ls /home/agent/repos/ +``` + +If the agent repo is missing, clone it: +```bash +docker exec disinto-agents-1 chown -R agent:agent /home/agent/repos +docker exec -u agent disinto-agents-1 bash -c "source /home/agent/disinto/.env && git clone http://dev-bot:\${FORGE_TOKEN}@forgejo:3000/<org>/<repo>.git /home/agent/repos/<name>" +``` + +### 4. Create the project configuration file + +The factory uses a TOML file to configure how it manages your project. Create +`projects/<name>.toml` based on the template format: + +```toml +# projects/harb.toml + +name = "harb" +repo = "johba/harb" +forge_url = "http://localhost:3000" +repo_root = "/home/agent/repos/harb" +primary_branch = "master" + +[ci] +woodpecker_repo_id = 0 +stale_minutes = 60 + +[services] +containers = ["ponder"] + +[monitoring] +check_prs = true +check_dev_agent = true +check_pipeline_stall = true + +# [mirrors] +# github = "git@github.com:johba/harb.git" +# codeberg = "git@codeberg.org:johba/harb.git" +``` + +**Key fields:** +- `name`: Project identifier (used for file names, logs, etc.) +- `repo`: The source repo in `owner/name` format +- `forge_url`: URL of your local Forgejo instance +- `repo_root`: Where the agent clones the repo +- `primary_branch`: Default branch name (e.g., `main` or `master`) +- `woodpecker_repo_id`: Set to `0` initially; auto-populated on first CI run +- `containers`: List of Docker containers the factory should manage +- `mirrors`: Optional external forge URLs for backup/sync + +### 5. Mirrors (optional) + +[ASK] Should the factory mirror to external forges? If yes, which? +- GitHub: need repo URL and SSH key added to GitHub account +- Codeberg: need repo URL and SSH key added to Codeberg account + +Show the user their public key: +```bash +cat ~/.ssh/id_ed25519.pub +``` + +Test SSH access: +```bash +ssh -T git@github.com 2>&1; ssh -T git@codeberg.org 2>&1 +``` + +If SSH host keys are missing: `ssh-keyscan github.com codeberg.org >> ~/.ssh/known_hosts 2>/dev/null` + +Edit `projects/<name>.toml` to uncomment and configure mirrors: +```toml +[mirrors] +github = "git@github.com:Org/repo.git" +codeberg = "git@codeberg.org:user/repo.git" +``` + +Test with a manual push: +```bash +source .env && source lib/env.sh && export PROJECT_TOML=projects/<name>.toml && source lib/load-project.sh && source lib/mirrors.sh && mirror_push +``` + +### 6. Seed the backlog + +[ASK] What should the factory work on first? Brainstorm with the user. + +Help them create issues on the local Forgejo. Each issue needs: +- A clear title prefixed with `fix:`, `feat:`, or `chore:` +- A body describing what to change, which files, and any constraints +- The `backlog` label (so the dev-agent picks it up) + +```bash +source .env +BACKLOG_ID=$(curl -sf "http://localhost:3000/api/v1/repos/<org>/<repo>/labels" \ + -H "Authorization: token $FORGE_TOKEN" | jq -r '.[] | select(.name=="backlog") | .id') + +curl -sf -X POST "http://localhost:3000/api/v1/repos/<org>/<repo>/issues" \ + -H "Authorization: token $FORGE_TOKEN" \ + -H "Content-Type: application/json" \ + -d "{\"title\": \"<title>\", \"body\": \"<body>\", \"labels\": [$BACKLOG_ID]}" +``` + +For issues with dependencies, add `Depends-on: #N` in the body — the dev-agent checks +these before starting. + +Use labels: +- `backlog` — ready for the dev-agent +- `blocked` — parked, not for the factory +- No label — tracked but not for autonomous work + +### 7. Watch it work + +The dev-agent polls every 5 minutes. Trigger manually to see it immediately: +```bash +source .env +export PROJECT_TOML=projects/<name>.toml +docker exec -u agent disinto-agents-1 bash -c "cd /home/agent/disinto && bash dev/dev-poll.sh projects/<name>.toml" +``` + +Then monitor: +```bash +# Watch the agent work +docker exec disinto-agents-1 tail -f /home/agent/data/logs/dev/dev-agent.log + +# Check for Claude running +docker exec disinto-agents-1 bash -c "for f in /proc/[0-9]*/cmdline; do cmd=\$(tr '\0' ' ' < \$f 2>/dev/null); echo \$cmd | grep -q 'claude.*-p' && echo 'Claude is running'; done" +```