From c604efd3681b934c36273e55bee92f3bbca85dc0 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 17 Apr 2026 07:38:11 +0000
Subject: [PATCH 1/2] chore: gardener housekeeping 2026-04-17

---
 AGENTS.md                     |  6 +++---
 architect/AGENTS.md           |  2 +-
 dev/AGENTS.md                 |  2 +-
 gardener/AGENTS.md            |  2 +-
 gardener/pending-actions.json | 38 +----------------------------------
 lib/AGENTS.md                 |  6 +++---
 nomad/AGENTS.md               | 12 ++++++-----
 planner/AGENTS.md             |  2 +-
 predictor/AGENTS.md           |  2 +-
 review/AGENTS.md              |  2 +-
 supervisor/AGENTS.md          | 16 ++++++++++-----
 vault/policies/AGENTS.md      |  2 +-
 12 files changed, 32 insertions(+), 60 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index fced0c6..28c37b2 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,4 +1,4 @@
-<!-- last-reviewed: 8ad5aca6bbee77634b3c63523042b1d39cefa96a -->
+<!-- last-reviewed: a7a046b81a7f454ebec43bab643067bd952d50b0 -->
 # Disinto — Agent Instructions
 
 ## What this repo is
@@ -37,9 +37,9 @@ disinto/                 (code repo)
 │                  examples/ — example vault action TOMLs (promote, publish, release, webhook-call)
 ├── lib/           env.sh, agent-sdk.sh, ci-helpers.sh, ci-debug.sh, load-project.sh, parse-deps.sh, guard.sh, mirrors.sh, pr-lifecycle.sh, issue-lifecycle.sh, worktree.sh, formula-session.sh, stack-lock.sh, forge-setup.sh, forge-push.sh, ops-setup.sh, ci-setup.sh, generators.sh, hire-agent.sh, release.sh, build-graph.py, branch-protection.sh, secret-scan.sh, tea-helpers.sh, action-vault.sh, ci-log-reader.py, git-creds.sh, sprint-filer.sh, hvault.sh
 │                  hooks/ — Claude Code session hooks (on-compact-reinject, on-idle-stop, on-phase-change, on-pretooluse-guard, on-session-end, on-stop-failure)
-│                  init/nomad/ — cluster-up.sh, install.sh, vault-init.sh, lib-systemd.sh (Nomad+Vault Step 0 installers, #821-#825)
+│                  init/nomad/ — cluster-up.sh, install.sh, vault-init.sh, lib-systemd.sh (Nomad+Vault Step 0 installers, #821-#825); wp-oauth-register.sh (Forgejo OAuth2 app + Vault KV seeder for Woodpecker, S3.3)
 ├── nomad/         server.hcl, client.hcl, vault.hcl — HCL configs deployed to /etc/nomad.d/ and /etc/vault.d/ by lib/init/nomad/cluster-up.sh
-│                  jobs/ — Nomad jobspecs (forgejo.hcl reads Vault secrets via template stanza, S2.4)
+│                  jobs/ — Nomad jobspecs: forgejo.hcl (Vault secrets via template, S2.4); woodpecker-server.hcl + woodpecker-agent.hcl (host-net, docker.sock, Vault KV, S3.1-S3.2)
 ├── projects/      *.toml.example — templates; *.toml — local per-box config (gitignored)
 ├── formulas/      Issue templates (TOML specs for multi-step agent tasks)
 ├── docker/        Dockerfiles and entrypoints: reproduce, triage, edge dispatcher, chat (server.py, entrypoint-chat.sh, Dockerfile, ui/)
diff --git a/architect/AGENTS.md b/architect/AGENTS.md
index 51b24b1..1b2f9e8 100644
--- a/architect/AGENTS.md
+++ b/architect/AGENTS.md
@@ -1,4 +1,4 @@
-<!-- last-reviewed: 8ad5aca6bbee77634b3c63523042b1d39cefa96a -->
+<!-- last-reviewed: a7a046b81a7f454ebec43bab643067bd952d50b0 -->
 # Architect — Agent Instructions
 
 ## What this agent is
diff --git a/dev/AGENTS.md b/dev/AGENTS.md
index 02fd612..0d565c3 100644
--- a/dev/AGENTS.md
+++ b/dev/AGENTS.md
@@ -1,4 +1,4 @@
-<!-- last-reviewed: 8ad5aca6bbee77634b3c63523042b1d39cefa96a -->
+<!-- last-reviewed: a7a046b81a7f454ebec43bab643067bd952d50b0 -->
 # Dev Agent
 
 **Role**: Implement issues autonomously — write code, push branches, address
diff --git a/gardener/AGENTS.md b/gardener/AGENTS.md
index e9ad846..fc54a03 100644
--- a/gardener/AGENTS.md
+++ b/gardener/AGENTS.md
@@ -1,4 +1,4 @@
-<!-- last-reviewed: 8ad5aca6bbee77634b3c63523042b1d39cefa96a -->
+<!-- last-reviewed: a7a046b81a7f454ebec43bab643067bd952d50b0 -->
 # Gardener Agent
 
 **Role**: Backlog grooming — detect duplicate issues, missing acceptance
diff --git a/gardener/pending-actions.json b/gardener/pending-actions.json
index 1c89c7d..fe51488 100644
--- a/gardener/pending-actions.json
+++ b/gardener/pending-actions.json
@@ -1,37 +1 @@
-[
-  {
-    "action": "edit_body",
-    "issue": 910,
-    "body": "Flagged by AI reviewer in PR #909.\n\n## Problem\n\n`tools/vault-import.sh` still uses hardcoded `secret/data/${path}` for its curl-based KV write (lines 149, 151, 162, 166, 170). The rest of the codebase was migrated to the configurable `VAULT_KV_MOUNT` variable (defaulting to `kv`) via PR #909. Any deployment with `kv/` as its KV mount will see 403/404 failures when `vault-import.sh` runs.\n\n## Fix\n\nEither:\n1. Refactor the write in `vault-import.sh` to call `hvault_kv_put` (which now respects `VAULT_KV_MOUNT`), or\n2. Replace the hardcoded `secret/data` reference with `${VAULT_KV_MOUNT:-kv}/data` matching the convention in `lib/hvault.sh`.\n\n---\n*Auto-created from AI review*\n\n## Affected files\n\n- `tools/vault-import.sh` (lines 149, 151, 162, 166, 170 — hardcoded `secret/data` references)\n- `lib/hvault.sh` (reference implementation using `VAULT_KV_MOUNT`)\n\n## Acceptance criteria\n\n- [ ] `tools/vault-import.sh` uses `${VAULT_KV_MOUNT:-kv}/data` (or calls `hvault_kv_put`) instead of hardcoded `secret/data`\n- [ ] No hardcoded `secret/data` path references remain in `tools/vault-import.sh`\n- [ ] Vault KV writes succeed when `VAULT_KV_MOUNT=kv` is set (matching the standard deployment config)\n- [ ] `shellcheck` clean\n"
-  },
-  {
-    "action": "add_label",
-    "issue": 910,
-    "label": "backlog"
-  },
-  {
-    "action": "edit_body",
-    "issue": 914,
-    "body": "Flagged by AI reviewer in PR #911.\n\n## Problem\n\n`lib/generators.sh` fixes the `agents` service missing `pull_policy: build` in `--build` mode (PR #893), but the `edge` service has the same root cause: the sed replacement at line 664 produces `build: ./docker/edge` with no `pull_policy: build`. Without it, `docker compose up -d --force-recreate` reuses the cached edge image and silently keeps running stale code even after source changes.\n\n## Fix\n\nAdd `\\n    pull_policy: build` to the edge sed replacement, matching the pattern applied to agents in PR #893.\n\n---\n*Auto-created from AI review*\n\n## Affected files\n\n- `lib/generators.sh` (line 664 — edge service sed replacement missing `pull_policy: build`)\n\n## Acceptance criteria\n\n- [ ] `lib/generators.sh` edge service block emits `pull_policy: build` when `--build` mode is active (matching the pattern from PR #893 for the agents service)\n- [ ] `docker compose up -d --force-recreate` after source changes rebuilds the edge image rather than using the cached layer\n- [ ] Generated `docker-compose.yml` edge service stanza contains `pull_policy: build`\n- [ ] `shellcheck` clean\n"
-  },
-  {
-    "action": "add_label",
-    "issue": 914,
-    "label": "backlog"
-  },
-  {
-    "action": "edit_body",
-    "issue": 867,
-    "body": "## Incident\n\n**2026-04-16 ~10:55–11:52 UTC.** Woodpecker CI agent (`disinto-woodpecker-agent`) entered a repeated gRPC-error crashloop (Codeberg #813 class — gRPC-in-nested-docker). Every workflow it accepted exited 1 within seconds, never actually running pipeline steps.\n\n**Blast radius:** dev-qwen took issue #842 at 10:55, opened PR #859, and burned its full 3-attempt `pr-lifecycle` CI-fix budget between 10:55 and 11:08 reacting to these infra-flake \"CI failures.\" Each failure arrived in ~30–60 seconds, too fast to be a real test run. After exhausting the budget, dev-qwen marked #842 as `blocked: ci_exhausted` and moved on. No real bug was being detected; the real failure surfaced later only after an operator restarted the WP agent and manually retriggered pipeline #966 — which then returned a legitimate `bats-init-nomad` failure in test #6 (different issue).\n\n**Root cause of the infra-flake:** gRPC-in-nested-docker bug, Woodpecker server ↔ agent comms inside nested containers. Known-flaky; restart of `disinto-woodpecker-agent` clears it.\n\n**Recovery:** operator `docker restart disinto-woodpecker-agent` + retrigger pipelines via WP API POST `/api/repos/2/pipelines/<N>`. Fresh run reached real stage signal.\n\n## Why this burned dev-qwen's budget\n\n`pr-lifecycle`'s CI-fix budget treats every failed commit-status as a signal to invoke the agent. It has no notion of \"infra flake\" vs. \"real test failure\" and no heuristic to distinguish them. Four infra-flake failures in 13 minutes looked identical to four real code-bug failures.\n\n## Suggestions — what supervisor can check every 20min\n\nSupervisor runs every `1200s` already. Add these probes:\n\n**1. WP agent container health.**\n```\ndocker inspect disinto-woodpecker-agent --format '{{.State.Health.Status}}'\n```\nIf `unhealthy` for the second consecutive supervisor tick → **restart it automatically + post a comment on any currently-running dev-bot/dev-qwen issues warning \"CI agent was restarted; subsequent failures before this marker may be infra-flake.\"**\n\n**2. Fast-failure heuristic on WP pipelines.**\nQuery WP API `GET /api/repos/2/pipelines?page=1`. For each pipeline in state `failure`, compute `finished - started`. If duration < 60s, flag as probable infra-flake. Three flagged flakes within a 15-min window → trigger agent restart as in (1) and a bulk-retrigger via POST `/api/repos/2/pipelines/<N>` for each.\n\n**3. grpc error pattern in agent log.**\n`docker logs --since 20m disinto-woodpecker-agent 2>&1 | grep -c 'grpc error'` — if ≥3 matches, agent is probably wedged. Trigger restart as in (1).\n\n**4. Issue-level guard.**\nWhen supervisor detects an agent restart, scan for issues updated in the preceding 30min with label `blocked: ci_exhausted` and for each one:\n- unassign + remove `blocked` label (return to pool)\n- comment on the issue: *\"CI agent was unhealthy between HH:MM and HH:MM — prior 3/3 retry budget may have been spent on infra flake, not real failures. Re-queueing for a fresh attempt.\"*\n- retrigger the PR's latest WP pipeline\n\nThis last step is the key correction: **`ci_exhausted` preceded by WP-agent-unhealth = false positive; return to pool with context.**\n\n## Why this matters for the migration\n\nBetween now and cutover every WP CI flake that silently exhausts an agent's budget steals hours of clock time. Without an automatic recovery path, the pace of the step-N backlogs falls off a cliff the moment the agent next goes unhealthy — and it *will* go unhealthy again (Codeberg #813 is not fixed upstream yet).\n\n## Fix for this specific incident (already applied manually)\n\n- Restarted `disinto-woodpecker-agent`.\n- Closed PR #859 (kept branch `fix/issue-842` at `64080232`).\n- Unassigned dev-qwen from #842, removed `blocked` label, appended prior-art section + pipeline #966 test-#6 failure details to issue body so the next claimant starts with full context.\n\n## Non-goals\n\n- Not trying to fix Codeberg #813 itself (upstream gRPC-in-nested-docker issue).\n- Not trying to fix `pr-lifecycle`'s budget logic — the supervisor-side detection is cheaper and more robust than per-issue budget changes.\n\n## Labels / meta\n\n- `bug-report` + supervisor-focused. Classify severity as blocker for the migration cadence (not for factory day-to-day — it only bites when an unfixable-by-dev issue hits the budget).\n\n## Affected files\n\n- `supervisor/supervisor-run.sh` — add WP agent health probes and flake-detection logic\n- `supervisor/preflight.sh` — may need additional data collection for WP agent health status\n\n## Acceptance criteria\n\n- [ ] Supervisor detects an unhealthy `disinto-woodpecker-agent` container (via `docker inspect` health status or gRPC error log count ≥ 3) and automatically restarts it\n- [ ] After an auto-restart, supervisor scans for issues updated in the prior 30 min labeled `blocked: ci_exhausted` and returns them to the pool (unassign, remove `blocked`, add comment noting infra-flake window)\n- [ ] Fast-failure heuristic: pipelines completing in <60s are flagged as probable infra-flake; 3+ in a 15-min window triggers the restart+retrigger flow\n- [ ] Already-swept PRs/issues are not processed twice (idempotency guard via `<!-- supervisor-swept -->` comment)\n- [ ] CI green\n"
-  },
-  {
-    "action": "add_label",
-    "issue": 867,
-    "label": "backlog"
-  },
-  {
-    "action": "add_label",
-    "issue": 820,
-    "label": "backlog"
-  }
-]
+[]
diff --git a/lib/AGENTS.md b/lib/AGENTS.md
index 97e6f5e..1762a2c 100644
--- a/lib/AGENTS.md
+++ b/lib/AGENTS.md
@@ -1,4 +1,4 @@
-<!-- last-reviewed: 8ad5aca6bbee77634b3c63523042b1d39cefa96a -->
+<!-- last-reviewed: a7a046b81a7f454ebec43bab643067bd952d50b0 -->
 # Shared Helpers (`lib/`)
 
 All agents source `lib/env.sh` as their first action. Additional helpers are
@@ -34,5 +34,5 @@ sourced as needed.
 | `lib/sprint-filer.sh` | Post-merge sub-issue filer for sprint PRs. Invoked by the `.woodpecker/ops-filer.yml` pipeline after a sprint PR merges to ops repo `main`. Parses `<!-- filer:begin --> ... <!-- filer:end -->` blocks from sprint PR bodies to extract sub-issue definitions, creates them on the project repo using `FORGE_FILER_TOKEN` (narrow-scope `filer-bot` identity with `issues:write` only), adds `in-progress` label to the parent vision issue, and handles vision lifecycle closure when all sub-issues are closed. Uses `filer_api_all()` for paginated fetches. Idempotent: uses `<!-- decomposed-from: #<vision>, sprint: <slug>, id: <id> -->` markers to skip already-filed issues. Requires `FORGE_FILER_TOKEN`, `FORGE_API`, `FORGE_API_BASE`, `FORGE_OPS_REPO`. | `.woodpecker/ops-filer.yml` (CI pipeline on ops repo) |
 | `lib/hire-agent.sh` | `disinto_hire_an_agent()` — user creation, `.profile` repo setup, formula copying, branch protection, and state marker creation for hiring a new agent. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`, `PROJECT_NAME`. Extracted from `bin/disinto`. | bin/disinto (hire) |
 | `lib/release.sh` | `disinto_release()` — vault TOML creation, branch setup on ops repo, PR creation, and auto-merge request for a versioned release. `_assert_release_globals()` validates required env vars. Requires `FORGE_URL`, `FORGE_TOKEN`, `FORGE_OPS_REPO`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. Extracted from `bin/disinto`. | bin/disinto (release) |
-| `lib/hvault.sh` | HashiCorp Vault helper module. `hvault_kv_get(PATH, [KEY])` — read KV v2 secret, optionally extract one key. `hvault_kv_put(PATH, KEY=VAL ...)` — write KV v2 secret. `hvault_kv_list(PATH)` — list keys at a KV path. `hvault_get_or_empty(PATH)` — GET /v1/PATH; 200→raw body, 404→empty, else structured error + return 1 (used by sync scripts to distinguish "absent, create" from hard failure without tripping errexit, #881). `hvault_policy_apply(NAME, FILE)` — idempotent policy upsert. `hvault_jwt_login(ROLE, JWT)` — exchange JWT for short-lived token. `hvault_token_lookup()` — returns TTL/policies/accessor for current token. All functions use `VAULT_ADDR` + `VAULT_TOKEN` from env (fallback: `/etc/vault.d/root.token`), emit structured JSON errors to stderr on failure. Tests: `tests/lib-hvault.bats` (requires `vault server -dev`). | `tools/vault-apply-policies.sh`, `tools/vault-apply-roles.sh`, `lib/init/nomad/vault-nomad-auth.sh` |
-| `lib/init/nomad/` | Nomad+Vault installer scripts. `cluster-up.sh` — idempotent Step-0 orchestrator that runs all steps in order (installs packages, writes HCL, enables systemd units, unseals Vault); uses `poll_until_healthy()` helper for deduped readiness polling. `install.sh` — installs pinned Nomad+Vault apt packages. `vault-init.sh` — initializes Vault (unseal keys → `/etc/vault.d/`), creates dev-persisted unseal unit. `lib-systemd.sh` — shared systemd unit helpers. `systemd-nomad.sh`, `systemd-vault.sh` — write and enable service units. `vault-nomad-auth.sh` — Step-2 script that enables Vault's JWT auth at path `jwt-nomad`, writes the JWKS/algs config pointing at Nomad's workload-identity signer, delegates role sync to `tools/vault-apply-roles.sh`, installs `/etc/nomad.d/server.hcl`, and SIGHUPs `nomad.service` if the file changed (#881). Idempotent: each step checks current state before acting. Sourced and called by `cluster-up.sh`; not sourced by agents. | `bin/disinto init --backend=nomad` |
+| `lib/hvault.sh` | HashiCorp Vault helper module. `hvault_kv_get(PATH, [KEY])` — read KV v2 secret, optionally extract one key. `hvault_kv_put(PATH, KEY=VAL ...)` — write KV v2 secret. `hvault_kv_list(PATH)` — list keys at a KV path. `hvault_get_or_empty(PATH)` — GET /v1/PATH; 200→raw body, 404→empty, else structured error + return 1 (used by sync scripts to distinguish "absent, create" from hard failure without tripping errexit, #881). `hvault_ensure_kv_v2(MOUNT, [LOG_PREFIX])` — idempotent KV v2 mount assertion: enables mount if absent, fails loudly if present as wrong type/version. Extracted from all `vault-seed-*.sh` scripts to eliminate dup-detector violations. Respects `DRY_RUN=1`. `hvault_policy_apply(NAME, FILE)` — idempotent policy upsert. `hvault_jwt_login(ROLE, JWT)` — exchange JWT for short-lived token. `hvault_token_lookup()` — returns TTL/policies/accessor for current token. All functions use `VAULT_ADDR` + `VAULT_TOKEN` from env (fallback: `/etc/vault.d/root.token`), emit structured JSON errors to stderr on failure. Tests: `tests/lib-hvault.bats` (requires `vault server -dev`). | `tools/vault-apply-policies.sh`, `tools/vault-apply-roles.sh`, `lib/init/nomad/vault-nomad-auth.sh`, `tools/vault-seed-*.sh` |
+| `lib/init/nomad/` | Nomad+Vault installer scripts. `cluster-up.sh` — idempotent Step-0 orchestrator that runs all steps in order (installs packages, writes HCL, enables systemd units, unseals Vault); uses `poll_until_healthy()` helper for deduped readiness polling. `install.sh` — installs pinned Nomad+Vault apt packages. `vault-init.sh` — initializes Vault (unseal keys → `/etc/vault.d/`), creates dev-persisted unseal unit. `lib-systemd.sh` — shared systemd unit helpers. `systemd-nomad.sh`, `systemd-vault.sh` — write and enable service units. `vault-nomad-auth.sh` — Step-2 script that enables Vault's JWT auth at path `jwt-nomad`, writes the JWKS/algs config pointing at Nomad's workload-identity signer, delegates role sync to `tools/vault-apply-roles.sh`, installs `/etc/nomad.d/server.hcl`, and SIGHUPs `nomad.service` if the file changed (#881). `wp-oauth-register.sh` — S3.3 script that creates the Woodpecker OAuth2 app in Forgejo and stores `forgejo_client`/`forgejo_secret` in Vault KV v2 at `kv/disinto/shared/woodpecker`; idempotent (skips if app or secrets already present); called by `bin/disinto --with woodpecker`. Idempotent: each step checks current state before acting. Sourced and called by `cluster-up.sh`; not sourced by agents. | `bin/disinto init --backend=nomad` |
diff --git a/nomad/AGENTS.md b/nomad/AGENTS.md
index f57c30a..bfb0ef0 100644
--- a/nomad/AGENTS.md
+++ b/nomad/AGENTS.md
@@ -1,12 +1,12 @@
-<!-- last-reviewed: 8ad5aca6bbee77634b3c63523042b1d39cefa96a -->
+<!-- last-reviewed: a7a046b81a7f454ebec43bab643067bd952d50b0 -->
 # nomad/ — Agent Instructions
 
 Nomad + Vault HCL for the factory's single-node cluster. These files are
 the source of truth that `lib/init/nomad/cluster-up.sh` copies onto a
 factory box under `/etc/nomad.d/` and `/etc/vault.d/` at init time.
 
-This directory covers the **Nomad+Vault migration (Steps 0–2)** —
-see issues #821–#884 for the step breakdown.
+This directory covers the **Nomad+Vault migration (Steps 0–3)** —
+see issues #821–#937 for the step breakdown.
 
 ## What lives here
 
@@ -16,6 +16,8 @@ see issues #821–#884 for the step breakdown.
 | `client.hcl` | `/etc/nomad.d/client.hcl` | Docker driver cfg + `host_volume` declarations (S0.2) |
 | `vault.hcl`  | `/etc/vault.d/vault.hcl`  | Vault storage, listener, UI, `disable_mlock` (S0.3) |
 | `jobs/forgejo.hcl` | submitted via `lib/init/nomad/deploy.sh` | Forgejo job; reads creds from Vault via consul-template stanza (S2.4) |
+| `jobs/woodpecker-server.hcl` | submitted via Nomad API | Woodpecker CI server; host networking, Vault KV for `WOODPECKER_AGENT_SECRET` + Forgejo OAuth creds (S3.1) |
+| `jobs/woodpecker-agent.hcl` | submitted via Nomad API | Woodpecker CI agent; host networking, `docker.sock` mount, Vault KV for `WOODPECKER_AGENT_SECRET` (S3.2) |
 
 Nomad auto-merges every `*.hcl` under `-config=/etc/nomad.d/`, so the
 split between `server.hcl` and `client.hcl` is for readability, not
@@ -30,8 +32,8 @@ convention, KV path summary, and JWT-auth role bindings (S2.1/S2.3).
 
 ## Not yet implemented
 
-- **Additional jobspecs** (woodpecker, agents, caddy) — Step 1 brought up
-  Forgejo; remaining services land in later steps.
+- **Additional jobspecs** (agents, caddy) — Woodpecker is now deployed (S3.1-S3.2);
+  agents and caddy land in later steps.
 - **TLS, ACLs, gossip encryption** — deliberately absent for now; land
   alongside multi-node support.
 
diff --git a/planner/AGENTS.md b/planner/AGENTS.md
index 7034b60..3c54bf8 100644
--- a/planner/AGENTS.md
+++ b/planner/AGENTS.md
@@ -1,4 +1,4 @@
-<!-- last-reviewed: 8ad5aca6bbee77634b3c63523042b1d39cefa96a -->
+<!-- last-reviewed: a7a046b81a7f454ebec43bab643067bd952d50b0 -->
 # Planner Agent
 
 **Role**: Strategic planning using a Prerequisite Tree (Theory of Constraints),
diff --git a/predictor/AGENTS.md b/predictor/AGENTS.md
index cec03a1..ead73cc 100644
--- a/predictor/AGENTS.md
+++ b/predictor/AGENTS.md
@@ -1,4 +1,4 @@
-<!-- last-reviewed: 8ad5aca6bbee77634b3c63523042b1d39cefa96a -->
+<!-- last-reviewed: a7a046b81a7f454ebec43bab643067bd952d50b0 -->
 # Predictor Agent
 
 **Role**: Abstract adversary (the "goblin"). Runs a 2-step formula
diff --git a/review/AGENTS.md b/review/AGENTS.md
index 4c06b34..e45a442 100644
--- a/review/AGENTS.md
+++ b/review/AGENTS.md
@@ -1,4 +1,4 @@
-<!-- last-reviewed: 8ad5aca6bbee77634b3c63523042b1d39cefa96a -->
+<!-- last-reviewed: a7a046b81a7f454ebec43bab643067bd952d50b0 -->
 # Review Agent
 
 **Role**: AI-powered PR review — post structured findings and formal
diff --git a/supervisor/AGENTS.md b/supervisor/AGENTS.md
index 77f7b64..93150b1 100644
--- a/supervisor/AGENTS.md
+++ b/supervisor/AGENTS.md
@@ -1,4 +1,4 @@
-<!-- last-reviewed: 8ad5aca6bbee77634b3c63523042b1d39cefa96a -->
+<!-- last-reviewed: a7a046b81a7f454ebec43bab643067bd952d50b0 -->
 # Supervisor Agent
 
 **Role**: Health monitoring and auto-remediation, executed as a formula-driven
@@ -24,12 +24,18 @@ Both invoke the same `supervisor-run.sh`. Sources `lib/guard.sh` and calls `chec
   files for `PHASE:escalate` entries and auto-removes any whose linked issue
   is confirmed closed (24h grace period after closure to avoid races). Reports
   **stale crashed worktrees** (worktrees preserved after crash) — supervisor
-  housekeeping removes them after 24h. Also collects **Woodpecker agent health**:
-  container status, gRPC error count (last 20m), fast-failure pipelines (<60s,
-  last 15m), and overall health determination.
+  housekeeping removes them after 24h. Collects **Woodpecker agent health**
+  (added #933): container `disinto-woodpecker-agent` health/running status,
+  gRPC error count in last 20 min, fast-failure pipeline count (<60s, last 15 min),
+  and overall health verdict (healthy/unhealthy). Unhealthy verdict triggers
+  automatic container restart + `blocked:ci_exhausted` issue recovery in
+  `supervisor-run.sh` before the Claude session starts.
 - `formulas/run-supervisor.toml` — Execution spec: five steps (preflight review,
   health-assessment, decide-actions, report, journal) with `needs` dependencies.
-  Claude evaluates all metrics and takes actions in a single interactive session
+  Claude evaluates all metrics and takes actions in a single interactive session.
+  Health-assessment now includes P2 **Woodpecker agent unhealthy** classification
+  (container not running, ≥3 gRPC errors/20m, or ≥3 fast-failure pipelines/15m);
+  decide-actions documents the pre-session auto-recovery path
 - `$OPS_REPO_ROOT/knowledge/*.md` — Domain-specific remediation guides (memory,
   disk, CI, git, dev-agent, review-agent, forge)
 
diff --git a/vault/policies/AGENTS.md b/vault/policies/AGENTS.md
index 692c885..26ec0d9 100644
--- a/vault/policies/AGENTS.md
+++ b/vault/policies/AGENTS.md
@@ -1,4 +1,4 @@
-<!-- last-reviewed: 8ad5aca6bbee77634b3c63523042b1d39cefa96a -->
+<!-- last-reviewed: a7a046b81a7f454ebec43bab643067bd952d50b0 -->
 # vault/policies/ — Agent Instructions
 
 HashiCorp Vault ACL policies for the disinto factory. One `.hcl` file per

From 7fd9a457c3262f95fbf9de14cea31ac10eb3549f Mon Sep 17 00:00:00 2001
From: dev-qwen2 <dev-qwen2@disinto.local>
Date: Fri, 17 Apr 2026 07:36:12 +0000
Subject: [PATCH 2/2] =?UTF-8?q?fix:=20[nomad-step-3]=20S3-fix=20=E2=80=94?=
 =?UTF-8?q?=20deploy.sh=20crashes=20on=20hyphenated=20job=20name=20+=20wp-?=
 =?UTF-8?q?oauth=20double=20lib/=20path=20(#944)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 lib/init/nomad/deploy.sh            | 3 ++-
 lib/init/nomad/wp-oauth-register.sh | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/init/nomad/deploy.sh b/lib/init/nomad/deploy.sh
index a1724c5..7cf9278 100755
--- a/lib/init/nomad/deploy.sh
+++ b/lib/init/nomad/deploy.sh
@@ -177,7 +177,8 @@ for job_name in "${JOBS[@]}"; do
   fi
 
   # Per-job timeout override: JOB_READY_TIMEOUT_<UPPERCASE_JOBNAME>
-  job_upper=$(printf '%s' "$job_name" | tr '[:lower:]' '[:upper:]')
+  # Sanitize job name: replace hyphens with underscores (bash vars can't have hyphens)
+  job_upper=$(printf '%s' "$job_name" | tr '[:lower:]-' '[:upper:]_' | tr ' ' '_')
   timeout_var="JOB_READY_TIMEOUT_${job_upper}"
   job_timeout="${!timeout_var:-$JOB_READY_TIMEOUT_SECS}"
 
diff --git a/lib/init/nomad/wp-oauth-register.sh b/lib/init/nomad/wp-oauth-register.sh
index 9b7f12a..6d2a4cd 100755
--- a/lib/init/nomad/wp-oauth-register.sh
+++ b/lib/init/nomad/wp-oauth-register.sh
@@ -44,7 +44,7 @@ set -euo pipefail
 # Source the hvault module for Vault helpers
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
-# shellcheck source=../../lib/hvault.sh
+# shellcheck source=../../../lib/hvault.sh
 source "${REPO_ROOT}/lib/hvault.sh"
 
 # Configuration