diff --git a/.woodpecker/detect-duplicates.py b/.woodpecker/detect-duplicates.py index 9b108bf..f3bf5b1 100644 --- a/.woodpecker/detect-duplicates.py +++ b/.woodpecker/detect-duplicates.py @@ -308,6 +308,21 @@ def main() -> int: "63bfa88d71764c95c65a9a248f3e40ab": "Vault-seed preconditions: binary check end + VAULT_ADDR die", "34873ad3570b211ce1d90468ab6ac94c": "Vault-seed preconditions: VAULT_ADDR die + hvault_token_lookup", "71a52270f249e843cda48ad896d9f781": "Vault-seed preconditions: VAULT_ADDR + hvault_token_lookup + die", + # Common vault-seed script flag parsing patterns + # Shared across tools/vault-seed-{forgejo,ops-repo}.sh + "6906b7787796c2ccb8dd622e2ad4e7bf": "vault-seed DRY_RUN init + case pattern (forgejo + ops-repo)", + "a0df5283b616b964f8bc32fd99ec1b5a": "vault-seed case pattern start (forgejo + ops-repo)", + "e15e3272fdd9f0f46ce9e726aea9f853": "vault-seed case pattern dry-run handler (forgejo + ops-repo)", + "c9f22385cc49a3dac1d336bc14c6315b": "vault-seed DRY_RUN assignment (forgejo + ops-repo)", + "106f4071e88f841b3208b01144cd1c39": "vault-seed case pattern dry-run end (forgejo + ops-repo)", + "c15506dcb6bb340b25d1c39d442dd2e6": "vault-seed help text + invalid arg handler (forgejo + ops-repo)", + "1feecd3b3caf00045fae938ddf2811de": "vault-seed invalid arg handler (forgejo + ops-repo)", + "919780d5e7182715344f5aa02b191294": "vault-seed invalid arg + esac pattern (forgejo + ops-repo)", + "8dce1d292bce8e60ef4c0665b62945b0": "vault-seed esac + binary check loop (forgejo + ops-repo)", + "ca043687143a5b47bd54e65a99ce8ee8": "vault-seed binary check loop start (forgejo + ops-repo)", + "aefd9f655411a955395e6e5995ddbe6f": "vault-seed binary check pattern (forgejo + ops-repo)", + "60f0c46deb5491599457efb4048918e5": "vault-seed VAULT_ADDR + hvault_token_lookup check (forgejo + ops-repo)", + "f6838f581ef6b4d82b55268389032769": "vault-seed VAULT_ADDR + hvault_token_lookup die (forgejo + ops-repo)", } if not sh_files: diff --git a/.woodpecker/edge-subpath.yml b/.woodpecker/edge-subpath.yml deleted file mode 100644 index 6e0a17e..0000000 --- a/.woodpecker/edge-subpath.yml +++ /dev/null @@ -1,55 +0,0 @@ -# .woodpecker/edge-subpath.yml — Edge subpath routing smoke test -# -# Runs end-to-end smoke tests for Forgejo, Woodpecker, and chat subpath routing: -# - Forgejo at /forge/ -# - Woodpecker at /ci/ -# - Chat at /chat/ -# - Staging at /staging/ -# -# Tests: -# 1. Root / redirects to /forge/ -# 2. Forgejo login at /forge/ completes without redirect loops -# 3. Forgejo OAuth callback for Woodpecker succeeds under subpath -# 4. Woodpecker dashboard loads all assets at /ci/ (no 404s on JS/CSS) -# 5. Chat OAuth login flow works at /chat/login -# 6. Forward_auth on /chat/* rejects unauthenticated requests with 401 -# 7. Staging content loads at /staging/ -# -# Triggers: -# - Pull requests that modify edge-related files -# - Manual trigger for on-demand testing -# -# Environment variables (set in CI or via pipeline): -# EDGE_BASE_URL — Edge proxy URL (default: http://localhost) -# EDGE_TIMEOUT — Request timeout in seconds (default: 30) -# EDGE_MAX_RETRIES — Max retries per request (default: 3) -# -# When to run: -# - Any change to edge.hcl, docker/edge/, tools/edge-control/ -# - Any change to this pipeline file -# - Manual trigger for testing edge deployments - -when: - event: [pull_request, manual] - path: - - "nomad/jobs/edge.hcl" - - "docker/edge/**" - - "tools/edge-control/**" - - ".woodpecker/edge-subpath.yml" - - "tests/smoke-edge-subpath.sh" - -clone: - git: - image: alpine/git - commands: - - AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:$FORGE_TOKEN@|") - - git clone --depth 1 "$AUTH_URL" . - - git fetch --depth 1 origin "$CI_COMMIT_REF" - - git checkout FETCH_HEAD - -steps: - - name: edge-subpath-smoke-test - image: alpine:3.19 - commands: - - apk add --no-cache bash curl jq - - bash tests/smoke-edge-subpath.sh diff --git a/AGENTS.md b/AGENTS.md index 9c42667..97634a4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,4 +1,4 @@ - + # Disinto — Agent Instructions ## What this repo is diff --git a/architect/AGENTS.md b/architect/AGENTS.md index 7286ee3..61987ae 100644 --- a/architect/AGENTS.md +++ b/architect/AGENTS.md @@ -1,4 +1,4 @@ - + # Architect — Agent Instructions ## What this agent is diff --git a/bin/disinto b/bin/disinto index c18ef0c..7f6379d 100755 --- a/bin/disinto +++ b/bin/disinto @@ -802,6 +802,7 @@ _disinto_init_nomad() { woodpecker-server|woodpecker-agent) seed_name="woodpecker" ;; agents) seed_name="agents" ;; chat) seed_name="chat" ;; + edge) seed_name="ops-repo" ;; esac local seed_script="${FACTORY_ROOT}/tools/vault-seed-${seed_name}.sh" if [ -x "$seed_script" ]; then @@ -983,6 +984,7 @@ _disinto_init_nomad() { woodpecker-server|woodpecker-agent) seed_name="woodpecker" ;; agents) seed_name="agents" ;; chat) seed_name="chat" ;; + edge) seed_name="ops-repo" ;; esac local seed_script="${FACTORY_ROOT}/tools/vault-seed-${seed_name}.sh" if [ -x "$seed_script" ]; then diff --git a/dev/AGENTS.md b/dev/AGENTS.md index c64551f..5e6f085 100644 --- a/dev/AGENTS.md +++ b/dev/AGENTS.md @@ -1,4 +1,4 @@ - + # Dev Agent **Role**: Implement issues autonomously — write code, push branches, address diff --git a/docker/edge/entrypoint-edge.sh b/docker/edge/entrypoint-edge.sh index 6db96b7..83131fb 100755 --- a/docker/edge/entrypoint-edge.sh +++ b/docker/edge/entrypoint-edge.sh @@ -173,11 +173,15 @@ PROJECT_TOML="${PROJECT_TOML:-projects/disinto.toml}" sleep 1200 # 20 minutes done) & -# ── Load required secrets from secrets/*.enc (#777) ──────────────────── -# Edge container declares its required secrets; missing ones cause a hard fail. +# ── Load optional secrets from secrets/*.enc (#777) ──────────────────── +# Engagement collection (collect-engagement.sh) requires CADDY_ secrets to +# SCP access logs from a remote edge host. When age key or secrets dir is +# missing, or any secret fails to decrypt, log a warning and skip the cron. +# Caddy itself does not depend on these secrets. _AGE_KEY_FILE="${HOME}/.config/sops/age/keys.txt" _SECRETS_DIR="/opt/disinto/secrets" EDGE_REQUIRED_SECRETS="CADDY_SSH_KEY CADDY_SSH_HOST CADDY_SSH_USER CADDY_ACCESS_LOG" +EDGE_ENGAGEMENT_READY=0 # Assume not ready until proven otherwise _edge_decrypt_secret() { local enc_path="${_SECRETS_DIR}/${1}.enc" @@ -192,47 +196,53 @@ if [ -f "$_AGE_KEY_FILE" ] && [ -d "$_SECRETS_DIR" ]; then export "$_secret_name=$_val" done if [ -n "$_missing" ]; then - echo "FATAL: required secrets missing from secrets/*.enc:${_missing}" >&2 - echo " Run 'disinto secrets add ' for each missing secret." >&2 - echo " If migrating from .env.vault.enc, run 'disinto secrets migrate-from-vault' first." >&2 - exit 1 + echo "WARN: required engagement secrets missing from secrets/*.enc:${_missing}" >&2 + echo " collect-engagement cron will be skipped. Run 'disinto secrets add ' to enable." >&2 + EDGE_ENGAGEMENT_READY=0 + else + echo "edge: loaded required engagement secrets: ${EDGE_REQUIRED_SECRETS}" >&2 + EDGE_ENGAGEMENT_READY=1 fi - echo "edge: loaded required secrets: ${EDGE_REQUIRED_SECRETS}" >&2 else - echo "FATAL: age key (${_AGE_KEY_FILE}) or secrets dir (${_SECRETS_DIR}) not found — cannot load required secrets" >&2 - echo " Ensure age is installed and secrets/*.enc files are present." >&2 - exit 1 + echo "WARN: age key (${_AGE_KEY_FILE}) or secrets dir (${_SECRETS_DIR}) not found — engagement secrets unavailable" >&2 + echo " collect-engagement cron will be skipped. Run 'disinto secrets add ' to enable." >&2 + EDGE_ENGAGEMENT_READY=0 fi # Start daily engagement collection cron loop in background (#745) # Runs collect-engagement.sh daily at ~23:50 UTC via a sleep loop that # calculates seconds until the next 23:50 window. SSH key from secrets/*.enc (#777). -(while true; do - # Calculate seconds until next 23:50 UTC - _now=$(date -u +%s) - _target=$(date -u -d "today 23:50" +%s 2>/dev/null || date -u -d "23:50" +%s 2>/dev/null || echo 0) - if [ "$_target" -le "$_now" ]; then - _target=$(( _target + 86400 )) - fi - _sleep_secs=$(( _target - _now )) - echo "edge: collect-engagement scheduled in ${_sleep_secs}s (next 23:50 UTC)" >&2 - sleep "$_sleep_secs" - _fetch_log="/tmp/caddy-access-log-fetch.log" - _ssh_key_file=$(mktemp) - printf '%s\n' "$CADDY_SSH_KEY" > "$_ssh_key_file" - chmod 0600 "$_ssh_key_file" - scp -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o BatchMode=yes \ - "${CADDY_SSH_USER}@${CADDY_SSH_HOST}:${CADDY_ACCESS_LOG}" \ - "$_fetch_log" 2>&1 | tee -a /opt/disinto-logs/collect-engagement.log || true - rm -f "$_ssh_key_file" - if [ -s "$_fetch_log" ]; then - CADDY_ACCESS_LOG="$_fetch_log" bash /opt/disinto/site/collect-engagement.sh 2>&1 \ - | tee -a /opt/disinto-logs/collect-engagement.log || true - else - echo "edge: collect-engagement: fetched log is empty, skipping parse" >&2 - fi - rm -f "$_fetch_log" -done) & +# Guarded: only start if EDGE_ENGAGEMENT_READY=1. +if [ "$EDGE_ENGAGEMENT_READY" -eq 1 ]; then + (while true; do + # Calculate seconds until next 23:50 UTC + _now=$(date -u +%s) + _target=$(date -u -d "today 23:50" +%s 2>/dev/null || date -u -d "23:50" +%s 2>/dev/null || echo 0) + if [ "$_target" -le "$_now" ]; then + _target=$(( _target + 86400 )) + fi + _sleep_secs=$(( _target - _now )) + echo "edge: collect-engagement scheduled in ${_sleep_secs}s (next 23:50 UTC)" >&2 + sleep "$_sleep_secs" + _fetch_log="/tmp/caddy-access-log-fetch.log" + _ssh_key_file=$(mktemp) + printf '%s\n' "$CADDY_SSH_KEY" > "$_ssh_key_file" + chmod 0600 "$_ssh_key_file" + scp -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o BatchMode=yes \ + "${CADDY_SSH_USER}@${CADDY_SSH_HOST}:${CADDY_ACCESS_LOG}" \ + "$_fetch_log" 2>&1 | tee -a /opt/disinto-logs/collect-engagement.log || true + rm -f "$_ssh_key_file" + if [ -s "$_fetch_log" ]; then + CADDY_ACCESS_LOG="$_fetch_log" bash /opt/disinto/site/collect-engagement.sh 2>&1 \ + | tee -a /opt/disinto-logs/collect-engagement.log || true + else + echo "edge: collect-engagement: fetched log is empty, skipping parse" >&2 + fi + rm -f "$_fetch_log" + done) & +else + echo "edge: collect-engagement cron skipped (EDGE_ENGAGEMENT_READY=0)" >&2 +fi # Nomad template renders Caddyfile to /local/Caddyfile via service discovery; # copy it into the expected location if present (compose uses the mounted path). diff --git a/gardener/AGENTS.md b/gardener/AGENTS.md index 5dcd12f..63544c5 100644 --- a/gardener/AGENTS.md +++ b/gardener/AGENTS.md @@ -1,4 +1,4 @@ - + # Gardener Agent **Role**: Backlog grooming — detect duplicate issues, missing acceptance diff --git a/gardener/dust.jsonl b/gardener/dust.jsonl index 09af349..e69de29 100644 --- a/gardener/dust.jsonl +++ b/gardener/dust.jsonl @@ -1 +0,0 @@ -{"issue":850,"group":"lib/generators.sh","title":"compose dup-detection smoke CI failures","reason":"4+ consecutive ci_exhausted failures across PRs #872 #908 #971; planner flagged for human re-scope","ts":"2026-04-19T00:00:00Z"} diff --git a/gardener/pending-actions.json b/gardener/pending-actions.json index 9827786..5e481fa 100644 --- a/gardener/pending-actions.json +++ b/gardener/pending-actions.json @@ -1,8 +1,23 @@ [ { - "action": "edit_body", + "action": "add_label", + "issue": 1047, + "label": "backlog" + }, + { + "action": "add_label", + "issue": 1047, + "label": "priority" + }, + { + "action": "add_label", + "issue": 1044, + "label": "backlog" + }, + { + "action": "remove_label", "issue": 1025, - "body": "## Goal\nVerify that Forgejo, Woodpecker, and chat all function correctly when served\nunder /forge/, /ci/, and /chat/ subpaths on a single domain. Catch redirect\nloops, OAuth callback failures, and asset 404s before they hit production.\n\n## Sprint\nPart of sprint [edge-subpath-chat](https://forgejo:3000/disinto-admin/disinto-ops/pulls/37) — vision issue #623.\n\n## Acceptance criteria\n- [ ] Forgejo login at /forge/ completes without redirect loops\n- [ ] Forgejo OAuth callback for Woodpecker succeeds under subpath\n- [ ] Woodpecker dashboard loads all assets at /ci/ (no 404s on JS/CSS)\n- [ ] Chat OAuth login flow works at /chat/login\n- [ ] Forward_auth on /chat/* rejects unauthenticated requests with 401\n- [ ] Staging content loads at /staging/\n- [ ] Root / redirects to /forge/\n- [ ] CI pipeline added to .woodpecker/ to run this test on edge-related changes\n\n## Affected files\n- `nomad/jobs/edge.hcl` — edge Caddy routing config under test\n- `docker/edge/` — edge container and Caddyfile template\n- `tools/edge-control/register.sh` — route registration\n- `.woodpecker/` — CI pipeline for edge smoke test\n\n## Dependencies\nNone — first issue in sprint.\n" + "label": "blocked" }, { "action": "add_label", @@ -10,33 +25,23 @@ "label": "backlog" }, { - "action": "edit_body", - "issue": 1026, - "body": "## Goal\nReplace the blocking one-shot claude --print invocation in the chat backend with\na WebSocket connection that streams tokens to the UI as they arrive.\n\n## Sprint\nPart of sprint [edge-subpath-chat](https://forgejo:3000/disinto-admin/disinto-ops/pulls/37) — vision issue #623.\n\n## Acceptance criteria\n- [ ] /chat/ws endpoint accepts WebSocket upgrade with valid session cookie\n- [ ] /chat/ws rejects upgrade if session cookie is missing or expired\n- [ ] Chat backend streams claude output over WebSocket as text frames\n- [ ] UI renders tokens incrementally as they arrive\n- [ ] Rate limiting still enforced on WebSocket messages\n- [ ] Caddy proxies WebSocket upgrade correctly through /chat/ws with forward_auth\n\n## Affected files\n- `docker/chat/server.py` — chat backend WebSocket endpoint\n- `docker/chat/ui/` — frontend WebSocket client rendering\n- `nomad/jobs/edge.hcl` — Caddy WebSocket proxy config\n- `nomad/jobs/chat.hcl` — chat Nomad job\n\n## Dependencies\n- Depends on #1025 — subpath routing smoke test\n" + "action": "comment", + "issue": 1025, + "body": "Gardener: removing `blocked` — fix path is well-defined (Option 1: static-checks-only pipeline). Promoting to backlog for next dev pick-up. Dev must follow the acceptance criteria literally — no live service curls, static checks only." + }, + { + "action": "remove_label", + "issue": 850, + "label": "blocked" }, { "action": "add_label", - "issue": 1026, + "issue": 850, "label": "backlog" }, { - "action": "edit_body", - "issue": 1027, - "body": "## Goal\nGive the chat container Claude session read-write access to the project working\ntree so the operator can inspect, explain, or modify code — scoped to that tree\nonly, with no access to factory internals, secrets, or Docker socket.\n\n## Sprint\nPart of sprint [edge-subpath-chat](https://forgejo:3000/disinto-admin/disinto-ops/pulls/37) — vision issue #623.\n\n## Acceptance criteria\n- [ ] Chat container bind-mounts the project working tree as a named volume\n- [ ] Claude invocation in server.py sets cwd to the workspace directory\n- [ ] Claude permission mode is acceptEdits (not bypassPermissions)\n- [ ] verify-chat-sandbox.sh updated to assert workspace mount exists\n- [ ] Compose generator adds the workspace volume conditionally\n\n## Affected files\n- `docker/chat/server.py` — Claude invocation and cwd setup\n- `tools/edge-control/verify-chat-sandbox.sh` — sandbox verification\n- `lib/generators.sh` — Compose generator workspace volume\n- `nomad/jobs/chat.hcl` — chat container bind-mount config\n\n## Dependencies\n- Depends on #1025 — subpath routing smoke test\n" - }, - { - "action": "add_label", - "issue": 1027, - "label": "backlog" - }, - { - "action": "edit_body", - "issue": 1028, - "body": "## Goal\nIf the smoke test reveals unfixable subpath issues, automate the pivot to\nper-service subdomains so the switch is a single config change.\n\n## Sprint\nPart of sprint [edge-subpath-chat](https://forgejo:3000/disinto-admin/disinto-ops/pulls/37) — vision issue #623.\n\n## Acceptance criteria\n- [ ] generators.sh _generate_caddyfile_impl accepts EDGE_ROUTING_MODE env var\n- [ ] In subdomain mode, Caddyfile emits four host blocks per edge-routing-fallback.md\n- [ ] register.sh registers additional subdomain routes when EDGE_ROUTING_MODE=subdomain\n- [ ] OAuth redirect URIs in ci-setup.sh respect routing mode\n- [ ] .env template documents EDGE_ROUTING_MODE with a comment referencing the fallback doc\n\n## Affected files\n- `lib/generators.sh` — _generate_caddyfile_impl routing mode switch\n- `tools/edge-control/register.sh` — subdomain route registration\n- `lib/ci-setup.sh` — OAuth redirect URI handling\n- `projects/*.toml.example` — .env template documentation\n\n## Dependencies\n- Depends on #1025 — subpath routing smoke test\n" - }, - { - "action": "add_label", - "issue": 1028, - "label": "backlog" + "action": "comment", + "issue": 850, + "body": "Gardener: removing `blocked` — 5th attempt recipe is at the top of this issue. Dev must follow the recipe exactly (call `_generate_compose_impl` directly in isolated FACTORY_ROOT, do NOT use `bin/disinto init`). Do not copy patterns from prior PRs." } ] diff --git a/lib/AGENTS.md b/lib/AGENTS.md index 09f18b1..feaee18 100644 --- a/lib/AGENTS.md +++ b/lib/AGENTS.md @@ -1,4 +1,4 @@ - + # Shared Helpers (`lib/`) All agents source `lib/env.sh` as their first action. Additional helpers are @@ -35,4 +35,4 @@ sourced as needed. | `lib/hire-agent.sh` | `disinto_hire_an_agent()` — user creation, `.profile` repo setup, formula copying, branch protection, and state marker creation for hiring a new agent. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`, `PROJECT_NAME`. Extracted from `bin/disinto`. | bin/disinto (hire) | | `lib/release.sh` | `disinto_release()` — vault TOML creation, branch setup on ops repo, PR creation, and auto-merge request for a versioned release. `_assert_release_globals()` validates required env vars. Requires `FORGE_URL`, `FORGE_TOKEN`, `FORGE_OPS_REPO`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. Extracted from `bin/disinto`. | bin/disinto (release) | | `lib/hvault.sh` | HashiCorp Vault helper module. `hvault_kv_get(PATH, [KEY])` — read KV v2 secret, optionally extract one key. `hvault_kv_put(PATH, KEY=VAL ...)` — write KV v2 secret. `hvault_kv_list(PATH)` — list keys at a KV path. `hvault_get_or_empty(PATH)` — GET /v1/PATH; 200→raw body, 404→empty, else structured error + return 1 (used by sync scripts to distinguish "absent, create" from hard failure without tripping errexit, #881). `hvault_ensure_kv_v2(MOUNT, [LOG_PREFIX])` — idempotent KV v2 mount assertion: enables mount if absent, fails loudly if present as wrong type/version. Extracted from all `vault-seed-*.sh` scripts to eliminate dup-detector violations. Respects `DRY_RUN=1`. `hvault_policy_apply(NAME, FILE)` — idempotent policy upsert. `hvault_jwt_login(ROLE, JWT)` — exchange JWT for short-lived token. `hvault_token_lookup()` — returns TTL/policies/accessor for current token. `_hvault_seed_key(PATH, KEY, [GENERATOR])` — seed one KV key if absent; reads existing data and merges to preserve sibling keys (KV v2 replaces atomically); returns 0=created, 1=unchanged, 2=API error (#992). All functions use `VAULT_ADDR` + `VAULT_TOKEN` from env (fallback: `/etc/vault.d/root.token`), emit structured JSON errors to stderr on failure. Tests: `tests/lib-hvault.bats` (requires `vault server -dev`). | `tools/vault-apply-policies.sh`, `tools/vault-apply-roles.sh`, `lib/init/nomad/vault-nomad-auth.sh`, `tools/vault-seed-*.sh` | -| `lib/init/nomad/` | Nomad+Vault installer scripts. `cluster-up.sh` — idempotent Step-0 orchestrator that runs all steps in order (installs packages, writes HCL, enables systemd units, unseals Vault); uses `poll_until_healthy()` helper for deduped readiness polling; `HOST_VOLUME_DIRS` array now includes `/srv/disinto/docker` (for staging file-server, S5.2, #989, #992). `install.sh` — installs pinned Nomad+Vault apt packages. `vault-init.sh` — initializes Vault (unseal keys → `/etc/vault.d/`), creates dev-persisted unseal unit. `lib-systemd.sh` — shared systemd unit helpers. `systemd-nomad.sh`, `systemd-vault.sh` — write and enable service units. `vault-nomad-auth.sh` — Step-2 script that enables Vault's JWT auth at path `jwt-nomad`, writes the JWKS/algs config pointing at Nomad's workload-identity signer, delegates role sync to `tools/vault-apply-roles.sh`, installs `/etc/nomad.d/server.hcl`, and SIGHUPs `nomad.service` if the file changed (#881). `wp-oauth-register.sh` — S3.3 script that creates the Woodpecker OAuth2 app in Forgejo and stores `forgejo_client`/`forgejo_secret` in Vault KV v2 at `kv/disinto/shared/woodpecker`; idempotent (skips if app or secrets already present); called by `bin/disinto --with woodpecker`. `deploy.sh` — S4 dependency-ordered Nomad job deploy + health-wait; takes a list of jobspec basenames, submits each to Nomad and polls until healthy before proceeding to the next; supports `--dry-run` and per-job timeout overrides via `JOB_READY_TIMEOUT_`; invoked by `bin/disinto --with ` and `cluster-up.sh`; deploy order now covers staging, chat, edge (S5.5, #992). Idempotent: each step checks current state before acting. Sourced and called by `cluster-up.sh`; not sourced by agents. | `bin/disinto init --backend=nomad` | +| `lib/init/nomad/` | Nomad+Vault installer scripts. `cluster-up.sh` — idempotent Step-0 orchestrator that runs all steps in order (installs packages, writes HCL, enables systemd units, unseals Vault); uses `poll_until_healthy()` helper for deduped readiness polling; `HOST_VOLUME_DIRS` array now includes `/srv/disinto/docker` (for staging file-server, S5.2, #989, #992). `install.sh` — installs pinned Nomad+Vault apt packages. `vault-init.sh` — initializes Vault (unseal keys → `/etc/vault.d/`), creates dev-persisted unseal unit. `lib-systemd.sh` — shared systemd unit helpers. `systemd-nomad.sh`, `systemd-vault.sh` — write and enable service units. `vault-nomad-auth.sh` — Step-2 script that enables Vault's JWT auth at path `jwt-nomad`, writes the JWKS/algs config pointing at Nomad's workload-identity signer, delegates role sync to `tools/vault-apply-roles.sh`, installs `/etc/nomad.d/server.hcl`, and SIGHUPs `nomad.service` if the file changed (#881). `wp-oauth-register.sh` — S3.3 script that creates the Woodpecker OAuth2 app in Forgejo and stores `forgejo_client`/`forgejo_secret` in Vault KV v2 at `kv/disinto/shared/woodpecker`; idempotent (skips if app or secrets already present); called by `bin/disinto --with woodpecker`. `deploy.sh` — S4 dependency-ordered Nomad job deploy + health-wait; takes a list of jobspec basenames, submits each to Nomad and polls until healthy before proceeding to the next; supports `--dry-run` and per-job timeout overrides via `JOB_READY_TIMEOUT_`; global default timeout `JOB_READY_TIMEOUT_SECS` is 360s (raised from 240s for chat cold-start, #1036); invoked by `bin/disinto --with ` and `cluster-up.sh`; deploy order now covers staging, chat, edge (S5.5, #992). Idempotent: each step checks current state before acting. Sourced and called by `cluster-up.sh`; not sourced by agents. | `bin/disinto init --backend=nomad` | diff --git a/lib/init/nomad/deploy.sh b/lib/init/nomad/deploy.sh index 7cf9278..f9a3805 100755 --- a/lib/init/nomad/deploy.sh +++ b/lib/init/nomad/deploy.sh @@ -16,7 +16,7 @@ # Environment: # REPO_ROOT — absolute path to repo root (defaults to parent of # this script's parent directory) -# JOB_READY_TIMEOUT_SECS — poll timeout in seconds (default: 240) +# JOB_READY_TIMEOUT_SECS — poll timeout in seconds (default: 360) # JOB_READY_TIMEOUT_ — per-job timeout override (e.g., # JOB_READY_TIMEOUT_FORGEJO=300) # @@ -33,7 +33,7 @@ set -euo pipefail # ── Configuration ──────────────────────────────────────────────────────────── SCRIPT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="${REPO_ROOT:-$(cd "${SCRIPT_ROOT}/../../.." && pwd)}" -JOB_READY_TIMEOUT_SECS="${JOB_READY_TIMEOUT_SECS:-240}" +JOB_READY_TIMEOUT_SECS="${JOB_READY_TIMEOUT_SECS:-360}" DRY_RUN=0 diff --git a/lib/issue-lifecycle.sh b/lib/issue-lifecycle.sh index 1ad3239..25f2c6b 100644 --- a/lib/issue-lifecycle.sh +++ b/lib/issue-lifecycle.sh @@ -157,9 +157,10 @@ issue_claim() { return 1 fi - local ip_id bl_id + local ip_id bl_id bk_id ip_id=$(_ilc_in_progress_id) bl_id=$(_ilc_backlog_id) + bk_id=$(_ilc_blocked_id) if [ -n "$ip_id" ]; then curl -sf -X POST \ -H "Authorization: token ${FORGE_TOKEN}" \ @@ -172,6 +173,12 @@ issue_claim() { -H "Authorization: token ${FORGE_TOKEN}" \ "${FORGE_API}/issues/${issue}/labels/${bl_id}" >/dev/null 2>&1 || true fi + # Clear blocked label on re-claim — starting work is implicit resolution of prior block + if [ -n "$bk_id" ]; then + curl -sf -X DELETE \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_API}/issues/${issue}/labels/${bk_id}" >/dev/null 2>&1 || true + fi _ilc_log "claimed issue #${issue}" return 0 } diff --git a/nomad/AGENTS.md b/nomad/AGENTS.md index 57667bc..729214e 100644 --- a/nomad/AGENTS.md +++ b/nomad/AGENTS.md @@ -1,4 +1,4 @@ - + # nomad/ — Agent Instructions Nomad + Vault HCL for the factory's single-node cluster. These files are @@ -21,7 +21,7 @@ see issues #821–#992 for the step breakdown. | `jobs/agents.hcl` | submitted via `lib/init/nomad/deploy.sh` | All 7 agent roles (dev, review, gardener, planner, predictor, supervisor, architect) + llama variant; Vault-templated bot tokens via `service-agents` policy; `force_pull = false` — image is built locally by `bin/disinto --with agents`, no registry (S4.1, S4-fix-2, S4-fix-5, #955, #972, #978) | | `jobs/staging.hcl` | submitted via `lib/init/nomad/deploy.sh` | Caddy file-server mounting `docker/` as `/srv/site:ro`; no Vault integration; **dynamic host port** (no static 80 — edge owns 80/443, collision fixed in S5-fix-7 #1018); edge discovers via Nomad service registration (S5.2, #989) | | `jobs/chat.hcl` | submitted via `lib/init/nomad/deploy.sh` | Claude chat UI; custom `disinto/chat:local` image; sandbox hardening (cap_drop ALL, **tmpfs via mount block** not `tmpfs=` arg — S5-fix-5 #1012, pids_limit 128); Vault-templated OAuth secrets via `service-chat` policy (S5.2, #989) | -| `jobs/edge.hcl` | submitted via `lib/init/nomad/deploy.sh` | Caddy reverse proxy + dispatcher sidecar; routes /forge, /woodpecker, /staging, /chat; uses `disinto/edge:local` image built by `bin/disinto --with edge`; Vault-templated ops-repo creds via `service-dispatcher` policy (S5.1, #988) | +| `jobs/edge.hcl` | submitted via `lib/init/nomad/deploy.sh` | Caddy reverse proxy + dispatcher sidecar; routes /forge, /woodpecker, /staging, /chat; uses `disinto/edge:local` image built by `bin/disinto --with edge`; **both Caddy and dispatcher tasks use `network_mode = "host"`** — upstreams are `127.0.0.1:` (forgejo :3000, woodpecker :8000, chat :8080), not Docker hostnames (#1031, #1034); `FORGE_URL` rendered via Nomad service discovery template (not static env) to handle bridge vs. host network differences (#1034); dispatcher Vault secret path changed to `kv/data/disinto/shared/ops-repo` (#1041); Vault-templated ops-repo creds via `service-dispatcher` policy (S5.1, #988) | Nomad auto-merges every `*.hcl` under `-config=/etc/nomad.d/`, so the split between `server.hcl` and `client.hcl` is for readability, not diff --git a/nomad/jobs/edge.hcl b/nomad/jobs/edge.hcl index 4a495d9..bf82b3d 100644 --- a/nomad/jobs/edge.hcl +++ b/nomad/jobs/edge.hcl @@ -123,6 +123,19 @@ job "edge" { # ── Caddyfile via Nomad service discovery (S5-fix-7, issue #1018) ──── # Renders staging upstream from Nomad service registration instead of # hardcoded staging:80. Caddy picks up /local/Caddyfile via entrypoint. + # Forge URL via Nomad service discovery (issue #1034) — resolves forgejo + # service address/port dynamically for bridge network compatibility. + template { + destination = "local/forge.env" + env = true + change_mode = "restart" + data = < + # Planner Agent **Role**: Strategic planning using a Prerequisite Tree (Theory of Constraints), diff --git a/predictor/AGENTS.md b/predictor/AGENTS.md index a263066..f67d9d0 100644 --- a/predictor/AGENTS.md +++ b/predictor/AGENTS.md @@ -1,4 +1,4 @@ - + # Predictor Agent **Role**: Abstract adversary (the "goblin"). Runs a 2-step formula diff --git a/review/AGENTS.md b/review/AGENTS.md index 24606d1..8709cfb 100644 --- a/review/AGENTS.md +++ b/review/AGENTS.md @@ -1,4 +1,4 @@ - + # Review Agent **Role**: AI-powered PR review — post structured findings and formal diff --git a/supervisor/AGENTS.md b/supervisor/AGENTS.md index 23a3832..004c81f 100644 --- a/supervisor/AGENTS.md +++ b/supervisor/AGENTS.md @@ -1,4 +1,4 @@ - + # Supervisor Agent **Role**: Health monitoring and auto-remediation, executed as a formula-driven diff --git a/tests/disinto-init-nomad.bats b/tests/disinto-init-nomad.bats index 8c8b9a4..54c3655 100644 --- a/tests/disinto-init-nomad.bats +++ b/tests/disinto-init-nomad.bats @@ -426,3 +426,19 @@ setup_file() { [[ "$output" == *"services to deploy: forgejo,woodpecker-server,woodpecker-agent,agents"* ]] [[ "$output" == *"deployment order: forgejo woodpecker-server woodpecker-agent agents"* ]] } + +# S5.1 / #1035 — edge service seeds ops-repo (dispatcher FORGE_TOKEN) +@test "disinto init --backend=nomad --with edge deploys edge" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with edge --dry-run + [ "$status" -eq 0 ] + # edge depends on all backend services, so all are included + [[ "$output" == *"services to deploy: edge,forgejo"* ]] + [[ "$output" == *"deployment order: forgejo woodpecker-server woodpecker-agent agents staging chat edge"* ]] + [[ "$output" == *"[deploy] [dry-run] nomad job validate"*"edge.hcl"* ]] +} + +@test "disinto init --backend=nomad --with edge seeds ops-repo" { + run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with edge --dry-run + [ "$status" -eq 0 ] + [[ "$output" == *"tools/vault-seed-ops-repo.sh --dry-run"* ]] +} diff --git a/tests/smoke-edge-subpath.sh b/tests/smoke-edge-subpath.sh deleted file mode 100755 index d23d06b..0000000 --- a/tests/smoke-edge-subpath.sh +++ /dev/null @@ -1,390 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================= -# smoke-edge-subpath.sh — End-to-end subpath routing smoke test -# -# Verifies Forgejo, Woodpecker, and chat function correctly under subpaths: -# - Forgejo at /forge/ -# - Woodpecker at /ci/ -# - Chat at /chat/ -# - Staging at /staging/ -# -# Acceptance criteria: -# 1. Forgejo login at /forge/ completes without redirect loops -# 2. Forgejo OAuth callback for Woodpecker succeeds under subpath -# 3. Woodpecker dashboard loads all assets at /ci/ (no 404s on JS/CSS) -# 4. Chat OAuth login flow works at /chat/login -# 5. Forward_auth on /chat/* rejects unauthenticated requests with 401 -# 6. Staging content loads at /staging/ -# 7. Root / redirects to /forge/ -# -# Usage: -# smoke-edge-subpath.sh [--base-url BASE_URL] -# -# Environment variables: -# BASE_URL — Edge proxy URL (default: http://localhost) -# EDGE_TIMEOUT — Request timeout in seconds (default: 30) -# EDGE_MAX_RETRIES — Max retries per request (default: 3) -# -# Exit codes: -# 0 — All checks passed -# 1 — One or more checks failed -# ============================================================================= -set -euo pipefail - -# Script directory for relative paths -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -# Source common helpers -source "${SCRIPT_DIR}/../lib/env.sh" 2>/dev/null || true - -# ───────────────────────────────────────────────────────────────────────────── -# Configuration -# ───────────────────────────────────────────────────────────────────────────── - -BASE_URL="${BASE_URL:-http://localhost}" -EDGE_TIMEOUT="${EDGE_TIMEOUT:-30}" -EDGE_MAX_RETRIES="${EDGE_MAX_RETRIES:-3}" - -# Subpaths to test -FORGE_PATH="/forge/" -CI_PATH="/ci/" -CHAT_PATH="/chat/" -STAGING_PATH="/staging/" - -# Track overall test status -FAILED=0 -PASSED=0 -SKIPPED=0 - -# ───────────────────────────────────────────────────────────────────────────── -# Logging helpers -# ───────────────────────────────────────────────────────────────────────────── - -log_info() { - echo "[INFO] $*" -} - -log_pass() { - echo "[PASS] $*" - ((PASSED++)) || true -} - -log_fail() { - echo "[FAIL] $*" - ((FAILED++)) || true -} - -log_skip() { - echo "[SKIP] $*" - ((SKIPPED++)) || true -} - -log_section() { - echo "" - echo "=== $* ===" - echo "" -} - -# ───────────────────────────────────────────────────────────────────────────── -# HTTP helpers -# ───────────────────────────────────────────────────────────────────────────── - -# Make an HTTP request with retry logic -# Usage: http_request [options...] -# Returns: HTTP status code on stdout, body on stderr -http_request() { - local method="$1" - local url="$2" - shift 2 - - local retries=0 - local response status - - while [ "$retries" -lt "$EDGE_MAX_RETRIES" ]; do - response=$(curl -sS -w '\n%{http_code}' -X "$method" \ - --max-time "$EDGE_TIMEOUT" \ - -o /tmp/edge-response-$$ \ - "$@" "$url" 2>&1) || { - retries=$((retries + 1)) - log_info "Retry $retries/$EDGE_MAX_RETRIES for $url" - sleep 1 - continue - } - - status=$(echo "$response" | tail -n1) - - echo "$status" - return 0 - done - - log_fail "Max retries exceeded for $url" - return 1 -} - -# Make a GET request and return status code -# Usage: http_get [curl_options...] -# Returns: HTTP status code -http_get() { - local url="$1" - shift - - http_request "GET" "$url" "$@" -} - -# Make a HEAD request (no body) -# Usage: http_head [curl_options...] -# Returns: HTTP status code -http_head() { - local url="$1" - shift - - http_request "HEAD" "$url" "$@" -} - -# ───────────────────────────────────────────────────────────────────────────── -# Test checkers -# ───────────────────────────────────────────────────────────────────────────── - -# Check if a URL returns a valid response (2xx or 3xx) -# Usage: check_http_status -check_http_status() { - local url="$1" - local expected_pattern="$2" - local description="$3" - - local status - status=$(http_get "$url") - - if echo "$status" | grep -qE "$expected_pattern"; then - log_pass "$description: $url → $status" - return 0 - else - log_fail "$description: $url → $status (expected: $expected_pattern)" - return 1 - fi -} - -# Check that a URL does NOT redirect in a loop -# Usage: check_no_redirect_loop [max_redirects] -check_no_redirect_loop() { - local url="$1" - local max_redirects="${2:-10}" - local description="$3" - - # Use curl with max redirects and check the final status - local response status follow_location - - response=$(curl -sS -w '\n%{http_code}\n%{redirect_url}' \ - --max-time "$EDGE_TIMEOUT" \ - --max-redirs "$max_redirects" \ - -o /tmp/edge-response-$$ \ - "$url" 2>&1) || { - log_fail "$description: curl failed ($?)" - return 1 - } - - status=$(echo "$response" | sed -n '$p') - follow_location=$(echo "$response" | sed -n "$((NR-1))p") - - # If we hit max redirects, the last redirect is still in follow_location - if [ "$status" = "000" ] && [ -n "$follow_location" ]; then - log_fail "$description: possible redirect loop detected (last location: $follow_location)" - return 1 - fi - - # Check final status is in valid range - if echo "$status" | grep -qE '^(2|3)[0-9][0-9]$'; then - log_pass "$description: no redirect loop ($status)" - return 0 - else - log_fail "$description: unexpected status $status" - return 1 - fi -} - -# Check that specific assets load without 404 -# Usage: check_assets_no_404 -check_assets_no_404() { - local base_url="$1" - local _pattern="$2" - local description="$3" - - local assets_found=0 - local assets_404=0 - - # Fetch the main page and extract asset URLs - local main_page - main_page=$(curl -sS --max-time "$EDGE_TIMEOUT" "$base_url" 2>/dev/null) || { - log_skip "$description: could not fetch main page" - return 0 - } - - # Extract URLs matching the pattern (e.g., .js, .css files) - local assets - assets=$(echo "$main_page" | grep -oE 'https?://[^"'"'"']+\.(js|css|woff|woff2|ttf|eot|svg|png|jpg|jpeg|gif|ico)' | sort -u || true) - - if [ -z "$assets" ]; then - log_skip "$description: no assets found to check" - return 0 - fi - - assets_found=$(echo "$assets" | wc -l) - - # Check each asset - while IFS= read -r asset; do - local status - status=$(http_head "$asset") - - if [ "$status" = "404" ]; then - log_fail "$description: asset 404: $asset" - assets_404=$((assets_404 + 1)) - fi - done <<< "$assets" - - if [ $assets_404 -eq 0 ]; then - log_pass "$description: all $assets_found assets loaded (0 404s)" - return 0 - else - log_fail "$description: $assets_404/$assets_found assets returned 404" - return 1 - fi -} - -# Check that a path returns 401 (unauthorized) -# Usage: check_returns_401 -check_returns_401() { - local url="$1" - local description="$2" - - local status - status=$(http_get "$url") - - if [ "$status" = "401" ]; then - log_pass "$description: $url → 401 (as expected)" - return 0 - else - log_fail "$description: $url → $status (expected 401)" - return 1 - fi -} - -# Check that a path returns 302 redirect to expected location -# Usage: check_redirects_to -check_redirects_to() { - local url="$1" - local expected_target="$2" - local description="$3" - - local response status location - - response=$(curl -sS -w '\n%{http_code}\n%{redirect_url}' \ - --max-time "$EDGE_TIMEOUT" \ - --max-redirs 1 \ - -o /tmp/edge-response-$$ \ - "$url" 2>&1) || { - log_fail "$description: curl failed" - return 1 - } - - status=$(echo "$response" | sed -n '$p') - location=$(echo "$response" | sed -n "$((NR-1))p") - - if [ "$status" = "302" ] && echo "$location" | grep -qF "$expected_target"; then - log_pass "$description: redirects to $location" - return 0 - else - log_fail "$description: status=$status, location=$location (expected 302 → $expected_target)" - return 1 - fi -} - -# ───────────────────────────────────────────────────────────────────────────── -# Main test suite -# ───────────────────────────────────────────────────────────────────────────── - -main() { - log_section "Edge Subpath Routing Smoke Test" - log_info "Base URL: $BASE_URL" - log_info "Timeout: ${EDGE_TIMEOUT}s, Max retries: $EDGE_MAX_RETRIES" - - # ─── Test 1: Root redirects to /forge/ ────────────────────────────────── - log_section "Test 1: Root redirects to /forge/" - - check_redirects_to "$BASE_URL" "$FORGE_PATH" "Root redirect" || FAILED=1 - if [ "$FAILED" -eq 0 ]; then ((PASSED++)) || true; fi - - # ─── Test 2: Forgejo login at /forge/ without redirect loops ──────────── - log_section "Test 2: Forgejo login at /forge/" - - check_no_redirect_loop "$BASE_URL$FORGE_PATH" 10 "Forgejo root" || FAILED=1 - check_http_status "$BASE_URL$FORGE_PATH" "^(2|3)[0-9][0-9]$" "Forgejo root status" || FAILED=1 - if [ "$FAILED" -eq 0 ]; then ((PASSED++)) || true; fi - - # ─── Test 3: Forgejo OAuth callback at /forge/_oauth/callback ─────────── - log_section "Test 3: Forgejo OAuth callback at /forge/_oauth/callback" - - check_http_status "$BASE_URL/forge/_oauth/callback" "^(2|3|4|5)[0-9][0-9]$" "Forgejo OAuth callback" || FAILED=1 - if [ "$FAILED" -eq 0 ]; then ((PASSED++)) || true; fi - - # ─── Test 4: Woodpecker dashboard at /ci/ ─────────────────────────────── - log_section "Test 4: Woodpecker dashboard at /ci/" - - check_no_redirect_loop "$BASE_URL$CI_PATH" 10 "Woodpecker root" || FAILED=1 - check_http_status "$BASE_URL$CI_PATH" "^(2|3)[0-9][0-9]$" "Woodpecker root status" || FAILED=1 - check_assets_no_404 "$BASE_URL$CI_PATH" "\.(js|css)" "Woodpecker assets" || FAILED=1 - if [ "$FAILED" -eq 0 ]; then ((PASSED++)) || true; fi - - # ─── Test 5: Chat OAuth login at /chat/login ──────────────────────────── - log_section "Test 5: Chat OAuth login at /chat/login" - - check_http_status "$BASE_URL$CHAT_PATH/login" "^(2|3)[0-9][0-9]$" "Chat login page" || FAILED=1 - if [ "$FAILED" -eq 0 ]; then ((PASSED++)) || true; fi - - # ─── Test 6: Chat OAuth callback at /chat/oauth/callback ──────────────── - log_section "Test 6: Chat OAuth callback at /chat/oauth/callback" - - check_http_status "$BASE_URL/chat/oauth/callback" "^(2|3)[0-9][0-9]$" "Chat OAuth callback" || FAILED=1 - if [ "$FAILED" -eq 0 ]; then ((PASSED++)) || true; fi - - # ─── Test 7: Forward_auth on /chat/* returns 401 for unauthenticated ──── - log_section "Test 7: Forward_auth on /chat/* returns 401" - - # Test a protected chat endpoint (chat dashboard) - check_returns_401 "$BASE_URL$CHAT_PATH/" "Chat root (unauthenticated)" || FAILED=1 - check_returns_401 "$BASE_URL$CHAT_PATH/dashboard" "Chat dashboard (unauthenticated)" || FAILED=1 - if [ "$FAILED" -eq 0 ]; then ((PASSED++)) || true; fi - - # ─── Test 8: Staging at /staging/ ─────────────────────────────────────── - log_section "Test 8: Staging at /staging/" - - check_http_status "$BASE_URL$STAGING_PATH" "^(2|3)[0-9][0-9]$" "Staging root" || FAILED=1 - if [ "$FAILED" -eq 0 ]; then ((PASSED++)) || true; fi - - # ─── Test 9: Caddy admin API health ───────────────────────────────────── - log_section "Test 9: Caddy admin API health" - - # Caddy admin API is typically on port 2019 locally - if curl -sS --max-time 5 "http://127.0.0.1:2019/" >/dev/null 2>&1; then - log_pass "Caddy admin API reachable" - ((PASSED++)) - else - log_skip "Caddy admin API not reachable (expected if edge is remote)" - fi - - # ─── Summary ──────────────────────────────────────────────────────────── - log_section "Test Summary" - log_info "Passed: $PASSED" - log_info "Failed: $FAILED" - log_info "Skipped: $SKIPPED" - - if [ $FAILED -gt 0 ]; then - log_section "TEST FAILED" - exit 1 - fi - - log_section "TEST PASSED" - exit 0 -} - -# Run main -main "$@" diff --git a/tools/vault-seed-ops-repo.sh b/tools/vault-seed-ops-repo.sh new file mode 100755 index 0000000..09a2fba --- /dev/null +++ b/tools/vault-seed-ops-repo.sh @@ -0,0 +1,149 @@ +#!/usr/bin/env bash +# ============================================================================= +# tools/vault-seed-ops-repo.sh — Idempotent seed for kv/disinto/shared/ops-repo +# +# Part of the Nomad+Vault migration (S5.1, issue #1035). Populates the KV v2 +# path that nomad/jobs/edge.hcl dispatcher task reads from, so the edge +# proxy has FORGE_TOKEN for ops repo access. +# +# Seeds from kv/disinto/bots/vault (the vault bot credentials) — copies the +# token field to kv/disinto/shared/ops-repo. This is the "service" path that +# dispatcher uses, distinct from the "agent" path (bots/vault) used by +# agent tasks under the service-agents policy. +# +# Idempotency contract: +# - Key present with non-empty value → leave untouched, log "token unchanged". +# - Key missing or empty → copy from bots/vault, log "token copied". +# - If bots/vault is also empty → generate a random value, log "token generated". +# +# Preconditions: +# - Vault reachable + unsealed at $VAULT_ADDR. +# - VAULT_TOKEN set (env) or /etc/vault.d/root.token readable. +# - The `kv/` mount is enabled as KV v2. +# +# Requires: +# - VAULT_ADDR (e.g. http://127.0.0.1:8200) +# - VAULT_TOKEN (env OR /etc/vault.d/root.token, resolved by lib/hvault.sh) +# - curl, jq, openssl +# +# Usage: +# tools/vault-seed-ops-repo.sh +# tools/vault-seed-ops-repo.sh --dry-run +# +# Exit codes: +# 0 success (seed applied, or already applied) +# 1 precondition / API / mount-mismatch failure +# ============================================================================= +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" + +# shellcheck source=../lib/hvault.sh +source "${REPO_ROOT}/lib/hvault.sh" + +# KV v2 mount + logical paths +KV_MOUNT="kv" +OPS_REPO_PATH="disinto/shared/ops-repo" +VAULT_BOT_PATH="disinto/bots/vault" + +OPS_REPO_API="${KV_MOUNT}/data/${OPS_REPO_PATH}" +VAULT_BOT_API="${KV_MOUNT}/data/${VAULT_BOT_PATH}" + +log() { printf '[vault-seed-ops-repo] %s\n' "$*"; } +die() { printf '[vault-seed-ops-repo] ERROR: %s\n' "$*" >&2; exit 1; } + +# ── Flag parsing ───────────────────────────────────────────────────────────── +DRY_RUN=0 +case "$#:${1-}" in + 0:) + ;; + 1:--dry-run) + DRY_RUN=1 + ;; + 1:-h|1:--help) + printf 'Usage: %s [--dry-run]\n\n' "$(basename "$0")" + printf 'Seed kv/disinto/shared/ops-repo with FORGE_TOKEN.\n\n' + printf 'Copies token from kv/disinto/bots/vault if present;\n' + printf 'otherwise generates a random value. Idempotent:\n' + printf 'existing non-empty values are left untouched.\n\n' + printf ' --dry-run Print planned actions without writing.\n' + exit 0 + ;; + *) + die "invalid arguments: $* (try --help)" + ;; +esac + +# ── Preconditions ──────────────────────────────────────────────────────────── +for bin in curl jq openssl; do + command -v "$bin" >/dev/null 2>&1 \ + || die "required binary not found: ${bin}" +done + +[ -n "${VAULT_ADDR:-}" ] \ + || die "VAULT_ADDR unset — e.g. export VAULT_ADDR=http://127.0.0.1:8200" +hvault_token_lookup >/dev/null \ + || die "Vault auth probe failed — check VAULT_ADDR + VAULT_TOKEN" + +# ── Step 1/2: ensure kv/ mount exists and is KV v2 ─────────────────────────── +log "── Step 1/2: ensure ${KV_MOUNT}/ is KV v2 ──" +export DRY_RUN +hvault_ensure_kv_v2 "$KV_MOUNT" "[vault-seed-ops-repo]" \ + || die "KV mount check failed" + +# ── Step 2/2: seed ops-repo from vault bot ─────────────────────────────────── +log "── Step 2/2: seed ${OPS_REPO_API} ──" + +# Read existing ops-repo value +existing_raw="$(hvault_get_or_empty "${OPS_REPO_API}")" \ + || die "failed to read ${OPS_REPO_API}" + +existing_token="" +if [ -n "$existing_raw" ]; then + existing_token="$(printf '%s' "$existing_raw" | jq -r '.data.data.token // ""')" +fi + +desired_token="$existing_token" +action="" + +if [ -z "$existing_token" ]; then + # Token missing — try to copy from vault bot + bot_raw="$(hvault_get_or_empty "${VAULT_BOT_API}")" || true + if [ -n "$bot_raw" ]; then + bot_token="$(printf '%s' "$bot_raw" | jq -r '.data.data.token // ""')" + if [ -n "$bot_token" ]; then + desired_token="$bot_token" + action="copied" + fi + fi + + # If still no token, generate one + if [ -z "$desired_token" ]; then + if [ "$DRY_RUN" -eq 1 ]; then + action="generated (dry-run)" + else + desired_token="$(openssl rand -hex 32)" + action="generated" + fi + fi +fi + +if [ -z "$action" ]; then + log "all keys present at ${OPS_REPO_API} — no-op" + log "token unchanged" + exit 0 +fi + +if [ "$DRY_RUN" -eq 1 ]; then + log "[dry-run] ${OPS_REPO_PATH}: would ${action} token" + exit 0 +fi + +# Write the token +payload="$(jq -n --arg t "$desired_token" '{data: {token: $t}}')" +_hvault_request POST "${OPS_REPO_API}" "$payload" >/dev/null \ + || die "failed to write ${OPS_REPO_API}" + +log "${OPS_REPO_PATH}: ${action} token" +log "done — ${OPS_REPO_API} seeded" diff --git a/vault/policies/AGENTS.md b/vault/policies/AGENTS.md index 9a4b588..47af340 100644 --- a/vault/policies/AGENTS.md +++ b/vault/policies/AGENTS.md @@ -1,4 +1,4 @@ - + # vault/policies/ — Agent Instructions HashiCorp Vault ACL policies for the disinto factory. One `.hcl` file per