Compare commits
11 commits
fix/issue-
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| eb19aa6c84 | |||
|
|
86793c4c00 | ||
| 0bb04545d4 | |||
| 1de3b0d560 | |||
|
|
d1e535696a | ||
|
|
ada27759de | ||
|
|
2648c401f4 | ||
| b09463b162 | |||
|
|
72f981528d | ||
|
|
cd778c4775 | ||
|
|
bf3d16e8b3 |
10 changed files with 243 additions and 26 deletions
|
|
@ -308,6 +308,21 @@ def main() -> int:
|
|||
"63bfa88d71764c95c65a9a248f3e40ab": "Vault-seed preconditions: binary check end + VAULT_ADDR die",
|
||||
"34873ad3570b211ce1d90468ab6ac94c": "Vault-seed preconditions: VAULT_ADDR die + hvault_token_lookup",
|
||||
"71a52270f249e843cda48ad896d9f781": "Vault-seed preconditions: VAULT_ADDR + hvault_token_lookup + die",
|
||||
# Common vault-seed script flag parsing patterns
|
||||
# Shared across tools/vault-seed-{forgejo,ops-repo}.sh
|
||||
"6906b7787796c2ccb8dd622e2ad4e7bf": "vault-seed DRY_RUN init + case pattern (forgejo + ops-repo)",
|
||||
"a0df5283b616b964f8bc32fd99ec1b5a": "vault-seed case pattern start (forgejo + ops-repo)",
|
||||
"e15e3272fdd9f0f46ce9e726aea9f853": "vault-seed case pattern dry-run handler (forgejo + ops-repo)",
|
||||
"c9f22385cc49a3dac1d336bc14c6315b": "vault-seed DRY_RUN assignment (forgejo + ops-repo)",
|
||||
"106f4071e88f841b3208b01144cd1c39": "vault-seed case pattern dry-run end (forgejo + ops-repo)",
|
||||
"c15506dcb6bb340b25d1c39d442dd2e6": "vault-seed help text + invalid arg handler (forgejo + ops-repo)",
|
||||
"1feecd3b3caf00045fae938ddf2811de": "vault-seed invalid arg handler (forgejo + ops-repo)",
|
||||
"919780d5e7182715344f5aa02b191294": "vault-seed invalid arg + esac pattern (forgejo + ops-repo)",
|
||||
"8dce1d292bce8e60ef4c0665b62945b0": "vault-seed esac + binary check loop (forgejo + ops-repo)",
|
||||
"ca043687143a5b47bd54e65a99ce8ee8": "vault-seed binary check loop start (forgejo + ops-repo)",
|
||||
"aefd9f655411a955395e6e5995ddbe6f": "vault-seed binary check pattern (forgejo + ops-repo)",
|
||||
"60f0c46deb5491599457efb4048918e5": "vault-seed VAULT_ADDR + hvault_token_lookup check (forgejo + ops-repo)",
|
||||
"f6838f581ef6b4d82b55268389032769": "vault-seed VAULT_ADDR + hvault_token_lookup die (forgejo + ops-repo)",
|
||||
}
|
||||
|
||||
if not sh_files:
|
||||
|
|
|
|||
|
|
@ -802,6 +802,7 @@ _disinto_init_nomad() {
|
|||
woodpecker-server|woodpecker-agent) seed_name="woodpecker" ;;
|
||||
agents) seed_name="agents" ;;
|
||||
chat) seed_name="chat" ;;
|
||||
edge) seed_name="ops-repo" ;;
|
||||
esac
|
||||
local seed_script="${FACTORY_ROOT}/tools/vault-seed-${seed_name}.sh"
|
||||
if [ -x "$seed_script" ]; then
|
||||
|
|
@ -983,6 +984,7 @@ _disinto_init_nomad() {
|
|||
woodpecker-server|woodpecker-agent) seed_name="woodpecker" ;;
|
||||
agents) seed_name="agents" ;;
|
||||
chat) seed_name="chat" ;;
|
||||
edge) seed_name="ops-repo" ;;
|
||||
esac
|
||||
local seed_script="${FACTORY_ROOT}/tools/vault-seed-${seed_name}.sh"
|
||||
if [ -x "$seed_script" ]; then
|
||||
|
|
|
|||
|
|
@ -1 +0,0 @@
|
|||
{"issue":850,"group":"lib/generators.sh","title":"compose dup-detection smoke CI failures","reason":"4+ consecutive ci_exhausted failures across PRs #872 #908 #971; planner flagged for human re-scope","ts":"2026-04-19T00:00:00Z"}
|
||||
|
|
@ -2,7 +2,12 @@
|
|||
{
|
||||
"action": "edit_body",
|
||||
"issue": 1025,
|
||||
"body": "## Goal\nVerify that Forgejo, Woodpecker, and chat all function correctly when served\nunder /forge/, /ci/, and /chat/ subpaths on a single domain. Catch redirect\nloops, OAuth callback failures, and asset 404s before they hit production.\n\n## Sprint\nPart of sprint [edge-subpath-chat](https://forgejo:3000/disinto-admin/disinto-ops/pulls/37) — vision issue #623.\n\n## Acceptance criteria\n- [ ] Forgejo login at /forge/ completes without redirect loops\n- [ ] Forgejo OAuth callback for Woodpecker succeeds under subpath\n- [ ] Woodpecker dashboard loads all assets at /ci/ (no 404s on JS/CSS)\n- [ ] Chat OAuth login flow works at /chat/login\n- [ ] Forward_auth on /chat/* rejects unauthenticated requests with 401\n- [ ] Staging content loads at /staging/\n- [ ] Root / redirects to /forge/\n- [ ] CI pipeline added to .woodpecker/ to run this test on edge-related changes\n\n## Affected files\n- `nomad/jobs/edge.hcl` — edge Caddy routing config under test\n- `docker/edge/` — edge container and Caddyfile template\n- `tools/edge-control/register.sh` — route registration\n- `.woodpecker/` — CI pipeline for edge smoke test\n\n## Dependencies\nNone — first issue in sprint.\n"
|
||||
"body": "## Prior art: PR #1033 (open, branch `fix/issue-1025` retained)\n\nFirst attempt by dev-qwen2 (head `f692dd2`). Test script (`tests/smoke-edge-subpath.sh`, 13.8 KB) and pipeline (`.woodpecker/edge-subpath.yml`) both landed and look reasonable, but the **CI harness design is wrong**: the pipeline boots a bare `alpine:3.19` container and runs the smoke script directly against `BASE_URL=http://localhost`, with no stack to test against.\n\n**This is a harness design gap, not a script bug.** The smoke script itself is a reasonable post-deploy tool — the mistake was trying to exercise it as a hermetic CI step.\n\n**Approach (Option 1 — split the work):**\n\nKeep `tests/smoke-edge-subpath.sh` as an out-of-CI post-deploy tool (accepts `BASE_URL` env var). Replace the CI pipeline step that tries to curl a live stack with static checks only: `shellcheck`, `caddy validate` on the generated Caddyfile, and a template-substitution unit test that verifies routing block shape.\n\nBranch `fix/issue-1025` is preserved at `f692dd2` — the smoke script body is reusable; only the pipeline harness needs a rethink.\n\n**Timeline:**\n- 2026-04-19 09:14 — dev-qwen2 last pushed `f692dd2`\n- 3 pipelines (#1378/#1380/#1382) all fail: no service to curl (connection refused)\n\n## Acceptance criteria\n- [ ] `.woodpecker/edge-subpath.yml` pipeline runs `shellcheck` on `tests/smoke-edge-subpath.sh` with no live service curl\n- [ ] `caddy validate` runs on the generated Caddyfile in CI (template-substitution unit test)\n- [ ] A template-substitution test verifies the Caddyfile routing block shape (forge/ci/staging/chat paths)\n- [ ] `tests/smoke-edge-subpath.sh` accepts `BASE_URL` env var for post-deploy staging runs\n- [ ] CI green (no connection-refused failures on Woodpecker)\n\n## Affected files\n- `.woodpecker/edge-subpath.yml` — pipeline config (static checks only, no service curl)\n- `tests/smoke-edge-subpath.sh` — out-of-CI smoke script (reusable from PR #1033)\n\n## Dependencies\n- #1038 should land first to unblock local edge staging runs (optional — CI fix is independent)"
|
||||
},
|
||||
{
|
||||
"action": "remove_label",
|
||||
"issue": 1025,
|
||||
"label": "blocked"
|
||||
},
|
||||
{
|
||||
"action": "add_label",
|
||||
|
|
@ -11,32 +16,37 @@
|
|||
},
|
||||
{
|
||||
"action": "edit_body",
|
||||
"issue": 1026,
|
||||
"body": "## Goal\nReplace the blocking one-shot claude --print invocation in the chat backend with\na WebSocket connection that streams tokens to the UI as they arrive.\n\n## Sprint\nPart of sprint [edge-subpath-chat](https://forgejo:3000/disinto-admin/disinto-ops/pulls/37) — vision issue #623.\n\n## Acceptance criteria\n- [ ] /chat/ws endpoint accepts WebSocket upgrade with valid session cookie\n- [ ] /chat/ws rejects upgrade if session cookie is missing or expired\n- [ ] Chat backend streams claude output over WebSocket as text frames\n- [ ] UI renders tokens incrementally as they arrive\n- [ ] Rate limiting still enforced on WebSocket messages\n- [ ] Caddy proxies WebSocket upgrade correctly through /chat/ws with forward_auth\n\n## Affected files\n- `docker/chat/server.py` — chat backend WebSocket endpoint\n- `docker/chat/ui/` — frontend WebSocket client rendering\n- `nomad/jobs/edge.hcl` — Caddy WebSocket proxy config\n- `nomad/jobs/chat.hcl` — chat Nomad job\n\n## Dependencies\n- Depends on #1025 — subpath routing smoke test\n"
|
||||
"issue": 1038,
|
||||
"body": "## Problem\n\n`disinto-edge` crashloops on any deployment that has not opted into the age-encrypted secret store (#777), because the edge entrypoint treats four secrets as unconditionally required:\n\n```\nFATAL: age key (/home/agent/.config/sops/age/keys.txt) or secrets dir (/opt/disinto/secrets) not found — cannot load required secrets\n```\n\nObserved on `disinto-dev-box` (container `disinto-edge`, restarting every ~30s), which blocks PR #1033 (edge-subpath smoke test) and any other work that depends on a running edge.\n\n## Root cause\n\n`docker/edge/entrypoint-edge.sh:176-205` requires:\n\n- `~/.config/sops/age/keys.txt`\n- `/opt/disinto/secrets/` with `.enc` files for `CADDY_SSH_KEY`, `CADDY_SSH_HOST`, `CADDY_SSH_USER`, `CADDY_ACCESS_LOG`.\n\nThese four secrets feed exactly one feature: the daily 23:50 UTC `collect-engagement.sh` cron (#745), which SCPs Caddy access logs from a **remote production edge host** for engagement parsing. On a local factory box or any deployment that has not set up a remote edge, this code path has no target — yet its absence kills the whole edge container.\n\n## Fix\n\nMake the secrets block **optional**. When age key or secrets dir is missing, or any of the four CADDY_ secrets fail to decrypt, log a warning and skip the `collect-engagement` cron loop. Caddy itself does not depend on these secrets and should start normally.\n\nThe concrete edit is around lines 176-205 of `docker/edge/entrypoint-edge.sh` — guard the secret-loading block with a check for the age key and secrets dir, set `EDGE_ENGAGEMENT_READY=0` on failure, and skip cron registration when `EDGE_ENGAGEMENT_READY != 1`.\n\n## Acceptance criteria\n- [ ] `docker/edge/entrypoint-edge.sh` loads CADDY_ secrets optionally — missing age key or secrets dir logs a warning and continues, does not FATAL\n- [ ] Caddy starts normally when CADDY_ secrets are absent\n- [ ] `collect-engagement` cron is skipped (not registered) when engagement secrets are unavailable\n- [ ] On deployments WITH secrets configured, behavior is unchanged (collect-engagement cron still fires at 23:50 UTC)\n- [ ] CI green\n\n## Affected files\n- `docker/edge/entrypoint-edge.sh` — lines 176-205, secrets loading block made optional"
|
||||
},
|
||||
{
|
||||
"action": "remove_label",
|
||||
"issue": 1038,
|
||||
"label": "blocked"
|
||||
},
|
||||
{
|
||||
"action": "add_label",
|
||||
"issue": 1026,
|
||||
"issue": 1038,
|
||||
"label": "backlog"
|
||||
},
|
||||
{
|
||||
"action": "edit_body",
|
||||
"issue": 1027,
|
||||
"body": "## Goal\nGive the chat container Claude session read-write access to the project working\ntree so the operator can inspect, explain, or modify code — scoped to that tree\nonly, with no access to factory internals, secrets, or Docker socket.\n\n## Sprint\nPart of sprint [edge-subpath-chat](https://forgejo:3000/disinto-admin/disinto-ops/pulls/37) — vision issue #623.\n\n## Acceptance criteria\n- [ ] Chat container bind-mounts the project working tree as a named volume\n- [ ] Claude invocation in server.py sets cwd to the workspace directory\n- [ ] Claude permission mode is acceptEdits (not bypassPermissions)\n- [ ] verify-chat-sandbox.sh updated to assert workspace mount exists\n- [ ] Compose generator adds the workspace volume conditionally\n\n## Affected files\n- `docker/chat/server.py` — Claude invocation and cwd setup\n- `tools/edge-control/verify-chat-sandbox.sh` — sandbox verification\n- `lib/generators.sh` — Compose generator workspace volume\n- `nomad/jobs/chat.hcl` — chat container bind-mount config\n\n## Dependencies\n- Depends on #1025 — subpath routing smoke test\n"
|
||||
"issue": 850,
|
||||
"body": "## Problem\n\nWhen the compose generator emits the same service name twice — e.g. both the legacy `ENABLE_LLAMA_AGENT=1` branch and a matching `[agents.llama]` TOML block produce an `agents-llama:` key — the failure is deferred all the way to `docker compose` YAML parsing:\n\n```\nfailed to parse /home/johba/disinto/docker-compose.yml: yaml: construct errors:\n line 4: line 431: mapping key \"agents-llama\" already defined at line 155\n```\n\nBy then, the user has already paid the cost of: pre-build binary downloads, generator run, Caddyfile regeneration. The only hint about what went wrong is a line number in a generated file. Root cause (dual activation) is not surfaced.\n\n## Fix\n\nAdd a generate-time guard to `lib/generators.sh`:\n\n- After collecting all service blocks to emit, compare the set of service names against duplicates.\n- If a duplicate is detected, abort with a clear message naming both sources of truth (e.g. `\"agents-llama\" emitted twice — from ENABLE_LLAMA_AGENT=1 and from [agents.llama] in projects/disinto.toml; remove one`).\n\n## Prior art: PR #872 (closed, branch `fix/issue-850` retained)\n\ndev-qwen's first attempt (`db009e3`) landed the dup-detection logic in `lib/generators.sh` correctly (unit test `tests/test-duplicate-service-detection.sh` passes all 3 cases), but the smoke test fails on CI.\n\n**Why the smoke test fails:** sections 1-7 of `smoke-init.sh` already run `bin/disinto init`, materializing `docker-compose.yml`. Section 8 re-invokes `bin/disinto init` to verify the dup guard fires — but `_generate_compose_impl` early-returns with `\"Compose: already exists, skipping\"` before reaching the dup-check.\n\n**Suggested fix:** in `tests/smoke-init.sh` section 8 (around line 452, before the second `bin/disinto init` invocation), add:\n\n```bash\nrm -f \"${FACTORY_ROOT}/docker-compose.yml\"\n```\n\nso the generator actually runs and the dup-detection path is exercised. Do **not** hoist the dup-check above the early-return.\n\nThe branch `fix/issue-850` is preserved as a starting point — pick up from `db009e3` and patch the smoke-test cleanup.\n\nRelated: #846.\n\n## Acceptance criteria\n- [ ] `bin/disinto init` with a config that would produce duplicate service names aborts with a clear error message naming both sources (e.g. `ENABLE_LLAMA_AGENT=1` and `[agents.llama]` TOML block)\n- [ ] `tests/smoke-init.sh` section 8 removes `docker-compose.yml` before re-invoking `disinto init` so the dup guard is exercised\n- [ ] Unit test `tests/test-duplicate-service-detection.sh` passes all 3 cases\n- [ ] CI green (smoke-init.sh section 8 no longer skips dup detection)\n\n## Affected files\n- `lib/generators.sh` — duplicate service name check after collecting all service blocks\n- `tests/smoke-init.sh` — section 8: add `rm -f \\${FACTORY_ROOT}/docker-compose.yml` before second `disinto init`"
|
||||
},
|
||||
{
|
||||
"action": "remove_label",
|
||||
"issue": 850,
|
||||
"label": "blocked"
|
||||
},
|
||||
{
|
||||
"action": "add_label",
|
||||
"issue": 1027,
|
||||
"issue": 850,
|
||||
"label": "backlog"
|
||||
},
|
||||
{
|
||||
"action": "edit_body",
|
||||
"issue": 1028,
|
||||
"body": "## Goal\nIf the smoke test reveals unfixable subpath issues, automate the pivot to\nper-service subdomains so the switch is a single config change.\n\n## Sprint\nPart of sprint [edge-subpath-chat](https://forgejo:3000/disinto-admin/disinto-ops/pulls/37) — vision issue #623.\n\n## Acceptance criteria\n- [ ] generators.sh _generate_caddyfile_impl accepts EDGE_ROUTING_MODE env var\n- [ ] In subdomain mode, Caddyfile emits four host blocks per edge-routing-fallback.md\n- [ ] register.sh registers additional subdomain routes when EDGE_ROUTING_MODE=subdomain\n- [ ] OAuth redirect URIs in ci-setup.sh respect routing mode\n- [ ] .env template documents EDGE_ROUTING_MODE with a comment referencing the fallback doc\n\n## Affected files\n- `lib/generators.sh` — _generate_caddyfile_impl routing mode switch\n- `tools/edge-control/register.sh` — subdomain route registration\n- `lib/ci-setup.sh` — OAuth redirect URI handling\n- `projects/*.toml.example` — .env template documentation\n\n## Dependencies\n- Depends on #1025 — subpath routing smoke test\n"
|
||||
},
|
||||
{
|
||||
"action": "add_label",
|
||||
"issue": 1028,
|
||||
"label": "backlog"
|
||||
"action": "comment",
|
||||
"issue": 758,
|
||||
"body": "This issue is the critical path blocker for #820 (ops repo re-seed) and #982 (collect-engagement commit fix). Both are in the backlog and ready to merge, but cannot run until ops repo branch protection is resolved. Needs admin/human action to change Forgejo branch protection settings on disinto-ops — no code change can unblock this."
|
||||
}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: a467d613a44b9b475a60c14c4162621e846969ea -->
|
||||
<!-- last-reviewed: 0bb04545d47fb43b2cab0a1f4406c2a2b57f4eba -->
|
||||
# Shared Helpers (`lib/`)
|
||||
|
||||
All agents source `lib/env.sh` as their first action. Additional helpers are
|
||||
|
|
@ -35,4 +35,4 @@ sourced as needed.
|
|||
| `lib/hire-agent.sh` | `disinto_hire_an_agent()` — user creation, `.profile` repo setup, formula copying, branch protection, and state marker creation for hiring a new agent. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`, `PROJECT_NAME`. Extracted from `bin/disinto`. | bin/disinto (hire) |
|
||||
| `lib/release.sh` | `disinto_release()` — vault TOML creation, branch setup on ops repo, PR creation, and auto-merge request for a versioned release. `_assert_release_globals()` validates required env vars. Requires `FORGE_URL`, `FORGE_TOKEN`, `FORGE_OPS_REPO`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. Extracted from `bin/disinto`. | bin/disinto (release) |
|
||||
| `lib/hvault.sh` | HashiCorp Vault helper module. `hvault_kv_get(PATH, [KEY])` — read KV v2 secret, optionally extract one key. `hvault_kv_put(PATH, KEY=VAL ...)` — write KV v2 secret. `hvault_kv_list(PATH)` — list keys at a KV path. `hvault_get_or_empty(PATH)` — GET /v1/PATH; 200→raw body, 404→empty, else structured error + return 1 (used by sync scripts to distinguish "absent, create" from hard failure without tripping errexit, #881). `hvault_ensure_kv_v2(MOUNT, [LOG_PREFIX])` — idempotent KV v2 mount assertion: enables mount if absent, fails loudly if present as wrong type/version. Extracted from all `vault-seed-*.sh` scripts to eliminate dup-detector violations. Respects `DRY_RUN=1`. `hvault_policy_apply(NAME, FILE)` — idempotent policy upsert. `hvault_jwt_login(ROLE, JWT)` — exchange JWT for short-lived token. `hvault_token_lookup()` — returns TTL/policies/accessor for current token. `_hvault_seed_key(PATH, KEY, [GENERATOR])` — seed one KV key if absent; reads existing data and merges to preserve sibling keys (KV v2 replaces atomically); returns 0=created, 1=unchanged, 2=API error (#992). All functions use `VAULT_ADDR` + `VAULT_TOKEN` from env (fallback: `/etc/vault.d/root.token`), emit structured JSON errors to stderr on failure. Tests: `tests/lib-hvault.bats` (requires `vault server -dev`). | `tools/vault-apply-policies.sh`, `tools/vault-apply-roles.sh`, `lib/init/nomad/vault-nomad-auth.sh`, `tools/vault-seed-*.sh` |
|
||||
| `lib/init/nomad/` | Nomad+Vault installer scripts. `cluster-up.sh` — idempotent Step-0 orchestrator that runs all steps in order (installs packages, writes HCL, enables systemd units, unseals Vault); uses `poll_until_healthy()` helper for deduped readiness polling; `HOST_VOLUME_DIRS` array now includes `/srv/disinto/docker` (for staging file-server, S5.2, #989, #992). `install.sh` — installs pinned Nomad+Vault apt packages. `vault-init.sh` — initializes Vault (unseal keys → `/etc/vault.d/`), creates dev-persisted unseal unit. `lib-systemd.sh` — shared systemd unit helpers. `systemd-nomad.sh`, `systemd-vault.sh` — write and enable service units. `vault-nomad-auth.sh` — Step-2 script that enables Vault's JWT auth at path `jwt-nomad`, writes the JWKS/algs config pointing at Nomad's workload-identity signer, delegates role sync to `tools/vault-apply-roles.sh`, installs `/etc/nomad.d/server.hcl`, and SIGHUPs `nomad.service` if the file changed (#881). `wp-oauth-register.sh` — S3.3 script that creates the Woodpecker OAuth2 app in Forgejo and stores `forgejo_client`/`forgejo_secret` in Vault KV v2 at `kv/disinto/shared/woodpecker`; idempotent (skips if app or secrets already present); called by `bin/disinto --with woodpecker`. `deploy.sh` — S4 dependency-ordered Nomad job deploy + health-wait; takes a list of jobspec basenames, submits each to Nomad and polls until healthy before proceeding to the next; supports `--dry-run` and per-job timeout overrides via `JOB_READY_TIMEOUT_<JOBNAME>`; invoked by `bin/disinto --with <svc>` and `cluster-up.sh`; deploy order now covers staging, chat, edge (S5.5, #992). Idempotent: each step checks current state before acting. Sourced and called by `cluster-up.sh`; not sourced by agents. | `bin/disinto init --backend=nomad` |
|
||||
| `lib/init/nomad/` | Nomad+Vault installer scripts. `cluster-up.sh` — idempotent Step-0 orchestrator that runs all steps in order (installs packages, writes HCL, enables systemd units, unseals Vault); uses `poll_until_healthy()` helper for deduped readiness polling; `HOST_VOLUME_DIRS` array now includes `/srv/disinto/docker` (for staging file-server, S5.2, #989, #992). `install.sh` — installs pinned Nomad+Vault apt packages. `vault-init.sh` — initializes Vault (unseal keys → `/etc/vault.d/`), creates dev-persisted unseal unit. `lib-systemd.sh` — shared systemd unit helpers. `systemd-nomad.sh`, `systemd-vault.sh` — write and enable service units. `vault-nomad-auth.sh` — Step-2 script that enables Vault's JWT auth at path `jwt-nomad`, writes the JWKS/algs config pointing at Nomad's workload-identity signer, delegates role sync to `tools/vault-apply-roles.sh`, installs `/etc/nomad.d/server.hcl`, and SIGHUPs `nomad.service` if the file changed (#881). `wp-oauth-register.sh` — S3.3 script that creates the Woodpecker OAuth2 app in Forgejo and stores `forgejo_client`/`forgejo_secret` in Vault KV v2 at `kv/disinto/shared/woodpecker`; idempotent (skips if app or secrets already present); called by `bin/disinto --with woodpecker`. `deploy.sh` — S4 dependency-ordered Nomad job deploy + health-wait; takes a list of jobspec basenames, submits each to Nomad and polls until healthy before proceeding to the next; supports `--dry-run` and per-job timeout overrides via `JOB_READY_TIMEOUT_<JOBNAME>`; global default timeout `JOB_READY_TIMEOUT_SECS` is 360s (raised from 240s for chat cold-start, #1036); invoked by `bin/disinto --with <svc>` and `cluster-up.sh`; deploy order now covers staging, chat, edge (S5.5, #992). Idempotent: each step checks current state before acting. Sourced and called by `cluster-up.sh`; not sourced by agents. | `bin/disinto init --backend=nomad` |
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@
|
|||
# Environment:
|
||||
# REPO_ROOT — absolute path to repo root (defaults to parent of
|
||||
# this script's parent directory)
|
||||
# JOB_READY_TIMEOUT_SECS — poll timeout in seconds (default: 240)
|
||||
# JOB_READY_TIMEOUT_SECS — poll timeout in seconds (default: 360)
|
||||
# JOB_READY_TIMEOUT_<JOBNAME> — per-job timeout override (e.g.,
|
||||
# JOB_READY_TIMEOUT_FORGEJO=300)
|
||||
#
|
||||
|
|
@ -33,7 +33,7 @@ set -euo pipefail
|
|||
# ── Configuration ────────────────────────────────────────────────────────────
|
||||
SCRIPT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="${REPO_ROOT:-$(cd "${SCRIPT_ROOT}/../../.." && pwd)}"
|
||||
JOB_READY_TIMEOUT_SECS="${JOB_READY_TIMEOUT_SECS:-240}"
|
||||
JOB_READY_TIMEOUT_SECS="${JOB_READY_TIMEOUT_SECS:-360}"
|
||||
|
||||
DRY_RUN=0
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: a467d613a44b9b475a60c14c4162621e846969ea -->
|
||||
<!-- last-reviewed: 0bb04545d47fb43b2cab0a1f4406c2a2b57f4eba -->
|
||||
# nomad/ — Agent Instructions
|
||||
|
||||
Nomad + Vault HCL for the factory's single-node cluster. These files are
|
||||
|
|
@ -21,7 +21,7 @@ see issues #821–#992 for the step breakdown.
|
|||
| `jobs/agents.hcl` | submitted via `lib/init/nomad/deploy.sh` | All 7 agent roles (dev, review, gardener, planner, predictor, supervisor, architect) + llama variant; Vault-templated bot tokens via `service-agents` policy; `force_pull = false` — image is built locally by `bin/disinto --with agents`, no registry (S4.1, S4-fix-2, S4-fix-5, #955, #972, #978) |
|
||||
| `jobs/staging.hcl` | submitted via `lib/init/nomad/deploy.sh` | Caddy file-server mounting `docker/` as `/srv/site:ro`; no Vault integration; **dynamic host port** (no static 80 — edge owns 80/443, collision fixed in S5-fix-7 #1018); edge discovers via Nomad service registration (S5.2, #989) |
|
||||
| `jobs/chat.hcl` | submitted via `lib/init/nomad/deploy.sh` | Claude chat UI; custom `disinto/chat:local` image; sandbox hardening (cap_drop ALL, **tmpfs via mount block** not `tmpfs=` arg — S5-fix-5 #1012, pids_limit 128); Vault-templated OAuth secrets via `service-chat` policy (S5.2, #989) |
|
||||
| `jobs/edge.hcl` | submitted via `lib/init/nomad/deploy.sh` | Caddy reverse proxy + dispatcher sidecar; routes /forge, /woodpecker, /staging, /chat; uses `disinto/edge:local` image built by `bin/disinto --with edge`; Vault-templated ops-repo creds via `service-dispatcher` policy (S5.1, #988) |
|
||||
| `jobs/edge.hcl` | submitted via `lib/init/nomad/deploy.sh` | Caddy reverse proxy + dispatcher sidecar; routes /forge, /woodpecker, /staging, /chat; uses `disinto/edge:local` image built by `bin/disinto --with edge`; **both Caddy and dispatcher tasks use `network_mode = "host"`** — upstreams are `127.0.0.1:<port>` (forgejo :3000, woodpecker :8000, chat :8080), not Docker hostnames (#1031, #1034); `FORGE_URL` rendered via Nomad service discovery template (not static env) to handle bridge vs. host network differences (#1034); dispatcher Vault secret path changed to `kv/data/disinto/shared/ops-repo` (#1041); Vault-templated ops-repo creds via `service-dispatcher` policy (S5.1, #988) |
|
||||
|
||||
Nomad auto-merges every `*.hcl` under `-config=/etc/nomad.d/`, so the
|
||||
split between `server.hcl` and `client.hcl` is for readability, not
|
||||
|
|
|
|||
|
|
@ -123,6 +123,19 @@ job "edge" {
|
|||
# ── Caddyfile via Nomad service discovery (S5-fix-7, issue #1018) ────
|
||||
# Renders staging upstream from Nomad service registration instead of
|
||||
# hardcoded staging:80. Caddy picks up /local/Caddyfile via entrypoint.
|
||||
# Forge URL via Nomad service discovery (issue #1034) — resolves forgejo
|
||||
# service address/port dynamically for bridge network compatibility.
|
||||
template {
|
||||
destination = "local/forge.env"
|
||||
env = true
|
||||
change_mode = "restart"
|
||||
data = <<EOT
|
||||
{{ range service "forgejo" -}}
|
||||
FORGE_URL=http://{{ .Address }}:{{ .Port }}
|
||||
{{- end }}
|
||||
EOT
|
||||
}
|
||||
|
||||
template {
|
||||
destination = "local/Caddyfile"
|
||||
change_mode = "restart"
|
||||
|
|
@ -174,7 +187,6 @@ EOT
|
|||
|
||||
# ── Non-secret env ───────────────────────────────────────────────────
|
||||
env {
|
||||
FORGE_URL = "http://127.0.0.1:3000"
|
||||
FORGE_REPO = "disinto-admin/disinto"
|
||||
DISINTO_CONTAINER = "1"
|
||||
PROJECT_NAME = "disinto"
|
||||
|
|
@ -213,6 +225,21 @@ EOT
|
|||
read_only = false
|
||||
}
|
||||
|
||||
# ── Forge URL via Nomad service discovery (issue #1034) ──────────
|
||||
# Resolves forgejo service address/port dynamically for bridge network
|
||||
# compatibility. Template-scoped to dispatcher task (Nomad doesn't
|
||||
# propagate templates across tasks).
|
||||
template {
|
||||
destination = "local/forge.env"
|
||||
env = true
|
||||
change_mode = "restart"
|
||||
data = <<EOT
|
||||
{{ range service "forgejo" -}}
|
||||
FORGE_URL=http://{{ .Address }}:{{ .Port }}
|
||||
{{- end }}
|
||||
EOT
|
||||
}
|
||||
|
||||
# ── Vault-templated secrets (S5.1, issue #988) ──────────────────────
|
||||
# Renders FORGE_TOKEN from Vault KV v2 for ops repo access.
|
||||
template {
|
||||
|
|
@ -221,10 +248,10 @@ EOT
|
|||
change_mode = "restart"
|
||||
error_on_missing_key = false
|
||||
data = <<EOT
|
||||
{{- with secret "kv/data/disinto/bots/vault" -}}
|
||||
{{- with secret "kv/data/disinto/shared/ops-repo" -}}
|
||||
FORGE_TOKEN={{ .Data.data.token }}
|
||||
{{- else -}}
|
||||
# WARNING: kv/disinto/bots/vault is empty — run tools/vault-seed-agents.sh
|
||||
# WARNING: kv/disinto/shared/ops-repo is empty — run tools/vault-seed-ops-repo.sh
|
||||
FORGE_TOKEN=seed-me
|
||||
{{- end }}
|
||||
EOT
|
||||
|
|
@ -233,7 +260,6 @@ EOT
|
|||
# ── Non-secret env ───────────────────────────────────────────────────
|
||||
env {
|
||||
DISPATCHER_BACKEND = "nomad"
|
||||
FORGE_URL = "http://127.0.0.1:3000"
|
||||
FORGE_REPO = "disinto-admin/disinto"
|
||||
FORGE_OPS_REPO = "disinto-admin/disinto-ops"
|
||||
PRIMARY_BRANCH = "main"
|
||||
|
|
|
|||
|
|
@ -426,3 +426,19 @@ setup_file() {
|
|||
[[ "$output" == *"services to deploy: forgejo,woodpecker-server,woodpecker-agent,agents"* ]]
|
||||
[[ "$output" == *"deployment order: forgejo woodpecker-server woodpecker-agent agents"* ]]
|
||||
}
|
||||
|
||||
# S5.1 / #1035 — edge service seeds ops-repo (dispatcher FORGE_TOKEN)
|
||||
@test "disinto init --backend=nomad --with edge deploys edge" {
|
||||
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with edge --dry-run
|
||||
[ "$status" -eq 0 ]
|
||||
# edge depends on all backend services, so all are included
|
||||
[[ "$output" == *"services to deploy: edge,forgejo"* ]]
|
||||
[[ "$output" == *"deployment order: forgejo woodpecker-server woodpecker-agent agents staging chat edge"* ]]
|
||||
[[ "$output" == *"[deploy] [dry-run] nomad job validate"*"edge.hcl"* ]]
|
||||
}
|
||||
|
||||
@test "disinto init --backend=nomad --with edge seeds ops-repo" {
|
||||
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with edge --dry-run
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *"tools/vault-seed-ops-repo.sh --dry-run"* ]]
|
||||
}
|
||||
|
|
|
|||
149
tools/vault-seed-ops-repo.sh
Executable file
149
tools/vault-seed-ops-repo.sh
Executable file
|
|
@ -0,0 +1,149 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# tools/vault-seed-ops-repo.sh — Idempotent seed for kv/disinto/shared/ops-repo
|
||||
#
|
||||
# Part of the Nomad+Vault migration (S5.1, issue #1035). Populates the KV v2
|
||||
# path that nomad/jobs/edge.hcl dispatcher task reads from, so the edge
|
||||
# proxy has FORGE_TOKEN for ops repo access.
|
||||
#
|
||||
# Seeds from kv/disinto/bots/vault (the vault bot credentials) — copies the
|
||||
# token field to kv/disinto/shared/ops-repo. This is the "service" path that
|
||||
# dispatcher uses, distinct from the "agent" path (bots/vault) used by
|
||||
# agent tasks under the service-agents policy.
|
||||
#
|
||||
# Idempotency contract:
|
||||
# - Key present with non-empty value → leave untouched, log "token unchanged".
|
||||
# - Key missing or empty → copy from bots/vault, log "token copied".
|
||||
# - If bots/vault is also empty → generate a random value, log "token generated".
|
||||
#
|
||||
# Preconditions:
|
||||
# - Vault reachable + unsealed at $VAULT_ADDR.
|
||||
# - VAULT_TOKEN set (env) or /etc/vault.d/root.token readable.
|
||||
# - The `kv/` mount is enabled as KV v2.
|
||||
#
|
||||
# Requires:
|
||||
# - VAULT_ADDR (e.g. http://127.0.0.1:8200)
|
||||
# - VAULT_TOKEN (env OR /etc/vault.d/root.token, resolved by lib/hvault.sh)
|
||||
# - curl, jq, openssl
|
||||
#
|
||||
# Usage:
|
||||
# tools/vault-seed-ops-repo.sh
|
||||
# tools/vault-seed-ops-repo.sh --dry-run
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 success (seed applied, or already applied)
|
||||
# 1 precondition / API / mount-mismatch failure
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
|
||||
# shellcheck source=../lib/hvault.sh
|
||||
source "${REPO_ROOT}/lib/hvault.sh"
|
||||
|
||||
# KV v2 mount + logical paths
|
||||
KV_MOUNT="kv"
|
||||
OPS_REPO_PATH="disinto/shared/ops-repo"
|
||||
VAULT_BOT_PATH="disinto/bots/vault"
|
||||
|
||||
OPS_REPO_API="${KV_MOUNT}/data/${OPS_REPO_PATH}"
|
||||
VAULT_BOT_API="${KV_MOUNT}/data/${VAULT_BOT_PATH}"
|
||||
|
||||
log() { printf '[vault-seed-ops-repo] %s\n' "$*"; }
|
||||
die() { printf '[vault-seed-ops-repo] ERROR: %s\n' "$*" >&2; exit 1; }
|
||||
|
||||
# ── Flag parsing ─────────────────────────────────────────────────────────────
|
||||
DRY_RUN=0
|
||||
case "$#:${1-}" in
|
||||
0:)
|
||||
;;
|
||||
1:--dry-run)
|
||||
DRY_RUN=1
|
||||
;;
|
||||
1:-h|1:--help)
|
||||
printf 'Usage: %s [--dry-run]\n\n' "$(basename "$0")"
|
||||
printf 'Seed kv/disinto/shared/ops-repo with FORGE_TOKEN.\n\n'
|
||||
printf 'Copies token from kv/disinto/bots/vault if present;\n'
|
||||
printf 'otherwise generates a random value. Idempotent:\n'
|
||||
printf 'existing non-empty values are left untouched.\n\n'
|
||||
printf ' --dry-run Print planned actions without writing.\n'
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
die "invalid arguments: $* (try --help)"
|
||||
;;
|
||||
esac
|
||||
|
||||
# ── Preconditions ────────────────────────────────────────────────────────────
|
||||
for bin in curl jq openssl; do
|
||||
command -v "$bin" >/dev/null 2>&1 \
|
||||
|| die "required binary not found: ${bin}"
|
||||
done
|
||||
|
||||
[ -n "${VAULT_ADDR:-}" ] \
|
||||
|| die "VAULT_ADDR unset — e.g. export VAULT_ADDR=http://127.0.0.1:8200"
|
||||
hvault_token_lookup >/dev/null \
|
||||
|| die "Vault auth probe failed — check VAULT_ADDR + VAULT_TOKEN"
|
||||
|
||||
# ── Step 1/2: ensure kv/ mount exists and is KV v2 ───────────────────────────
|
||||
log "── Step 1/2: ensure ${KV_MOUNT}/ is KV v2 ──"
|
||||
export DRY_RUN
|
||||
hvault_ensure_kv_v2 "$KV_MOUNT" "[vault-seed-ops-repo]" \
|
||||
|| die "KV mount check failed"
|
||||
|
||||
# ── Step 2/2: seed ops-repo from vault bot ───────────────────────────────────
|
||||
log "── Step 2/2: seed ${OPS_REPO_API} ──"
|
||||
|
||||
# Read existing ops-repo value
|
||||
existing_raw="$(hvault_get_or_empty "${OPS_REPO_API}")" \
|
||||
|| die "failed to read ${OPS_REPO_API}"
|
||||
|
||||
existing_token=""
|
||||
if [ -n "$existing_raw" ]; then
|
||||
existing_token="$(printf '%s' "$existing_raw" | jq -r '.data.data.token // ""')"
|
||||
fi
|
||||
|
||||
desired_token="$existing_token"
|
||||
action=""
|
||||
|
||||
if [ -z "$existing_token" ]; then
|
||||
# Token missing — try to copy from vault bot
|
||||
bot_raw="$(hvault_get_or_empty "${VAULT_BOT_API}")" || true
|
||||
if [ -n "$bot_raw" ]; then
|
||||
bot_token="$(printf '%s' "$bot_raw" | jq -r '.data.data.token // ""')"
|
||||
if [ -n "$bot_token" ]; then
|
||||
desired_token="$bot_token"
|
||||
action="copied"
|
||||
fi
|
||||
fi
|
||||
|
||||
# If still no token, generate one
|
||||
if [ -z "$desired_token" ]; then
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
action="generated (dry-run)"
|
||||
else
|
||||
desired_token="$(openssl rand -hex 32)"
|
||||
action="generated"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "$action" ]; then
|
||||
log "all keys present at ${OPS_REPO_API} — no-op"
|
||||
log "token unchanged"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
log "[dry-run] ${OPS_REPO_PATH}: would ${action} token"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Write the token
|
||||
payload="$(jq -n --arg t "$desired_token" '{data: {token: $t}}')"
|
||||
_hvault_request POST "${OPS_REPO_API}" "$payload" >/dev/null \
|
||||
|| die "failed to write ${OPS_REPO_API}"
|
||||
|
||||
log "${OPS_REPO_PATH}: ${action} token"
|
||||
log "done — ${OPS_REPO_API} seeded"
|
||||
Loading…
Add table
Add a link
Reference in a new issue