Compare commits

..

1 commit

Author SHA1 Message Date
dev-qwen2
f692dd23e4 fix: vision(#623): end-to-end subpath routing smoke test for Forgejo + Woodpecker + chat (#1025)
Some checks failed
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
ci/woodpecker/pr/edge-subpath Pipeline failed
ci/woodpecker/pr/smoke-init Pipeline was successful
2026-04-19 05:03:26 +00:00
14 changed files with 508 additions and 297 deletions

View file

@ -308,21 +308,6 @@ def main() -> int:
"63bfa88d71764c95c65a9a248f3e40ab": "Vault-seed preconditions: binary check end + VAULT_ADDR die", "63bfa88d71764c95c65a9a248f3e40ab": "Vault-seed preconditions: binary check end + VAULT_ADDR die",
"34873ad3570b211ce1d90468ab6ac94c": "Vault-seed preconditions: VAULT_ADDR die + hvault_token_lookup", "34873ad3570b211ce1d90468ab6ac94c": "Vault-seed preconditions: VAULT_ADDR die + hvault_token_lookup",
"71a52270f249e843cda48ad896d9f781": "Vault-seed preconditions: VAULT_ADDR + hvault_token_lookup + die", "71a52270f249e843cda48ad896d9f781": "Vault-seed preconditions: VAULT_ADDR + hvault_token_lookup + die",
# Common vault-seed script flag parsing patterns
# Shared across tools/vault-seed-{forgejo,ops-repo}.sh
"6906b7787796c2ccb8dd622e2ad4e7bf": "vault-seed DRY_RUN init + case pattern (forgejo + ops-repo)",
"a0df5283b616b964f8bc32fd99ec1b5a": "vault-seed case pattern start (forgejo + ops-repo)",
"e15e3272fdd9f0f46ce9e726aea9f853": "vault-seed case pattern dry-run handler (forgejo + ops-repo)",
"c9f22385cc49a3dac1d336bc14c6315b": "vault-seed DRY_RUN assignment (forgejo + ops-repo)",
"106f4071e88f841b3208b01144cd1c39": "vault-seed case pattern dry-run end (forgejo + ops-repo)",
"c15506dcb6bb340b25d1c39d442dd2e6": "vault-seed help text + invalid arg handler (forgejo + ops-repo)",
"1feecd3b3caf00045fae938ddf2811de": "vault-seed invalid arg handler (forgejo + ops-repo)",
"919780d5e7182715344f5aa02b191294": "vault-seed invalid arg + esac pattern (forgejo + ops-repo)",
"8dce1d292bce8e60ef4c0665b62945b0": "vault-seed esac + binary check loop (forgejo + ops-repo)",
"ca043687143a5b47bd54e65a99ce8ee8": "vault-seed binary check loop start (forgejo + ops-repo)",
"aefd9f655411a955395e6e5995ddbe6f": "vault-seed binary check pattern (forgejo + ops-repo)",
"60f0c46deb5491599457efb4048918e5": "vault-seed VAULT_ADDR + hvault_token_lookup check (forgejo + ops-repo)",
"f6838f581ef6b4d82b55268389032769": "vault-seed VAULT_ADDR + hvault_token_lookup die (forgejo + ops-repo)",
} }
if not sh_files: if not sh_files:

View file

@ -0,0 +1,55 @@
# .woodpecker/edge-subpath.yml — Edge subpath routing smoke test
#
# Runs end-to-end smoke tests for Forgejo, Woodpecker, and chat subpath routing:
# - Forgejo at /forge/
# - Woodpecker at /ci/
# - Chat at /chat/
# - Staging at /staging/
#
# Tests:
# 1. Root / redirects to /forge/
# 2. Forgejo login at /forge/ completes without redirect loops
# 3. Forgejo OAuth callback for Woodpecker succeeds under subpath
# 4. Woodpecker dashboard loads all assets at /ci/ (no 404s on JS/CSS)
# 5. Chat OAuth login flow works at /chat/login
# 6. Forward_auth on /chat/* rejects unauthenticated requests with 401
# 7. Staging content loads at /staging/
#
# Triggers:
# - Pull requests that modify edge-related files
# - Manual trigger for on-demand testing
#
# Environment variables (set in CI or via pipeline):
# EDGE_BASE_URL — Edge proxy URL (default: http://localhost)
# EDGE_TIMEOUT — Request timeout in seconds (default: 30)
# EDGE_MAX_RETRIES — Max retries per request (default: 3)
#
# When to run:
# - Any change to edge.hcl, docker/edge/, tools/edge-control/
# - Any change to this pipeline file
# - Manual trigger for testing edge deployments
when:
event: [pull_request, manual]
path:
- "nomad/jobs/edge.hcl"
- "docker/edge/**"
- "tools/edge-control/**"
- ".woodpecker/edge-subpath.yml"
- "tests/smoke-edge-subpath.sh"
clone:
git:
image: alpine/git
commands:
- AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:$FORGE_TOKEN@|")
- git clone --depth 1 "$AUTH_URL" .
- git fetch --depth 1 origin "$CI_COMMIT_REF"
- git checkout FETCH_HEAD
steps:
- name: edge-subpath-smoke-test
image: alpine:3.19
commands:
- apk add --no-cache bash curl jq
- bash tests/smoke-edge-subpath.sh

View file

@ -802,7 +802,6 @@ _disinto_init_nomad() {
woodpecker-server|woodpecker-agent) seed_name="woodpecker" ;; woodpecker-server|woodpecker-agent) seed_name="woodpecker" ;;
agents) seed_name="agents" ;; agents) seed_name="agents" ;;
chat) seed_name="chat" ;; chat) seed_name="chat" ;;
edge) seed_name="ops-repo" ;;
esac esac
local seed_script="${FACTORY_ROOT}/tools/vault-seed-${seed_name}.sh" local seed_script="${FACTORY_ROOT}/tools/vault-seed-${seed_name}.sh"
if [ -x "$seed_script" ]; then if [ -x "$seed_script" ]; then
@ -984,7 +983,6 @@ _disinto_init_nomad() {
woodpecker-server|woodpecker-agent) seed_name="woodpecker" ;; woodpecker-server|woodpecker-agent) seed_name="woodpecker" ;;
agents) seed_name="agents" ;; agents) seed_name="agents" ;;
chat) seed_name="chat" ;; chat) seed_name="chat" ;;
edge) seed_name="ops-repo" ;;
esac esac
local seed_script="${FACTORY_ROOT}/tools/vault-seed-${seed_name}.sh" local seed_script="${FACTORY_ROOT}/tools/vault-seed-${seed_name}.sh"
if [ -x "$seed_script" ]; then if [ -x "$seed_script" ]; then

View file

@ -173,15 +173,11 @@ PROJECT_TOML="${PROJECT_TOML:-projects/disinto.toml}"
sleep 1200 # 20 minutes sleep 1200 # 20 minutes
done) & done) &
# ── Load optional secrets from secrets/*.enc (#777) ──────────────────── # ── Load required secrets from secrets/*.enc (#777) ────────────────────
# Engagement collection (collect-engagement.sh) requires CADDY_ secrets to # Edge container declares its required secrets; missing ones cause a hard fail.
# SCP access logs from a remote edge host. When age key or secrets dir is
# missing, or any secret fails to decrypt, log a warning and skip the cron.
# Caddy itself does not depend on these secrets.
_AGE_KEY_FILE="${HOME}/.config/sops/age/keys.txt" _AGE_KEY_FILE="${HOME}/.config/sops/age/keys.txt"
_SECRETS_DIR="/opt/disinto/secrets" _SECRETS_DIR="/opt/disinto/secrets"
EDGE_REQUIRED_SECRETS="CADDY_SSH_KEY CADDY_SSH_HOST CADDY_SSH_USER CADDY_ACCESS_LOG" EDGE_REQUIRED_SECRETS="CADDY_SSH_KEY CADDY_SSH_HOST CADDY_SSH_USER CADDY_ACCESS_LOG"
EDGE_ENGAGEMENT_READY=0 # Assume not ready until proven otherwise
_edge_decrypt_secret() { _edge_decrypt_secret() {
local enc_path="${_SECRETS_DIR}/${1}.enc" local enc_path="${_SECRETS_DIR}/${1}.enc"
@ -196,53 +192,47 @@ if [ -f "$_AGE_KEY_FILE" ] && [ -d "$_SECRETS_DIR" ]; then
export "$_secret_name=$_val" export "$_secret_name=$_val"
done done
if [ -n "$_missing" ]; then if [ -n "$_missing" ]; then
echo "WARN: required engagement secrets missing from secrets/*.enc:${_missing}" >&2 echo "FATAL: required secrets missing from secrets/*.enc:${_missing}" >&2
echo " collect-engagement cron will be skipped. Run 'disinto secrets add <NAME>' to enable." >&2 echo " Run 'disinto secrets add <NAME>' for each missing secret." >&2
EDGE_ENGAGEMENT_READY=0 echo " If migrating from .env.vault.enc, run 'disinto secrets migrate-from-vault' first." >&2
else exit 1
echo "edge: loaded required engagement secrets: ${EDGE_REQUIRED_SECRETS}" >&2
EDGE_ENGAGEMENT_READY=1
fi fi
echo "edge: loaded required secrets: ${EDGE_REQUIRED_SECRETS}" >&2
else else
echo "WARN: age key (${_AGE_KEY_FILE}) or secrets dir (${_SECRETS_DIR}) not found — engagement secrets unavailable" >&2 echo "FATAL: age key (${_AGE_KEY_FILE}) or secrets dir (${_SECRETS_DIR}) not found — cannot load required secrets" >&2
echo " collect-engagement cron will be skipped. Run 'disinto secrets add <NAME>' to enable." >&2 echo " Ensure age is installed and secrets/*.enc files are present." >&2
EDGE_ENGAGEMENT_READY=0 exit 1
fi fi
# Start daily engagement collection cron loop in background (#745) # Start daily engagement collection cron loop in background (#745)
# Runs collect-engagement.sh daily at ~23:50 UTC via a sleep loop that # Runs collect-engagement.sh daily at ~23:50 UTC via a sleep loop that
# calculates seconds until the next 23:50 window. SSH key from secrets/*.enc (#777). # calculates seconds until the next 23:50 window. SSH key from secrets/*.enc (#777).
# Guarded: only start if EDGE_ENGAGEMENT_READY=1. (while true; do
if [ "$EDGE_ENGAGEMENT_READY" -eq 1 ]; then # Calculate seconds until next 23:50 UTC
(while true; do _now=$(date -u +%s)
# Calculate seconds until next 23:50 UTC _target=$(date -u -d "today 23:50" +%s 2>/dev/null || date -u -d "23:50" +%s 2>/dev/null || echo 0)
_now=$(date -u +%s) if [ "$_target" -le "$_now" ]; then
_target=$(date -u -d "today 23:50" +%s 2>/dev/null || date -u -d "23:50" +%s 2>/dev/null || echo 0) _target=$(( _target + 86400 ))
if [ "$_target" -le "$_now" ]; then fi
_target=$(( _target + 86400 )) _sleep_secs=$(( _target - _now ))
fi echo "edge: collect-engagement scheduled in ${_sleep_secs}s (next 23:50 UTC)" >&2
_sleep_secs=$(( _target - _now )) sleep "$_sleep_secs"
echo "edge: collect-engagement scheduled in ${_sleep_secs}s (next 23:50 UTC)" >&2 _fetch_log="/tmp/caddy-access-log-fetch.log"
sleep "$_sleep_secs" _ssh_key_file=$(mktemp)
_fetch_log="/tmp/caddy-access-log-fetch.log" printf '%s\n' "$CADDY_SSH_KEY" > "$_ssh_key_file"
_ssh_key_file=$(mktemp) chmod 0600 "$_ssh_key_file"
printf '%s\n' "$CADDY_SSH_KEY" > "$_ssh_key_file" scp -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o BatchMode=yes \
chmod 0600 "$_ssh_key_file" "${CADDY_SSH_USER}@${CADDY_SSH_HOST}:${CADDY_ACCESS_LOG}" \
scp -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o BatchMode=yes \ "$_fetch_log" 2>&1 | tee -a /opt/disinto-logs/collect-engagement.log || true
"${CADDY_SSH_USER}@${CADDY_SSH_HOST}:${CADDY_ACCESS_LOG}" \ rm -f "$_ssh_key_file"
"$_fetch_log" 2>&1 | tee -a /opt/disinto-logs/collect-engagement.log || true if [ -s "$_fetch_log" ]; then
rm -f "$_ssh_key_file" CADDY_ACCESS_LOG="$_fetch_log" bash /opt/disinto/site/collect-engagement.sh 2>&1 \
if [ -s "$_fetch_log" ]; then | tee -a /opt/disinto-logs/collect-engagement.log || true
CADDY_ACCESS_LOG="$_fetch_log" bash /opt/disinto/site/collect-engagement.sh 2>&1 \ else
| tee -a /opt/disinto-logs/collect-engagement.log || true echo "edge: collect-engagement: fetched log is empty, skipping parse" >&2
else fi
echo "edge: collect-engagement: fetched log is empty, skipping parse" >&2 rm -f "$_fetch_log"
fi done) &
rm -f "$_fetch_log"
done) &
else
echo "edge: collect-engagement cron skipped (EDGE_ENGAGEMENT_READY=0)" >&2
fi
# Nomad template renders Caddyfile to /local/Caddyfile via service discovery; # Nomad template renders Caddyfile to /local/Caddyfile via service discovery;
# copy it into the expected location if present (compose uses the mounted path). # copy it into the expected location if present (compose uses the mounted path).

View file

@ -0,0 +1 @@
{"issue":850,"group":"lib/generators.sh","title":"compose dup-detection smoke CI failures","reason":"4+ consecutive ci_exhausted failures across PRs #872 #908 #971; planner flagged for human re-scope","ts":"2026-04-19T00:00:00Z"}

View file

@ -2,12 +2,7 @@
{ {
"action": "edit_body", "action": "edit_body",
"issue": 1025, "issue": 1025,
"body": "## Prior art: PR #1033 (open, branch `fix/issue-1025` retained)\n\nFirst attempt by dev-qwen2 (head `f692dd2`). Test script (`tests/smoke-edge-subpath.sh`, 13.8 KB) and pipeline (`.woodpecker/edge-subpath.yml`) both landed and look reasonable, but the **CI harness design is wrong**: the pipeline boots a bare `alpine:3.19` container and runs the smoke script directly against `BASE_URL=http://localhost`, with no stack to test against.\n\n**This is a harness design gap, not a script bug.** The smoke script itself is a reasonable post-deploy tool — the mistake was trying to exercise it as a hermetic CI step.\n\n**Approach (Option 1 — split the work):**\n\nKeep `tests/smoke-edge-subpath.sh` as an out-of-CI post-deploy tool (accepts `BASE_URL` env var). Replace the CI pipeline step that tries to curl a live stack with static checks only: `shellcheck`, `caddy validate` on the generated Caddyfile, and a template-substitution unit test that verifies routing block shape.\n\nBranch `fix/issue-1025` is preserved at `f692dd2` — the smoke script body is reusable; only the pipeline harness needs a rethink.\n\n**Timeline:**\n- 2026-04-19 09:14 — dev-qwen2 last pushed `f692dd2`\n- 3 pipelines (#1378/#1380/#1382) all fail: no service to curl (connection refused)\n\n## Acceptance criteria\n- [ ] `.woodpecker/edge-subpath.yml` pipeline runs `shellcheck` on `tests/smoke-edge-subpath.sh` with no live service curl\n- [ ] `caddy validate` runs on the generated Caddyfile in CI (template-substitution unit test)\n- [ ] A template-substitution test verifies the Caddyfile routing block shape (forge/ci/staging/chat paths)\n- [ ] `tests/smoke-edge-subpath.sh` accepts `BASE_URL` env var for post-deploy staging runs\n- [ ] CI green (no connection-refused failures on Woodpecker)\n\n## Affected files\n- `.woodpecker/edge-subpath.yml` — pipeline config (static checks only, no service curl)\n- `tests/smoke-edge-subpath.sh` — out-of-CI smoke script (reusable from PR #1033)\n\n## Dependencies\n- #1038 should land first to unblock local edge staging runs (optional — CI fix is independent)" "body": "## Goal\nVerify that Forgejo, Woodpecker, and chat all function correctly when served\nunder /forge/, /ci/, and /chat/ subpaths on a single domain. Catch redirect\nloops, OAuth callback failures, and asset 404s before they hit production.\n\n## Sprint\nPart of sprint [edge-subpath-chat](https://forgejo:3000/disinto-admin/disinto-ops/pulls/37) — vision issue #623.\n\n## Acceptance criteria\n- [ ] Forgejo login at /forge/ completes without redirect loops\n- [ ] Forgejo OAuth callback for Woodpecker succeeds under subpath\n- [ ] Woodpecker dashboard loads all assets at /ci/ (no 404s on JS/CSS)\n- [ ] Chat OAuth login flow works at /chat/login\n- [ ] Forward_auth on /chat/* rejects unauthenticated requests with 401\n- [ ] Staging content loads at /staging/\n- [ ] Root / redirects to /forge/\n- [ ] CI pipeline added to .woodpecker/ to run this test on edge-related changes\n\n## Affected files\n- `nomad/jobs/edge.hcl` — edge Caddy routing config under test\n- `docker/edge/` — edge container and Caddyfile template\n- `tools/edge-control/register.sh` — route registration\n- `.woodpecker/` — CI pipeline for edge smoke test\n\n## Dependencies\nNone — first issue in sprint.\n"
},
{
"action": "remove_label",
"issue": 1025,
"label": "blocked"
}, },
{ {
"action": "add_label", "action": "add_label",
@ -16,37 +11,32 @@
}, },
{ {
"action": "edit_body", "action": "edit_body",
"issue": 1038, "issue": 1026,
"body": "## Problem\n\n`disinto-edge` crashloops on any deployment that has not opted into the age-encrypted secret store (#777), because the edge entrypoint treats four secrets as unconditionally required:\n\n```\nFATAL: age key (/home/agent/.config/sops/age/keys.txt) or secrets dir (/opt/disinto/secrets) not found — cannot load required secrets\n```\n\nObserved on `disinto-dev-box` (container `disinto-edge`, restarting every ~30s), which blocks PR #1033 (edge-subpath smoke test) and any other work that depends on a running edge.\n\n## Root cause\n\n`docker/edge/entrypoint-edge.sh:176-205` requires:\n\n- `~/.config/sops/age/keys.txt`\n- `/opt/disinto/secrets/` with `.enc` files for `CADDY_SSH_KEY`, `CADDY_SSH_HOST`, `CADDY_SSH_USER`, `CADDY_ACCESS_LOG`.\n\nThese four secrets feed exactly one feature: the daily 23:50 UTC `collect-engagement.sh` cron (#745), which SCPs Caddy access logs from a **remote production edge host** for engagement parsing. On a local factory box or any deployment that has not set up a remote edge, this code path has no target — yet its absence kills the whole edge container.\n\n## Fix\n\nMake the secrets block **optional**. When age key or secrets dir is missing, or any of the four CADDY_ secrets fail to decrypt, log a warning and skip the `collect-engagement` cron loop. Caddy itself does not depend on these secrets and should start normally.\n\nThe concrete edit is around lines 176-205 of `docker/edge/entrypoint-edge.sh` — guard the secret-loading block with a check for the age key and secrets dir, set `EDGE_ENGAGEMENT_READY=0` on failure, and skip cron registration when `EDGE_ENGAGEMENT_READY != 1`.\n\n## Acceptance criteria\n- [ ] `docker/edge/entrypoint-edge.sh` loads CADDY_ secrets optionally — missing age key or secrets dir logs a warning and continues, does not FATAL\n- [ ] Caddy starts normally when CADDY_ secrets are absent\n- [ ] `collect-engagement` cron is skipped (not registered) when engagement secrets are unavailable\n- [ ] On deployments WITH secrets configured, behavior is unchanged (collect-engagement cron still fires at 23:50 UTC)\n- [ ] CI green\n\n## Affected files\n- `docker/edge/entrypoint-edge.sh` — lines 176-205, secrets loading block made optional" "body": "## Goal\nReplace the blocking one-shot claude --print invocation in the chat backend with\na WebSocket connection that streams tokens to the UI as they arrive.\n\n## Sprint\nPart of sprint [edge-subpath-chat](https://forgejo:3000/disinto-admin/disinto-ops/pulls/37) — vision issue #623.\n\n## Acceptance criteria\n- [ ] /chat/ws endpoint accepts WebSocket upgrade with valid session cookie\n- [ ] /chat/ws rejects upgrade if session cookie is missing or expired\n- [ ] Chat backend streams claude output over WebSocket as text frames\n- [ ] UI renders tokens incrementally as they arrive\n- [ ] Rate limiting still enforced on WebSocket messages\n- [ ] Caddy proxies WebSocket upgrade correctly through /chat/ws with forward_auth\n\n## Affected files\n- `docker/chat/server.py` — chat backend WebSocket endpoint\n- `docker/chat/ui/` — frontend WebSocket client rendering\n- `nomad/jobs/edge.hcl` — Caddy WebSocket proxy config\n- `nomad/jobs/chat.hcl` — chat Nomad job\n\n## Dependencies\n- Depends on #1025 — subpath routing smoke test\n"
},
{
"action": "remove_label",
"issue": 1038,
"label": "blocked"
}, },
{ {
"action": "add_label", "action": "add_label",
"issue": 1038, "issue": 1026,
"label": "backlog" "label": "backlog"
}, },
{ {
"action": "edit_body", "action": "edit_body",
"issue": 850, "issue": 1027,
"body": "## Problem\n\nWhen the compose generator emits the same service name twice — e.g. both the legacy `ENABLE_LLAMA_AGENT=1` branch and a matching `[agents.llama]` TOML block produce an `agents-llama:` key — the failure is deferred all the way to `docker compose` YAML parsing:\n\n```\nfailed to parse /home/johba/disinto/docker-compose.yml: yaml: construct errors:\n line 4: line 431: mapping key \"agents-llama\" already defined at line 155\n```\n\nBy then, the user has already paid the cost of: pre-build binary downloads, generator run, Caddyfile regeneration. The only hint about what went wrong is a line number in a generated file. Root cause (dual activation) is not surfaced.\n\n## Fix\n\nAdd a generate-time guard to `lib/generators.sh`:\n\n- After collecting all service blocks to emit, compare the set of service names against duplicates.\n- If a duplicate is detected, abort with a clear message naming both sources of truth (e.g. `\"agents-llama\" emitted twice — from ENABLE_LLAMA_AGENT=1 and from [agents.llama] in projects/disinto.toml; remove one`).\n\n## Prior art: PR #872 (closed, branch `fix/issue-850` retained)\n\ndev-qwen's first attempt (`db009e3`) landed the dup-detection logic in `lib/generators.sh` correctly (unit test `tests/test-duplicate-service-detection.sh` passes all 3 cases), but the smoke test fails on CI.\n\n**Why the smoke test fails:** sections 1-7 of `smoke-init.sh` already run `bin/disinto init`, materializing `docker-compose.yml`. Section 8 re-invokes `bin/disinto init` to verify the dup guard fires — but `_generate_compose_impl` early-returns with `\"Compose: already exists, skipping\"` before reaching the dup-check.\n\n**Suggested fix:** in `tests/smoke-init.sh` section 8 (around line 452, before the second `bin/disinto init` invocation), add:\n\n```bash\nrm -f \"${FACTORY_ROOT}/docker-compose.yml\"\n```\n\nso the generator actually runs and the dup-detection path is exercised. Do **not** hoist the dup-check above the early-return.\n\nThe branch `fix/issue-850` is preserved as a starting point — pick up from `db009e3` and patch the smoke-test cleanup.\n\nRelated: #846.\n\n## Acceptance criteria\n- [ ] `bin/disinto init` with a config that would produce duplicate service names aborts with a clear error message naming both sources (e.g. `ENABLE_LLAMA_AGENT=1` and `[agents.llama]` TOML block)\n- [ ] `tests/smoke-init.sh` section 8 removes `docker-compose.yml` before re-invoking `disinto init` so the dup guard is exercised\n- [ ] Unit test `tests/test-duplicate-service-detection.sh` passes all 3 cases\n- [ ] CI green (smoke-init.sh section 8 no longer skips dup detection)\n\n## Affected files\n- `lib/generators.sh` — duplicate service name check after collecting all service blocks\n- `tests/smoke-init.sh` — section 8: add `rm -f \\${FACTORY_ROOT}/docker-compose.yml` before second `disinto init`" "body": "## Goal\nGive the chat container Claude session read-write access to the project working\ntree so the operator can inspect, explain, or modify code — scoped to that tree\nonly, with no access to factory internals, secrets, or Docker socket.\n\n## Sprint\nPart of sprint [edge-subpath-chat](https://forgejo:3000/disinto-admin/disinto-ops/pulls/37) — vision issue #623.\n\n## Acceptance criteria\n- [ ] Chat container bind-mounts the project working tree as a named volume\n- [ ] Claude invocation in server.py sets cwd to the workspace directory\n- [ ] Claude permission mode is acceptEdits (not bypassPermissions)\n- [ ] verify-chat-sandbox.sh updated to assert workspace mount exists\n- [ ] Compose generator adds the workspace volume conditionally\n\n## Affected files\n- `docker/chat/server.py` — Claude invocation and cwd setup\n- `tools/edge-control/verify-chat-sandbox.sh` — sandbox verification\n- `lib/generators.sh` — Compose generator workspace volume\n- `nomad/jobs/chat.hcl` — chat container bind-mount config\n\n## Dependencies\n- Depends on #1025 — subpath routing smoke test\n"
},
{
"action": "remove_label",
"issue": 850,
"label": "blocked"
}, },
{ {
"action": "add_label", "action": "add_label",
"issue": 850, "issue": 1027,
"label": "backlog" "label": "backlog"
}, },
{ {
"action": "comment", "action": "edit_body",
"issue": 758, "issue": 1028,
"body": "This issue is the critical path blocker for #820 (ops repo re-seed) and #982 (collect-engagement commit fix). Both are in the backlog and ready to merge, but cannot run until ops repo branch protection is resolved. Needs admin/human action to change Forgejo branch protection settings on disinto-ops — no code change can unblock this." "body": "## Goal\nIf the smoke test reveals unfixable subpath issues, automate the pivot to\nper-service subdomains so the switch is a single config change.\n\n## Sprint\nPart of sprint [edge-subpath-chat](https://forgejo:3000/disinto-admin/disinto-ops/pulls/37) — vision issue #623.\n\n## Acceptance criteria\n- [ ] generators.sh _generate_caddyfile_impl accepts EDGE_ROUTING_MODE env var\n- [ ] In subdomain mode, Caddyfile emits four host blocks per edge-routing-fallback.md\n- [ ] register.sh registers additional subdomain routes when EDGE_ROUTING_MODE=subdomain\n- [ ] OAuth redirect URIs in ci-setup.sh respect routing mode\n- [ ] .env template documents EDGE_ROUTING_MODE with a comment referencing the fallback doc\n\n## Affected files\n- `lib/generators.sh` — _generate_caddyfile_impl routing mode switch\n- `tools/edge-control/register.sh` — subdomain route registration\n- `lib/ci-setup.sh` — OAuth redirect URI handling\n- `projects/*.toml.example` — .env template documentation\n\n## Dependencies\n- Depends on #1025 — subpath routing smoke test\n"
},
{
"action": "add_label",
"issue": 1028,
"label": "backlog"
} }
] ]

View file

@ -1,4 +1,4 @@
<!-- last-reviewed: 0bb04545d47fb43b2cab0a1f4406c2a2b57f4eba --> <!-- last-reviewed: a467d613a44b9b475a60c14c4162621e846969ea -->
# Shared Helpers (`lib/`) # Shared Helpers (`lib/`)
All agents source `lib/env.sh` as their first action. Additional helpers are All agents source `lib/env.sh` as their first action. Additional helpers are
@ -35,4 +35,4 @@ sourced as needed.
| `lib/hire-agent.sh` | `disinto_hire_an_agent()` — user creation, `.profile` repo setup, formula copying, branch protection, and state marker creation for hiring a new agent. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`, `PROJECT_NAME`. Extracted from `bin/disinto`. | bin/disinto (hire) | | `lib/hire-agent.sh` | `disinto_hire_an_agent()` — user creation, `.profile` repo setup, formula copying, branch protection, and state marker creation for hiring a new agent. Requires `FORGE_URL`, `FORGE_TOKEN`, `FACTORY_ROOT`, `PROJECT_NAME`. Extracted from `bin/disinto`. | bin/disinto (hire) |
| `lib/release.sh` | `disinto_release()` — vault TOML creation, branch setup on ops repo, PR creation, and auto-merge request for a versioned release. `_assert_release_globals()` validates required env vars. Requires `FORGE_URL`, `FORGE_TOKEN`, `FORGE_OPS_REPO`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. Extracted from `bin/disinto`. | bin/disinto (release) | | `lib/release.sh` | `disinto_release()` — vault TOML creation, branch setup on ops repo, PR creation, and auto-merge request for a versioned release. `_assert_release_globals()` validates required env vars. Requires `FORGE_URL`, `FORGE_TOKEN`, `FORGE_OPS_REPO`, `FACTORY_ROOT`, `PRIMARY_BRANCH`. Extracted from `bin/disinto`. | bin/disinto (release) |
| `lib/hvault.sh` | HashiCorp Vault helper module. `hvault_kv_get(PATH, [KEY])` — read KV v2 secret, optionally extract one key. `hvault_kv_put(PATH, KEY=VAL ...)` — write KV v2 secret. `hvault_kv_list(PATH)` — list keys at a KV path. `hvault_get_or_empty(PATH)` — GET /v1/PATH; 200→raw body, 404→empty, else structured error + return 1 (used by sync scripts to distinguish "absent, create" from hard failure without tripping errexit, #881). `hvault_ensure_kv_v2(MOUNT, [LOG_PREFIX])` — idempotent KV v2 mount assertion: enables mount if absent, fails loudly if present as wrong type/version. Extracted from all `vault-seed-*.sh` scripts to eliminate dup-detector violations. Respects `DRY_RUN=1`. `hvault_policy_apply(NAME, FILE)` — idempotent policy upsert. `hvault_jwt_login(ROLE, JWT)` — exchange JWT for short-lived token. `hvault_token_lookup()` — returns TTL/policies/accessor for current token. `_hvault_seed_key(PATH, KEY, [GENERATOR])` — seed one KV key if absent; reads existing data and merges to preserve sibling keys (KV v2 replaces atomically); returns 0=created, 1=unchanged, 2=API error (#992). All functions use `VAULT_ADDR` + `VAULT_TOKEN` from env (fallback: `/etc/vault.d/root.token`), emit structured JSON errors to stderr on failure. Tests: `tests/lib-hvault.bats` (requires `vault server -dev`). | `tools/vault-apply-policies.sh`, `tools/vault-apply-roles.sh`, `lib/init/nomad/vault-nomad-auth.sh`, `tools/vault-seed-*.sh` | | `lib/hvault.sh` | HashiCorp Vault helper module. `hvault_kv_get(PATH, [KEY])` — read KV v2 secret, optionally extract one key. `hvault_kv_put(PATH, KEY=VAL ...)` — write KV v2 secret. `hvault_kv_list(PATH)` — list keys at a KV path. `hvault_get_or_empty(PATH)` — GET /v1/PATH; 200→raw body, 404→empty, else structured error + return 1 (used by sync scripts to distinguish "absent, create" from hard failure without tripping errexit, #881). `hvault_ensure_kv_v2(MOUNT, [LOG_PREFIX])` — idempotent KV v2 mount assertion: enables mount if absent, fails loudly if present as wrong type/version. Extracted from all `vault-seed-*.sh` scripts to eliminate dup-detector violations. Respects `DRY_RUN=1`. `hvault_policy_apply(NAME, FILE)` — idempotent policy upsert. `hvault_jwt_login(ROLE, JWT)` — exchange JWT for short-lived token. `hvault_token_lookup()` — returns TTL/policies/accessor for current token. `_hvault_seed_key(PATH, KEY, [GENERATOR])` — seed one KV key if absent; reads existing data and merges to preserve sibling keys (KV v2 replaces atomically); returns 0=created, 1=unchanged, 2=API error (#992). All functions use `VAULT_ADDR` + `VAULT_TOKEN` from env (fallback: `/etc/vault.d/root.token`), emit structured JSON errors to stderr on failure. Tests: `tests/lib-hvault.bats` (requires `vault server -dev`). | `tools/vault-apply-policies.sh`, `tools/vault-apply-roles.sh`, `lib/init/nomad/vault-nomad-auth.sh`, `tools/vault-seed-*.sh` |
| `lib/init/nomad/` | Nomad+Vault installer scripts. `cluster-up.sh` — idempotent Step-0 orchestrator that runs all steps in order (installs packages, writes HCL, enables systemd units, unseals Vault); uses `poll_until_healthy()` helper for deduped readiness polling; `HOST_VOLUME_DIRS` array now includes `/srv/disinto/docker` (for staging file-server, S5.2, #989, #992). `install.sh` — installs pinned Nomad+Vault apt packages. `vault-init.sh` — initializes Vault (unseal keys → `/etc/vault.d/`), creates dev-persisted unseal unit. `lib-systemd.sh` — shared systemd unit helpers. `systemd-nomad.sh`, `systemd-vault.sh` — write and enable service units. `vault-nomad-auth.sh` — Step-2 script that enables Vault's JWT auth at path `jwt-nomad`, writes the JWKS/algs config pointing at Nomad's workload-identity signer, delegates role sync to `tools/vault-apply-roles.sh`, installs `/etc/nomad.d/server.hcl`, and SIGHUPs `nomad.service` if the file changed (#881). `wp-oauth-register.sh` — S3.3 script that creates the Woodpecker OAuth2 app in Forgejo and stores `forgejo_client`/`forgejo_secret` in Vault KV v2 at `kv/disinto/shared/woodpecker`; idempotent (skips if app or secrets already present); called by `bin/disinto --with woodpecker`. `deploy.sh` — S4 dependency-ordered Nomad job deploy + health-wait; takes a list of jobspec basenames, submits each to Nomad and polls until healthy before proceeding to the next; supports `--dry-run` and per-job timeout overrides via `JOB_READY_TIMEOUT_<JOBNAME>`; global default timeout `JOB_READY_TIMEOUT_SECS` is 360s (raised from 240s for chat cold-start, #1036); invoked by `bin/disinto --with <svc>` and `cluster-up.sh`; deploy order now covers staging, chat, edge (S5.5, #992). Idempotent: each step checks current state before acting. Sourced and called by `cluster-up.sh`; not sourced by agents. | `bin/disinto init --backend=nomad` | | `lib/init/nomad/` | Nomad+Vault installer scripts. `cluster-up.sh` — idempotent Step-0 orchestrator that runs all steps in order (installs packages, writes HCL, enables systemd units, unseals Vault); uses `poll_until_healthy()` helper for deduped readiness polling; `HOST_VOLUME_DIRS` array now includes `/srv/disinto/docker` (for staging file-server, S5.2, #989, #992). `install.sh` — installs pinned Nomad+Vault apt packages. `vault-init.sh` — initializes Vault (unseal keys → `/etc/vault.d/`), creates dev-persisted unseal unit. `lib-systemd.sh` — shared systemd unit helpers. `systemd-nomad.sh`, `systemd-vault.sh` — write and enable service units. `vault-nomad-auth.sh` — Step-2 script that enables Vault's JWT auth at path `jwt-nomad`, writes the JWKS/algs config pointing at Nomad's workload-identity signer, delegates role sync to `tools/vault-apply-roles.sh`, installs `/etc/nomad.d/server.hcl`, and SIGHUPs `nomad.service` if the file changed (#881). `wp-oauth-register.sh` — S3.3 script that creates the Woodpecker OAuth2 app in Forgejo and stores `forgejo_client`/`forgejo_secret` in Vault KV v2 at `kv/disinto/shared/woodpecker`; idempotent (skips if app or secrets already present); called by `bin/disinto --with woodpecker`. `deploy.sh` — S4 dependency-ordered Nomad job deploy + health-wait; takes a list of jobspec basenames, submits each to Nomad and polls until healthy before proceeding to the next; supports `--dry-run` and per-job timeout overrides via `JOB_READY_TIMEOUT_<JOBNAME>`; invoked by `bin/disinto --with <svc>` and `cluster-up.sh`; deploy order now covers staging, chat, edge (S5.5, #992). Idempotent: each step checks current state before acting. Sourced and called by `cluster-up.sh`; not sourced by agents. | `bin/disinto init --backend=nomad` |

View file

@ -16,7 +16,7 @@
# Environment: # Environment:
# REPO_ROOT — absolute path to repo root (defaults to parent of # REPO_ROOT — absolute path to repo root (defaults to parent of
# this script's parent directory) # this script's parent directory)
# JOB_READY_TIMEOUT_SECS — poll timeout in seconds (default: 360) # JOB_READY_TIMEOUT_SECS — poll timeout in seconds (default: 240)
# JOB_READY_TIMEOUT_<JOBNAME> — per-job timeout override (e.g., # JOB_READY_TIMEOUT_<JOBNAME> — per-job timeout override (e.g.,
# JOB_READY_TIMEOUT_FORGEJO=300) # JOB_READY_TIMEOUT_FORGEJO=300)
# #
@ -33,7 +33,7 @@ set -euo pipefail
# ── Configuration ──────────────────────────────────────────────────────────── # ── Configuration ────────────────────────────────────────────────────────────
SCRIPT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SCRIPT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="${REPO_ROOT:-$(cd "${SCRIPT_ROOT}/../../.." && pwd)}" REPO_ROOT="${REPO_ROOT:-$(cd "${SCRIPT_ROOT}/../../.." && pwd)}"
JOB_READY_TIMEOUT_SECS="${JOB_READY_TIMEOUT_SECS:-360}" JOB_READY_TIMEOUT_SECS="${JOB_READY_TIMEOUT_SECS:-240}"
DRY_RUN=0 DRY_RUN=0

View file

@ -157,10 +157,9 @@ issue_claim() {
return 1 return 1
fi fi
local ip_id bl_id bk_id local ip_id bl_id
ip_id=$(_ilc_in_progress_id) ip_id=$(_ilc_in_progress_id)
bl_id=$(_ilc_backlog_id) bl_id=$(_ilc_backlog_id)
bk_id=$(_ilc_blocked_id)
if [ -n "$ip_id" ]; then if [ -n "$ip_id" ]; then
curl -sf -X POST \ curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \ -H "Authorization: token ${FORGE_TOKEN}" \
@ -173,12 +172,6 @@ issue_claim() {
-H "Authorization: token ${FORGE_TOKEN}" \ -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues/${issue}/labels/${bl_id}" >/dev/null 2>&1 || true "${FORGE_API}/issues/${issue}/labels/${bl_id}" >/dev/null 2>&1 || true
fi fi
# Clear blocked label on re-claim — starting work is implicit resolution of prior block
if [ -n "$bk_id" ]; then
curl -sf -X DELETE \
-H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues/${issue}/labels/${bk_id}" >/dev/null 2>&1 || true
fi
_ilc_log "claimed issue #${issue}" _ilc_log "claimed issue #${issue}"
return 0 return 0
} }

View file

@ -1,4 +1,4 @@
<!-- last-reviewed: 0bb04545d47fb43b2cab0a1f4406c2a2b57f4eba --> <!-- last-reviewed: a467d613a44b9b475a60c14c4162621e846969ea -->
# nomad/ — Agent Instructions # nomad/ — Agent Instructions
Nomad + Vault HCL for the factory's single-node cluster. These files are Nomad + Vault HCL for the factory's single-node cluster. These files are
@ -21,7 +21,7 @@ see issues #821#992 for the step breakdown.
| `jobs/agents.hcl` | submitted via `lib/init/nomad/deploy.sh` | All 7 agent roles (dev, review, gardener, planner, predictor, supervisor, architect) + llama variant; Vault-templated bot tokens via `service-agents` policy; `force_pull = false` — image is built locally by `bin/disinto --with agents`, no registry (S4.1, S4-fix-2, S4-fix-5, #955, #972, #978) | | `jobs/agents.hcl` | submitted via `lib/init/nomad/deploy.sh` | All 7 agent roles (dev, review, gardener, planner, predictor, supervisor, architect) + llama variant; Vault-templated bot tokens via `service-agents` policy; `force_pull = false` — image is built locally by `bin/disinto --with agents`, no registry (S4.1, S4-fix-2, S4-fix-5, #955, #972, #978) |
| `jobs/staging.hcl` | submitted via `lib/init/nomad/deploy.sh` | Caddy file-server mounting `docker/` as `/srv/site:ro`; no Vault integration; **dynamic host port** (no static 80 — edge owns 80/443, collision fixed in S5-fix-7 #1018); edge discovers via Nomad service registration (S5.2, #989) | | `jobs/staging.hcl` | submitted via `lib/init/nomad/deploy.sh` | Caddy file-server mounting `docker/` as `/srv/site:ro`; no Vault integration; **dynamic host port** (no static 80 — edge owns 80/443, collision fixed in S5-fix-7 #1018); edge discovers via Nomad service registration (S5.2, #989) |
| `jobs/chat.hcl` | submitted via `lib/init/nomad/deploy.sh` | Claude chat UI; custom `disinto/chat:local` image; sandbox hardening (cap_drop ALL, **tmpfs via mount block** not `tmpfs=` arg — S5-fix-5 #1012, pids_limit 128); Vault-templated OAuth secrets via `service-chat` policy (S5.2, #989) | | `jobs/chat.hcl` | submitted via `lib/init/nomad/deploy.sh` | Claude chat UI; custom `disinto/chat:local` image; sandbox hardening (cap_drop ALL, **tmpfs via mount block** not `tmpfs=` arg — S5-fix-5 #1012, pids_limit 128); Vault-templated OAuth secrets via `service-chat` policy (S5.2, #989) |
| `jobs/edge.hcl` | submitted via `lib/init/nomad/deploy.sh` | Caddy reverse proxy + dispatcher sidecar; routes /forge, /woodpecker, /staging, /chat; uses `disinto/edge:local` image built by `bin/disinto --with edge`; **both Caddy and dispatcher tasks use `network_mode = "host"`** — upstreams are `127.0.0.1:<port>` (forgejo :3000, woodpecker :8000, chat :8080), not Docker hostnames (#1031, #1034); `FORGE_URL` rendered via Nomad service discovery template (not static env) to handle bridge vs. host network differences (#1034); dispatcher Vault secret path changed to `kv/data/disinto/shared/ops-repo` (#1041); Vault-templated ops-repo creds via `service-dispatcher` policy (S5.1, #988) | | `jobs/edge.hcl` | submitted via `lib/init/nomad/deploy.sh` | Caddy reverse proxy + dispatcher sidecar; routes /forge, /woodpecker, /staging, /chat; uses `disinto/edge:local` image built by `bin/disinto --with edge`; Vault-templated ops-repo creds via `service-dispatcher` policy (S5.1, #988) |
Nomad auto-merges every `*.hcl` under `-config=/etc/nomad.d/`, so the Nomad auto-merges every `*.hcl` under `-config=/etc/nomad.d/`, so the
split between `server.hcl` and `client.hcl` is for readability, not split between `server.hcl` and `client.hcl` is for readability, not

View file

@ -123,19 +123,6 @@ job "edge" {
# Caddyfile via Nomad service discovery (S5-fix-7, issue #1018) # Caddyfile via Nomad service discovery (S5-fix-7, issue #1018)
# Renders staging upstream from Nomad service registration instead of # Renders staging upstream from Nomad service registration instead of
# hardcoded staging:80. Caddy picks up /local/Caddyfile via entrypoint. # hardcoded staging:80. Caddy picks up /local/Caddyfile via entrypoint.
# Forge URL via Nomad service discovery (issue #1034) resolves forgejo
# service address/port dynamically for bridge network compatibility.
template {
destination = "local/forge.env"
env = true
change_mode = "restart"
data = <<EOT
{{ range service "forgejo" -}}
FORGE_URL=http://{{ .Address }}:{{ .Port }}
{{- end }}
EOT
}
template { template {
destination = "local/Caddyfile" destination = "local/Caddyfile"
change_mode = "restart" change_mode = "restart"
@ -187,6 +174,7 @@ EOT
# Non-secret env # Non-secret env
env { env {
FORGE_URL = "http://127.0.0.1:3000"
FORGE_REPO = "disinto-admin/disinto" FORGE_REPO = "disinto-admin/disinto"
DISINTO_CONTAINER = "1" DISINTO_CONTAINER = "1"
PROJECT_NAME = "disinto" PROJECT_NAME = "disinto"
@ -225,21 +213,6 @@ EOT
read_only = false read_only = false
} }
# Forge URL via Nomad service discovery (issue #1034)
# Resolves forgejo service address/port dynamically for bridge network
# compatibility. Template-scoped to dispatcher task (Nomad doesn't
# propagate templates across tasks).
template {
destination = "local/forge.env"
env = true
change_mode = "restart"
data = <<EOT
{{ range service "forgejo" -}}
FORGE_URL=http://{{ .Address }}:{{ .Port }}
{{- end }}
EOT
}
# Vault-templated secrets (S5.1, issue #988) # Vault-templated secrets (S5.1, issue #988)
# Renders FORGE_TOKEN from Vault KV v2 for ops repo access. # Renders FORGE_TOKEN from Vault KV v2 for ops repo access.
template { template {
@ -248,10 +221,10 @@ EOT
change_mode = "restart" change_mode = "restart"
error_on_missing_key = false error_on_missing_key = false
data = <<EOT data = <<EOT
{{- with secret "kv/data/disinto/shared/ops-repo" -}} {{- with secret "kv/data/disinto/bots/vault" -}}
FORGE_TOKEN={{ .Data.data.token }} FORGE_TOKEN={{ .Data.data.token }}
{{- else -}} {{- else -}}
# WARNING: kv/disinto/shared/ops-repo is empty run tools/vault-seed-ops-repo.sh # WARNING: kv/disinto/bots/vault is empty run tools/vault-seed-agents.sh
FORGE_TOKEN=seed-me FORGE_TOKEN=seed-me
{{- end }} {{- end }}
EOT EOT
@ -260,6 +233,7 @@ EOT
# Non-secret env # Non-secret env
env { env {
DISPATCHER_BACKEND = "nomad" DISPATCHER_BACKEND = "nomad"
FORGE_URL = "http://127.0.0.1:3000"
FORGE_REPO = "disinto-admin/disinto" FORGE_REPO = "disinto-admin/disinto"
FORGE_OPS_REPO = "disinto-admin/disinto-ops" FORGE_OPS_REPO = "disinto-admin/disinto-ops"
PRIMARY_BRANCH = "main" PRIMARY_BRANCH = "main"

View file

@ -426,19 +426,3 @@ setup_file() {
[[ "$output" == *"services to deploy: forgejo,woodpecker-server,woodpecker-agent,agents"* ]] [[ "$output" == *"services to deploy: forgejo,woodpecker-server,woodpecker-agent,agents"* ]]
[[ "$output" == *"deployment order: forgejo woodpecker-server woodpecker-agent agents"* ]] [[ "$output" == *"deployment order: forgejo woodpecker-server woodpecker-agent agents"* ]]
} }
# S5.1 / #1035 — edge service seeds ops-repo (dispatcher FORGE_TOKEN)
@test "disinto init --backend=nomad --with edge deploys edge" {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with edge --dry-run
[ "$status" -eq 0 ]
# edge depends on all backend services, so all are included
[[ "$output" == *"services to deploy: edge,forgejo"* ]]
[[ "$output" == *"deployment order: forgejo woodpecker-server woodpecker-agent agents staging chat edge"* ]]
[[ "$output" == *"[deploy] [dry-run] nomad job validate"*"edge.hcl"* ]]
}
@test "disinto init --backend=nomad --with edge seeds ops-repo" {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with edge --dry-run
[ "$status" -eq 0 ]
[[ "$output" == *"tools/vault-seed-ops-repo.sh --dry-run"* ]]
}

390
tests/smoke-edge-subpath.sh Executable file
View file

@ -0,0 +1,390 @@
#!/usr/bin/env bash
# =============================================================================
# smoke-edge-subpath.sh — End-to-end subpath routing smoke test
#
# Verifies Forgejo, Woodpecker, and chat function correctly under subpaths:
# - Forgejo at /forge/
# - Woodpecker at /ci/
# - Chat at /chat/
# - Staging at /staging/
#
# Acceptance criteria:
# 1. Forgejo login at /forge/ completes without redirect loops
# 2. Forgejo OAuth callback for Woodpecker succeeds under subpath
# 3. Woodpecker dashboard loads all assets at /ci/ (no 404s on JS/CSS)
# 4. Chat OAuth login flow works at /chat/login
# 5. Forward_auth on /chat/* rejects unauthenticated requests with 401
# 6. Staging content loads at /staging/
# 7. Root / redirects to /forge/
#
# Usage:
# smoke-edge-subpath.sh [--base-url BASE_URL]
#
# Environment variables:
# BASE_URL — Edge proxy URL (default: http://localhost)
# EDGE_TIMEOUT — Request timeout in seconds (default: 30)
# EDGE_MAX_RETRIES — Max retries per request (default: 3)
#
# Exit codes:
# 0 — All checks passed
# 1 — One or more checks failed
# =============================================================================
set -euo pipefail
# Script directory for relative paths
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Source common helpers
source "${SCRIPT_DIR}/../lib/env.sh" 2>/dev/null || true
# ─────────────────────────────────────────────────────────────────────────────
# Configuration
# ─────────────────────────────────────────────────────────────────────────────
BASE_URL="${BASE_URL:-http://localhost}"
EDGE_TIMEOUT="${EDGE_TIMEOUT:-30}"
EDGE_MAX_RETRIES="${EDGE_MAX_RETRIES:-3}"
# Subpaths to test
FORGE_PATH="/forge/"
CI_PATH="/ci/"
CHAT_PATH="/chat/"
STAGING_PATH="/staging/"
# Track overall test status
FAILED=0
PASSED=0
SKIPPED=0
# ─────────────────────────────────────────────────────────────────────────────
# Logging helpers
# ─────────────────────────────────────────────────────────────────────────────
log_info() {
echo "[INFO] $*"
}
log_pass() {
echo "[PASS] $*"
((PASSED++)) || true
}
log_fail() {
echo "[FAIL] $*"
((FAILED++)) || true
}
log_skip() {
echo "[SKIP] $*"
((SKIPPED++)) || true
}
log_section() {
echo ""
echo "=== $* ==="
echo ""
}
# ─────────────────────────────────────────────────────────────────────────────
# HTTP helpers
# ─────────────────────────────────────────────────────────────────────────────
# Make an HTTP request with retry logic
# Usage: http_request <method> <url> [options...]
# Returns: HTTP status code on stdout, body on stderr
http_request() {
local method="$1"
local url="$2"
shift 2
local retries=0
local response status
while [ "$retries" -lt "$EDGE_MAX_RETRIES" ]; do
response=$(curl -sS -w '\n%{http_code}' -X "$method" \
--max-time "$EDGE_TIMEOUT" \
-o /tmp/edge-response-$$ \
"$@" "$url" 2>&1) || {
retries=$((retries + 1))
log_info "Retry $retries/$EDGE_MAX_RETRIES for $url"
sleep 1
continue
}
status=$(echo "$response" | tail -n1)
echo "$status"
return 0
done
log_fail "Max retries exceeded for $url"
return 1
}
# Make a GET request and return status code
# Usage: http_get <url> [curl_options...]
# Returns: HTTP status code
http_get() {
local url="$1"
shift
http_request "GET" "$url" "$@"
}
# Make a HEAD request (no body)
# Usage: http_head <url> [curl_options...]
# Returns: HTTP status code
http_head() {
local url="$1"
shift
http_request "HEAD" "$url" "$@"
}
# ─────────────────────────────────────────────────────────────────────────────
# Test checkers
# ─────────────────────────────────────────────────────────────────────────────
# Check if a URL returns a valid response (2xx or 3xx)
# Usage: check_http_status <url> <expected_pattern>
check_http_status() {
local url="$1"
local expected_pattern="$2"
local description="$3"
local status
status=$(http_get "$url")
if echo "$status" | grep -qE "$expected_pattern"; then
log_pass "$description: $url$status"
return 0
else
log_fail "$description: $url$status (expected: $expected_pattern)"
return 1
fi
}
# Check that a URL does NOT redirect in a loop
# Usage: check_no_redirect_loop <url> [max_redirects]
check_no_redirect_loop() {
local url="$1"
local max_redirects="${2:-10}"
local description="$3"
# Use curl with max redirects and check the final status
local response status follow_location
response=$(curl -sS -w '\n%{http_code}\n%{redirect_url}' \
--max-time "$EDGE_TIMEOUT" \
--max-redirs "$max_redirects" \
-o /tmp/edge-response-$$ \
"$url" 2>&1) || {
log_fail "$description: curl failed ($?)"
return 1
}
status=$(echo "$response" | sed -n '$p')
follow_location=$(echo "$response" | sed -n "$((NR-1))p")
# If we hit max redirects, the last redirect is still in follow_location
if [ "$status" = "000" ] && [ -n "$follow_location" ]; then
log_fail "$description: possible redirect loop detected (last location: $follow_location)"
return 1
fi
# Check final status is in valid range
if echo "$status" | grep -qE '^(2|3)[0-9][0-9]$'; then
log_pass "$description: no redirect loop ($status)"
return 0
else
log_fail "$description: unexpected status $status"
return 1
fi
}
# Check that specific assets load without 404
# Usage: check_assets_no_404 <base_url> <pattern>
check_assets_no_404() {
local base_url="$1"
local _pattern="$2"
local description="$3"
local assets_found=0
local assets_404=0
# Fetch the main page and extract asset URLs
local main_page
main_page=$(curl -sS --max-time "$EDGE_TIMEOUT" "$base_url" 2>/dev/null) || {
log_skip "$description: could not fetch main page"
return 0
}
# Extract URLs matching the pattern (e.g., .js, .css files)
local assets
assets=$(echo "$main_page" | grep -oE 'https?://[^"'"'"']+\.(js|css|woff|woff2|ttf|eot|svg|png|jpg|jpeg|gif|ico)' | sort -u || true)
if [ -z "$assets" ]; then
log_skip "$description: no assets found to check"
return 0
fi
assets_found=$(echo "$assets" | wc -l)
# Check each asset
while IFS= read -r asset; do
local status
status=$(http_head "$asset")
if [ "$status" = "404" ]; then
log_fail "$description: asset 404: $asset"
assets_404=$((assets_404 + 1))
fi
done <<< "$assets"
if [ $assets_404 -eq 0 ]; then
log_pass "$description: all $assets_found assets loaded (0 404s)"
return 0
else
log_fail "$description: $assets_404/$assets_found assets returned 404"
return 1
fi
}
# Check that a path returns 401 (unauthorized)
# Usage: check_returns_401 <url> <description>
check_returns_401() {
local url="$1"
local description="$2"
local status
status=$(http_get "$url")
if [ "$status" = "401" ]; then
log_pass "$description: $url → 401 (as expected)"
return 0
else
log_fail "$description: $url$status (expected 401)"
return 1
fi
}
# Check that a path returns 302 redirect to expected location
# Usage: check_redirects_to <url> <expected_target> <description>
check_redirects_to() {
local url="$1"
local expected_target="$2"
local description="$3"
local response status location
response=$(curl -sS -w '\n%{http_code}\n%{redirect_url}' \
--max-time "$EDGE_TIMEOUT" \
--max-redirs 1 \
-o /tmp/edge-response-$$ \
"$url" 2>&1) || {
log_fail "$description: curl failed"
return 1
}
status=$(echo "$response" | sed -n '$p')
location=$(echo "$response" | sed -n "$((NR-1))p")
if [ "$status" = "302" ] && echo "$location" | grep -qF "$expected_target"; then
log_pass "$description: redirects to $location"
return 0
else
log_fail "$description: status=$status, location=$location (expected 302 → $expected_target)"
return 1
fi
}
# ─────────────────────────────────────────────────────────────────────────────
# Main test suite
# ─────────────────────────────────────────────────────────────────────────────
main() {
log_section "Edge Subpath Routing Smoke Test"
log_info "Base URL: $BASE_URL"
log_info "Timeout: ${EDGE_TIMEOUT}s, Max retries: $EDGE_MAX_RETRIES"
# ─── Test 1: Root redirects to /forge/ ──────────────────────────────────
log_section "Test 1: Root redirects to /forge/"
check_redirects_to "$BASE_URL" "$FORGE_PATH" "Root redirect" || FAILED=1
if [ "$FAILED" -eq 0 ]; then ((PASSED++)) || true; fi
# ─── Test 2: Forgejo login at /forge/ without redirect loops ────────────
log_section "Test 2: Forgejo login at /forge/"
check_no_redirect_loop "$BASE_URL$FORGE_PATH" 10 "Forgejo root" || FAILED=1
check_http_status "$BASE_URL$FORGE_PATH" "^(2|3)[0-9][0-9]$" "Forgejo root status" || FAILED=1
if [ "$FAILED" -eq 0 ]; then ((PASSED++)) || true; fi
# ─── Test 3: Forgejo OAuth callback at /forge/_oauth/callback ───────────
log_section "Test 3: Forgejo OAuth callback at /forge/_oauth/callback"
check_http_status "$BASE_URL/forge/_oauth/callback" "^(2|3|4|5)[0-9][0-9]$" "Forgejo OAuth callback" || FAILED=1
if [ "$FAILED" -eq 0 ]; then ((PASSED++)) || true; fi
# ─── Test 4: Woodpecker dashboard at /ci/ ───────────────────────────────
log_section "Test 4: Woodpecker dashboard at /ci/"
check_no_redirect_loop "$BASE_URL$CI_PATH" 10 "Woodpecker root" || FAILED=1
check_http_status "$BASE_URL$CI_PATH" "^(2|3)[0-9][0-9]$" "Woodpecker root status" || FAILED=1
check_assets_no_404 "$BASE_URL$CI_PATH" "\.(js|css)" "Woodpecker assets" || FAILED=1
if [ "$FAILED" -eq 0 ]; then ((PASSED++)) || true; fi
# ─── Test 5: Chat OAuth login at /chat/login ────────────────────────────
log_section "Test 5: Chat OAuth login at /chat/login"
check_http_status "$BASE_URL$CHAT_PATH/login" "^(2|3)[0-9][0-9]$" "Chat login page" || FAILED=1
if [ "$FAILED" -eq 0 ]; then ((PASSED++)) || true; fi
# ─── Test 6: Chat OAuth callback at /chat/oauth/callback ────────────────
log_section "Test 6: Chat OAuth callback at /chat/oauth/callback"
check_http_status "$BASE_URL/chat/oauth/callback" "^(2|3)[0-9][0-9]$" "Chat OAuth callback" || FAILED=1
if [ "$FAILED" -eq 0 ]; then ((PASSED++)) || true; fi
# ─── Test 7: Forward_auth on /chat/* returns 401 for unauthenticated ────
log_section "Test 7: Forward_auth on /chat/* returns 401"
# Test a protected chat endpoint (chat dashboard)
check_returns_401 "$BASE_URL$CHAT_PATH/" "Chat root (unauthenticated)" || FAILED=1
check_returns_401 "$BASE_URL$CHAT_PATH/dashboard" "Chat dashboard (unauthenticated)" || FAILED=1
if [ "$FAILED" -eq 0 ]; then ((PASSED++)) || true; fi
# ─── Test 8: Staging at /staging/ ───────────────────────────────────────
log_section "Test 8: Staging at /staging/"
check_http_status "$BASE_URL$STAGING_PATH" "^(2|3)[0-9][0-9]$" "Staging root" || FAILED=1
if [ "$FAILED" -eq 0 ]; then ((PASSED++)) || true; fi
# ─── Test 9: Caddy admin API health ─────────────────────────────────────
log_section "Test 9: Caddy admin API health"
# Caddy admin API is typically on port 2019 locally
if curl -sS --max-time 5 "http://127.0.0.1:2019/" >/dev/null 2>&1; then
log_pass "Caddy admin API reachable"
((PASSED++))
else
log_skip "Caddy admin API not reachable (expected if edge is remote)"
fi
# ─── Summary ────────────────────────────────────────────────────────────
log_section "Test Summary"
log_info "Passed: $PASSED"
log_info "Failed: $FAILED"
log_info "Skipped: $SKIPPED"
if [ $FAILED -gt 0 ]; then
log_section "TEST FAILED"
exit 1
fi
log_section "TEST PASSED"
exit 0
}
# Run main
main "$@"

View file

@ -1,149 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# tools/vault-seed-ops-repo.sh — Idempotent seed for kv/disinto/shared/ops-repo
#
# Part of the Nomad+Vault migration (S5.1, issue #1035). Populates the KV v2
# path that nomad/jobs/edge.hcl dispatcher task reads from, so the edge
# proxy has FORGE_TOKEN for ops repo access.
#
# Seeds from kv/disinto/bots/vault (the vault bot credentials) — copies the
# token field to kv/disinto/shared/ops-repo. This is the "service" path that
# dispatcher uses, distinct from the "agent" path (bots/vault) used by
# agent tasks under the service-agents policy.
#
# Idempotency contract:
# - Key present with non-empty value → leave untouched, log "token unchanged".
# - Key missing or empty → copy from bots/vault, log "token copied".
# - If bots/vault is also empty → generate a random value, log "token generated".
#
# Preconditions:
# - Vault reachable + unsealed at $VAULT_ADDR.
# - VAULT_TOKEN set (env) or /etc/vault.d/root.token readable.
# - The `kv/` mount is enabled as KV v2.
#
# Requires:
# - VAULT_ADDR (e.g. http://127.0.0.1:8200)
# - VAULT_TOKEN (env OR /etc/vault.d/root.token, resolved by lib/hvault.sh)
# - curl, jq, openssl
#
# Usage:
# tools/vault-seed-ops-repo.sh
# tools/vault-seed-ops-repo.sh --dry-run
#
# Exit codes:
# 0 success (seed applied, or already applied)
# 1 precondition / API / mount-mismatch failure
# =============================================================================
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
# shellcheck source=../lib/hvault.sh
source "${REPO_ROOT}/lib/hvault.sh"
# KV v2 mount + logical paths
KV_MOUNT="kv"
OPS_REPO_PATH="disinto/shared/ops-repo"
VAULT_BOT_PATH="disinto/bots/vault"
OPS_REPO_API="${KV_MOUNT}/data/${OPS_REPO_PATH}"
VAULT_BOT_API="${KV_MOUNT}/data/${VAULT_BOT_PATH}"
log() { printf '[vault-seed-ops-repo] %s\n' "$*"; }
die() { printf '[vault-seed-ops-repo] ERROR: %s\n' "$*" >&2; exit 1; }
# ── Flag parsing ─────────────────────────────────────────────────────────────
DRY_RUN=0
case "$#:${1-}" in
0:)
;;
1:--dry-run)
DRY_RUN=1
;;
1:-h|1:--help)
printf 'Usage: %s [--dry-run]\n\n' "$(basename "$0")"
printf 'Seed kv/disinto/shared/ops-repo with FORGE_TOKEN.\n\n'
printf 'Copies token from kv/disinto/bots/vault if present;\n'
printf 'otherwise generates a random value. Idempotent:\n'
printf 'existing non-empty values are left untouched.\n\n'
printf ' --dry-run Print planned actions without writing.\n'
exit 0
;;
*)
die "invalid arguments: $* (try --help)"
;;
esac
# ── Preconditions ────────────────────────────────────────────────────────────
for bin in curl jq openssl; do
command -v "$bin" >/dev/null 2>&1 \
|| die "required binary not found: ${bin}"
done
[ -n "${VAULT_ADDR:-}" ] \
|| die "VAULT_ADDR unset — e.g. export VAULT_ADDR=http://127.0.0.1:8200"
hvault_token_lookup >/dev/null \
|| die "Vault auth probe failed — check VAULT_ADDR + VAULT_TOKEN"
# ── Step 1/2: ensure kv/ mount exists and is KV v2 ───────────────────────────
log "── Step 1/2: ensure ${KV_MOUNT}/ is KV v2 ──"
export DRY_RUN
hvault_ensure_kv_v2 "$KV_MOUNT" "[vault-seed-ops-repo]" \
|| die "KV mount check failed"
# ── Step 2/2: seed ops-repo from vault bot ───────────────────────────────────
log "── Step 2/2: seed ${OPS_REPO_API} ──"
# Read existing ops-repo value
existing_raw="$(hvault_get_or_empty "${OPS_REPO_API}")" \
|| die "failed to read ${OPS_REPO_API}"
existing_token=""
if [ -n "$existing_raw" ]; then
existing_token="$(printf '%s' "$existing_raw" | jq -r '.data.data.token // ""')"
fi
desired_token="$existing_token"
action=""
if [ -z "$existing_token" ]; then
# Token missing — try to copy from vault bot
bot_raw="$(hvault_get_or_empty "${VAULT_BOT_API}")" || true
if [ -n "$bot_raw" ]; then
bot_token="$(printf '%s' "$bot_raw" | jq -r '.data.data.token // ""')"
if [ -n "$bot_token" ]; then
desired_token="$bot_token"
action="copied"
fi
fi
# If still no token, generate one
if [ -z "$desired_token" ]; then
if [ "$DRY_RUN" -eq 1 ]; then
action="generated (dry-run)"
else
desired_token="$(openssl rand -hex 32)"
action="generated"
fi
fi
fi
if [ -z "$action" ]; then
log "all keys present at ${OPS_REPO_API} — no-op"
log "token unchanged"
exit 0
fi
if [ "$DRY_RUN" -eq 1 ]; then
log "[dry-run] ${OPS_REPO_PATH}: would ${action} token"
exit 0
fi
# Write the token
payload="$(jq -n --arg t "$desired_token" '{data: {token: $t}}')"
_hvault_request POST "${OPS_REPO_API}" "$payload" >/dev/null \
|| die "failed to write ${OPS_REPO_API}"
log "${OPS_REPO_PATH}: ${action} token"
log "done — ${OPS_REPO_API} seeded"