From 2ad4bdc624de9adca4016ac79c969b8ca3dc9f9c Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 16 Apr 2026 09:55:35 +0000 Subject: [PATCH 1/2] =?UTF-8?q?fix:=20[nomad-step-1]=20S1.1=20=E2=80=94=20?= =?UTF-8?q?add=20nomad/jobs/forgejo.hcl=20(service=20job,=20host=5Fvolume,?= =?UTF-8?q?=20port=203000)=20(#840)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First Nomad jobspec to land under nomad/jobs/ as part of the Nomad+Vault migration. Proves the docker driver + host_volume plumbing wired up in Step 0 (client.hcl) by defining a real factory service: - job type=service, datacenters=["dc1"], 1 group × 1 task - docker driver, image pinned to codeberg.org/forgejo/forgejo:11.0 (matches docker-compose.yml) - network port "http" static=3000, to=3000 (same host:port as compose, so agents/woodpecker/caddy reach forgejo unchanged across cutover) - mounts the forgejo-data host_volume from nomad/client.hcl at /data - non-secret env subset from docker-compose's forgejo service (DB type, ROOT_URL, HTTP_PORT, INSTALL_LOCK, DISABLE_REGISTRATION, webhook allow-list); OAuth/secret env vars land in Step 2 via Vault - Nomad-native service discovery (provider="nomad", no Consul) with HTTP check on /api/v1/version (10s interval, 3s timeout). No initial_status override — Nomad waits for first probe to pass. - restart: 3 attempts / 5m / 15s delay / mode=delay - resources: cpu=300 memory=512 baseline No changes to docker-compose.yml — the docker stack remains the factory's runtime until cutover. CI integration (`nomad job validate`) is tracked by #843. Co-Authored-By: Claude Opus 4.6 (1M context) --- nomad/jobs/forgejo.hcl | 113 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 nomad/jobs/forgejo.hcl diff --git a/nomad/jobs/forgejo.hcl b/nomad/jobs/forgejo.hcl new file mode 100644 index 0000000..b2c057f --- /dev/null +++ b/nomad/jobs/forgejo.hcl @@ -0,0 +1,113 @@ +# ============================================================================= +# nomad/jobs/forgejo.hcl — Forgejo git server (Nomad service job) +# +# Part of the Nomad+Vault migration (S1.1, issue #840). First jobspec to +# land under nomad/jobs/ — proves the docker driver + host_volume plumbing +# from Step 0 (client.hcl) by running a real factory service. +# +# Host_volume contract: +# This job mounts the `forgejo-data` host_volume declared in +# nomad/client.hcl. That volume is backed by /srv/disinto/forgejo-data on +# the factory box, created by lib/init/nomad/cluster-up.sh before any job +# references it. Keep the `source = "forgejo-data"` below in sync with the +# host_volume stanza in client.hcl — drift = scheduling failures. +# +# No Vault integration yet — Step 2 (#...) templates in OAuth secrets and +# replaces the inline FORGEJO__oauth2__* bits. The env vars below are the +# subset of docker-compose.yml's forgejo service that does NOT depend on +# secrets: DB type, public URL, install lock, registration lockdown, webhook +# allow-list. OAuth app registration lands later, per-service. +# +# Not the runtime yet: docker-compose.yml is still the factory's live stack +# until cutover. This file exists so CI can validate it and S1.3 can wire +# `disinto init --backend=nomad --with forgejo` to `nomad job run` it. +# ============================================================================= + +job "forgejo" { + type = "service" + datacenters = ["dc1"] + + group "forgejo" { + count = 1 + + # Static :3000 matches docker-compose's published port so the rest of + # the factory (agents, woodpecker, caddy) keeps reaching forgejo at the + # same host:port during and after cutover. `to = 3000` maps the host + # port into the container's :3000 listener. + network { + port "http" { + static = 3000 + to = 3000 + } + } + + # Host-volume mount: declared in nomad/client.hcl, path + # /srv/disinto/forgejo-data on the factory box. + volume "forgejo-data" { + type = "host" + source = "forgejo-data" + read_only = false + } + + # Conservative restart policy — fail fast to the scheduler instead of + # spinning on a broken image/config. 3 attempts over 5m, then back off. + restart { + attempts = 3 + interval = "5m" + delay = "15s" + mode = "delay" + } + + # Native Nomad service discovery (no Consul in this factory cluster). + # Health check gates the service as healthy only after the API is up; + # initial_status is deliberately unset so Nomad waits for the first + # probe to pass before marking the allocation healthy on boot. + service { + name = "forgejo" + port = "http" + provider = "nomad" + + check { + type = "http" + path = "/api/v1/version" + interval = "10s" + timeout = "3s" + } + } + + task "forgejo" { + driver = "docker" + + config { + image = "codeberg.org/forgejo/forgejo:11.0" + ports = ["http"] + } + + volume_mount { + volume = "forgejo-data" + destination = "/data" + read_only = false + } + + # Mirrors the non-secret env set from docker-compose.yml's forgejo + # service. OAuth/secret-bearing env vars land in Step 2 via Vault + # templates — do NOT add them here. + env { + FORGEJO__database__DB_TYPE = "sqlite3" + FORGEJO__server__ROOT_URL = "http://forgejo:3000/" + FORGEJO__server__HTTP_PORT = "3000" + FORGEJO__security__INSTALL_LOCK = "true" + FORGEJO__service__DISABLE_REGISTRATION = "true" + FORGEJO__webhook__ALLOWED_HOST_LIST = "private" + } + + # Baseline — tune once we have real usage numbers under nomad. The + # docker-compose stack runs forgejo uncapped; these limits exist so + # an unhealthy forgejo can't starve the rest of the node. + resources { + cpu = 300 + memory = 512 + } + } + } +} -- 2.49.1 From db64f2fdae2b3fd0d7d0c2abc38c8b904c98819d Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 16 Apr 2026 10:11:34 +0000 Subject: [PATCH 2/2] =?UTF-8?q?fix:=20address=20review=20=E2=80=94=20renam?= =?UTF-8?q?e=20forgejo.nomad.hcl=20+=20wire=20nomad=20job=20validate=20CI?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two blockers from the #844 review: 1. Rename nomad/jobs/forgejo.hcl → nomad/jobs/forgejo.nomad.hcl to match the convention documented in nomad/AGENTS.md:38 (*.nomad.hcl suffix). First jobspec sets the pattern for all future ones; keeps any glob- based tooling over nomad/jobs/*.nomad.hcl working. 2. Add a dedicated `nomad-job-validate` step to .woodpecker/nomad-validate.yml. `nomad config validate` (step 1) parses agent configs only — it rejects jobspec HCL as "unknown block 'job'". `nomad job validate` is the correct offline validator for jobspec HCL. Per the Hashicorp docs it does not require a running agent (exit 0 clean, 1 on syntax/semantic error). New jobspecs will add an explicit line alongside forgejo's, matching step 1's enumeration pattern and this file's "no-ad-hoc-steps" principle. Also updated the file header comment and the pipeline's top-of-file step index to reflect the new step ordering (2. nomad-job-validate inserted; old 2-4 renumbered to 3-5). Refs: #840 (S1.1), PR #844 --- .woodpecker/nomad-validate.yml | 30 +++++++++++++++---- nomad/jobs/{forgejo.hcl => forgejo.nomad.hcl} | 2 +- 2 files changed, 25 insertions(+), 7 deletions(-) rename nomad/jobs/{forgejo.hcl => forgejo.nomad.hcl} (98%) diff --git a/.woodpecker/nomad-validate.yml b/.woodpecker/nomad-validate.yml index 6cd616f..83946c3 100644 --- a/.woodpecker/nomad-validate.yml +++ b/.woodpecker/nomad-validate.yml @@ -15,9 +15,10 @@ # # Steps (all fail-closed — any error blocks merge): # 1. nomad-config-validate — `nomad config validate` on server + client HCL -# 2. vault-operator-diagnose — `vault operator diagnose` syntax check on vault.hcl -# 3. shellcheck-nomad — shellcheck the cluster-up + install scripts + disinto -# 4. bats-init-nomad — `disinto init --backend=nomad --dry-run` smoke tests +# 2. nomad-job-validate — `nomad job validate` on every nomad/jobs/*.nomad.hcl +# 3. vault-operator-diagnose — `vault operator diagnose` syntax check on vault.hcl +# 4. shellcheck-nomad — shellcheck the cluster-up + install scripts + disinto +# 5. bats-init-nomad — `disinto init --backend=nomad --dry-run` smoke tests # # Pinned image versions match lib/init/nomad/install.sh (nomad 1.9.5 / # vault 1.18.5). Bump there AND here together — drift = CI passing on @@ -56,7 +57,24 @@ steps: commands: - nomad config validate nomad/server.hcl nomad/client.hcl - # ── 2. Vault HCL syntax check ──────────────────────────────────────────── + # ── 2. Nomad jobspec HCL syntax check ──────────────────────────────────── + # `nomad job validate` is a *different* tool from `nomad config validate` — + # the former parses jobspec HCL (job/group/task blocks, driver config, + # volume refs, network ports), the latter parses agent config HCL + # (server/client blocks). Running step 1 on a jobspec would reject it + # with "unknown block 'job'", and vice versa. Hence two separate steps. + # + # Validation is offline: no running Nomad server is required (exit 0 on + # valid HCL, 1 on syntax/semantic error). One invocation per file — the + # CLI takes a single path argument. New jobspecs get explicit lines here + # so bringing one up is a conscious CI edit, matching step 1's pattern + # and this file's "no-ad-hoc-steps" principle. + - name: nomad-job-validate + image: hashicorp/nomad:1.9.5 + commands: + - nomad job validate nomad/jobs/forgejo.nomad.hcl + + # ── 3. Vault HCL syntax check ──────────────────────────────────────────── # `vault operator diagnose` loads the config and runs a suite of checks. # Exit codes: # 0 — all checks green @@ -81,7 +99,7 @@ steps: *) echo "vault config: hard failure (rc=$rc)" >&2; exit "$rc" ;; esac - # ── 3. Shellcheck ──────────────────────────────────────────────────────── + # ── 4. Shellcheck ──────────────────────────────────────────────────────── # Covers the new lib/init/nomad/*.sh scripts plus bin/disinto (which owns # the backend dispatcher). bin/disinto has no .sh extension so the # repo-wide shellcheck in .woodpecker/ci.yml skips it — this step is the @@ -91,7 +109,7 @@ steps: commands: - shellcheck --severity=warning lib/init/nomad/*.sh bin/disinto - # ── 4. bats: `disinto init --backend=nomad --dry-run` ──────────────────── + # ── 5. bats: `disinto init --backend=nomad --dry-run` ──────────────────── # Smoke-tests the CLI dispatcher: both --backend=nomad variants exit 0 # with the expected step list, and --backend=docker stays on the docker # path (regression guard). Pure dry-run — no sudo, no network. diff --git a/nomad/jobs/forgejo.hcl b/nomad/jobs/forgejo.nomad.hcl similarity index 98% rename from nomad/jobs/forgejo.hcl rename to nomad/jobs/forgejo.nomad.hcl index b2c057f..c7a0326 100644 --- a/nomad/jobs/forgejo.hcl +++ b/nomad/jobs/forgejo.nomad.hcl @@ -1,5 +1,5 @@ # ============================================================================= -# nomad/jobs/forgejo.hcl — Forgejo git server (Nomad service job) +# nomad/jobs/forgejo.nomad.hcl — Forgejo git server (Nomad service job) # # Part of the Nomad+Vault migration (S1.1, issue #840). First jobspec to # land under nomad/jobs/ — proves the docker driver + host_volume plumbing -- 2.49.1