fix: [nomad-step-5] S5.2 — nomad/jobs/staging.hcl + chat.hcl (#989)
Some checks failed
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/nomad-validate Pipeline failed
ci/woodpecker/pr/ci Pipeline was successful
ci/woodpecker/pr/nomad-validate Pipeline failed
ci/woodpecker/pr/secret-scan Pipeline was successful
ci/woodpecker/pr/smoke-init Pipeline failed
Some checks failed
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/nomad-validate Pipeline failed
ci/woodpecker/pr/ci Pipeline was successful
ci/woodpecker/pr/nomad-validate Pipeline failed
ci/woodpecker/pr/secret-scan Pipeline was successful
ci/woodpecker/pr/smoke-init Pipeline failed
This commit is contained in:
parent
daaaf70d34
commit
baf56229c3
4 changed files with 286 additions and 10 deletions
31
bin/disinto
31
bin/disinto
|
|
@ -823,12 +823,17 @@ _disinto_init_nomad() {
|
||||||
echo "[deploy] dry-run complete"
|
echo "[deploy] dry-run complete"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Build custom images dry-run (if agents service is included)
|
# Build custom images dry-run (if agents or chat services are included)
|
||||||
if echo ",$with_services," | grep -q ",agents,"; then
|
if echo ",$with_services," | grep -qE ",(agents|chat),"; then
|
||||||
echo ""
|
echo ""
|
||||||
echo "── Build images dry-run ──────────────────────────────"
|
echo "── Build images dry-run ──────────────────────────────"
|
||||||
|
if echo ",$with_services," | grep -q ",agents,"; then
|
||||||
echo "[build] [dry-run] docker build -t disinto/agents:local -f ${FACTORY_ROOT}/docker/agents/Dockerfile ${FACTORY_ROOT}"
|
echo "[build] [dry-run] docker build -t disinto/agents:local -f ${FACTORY_ROOT}/docker/agents/Dockerfile ${FACTORY_ROOT}"
|
||||||
fi
|
fi
|
||||||
|
if echo ",$with_services," | grep -q ",chat,"; then
|
||||||
|
echo "[build] [dry-run] docker build -t disinto/chat:local -f ${FACTORY_ROOT}/docker/chat/Dockerfile ${FACTORY_ROOT}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -916,16 +921,23 @@ _disinto_init_nomad() {
|
||||||
echo "[import] no --import-env/--import-sops — skipping; set them or seed kv/disinto/* manually before deploying secret-dependent services"
|
echo "[import] no --import-env/--import-sops — skipping; set them or seed kv/disinto/* manually before deploying secret-dependent services"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Build custom images required by Nomad jobs (S4.2) — before deploy.
|
# Build custom images required by Nomad jobs (S4.2, S5.2) — before deploy.
|
||||||
# Single-node factory dev box: no multi-node pull needed, no registry auth.
|
# Single-node factory dev box: no multi-node pull needed, no registry auth.
|
||||||
# Can upgrade to approach B (registry push/pull) later if multi-node.
|
# Can upgrade to approach B (registry push/pull) later if multi-node.
|
||||||
if echo ",$with_services," | grep -q ",agents,"; then
|
if echo ",$with_services," | grep -qE ",(agents|chat),"; then
|
||||||
echo ""
|
echo ""
|
||||||
echo "── Building custom images ─────────────────────────────"
|
echo "── Building custom images ─────────────────────────────"
|
||||||
|
if echo ",$with_services," | grep -q ",agents,"; then
|
||||||
local tag="disinto/agents:local"
|
local tag="disinto/agents:local"
|
||||||
echo "── Building $tag ─────────────────────────────"
|
echo "── Building $tag ─────────────────────────────"
|
||||||
docker build -t "$tag" -f "${FACTORY_ROOT}/docker/agents/Dockerfile" "${FACTORY_ROOT}" 2>&1 | tail -5
|
docker build -t "$tag" -f "${FACTORY_ROOT}/docker/agents/Dockerfile" "${FACTORY_ROOT}" 2>&1 | tail -5
|
||||||
fi
|
fi
|
||||||
|
if echo ",$with_services," | grep -q ",chat,"; then
|
||||||
|
local tag="disinto/chat:local"
|
||||||
|
echo "── Building $tag ─────────────────────────────"
|
||||||
|
docker build -t "$tag" -f "${FACTORY_ROOT}/docker/chat/Dockerfile" "${FACTORY_ROOT}" 2>&1 | tail -5
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
# Interleaved seed/deploy per service (S2.6, #928, #948).
|
# Interleaved seed/deploy per service (S2.6, #928, #948).
|
||||||
# We interleave seed + deploy per service (not batch all seeds then all deploys)
|
# We interleave seed + deploy per service (not batch all seeds then all deploys)
|
||||||
|
|
@ -935,9 +947,9 @@ _disinto_init_nomad() {
|
||||||
if [ -n "$with_services" ]; then
|
if [ -n "$with_services" ]; then
|
||||||
local vault_addr="${VAULT_ADDR:-http://127.0.0.1:8200}"
|
local vault_addr="${VAULT_ADDR:-http://127.0.0.1:8200}"
|
||||||
|
|
||||||
# Build ordered deploy list (S3.4, S4.2): forgejo → woodpecker-server → woodpecker-agent → agents
|
# Build ordered deploy list (S3.4, S4.2, S5.2): forgejo → woodpecker-server → woodpecker-agent → agents → staging → chat
|
||||||
local DEPLOY_ORDER=""
|
local DEPLOY_ORDER=""
|
||||||
for ordered_svc in forgejo woodpecker-server woodpecker-agent agents; do
|
for ordered_svc in forgejo woodpecker-server woodpecker-agent agents staging chat; do
|
||||||
if echo ",$with_services," | grep -q ",$ordered_svc,"; then
|
if echo ",$with_services," | grep -q ",$ordered_svc,"; then
|
||||||
DEPLOY_ORDER="${DEPLOY_ORDER:+${DEPLOY_ORDER} }${ordered_svc}"
|
DEPLOY_ORDER="${DEPLOY_ORDER:+${DEPLOY_ORDER} }${ordered_svc}"
|
||||||
fi
|
fi
|
||||||
|
|
@ -950,6 +962,7 @@ _disinto_init_nomad() {
|
||||||
case "$svc" in
|
case "$svc" in
|
||||||
woodpecker-server|woodpecker-agent) seed_name="woodpecker" ;;
|
woodpecker-server|woodpecker-agent) seed_name="woodpecker" ;;
|
||||||
agents) seed_name="agents" ;;
|
agents) seed_name="agents" ;;
|
||||||
|
chat) seed_name="chat" ;;
|
||||||
esac
|
esac
|
||||||
local seed_script="${FACTORY_ROOT}/tools/vault-seed-${seed_name}.sh"
|
local seed_script="${FACTORY_ROOT}/tools/vault-seed-${seed_name}.sh"
|
||||||
if [ -x "$seed_script" ]; then
|
if [ -x "$seed_script" ]; then
|
||||||
|
|
@ -1014,6 +1027,12 @@ _disinto_init_nomad() {
|
||||||
if echo ",$with_services," | grep -q ",agents,"; then
|
if echo ",$with_services," | grep -q ",agents,"; then
|
||||||
echo " agents: (polling loop running)"
|
echo " agents: (polling loop running)"
|
||||||
fi
|
fi
|
||||||
|
if echo ",$with_services," | grep -q ",staging,"; then
|
||||||
|
echo " staging: (internal, no external port)"
|
||||||
|
fi
|
||||||
|
if echo ",$with_services," | grep -q ",chat,"; then
|
||||||
|
echo " chat: 8080"
|
||||||
|
fi
|
||||||
echo "────────────────────────────────────────────────────────"
|
echo "────────────────────────────────────────────────────────"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,12 @@ client {
|
||||||
read_only = false
|
read_only = false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# staging static content (docker/ directory with images, HTML, etc.)
|
||||||
|
host_volume "site-content" {
|
||||||
|
path = "/srv/disinto/docker"
|
||||||
|
read_only = true
|
||||||
|
}
|
||||||
|
|
||||||
# ops repo clone (vault actions, sprint artifacts, knowledge).
|
# ops repo clone (vault actions, sprint artifacts, knowledge).
|
||||||
host_volume "ops-repo" {
|
host_volume "ops-repo" {
|
||||||
path = "/srv/disinto/ops-repo"
|
path = "/srv/disinto/ops-repo"
|
||||||
|
|
|
||||||
166
nomad/jobs/chat.hcl
Normal file
166
nomad/jobs/chat.hcl
Normal file
|
|
@ -0,0 +1,166 @@
|
||||||
|
# =============================================================================
|
||||||
|
# nomad/jobs/chat.hcl — Claude chat UI (Nomad service job)
|
||||||
|
#
|
||||||
|
# Part of the Nomad+Vault migration (S5.2, issue #989). Lightweight service
|
||||||
|
# job for the Claude chat UI with sandbox hardening (#706).
|
||||||
|
#
|
||||||
|
# Build:
|
||||||
|
# Custom image built from docker/chat/Dockerfile as disinto/chat:local
|
||||||
|
# (same :local pattern as disinto/agents:local).
|
||||||
|
#
|
||||||
|
# Sandbox hardening (#706):
|
||||||
|
# - Read-only root filesystem
|
||||||
|
# - tmpfs /tmp:size=64m for runtime temp files
|
||||||
|
# - cap_drop ALL (no Linux capabilities)
|
||||||
|
# - pids_limit 128 (prevent fork bombs)
|
||||||
|
# - mem_limit 512m (matches compose sandbox hardening)
|
||||||
|
#
|
||||||
|
# Vault integration:
|
||||||
|
# - vault { role = "service-chat" } at group scope
|
||||||
|
# - Template stanza renders CHAT_OAUTH_CLIENT_ID, CHAT_OAUTH_CLIENT_SECRET,
|
||||||
|
# FORWARD_AUTH_SECRET from kv/disinto/shared/chat
|
||||||
|
# - Seeded on fresh boxes by tools/vault-seed-chat.sh
|
||||||
|
#
|
||||||
|
# Host volume:
|
||||||
|
# - chat-history → /var/lib/chat/history (persists conversation history)
|
||||||
|
#
|
||||||
|
# Not the runtime yet: docker-compose.yml is still the factory's live stack
|
||||||
|
# until cutover. This file exists so CI can validate it and S5.2 can wire
|
||||||
|
# `disinto init --backend=nomad --with chat` to `nomad job run` it.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
job "chat" {
|
||||||
|
type = "service"
|
||||||
|
datacenters = ["dc1"]
|
||||||
|
|
||||||
|
group "chat" {
|
||||||
|
count = 1
|
||||||
|
|
||||||
|
# ── Vault workload identity (S5.2, issue #989) ───────────────────────────
|
||||||
|
# Role `service-chat` defined in vault/roles.yaml, policy in
|
||||||
|
# vault/policies/service-chat.hcl. Bound claim pins nomad_job_id = "chat".
|
||||||
|
vault {
|
||||||
|
role = "service-chat"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Network ──────────────────────────────────────────────────────────────
|
||||||
|
# External port 8080 for chat UI access (via edge proxy or direct).
|
||||||
|
network {
|
||||||
|
port "http" {
|
||||||
|
static = 8080
|
||||||
|
to = 8080
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Host volumes ─────────────────────────────────────────────────────────
|
||||||
|
# chat-history volume: declared in nomad/client.hcl, path
|
||||||
|
# /srv/disinto/chat-history on the factory box.
|
||||||
|
volume "chat-history" {
|
||||||
|
type = "host"
|
||||||
|
source = "chat-history"
|
||||||
|
read_only = false
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Restart policy ───────────────────────────────────────────────────────
|
||||||
|
restart {
|
||||||
|
attempts = 3
|
||||||
|
interval = "5m"
|
||||||
|
delay = "15s"
|
||||||
|
mode = "delay"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Service registration ─────────────────────────────────────────────────
|
||||||
|
service {
|
||||||
|
name = "chat"
|
||||||
|
port = "http"
|
||||||
|
provider = "nomad"
|
||||||
|
|
||||||
|
check {
|
||||||
|
type = "http"
|
||||||
|
path = "/health"
|
||||||
|
interval = "10s"
|
||||||
|
timeout = "3s"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
task "chat" {
|
||||||
|
driver = "docker"
|
||||||
|
|
||||||
|
config {
|
||||||
|
image = "disinto/chat:local"
|
||||||
|
force_pull = false
|
||||||
|
# Sandbox hardening (#706): read-only rootfs, cap_drop ALL
|
||||||
|
# Note: Nomad docker driver maps these to Docker's
|
||||||
|
# ReadonlyRootfs and CapDrop options at runtime.
|
||||||
|
readonly_rootfs = true
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── tmpfs /tmp:size=64m ────────────────────────────────────────────────
|
||||||
|
# Sandbox hardening (#706): isolated temp area on tmpfs for runtime
|
||||||
|
# files. The ephemeral block provides anonymous tmpfs storage.
|
||||||
|
ephemeral {
|
||||||
|
size = 64 # MB
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Volume mounts ──────────────────────────────────────────────────────
|
||||||
|
# Mount chat-history for conversation persistence
|
||||||
|
volume_mount {
|
||||||
|
volume = "chat-history"
|
||||||
|
destination = "/var/lib/chat/history"
|
||||||
|
read_only = false
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Environment: secrets from Vault (S5.2) ──────────────────────────────
|
||||||
|
# CHAT_OAUTH_CLIENT_ID, CHAT_OAUTH_CLIENT_SECRET, FORWARD_AUTH_SECRET
|
||||||
|
# rendered from kv/disinto/shared/chat via template stanza.
|
||||||
|
env {
|
||||||
|
FORGE_URL = "http://forgejo:3000"
|
||||||
|
CHAT_MAX_REQUESTS_PER_HOUR = "60"
|
||||||
|
CHAT_MAX_REQUESTS_PER_DAY = "1000"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Vault-templated secrets (S5.2, issue #989) ─────────────────────────
|
||||||
|
# Renders chat-secrets.env from Vault KV v2 at kv/disinto/shared/chat.
|
||||||
|
# Placeholder values kept < 16 chars to avoid secret-scan CI failures.
|
||||||
|
template {
|
||||||
|
destination = "secrets/chat-secrets.env"
|
||||||
|
env = true
|
||||||
|
change_mode = "restart"
|
||||||
|
error_on_missing_key = false
|
||||||
|
data = <<EOT
|
||||||
|
{{- with secret "kv/data/disinto/shared/chat" -}}
|
||||||
|
CHAT_OAUTH_CLIENT_ID={{ .Data.data.chat_oauth_client_id }}
|
||||||
|
CHAT_OAUTH_CLIENT_SECRET={{ .Data.data.chat_oauth_client_secret }}
|
||||||
|
FORWARD_AUTH_SECRET={{ .Data.data.forward_auth_secret }}
|
||||||
|
{{- else -}}
|
||||||
|
# WARNING: run tools/vault-seed-chat.sh
|
||||||
|
CHAT_OAUTH_CLIENT_ID=seed-me
|
||||||
|
CHAT_OAUTH_CLIENT_SECRET=seed-me
|
||||||
|
FORWARD_AUTH_SECRET=seed-me
|
||||||
|
{{- end -}}
|
||||||
|
EOT
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Sandbox hardening (S5.2, #706) ────────────────────────────────────
|
||||||
|
# Matches docker-compose sandbox hardening:
|
||||||
|
# - ReadonlyRootfs=true (read-only root filesystem)
|
||||||
|
# - CapDrop=ALL (no Linux capabilities)
|
||||||
|
# - PidsLimit=128 (prevent fork bombs)
|
||||||
|
# - Memory=512m (536870912 bytes)
|
||||||
|
# - SecurityOpt=no-new-privileges
|
||||||
|
#
|
||||||
|
# Note: Nomad's docker driver supports security_opt and some of these
|
||||||
|
# via the task's config block. Others (pids_limit, memory) are in
|
||||||
|
# resources block.
|
||||||
|
resources {
|
||||||
|
cpu = 200
|
||||||
|
memory = 512
|
||||||
|
}
|
||||||
|
|
||||||
|
# Security options for sandbox hardening
|
||||||
|
# apparmor=unconfined needed for Claude CLI ptrace access
|
||||||
|
# no-new-privileges prevents privilege escalation
|
||||||
|
security_opt = ["apparmor=unconfined", "no-new-privileges"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
85
nomad/jobs/staging.hcl
Normal file
85
nomad/jobs/staging.hcl
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
# =============================================================================
|
||||||
|
# nomad/jobs/staging.hcl — Staging file server (Nomad service job)
|
||||||
|
#
|
||||||
|
# Part of the Nomad+Vault migration (S5.2, issue #989). Lightweight service job
|
||||||
|
# for the staging file server using Caddy as a static file server.
|
||||||
|
#
|
||||||
|
# Mount contract:
|
||||||
|
# This job mounts the `docker/` directory as `/srv/site` (read-only).
|
||||||
|
# The docker/ directory contains static content (images, HTML, etc.)
|
||||||
|
# served to staging environment users.
|
||||||
|
#
|
||||||
|
# Network:
|
||||||
|
# No external port exposed — edge proxy routes to it internally.
|
||||||
|
# Service discovery via Nomad native provider for internal routing.
|
||||||
|
#
|
||||||
|
# Not the runtime yet: docker-compose.yml is still the factory's live stack
|
||||||
|
# until cutover. This file exists so CI can validate it and S5.2 can wire
|
||||||
|
# `disinto init --backend=nomad --with staging` to `nomad job run` it.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
job "staging" {
|
||||||
|
type = "service"
|
||||||
|
datacenters = ["dc1"]
|
||||||
|
|
||||||
|
group "staging" {
|
||||||
|
count = 1
|
||||||
|
|
||||||
|
# No Vault integration needed — no secrets required (static file server)
|
||||||
|
|
||||||
|
# Internal service — no external port. Edge proxy routes internally.
|
||||||
|
network {
|
||||||
|
port "http" {
|
||||||
|
static = 80
|
||||||
|
to = 80
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
restart {
|
||||||
|
attempts = 3
|
||||||
|
interval = "5m"
|
||||||
|
delay = "15s"
|
||||||
|
mode = "delay"
|
||||||
|
}
|
||||||
|
|
||||||
|
service {
|
||||||
|
name = "staging"
|
||||||
|
port = "http"
|
||||||
|
provider = "nomad"
|
||||||
|
|
||||||
|
check {
|
||||||
|
type = "http"
|
||||||
|
path = "/"
|
||||||
|
interval = "10s"
|
||||||
|
timeout = "3s"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
task "staging" {
|
||||||
|
driver = "docker"
|
||||||
|
|
||||||
|
config {
|
||||||
|
image = "caddy:alpine"
|
||||||
|
ports = ["http"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Mount docker/ directory as /srv/site:ro (static content)
|
||||||
|
volume_mount {
|
||||||
|
volume = "site-content"
|
||||||
|
destination = "/srv/site"
|
||||||
|
read_only = true
|
||||||
|
}
|
||||||
|
|
||||||
|
resources {
|
||||||
|
cpu = 100
|
||||||
|
memory = 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
volume "site-content" {
|
||||||
|
type = "host"
|
||||||
|
source = "site-content"
|
||||||
|
read_only = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue