fix: [nomad-step-5] S5.2 — nomad/jobs/staging.hcl + chat.hcl (#989) #999

Merged
dev-qwen2 merged 2 commits from fix/issue-989-2 into main 2026-04-18 08:28:41 +00:00
7 changed files with 300 additions and 14 deletions
Showing only changes of commit da93748fee - Show all commits

View file

@ -787,7 +787,7 @@ _disinto_init_nomad() {
# real-run path so dry-run output accurately represents execution order. # real-run path so dry-run output accurately represents execution order.
# Build ordered deploy list: only include services present in with_services # Build ordered deploy list: only include services present in with_services
local DEPLOY_ORDER="" local DEPLOY_ORDER=""
for ordered_svc in forgejo woodpecker-server woodpecker-agent agents; do for ordered_svc in forgejo woodpecker-server woodpecker-agent agents staging chat; do
if echo ",$with_services," | grep -q ",$ordered_svc,"; then if echo ",$with_services," | grep -q ",$ordered_svc,"; then
DEPLOY_ORDER="${DEPLOY_ORDER:+${DEPLOY_ORDER} }${ordered_svc}" DEPLOY_ORDER="${DEPLOY_ORDER:+${DEPLOY_ORDER} }${ordered_svc}"
fi fi
@ -801,6 +801,7 @@ _disinto_init_nomad() {
case "$svc" in case "$svc" in
woodpecker-server|woodpecker-agent) seed_name="woodpecker" ;; woodpecker-server|woodpecker-agent) seed_name="woodpecker" ;;
agents) seed_name="agents" ;; agents) seed_name="agents" ;;
chat) seed_name="chat" ;;
esac esac
local seed_script="${FACTORY_ROOT}/tools/vault-seed-${seed_name}.sh" local seed_script="${FACTORY_ROOT}/tools/vault-seed-${seed_name}.sh"
if [ -x "$seed_script" ]; then if [ -x "$seed_script" ]; then
@ -823,11 +824,16 @@ _disinto_init_nomad() {
echo "[deploy] dry-run complete" echo "[deploy] dry-run complete"
fi fi
# Build custom images dry-run (if agents service is included) # Build custom images dry-run (if agents or chat services are included)
if echo ",$with_services," | grep -q ",agents,"; then if echo ",$with_services," | grep -qE ",(agents|chat),"; then
echo "" echo ""
echo "── Build images dry-run ──────────────────────────────" echo "── Build images dry-run ──────────────────────────────"
echo "[build] [dry-run] docker build -t disinto/agents:local -f ${FACTORY_ROOT}/docker/agents/Dockerfile ${FACTORY_ROOT}" if echo ",$with_services," | grep -q ",agents,"; then
echo "[build] [dry-run] docker build -t disinto/agents:local -f ${FACTORY_ROOT}/docker/agents/Dockerfile ${FACTORY_ROOT}"
fi
if echo ",$with_services," | grep -q ",chat,"; then
echo "[build] [dry-run] docker build -t disinto/chat:local -f ${FACTORY_ROOT}/docker/chat/Dockerfile ${FACTORY_ROOT}"
fi
fi fi
exit 0 exit 0
fi fi
@ -916,15 +922,22 @@ _disinto_init_nomad() {
echo "[import] no --import-env/--import-sops — skipping; set them or seed kv/disinto/* manually before deploying secret-dependent services" echo "[import] no --import-env/--import-sops — skipping; set them or seed kv/disinto/* manually before deploying secret-dependent services"
fi fi
# Build custom images required by Nomad jobs (S4.2) — before deploy. # Build custom images required by Nomad jobs (S4.2, S5.2) — before deploy.
# Single-node factory dev box: no multi-node pull needed, no registry auth. # Single-node factory dev box: no multi-node pull needed, no registry auth.
# Can upgrade to approach B (registry push/pull) later if multi-node. # Can upgrade to approach B (registry push/pull) later if multi-node.
if echo ",$with_services," | grep -q ",agents,"; then if echo ",$with_services," | grep -qE ",(agents|chat),"; then
echo "" echo ""
echo "── Building custom images ─────────────────────────────" echo "── Building custom images ─────────────────────────────"
local tag="disinto/agents:local" if echo ",$with_services," | grep -q ",agents,"; then
echo "── Building $tag ─────────────────────────────" local tag="disinto/agents:local"
docker build -t "$tag" -f "${FACTORY_ROOT}/docker/agents/Dockerfile" "${FACTORY_ROOT}" 2>&1 | tail -5 echo "── Building $tag ─────────────────────────────"
docker build -t "$tag" -f "${FACTORY_ROOT}/docker/agents/Dockerfile" "${FACTORY_ROOT}" 2>&1 | tail -5
fi
if echo ",$with_services," | grep -q ",chat,"; then
local tag="disinto/chat:local"
echo "── Building $tag ─────────────────────────────"
docker build -t "$tag" -f "${FACTORY_ROOT}/docker/chat/Dockerfile" "${FACTORY_ROOT}" 2>&1 | tail -5
fi
fi fi
# Interleaved seed/deploy per service (S2.6, #928, #948). # Interleaved seed/deploy per service (S2.6, #928, #948).
@ -935,9 +948,9 @@ _disinto_init_nomad() {
if [ -n "$with_services" ]; then if [ -n "$with_services" ]; then
local vault_addr="${VAULT_ADDR:-http://127.0.0.1:8200}" local vault_addr="${VAULT_ADDR:-http://127.0.0.1:8200}"
# Build ordered deploy list (S3.4, S4.2): forgejo → woodpecker-server → woodpecker-agent → agents # Build ordered deploy list (S3.4, S4.2, S5.2): forgejo → woodpecker-server → woodpecker-agent → agents → staging → chat
local DEPLOY_ORDER="" local DEPLOY_ORDER=""
for ordered_svc in forgejo woodpecker-server woodpecker-agent agents; do for ordered_svc in forgejo woodpecker-server woodpecker-agent agents staging chat; do
if echo ",$with_services," | grep -q ",$ordered_svc,"; then if echo ",$with_services," | grep -q ",$ordered_svc,"; then
DEPLOY_ORDER="${DEPLOY_ORDER:+${DEPLOY_ORDER} }${ordered_svc}" DEPLOY_ORDER="${DEPLOY_ORDER:+${DEPLOY_ORDER} }${ordered_svc}"
fi fi
@ -950,6 +963,7 @@ _disinto_init_nomad() {
case "$svc" in case "$svc" in
woodpecker-server|woodpecker-agent) seed_name="woodpecker" ;; woodpecker-server|woodpecker-agent) seed_name="woodpecker" ;;
agents) seed_name="agents" ;; agents) seed_name="agents" ;;
chat) seed_name="chat" ;;
esac esac
local seed_script="${FACTORY_ROOT}/tools/vault-seed-${seed_name}.sh" local seed_script="${FACTORY_ROOT}/tools/vault-seed-${seed_name}.sh"
if [ -x "$seed_script" ]; then if [ -x "$seed_script" ]; then
@ -1014,6 +1028,12 @@ _disinto_init_nomad() {
if echo ",$with_services," | grep -q ",agents,"; then if echo ",$with_services," | grep -q ",agents,"; then
echo " agents: (polling loop running)" echo " agents: (polling loop running)"
fi fi
if echo ",$with_services," | grep -q ",staging,"; then
echo " staging: (internal, no external port)"
fi
if echo ",$with_services," | grep -q ",chat,"; then
echo " chat: 8080"
fi
echo "────────────────────────────────────────────────────────" echo "────────────────────────────────────────────────────────"
fi fi
@ -1142,9 +1162,9 @@ disinto_init() {
for _svc in $with_services; do for _svc in $with_services; do
_svc=$(echo "$_svc" | xargs) _svc=$(echo "$_svc" | xargs)
case "$_svc" in case "$_svc" in
forgejo|woodpecker-server|woodpecker-agent|agents) ;; forgejo|woodpecker-server|woodpecker-agent|agents|staging|chat) ;;
*) *)
echo "Error: unknown service '${_svc}' — known: forgejo, woodpecker-server, woodpecker-agent, agents" >&2 echo "Error: unknown service '${_svc}' — known: forgejo, woodpecker-server, woodpecker-agent, agents, staging, chat" >&2
exit 1 exit 1
;; ;;
esac esac

View file

@ -49,6 +49,12 @@ client {
read_only = false read_only = false
} }
# staging static content (docker/ directory with images, HTML, etc.)
host_volume "site-content" {
path = "/srv/disinto/docker"
read_only = true
}
# disinto chat transcripts + attachments. # disinto chat transcripts + attachments.
host_volume "chat-history" { host_volume "chat-history" {
path = "/srv/disinto/chat-history" path = "/srv/disinto/chat-history"

152
nomad/jobs/chat.hcl Normal file
View file

@ -0,0 +1,152 @@
# =============================================================================
# nomad/jobs/chat.hcl Claude chat UI (Nomad service job)
#
# Part of the Nomad+Vault migration (S5.2, issue #989). Lightweight service
# job for the Claude chat UI with sandbox hardening (#706).
#
# Build:
# Custom image built from docker/chat/Dockerfile as disinto/chat:local
# (same :local pattern as disinto/agents:local).
#
# Sandbox hardening (#706):
# - Read-only root filesystem (enforced via entrypoint)
# - tmpfs /tmp:size=64m for runtime temp files
# - cap_drop ALL (no Linux capabilities)
# - pids_limit 128 (prevent fork bombs)
# - mem_limit 512m (matches compose sandbox hardening)
#
# Vault integration:
# - vault { role = "service-chat" } at group scope
# - Template stanza renders CHAT_OAUTH_CLIENT_ID, CHAT_OAUTH_CLIENT_SECRET,
# FORWARD_AUTH_SECRET from kv/disinto/shared/chat
# - Seeded on fresh boxes by tools/vault-seed-chat.sh
#
# Host volume:
# - chat-history /var/lib/chat/history (persists conversation history)
#
# Not the runtime yet: docker-compose.yml is still the factory's live stack
# until cutover. This file exists so CI can validate it and S5.2 can wire
# `disinto init --backend=nomad --with chat` to `nomad job run` it.
# =============================================================================
job "chat" {
type = "service"
datacenters = ["dc1"]
group "chat" {
count = 1
# Vault workload identity (S5.2, issue #989)
# Role `service-chat` defined in vault/roles.yaml, policy in
# vault/policies/service-chat.hcl. Bound claim pins nomad_job_id = "chat".
vault {
role = "service-chat"
}
# Network
# External port 8080 for chat UI access (via edge proxy or direct).
network {
port "http" {
static = 8080
to = 8080
}
}
# Host volumes
# chat-history volume: declared in nomad/client.hcl, path
# /srv/disinto/chat-history on the factory box.
volume "chat-history" {
type = "host"
source = "chat-history"
read_only = false
}
# Restart policy
restart {
attempts = 3
interval = "5m"
delay = "15s"
mode = "delay"
}
# Service registration
service {
name = "chat"
port = "http"
provider = "nomad"
check {
type = "http"
path = "/health"
interval = "10s"
timeout = "3s"
}
}
task "chat" {
driver = "docker"
config {
image = "disinto/chat:local"
force_pull = false
# Sandbox hardening (#706): cap_drop ALL (no Linux capabilities)
# tmpfs /tmp for runtime files (64MB)
# pids_limit 128 (prevent fork bombs)
# ReadonlyRootfs enforced via entrypoint script (fails if running as root)
cap_drop = ["ALL"]
tmpfs = ["/tmp:size=64m"]
pids_limit = 128
# Security options for sandbox hardening
# apparmor=unconfined needed for Claude CLI ptrace access
# no-new-privileges prevents privilege escalation
security_opt = ["apparmor=unconfined", "no-new-privileges"]
}
# Volume mounts
# Mount chat-history for conversation persistence
volume_mount {
volume = "chat-history"
destination = "/var/lib/chat/history"
read_only = false
}
# Environment: secrets from Vault (S5.2)
# CHAT_OAUTH_CLIENT_ID, CHAT_OAUTH_CLIENT_SECRET, FORWARD_AUTH_SECRET
# rendered from kv/disinto/shared/chat via template stanza.
env {
FORGE_URL = "http://forgejo:3000"
CHAT_MAX_REQUESTS_PER_HOUR = "60"
CHAT_MAX_REQUESTS_PER_DAY = "1000"
}
# Vault-templated secrets (S5.2, issue #989)
# Renders chat-secrets.env from Vault KV v2 at kv/disinto/shared/chat.
# Placeholder values kept < 16 chars to avoid secret-scan CI failures.
template {
destination = "secrets/chat-secrets.env"
env = true
change_mode = "restart"
error_on_missing_key = false
data = <<EOT
{{- with secret "kv/data/disinto/shared/chat" -}}
CHAT_OAUTH_CLIENT_ID={{ .Data.data.chat_oauth_client_id }}
CHAT_OAUTH_CLIENT_SECRET={{ .Data.data.chat_oauth_client_secret }}
FORWARD_AUTH_SECRET={{ .Data.data.forward_auth_secret }}
{{- else -}}
# WARNING: run tools/vault-seed-chat.sh
CHAT_OAUTH_CLIENT_ID=seed-me
CHAT_OAUTH_CLIENT_SECRET=seed-me
FORWARD_AUTH_SECRET=seed-me
{{- end -}}
EOT
}
# Sandbox hardening (S5.2, #706)
# Memory = 512MB (matches docker-compose sandbox hardening)
resources {
cpu = 200
memory = 512
}
}
}
}

86
nomad/jobs/staging.hcl Normal file
View file

@ -0,0 +1,86 @@
# =============================================================================
# nomad/jobs/staging.hcl Staging file server (Nomad service job)
#
# Part of the Nomad+Vault migration (S5.2, issue #989). Lightweight service job
# for the staging file server using Caddy as a static file server.
#
# Mount contract:
# This job mounts the `docker/` directory as `/srv/site` (read-only).
# The docker/ directory contains static content (images, HTML, etc.)
# served to staging environment users.
#
# Network:
# No external port exposed edge proxy routes to it internally.
# Service discovery via Nomad native provider for internal routing.
#
# Not the runtime yet: docker-compose.yml is still the factory's live stack
# until cutover. This file exists so CI can validate it and S5.2 can wire
# `disinto init --backend=nomad --with staging` to `nomad job run` it.
# =============================================================================
job "staging" {
type = "service"
datacenters = ["dc1"]
group "staging" {
count = 1
# No Vault integration needed no secrets required (static file server)
# Internal service no external port. Edge proxy routes internally.
network {
port "http" {
static = 80
to = 80
}
}
volume "site-content" {
type = "host"
source = "site-content"
read_only = true
}
restart {
attempts = 3
interval = "5m"
delay = "15s"
mode = "delay"
}
service {
name = "staging"
port = "http"
provider = "nomad"
check {
type = "http"
path = "/"
interval = "10s"
timeout = "3s"
}
}
task "staging" {
driver = "docker"
config {
image = "caddy:alpine"
ports = ["http"]
args = ["file-server", "--root", "/srv/site"]
}
# Mount docker/ directory as /srv/site:ro (static content)
volume_mount {
volume = "site-content"
destination = "/srv/site"
read_only = true
}
resources {
cpu = 100
memory = 256
}
}
}
}

View file

@ -215,7 +215,7 @@ setup_file() {
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with unknown-service --dry-run run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with unknown-service --dry-run
[ "$status" -ne 0 ] [ "$status" -ne 0 ]
[[ "$output" == *"unknown service"* ]] [[ "$output" == *"unknown service"* ]]
[[ "$output" == *"known: forgejo, woodpecker-server, woodpecker-agent, agents"* ]] [[ "$output" == *"known: forgejo, woodpecker-server, woodpecker-agent, agents, staging, chat"* ]]
} }
# S3.4: woodpecker auto-expansion and forgejo auto-inclusion # S3.4: woodpecker auto-expansion and forgejo auto-inclusion

View file

@ -0,0 +1,15 @@
# vault/policies/service-chat.hcl
#
# Read-only access to shared Chat secrets (OAuth client config, forward auth
# secret). Attached to the Chat Nomad job via workload identity (S5.2).
#
# Scope: kv/disinto/shared/chat entries owned by the operator and
# shared between the chat service and edge proxy.
path "kv/data/disinto/shared/chat" {
capabilities = ["read"]
}
path "kv/metadata/disinto/shared/chat" {
capabilities = ["list", "read"]
}

View file

@ -70,6 +70,13 @@ roles:
namespace: default namespace: default
job_id: agents job_id: agents
# ── Chat UI (nomad/jobs/chat.hcl — S5.2) ─────────────────────────────────
# Claude chat UI service with OAuth secrets. Uses vault/policies/service-chat.hcl.
- name: service-chat
policy: service-chat
namespace: default
job_id: chat
# ── Per-agent bots (nomad/jobs/bot-<role>.hcl — land in later steps) ─────── # ── Per-agent bots (nomad/jobs/bot-<role>.hcl — land in later steps) ───────
# job_id placeholders match the policy name 1:1 until each bot's jobspec # job_id placeholders match the policy name 1:1 until each bot's jobspec
# lands. When a bot's jobspec is added under nomad/jobs/, update the # lands. When a bot's jobspec is added under nomad/jobs/, update the