Compare commits
7 commits
e611288b80
...
ffcadbfee0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ffcadbfee0 | ||
| 3465319ac5 | |||
|
|
c5a7b89a39 | ||
|
|
a835517aea | ||
|
|
d898741283 | ||
|
|
dfe61b55fc | ||
|
|
719fdaeac4 |
6 changed files with 327 additions and 96 deletions
|
|
@ -16,7 +16,7 @@
|
||||||
# Steps (all fail-closed — any error blocks merge):
|
# Steps (all fail-closed — any error blocks merge):
|
||||||
# 1. nomad-config-validate — `nomad config validate` on server + client HCL
|
# 1. nomad-config-validate — `nomad config validate` on server + client HCL
|
||||||
# 2. nomad-job-validate — `nomad job validate` looped over every
|
# 2. nomad-job-validate — `nomad job validate` looped over every
|
||||||
# nomad/jobs/*.nomad.hcl (new jobspecs get
|
# nomad/jobs/*.hcl (new jobspecs get
|
||||||
# CI coverage automatically)
|
# CI coverage automatically)
|
||||||
# 3. vault-operator-diagnose — `vault operator diagnose` syntax check on vault.hcl
|
# 3. vault-operator-diagnose — `vault operator diagnose` syntax check on vault.hcl
|
||||||
# 4. shellcheck-nomad — shellcheck the cluster-up + install scripts + disinto
|
# 4. shellcheck-nomad — shellcheck the cluster-up + install scripts + disinto
|
||||||
|
|
@ -57,6 +57,7 @@ steps:
|
||||||
- name: nomad-config-validate
|
- name: nomad-config-validate
|
||||||
image: hashicorp/nomad:1.9.5
|
image: hashicorp/nomad:1.9.5
|
||||||
commands:
|
commands:
|
||||||
|
- nomad version
|
||||||
- nomad config validate nomad/server.hcl nomad/client.hcl
|
- nomad config validate nomad/server.hcl nomad/client.hcl
|
||||||
|
|
||||||
# ── 2. Nomad jobspec HCL syntax check ────────────────────────────────────
|
# ── 2. Nomad jobspec HCL syntax check ────────────────────────────────────
|
||||||
|
|
@ -68,15 +69,15 @@ steps:
|
||||||
#
|
#
|
||||||
# Validation is offline: no running Nomad server is required (exit 0 on
|
# Validation is offline: no running Nomad server is required (exit 0 on
|
||||||
# valid HCL, 1 on syntax/semantic error). The CLI takes a single path
|
# valid HCL, 1 on syntax/semantic error). The CLI takes a single path
|
||||||
# argument so we loop over every `*.nomad.hcl` file under nomad/jobs/ —
|
# argument so we loop over every `*.hcl` file under nomad/jobs/ —
|
||||||
# that way a new jobspec PR gets CI coverage automatically (no separate
|
# that way a new jobspec PR gets CI coverage automatically (no separate
|
||||||
# "edit the pipeline" step to forget). The `.nomad.hcl` suffix is the
|
# "edit the pipeline" step to forget). The `.hcl` suffix is the naming
|
||||||
# naming convention documented in nomad/AGENTS.md; anything else in
|
# convention: anything else in nomad/jobs/ is deliberately not validated
|
||||||
# nomad/jobs/ is deliberately not validated by this step.
|
# by this step.
|
||||||
#
|
#
|
||||||
# `[ -f "$f" ]` guards against the no-match case: POSIX sh does not
|
# `[ -f "$f" ]` guards against the no-match case: POSIX sh does not
|
||||||
# nullglob, so an empty jobs/ directory would leave the literal glob in
|
# nullglob, so an empty jobs/ directory would leave the literal glob in
|
||||||
# "$f" and fail. Today forgejo.nomad.hcl exists, but the guard keeps the
|
# "$f" and fail. Today forgejo.hcl exists, but the guard keeps the
|
||||||
# step safe during any future transient empty state.
|
# step safe during any future transient empty state.
|
||||||
#
|
#
|
||||||
# Scope note: offline validate catches jobspec-level errors (unknown
|
# Scope note: offline validate catches jobspec-level errors (unknown
|
||||||
|
|
@ -91,7 +92,7 @@ steps:
|
||||||
commands:
|
commands:
|
||||||
- |
|
- |
|
||||||
set -e
|
set -e
|
||||||
for f in nomad/jobs/*.nomad.hcl; do
|
for f in nomad/jobs/*.hcl; do
|
||||||
[ -f "$f" ] || continue
|
[ -f "$f" ] || continue
|
||||||
echo "validating jobspec: $f"
|
echo "validating jobspec: $f"
|
||||||
nomad job validate "$f"
|
nomad job validate "$f"
|
||||||
|
|
|
||||||
131
bin/disinto
131
bin/disinto
|
|
@ -82,6 +82,7 @@ Init options:
|
||||||
--ci-id <n> Woodpecker CI repo ID (default: 0 = no CI)
|
--ci-id <n> Woodpecker CI repo ID (default: 0 = no CI)
|
||||||
--forge-url <url> Forge base URL (default: http://localhost:3000)
|
--forge-url <url> Forge base URL (default: http://localhost:3000)
|
||||||
--backend <value> Orchestration backend: docker (default) | nomad
|
--backend <value> Orchestration backend: docker (default) | nomad
|
||||||
|
--with <services> (nomad) Deploy services: forgejo[,...] (S1.3)
|
||||||
--empty (nomad) Bring up cluster only, no jobs (S0.4)
|
--empty (nomad) Bring up cluster only, no jobs (S0.4)
|
||||||
--bare Skip compose generation (bare-metal setup)
|
--bare Skip compose generation (bare-metal setup)
|
||||||
--build Use local docker build instead of registry images (dev mode)
|
--build Use local docker build instead of registry images (dev mode)
|
||||||
|
|
@ -662,14 +663,20 @@ prompt_admin_password() {
|
||||||
# init run); operators running without sudo-NOPASSWD should invoke
|
# init run); operators running without sudo-NOPASSWD should invoke
|
||||||
# `sudo disinto init ...` directly.
|
# `sudo disinto init ...` directly.
|
||||||
_disinto_init_nomad() {
|
_disinto_init_nomad() {
|
||||||
local dry_run="${1:-false}" empty="${2:-false}"
|
local dry_run="${1:-false}" empty="${2:-false}" with_services="${3:-}"
|
||||||
local cluster_up="${FACTORY_ROOT}/lib/init/nomad/cluster-up.sh"
|
local cluster_up="${FACTORY_ROOT}/lib/init/nomad/cluster-up.sh"
|
||||||
|
local deploy_sh="${FACTORY_ROOT}/lib/init/nomad/deploy.sh"
|
||||||
|
|
||||||
if [ ! -x "$cluster_up" ]; then
|
if [ ! -x "$cluster_up" ]; then
|
||||||
echo "Error: ${cluster_up} not found or not executable" >&2
|
echo "Error: ${cluster_up} not found or not executable" >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ -n "$with_services" ] && [ ! -x "$deploy_sh" ]; then
|
||||||
|
echo "Error: ${deploy_sh} not found or not executable" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
# --empty and default both invoke cluster-up today. Log the requested
|
# --empty and default both invoke cluster-up today. Log the requested
|
||||||
# mode so the dispatch is visible in factory bootstrap logs — Step 1
|
# mode so the dispatch is visible in factory bootstrap logs — Step 1
|
||||||
# will branch on $empty to gate the job-deployment path.
|
# will branch on $empty to gate the job-deployment path.
|
||||||
|
|
@ -679,31 +686,105 @@ _disinto_init_nomad() {
|
||||||
echo "nomad backend: default (cluster-up; jobs deferred to Step 1)"
|
echo "nomad backend: default (cluster-up; jobs deferred to Step 1)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Dry-run forwards straight through; cluster-up.sh prints its own step
|
# Dry-run: print cluster-up plan + deploy.sh plan
|
||||||
# list and exits 0 without touching the box.
|
|
||||||
local -a cmd=("$cluster_up")
|
|
||||||
if [ "$dry_run" = "true" ]; then
|
if [ "$dry_run" = "true" ]; then
|
||||||
cmd+=("--dry-run")
|
echo ""
|
||||||
"${cmd[@]}"
|
echo "── Cluster-up dry-run ─────────────────────────────────"
|
||||||
exit $?
|
local -a cmd=("$cluster_up" "--dry-run")
|
||||||
|
"${cmd[@]}" || true
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
if [ -n "$with_services" ]; then
|
||||||
|
echo "── Deploy services dry-run ────────────────────────────"
|
||||||
|
echo "[deploy] services to deploy: ${with_services}"
|
||||||
|
local IFS=','
|
||||||
|
for svc in $with_services; do
|
||||||
|
svc=$(echo "$svc" | xargs) # trim whitespace
|
||||||
|
# Validate known services first
|
||||||
|
case "$svc" in
|
||||||
|
forgejo) ;;
|
||||||
|
*)
|
||||||
|
echo "Error: unknown service '${svc}' — known: forgejo" >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
local jobspec_path="${FACTORY_ROOT}/nomad/jobs/${svc}.hcl"
|
||||||
|
if [ ! -f "$jobspec_path" ]; then
|
||||||
|
echo "Error: jobspec not found: ${jobspec_path}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "[deploy] [dry-run] nomad job validate ${jobspec_path}"
|
||||||
|
echo "[deploy] [dry-run] nomad job run -detach ${jobspec_path}"
|
||||||
|
done
|
||||||
|
echo "[deploy] dry-run complete"
|
||||||
|
fi
|
||||||
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Real run — needs root. Invoke via sudo if we're not already root so
|
# Real run: cluster-up + deploy services
|
||||||
# the command's exit code propagates directly. We don't distinguish
|
local -a cluster_cmd=("$cluster_up")
|
||||||
# "sudo denied" from "cluster-up.sh failed" here; both surface as a
|
|
||||||
# non-zero exit, and cluster-up.sh's own error messages cover the
|
|
||||||
# latter case.
|
|
||||||
local rc=0
|
|
||||||
if [ "$(id -u)" -eq 0 ]; then
|
if [ "$(id -u)" -eq 0 ]; then
|
||||||
"${cmd[@]}" || rc=$?
|
"${cluster_cmd[@]}" || exit $?
|
||||||
else
|
else
|
||||||
if ! command -v sudo >/dev/null 2>&1; then
|
if ! command -v sudo >/dev/null 2>&1; then
|
||||||
echo "Error: cluster-up.sh must run as root and sudo is not installed" >&2
|
echo "Error: cluster-up.sh must run as root and sudo is not installed" >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
sudo -n -- "${cmd[@]}" || rc=$?
|
sudo -n -- "${cluster_cmd[@]}" || exit $?
|
||||||
fi
|
fi
|
||||||
exit "$rc"
|
|
||||||
|
# Deploy services if requested
|
||||||
|
if [ -n "$with_services" ]; then
|
||||||
|
echo ""
|
||||||
|
echo "── Deploying services ─────────────────────────────────"
|
||||||
|
local -a deploy_cmd=("$deploy_sh")
|
||||||
|
# Split comma-separated service list into positional args
|
||||||
|
local IFS=','
|
||||||
|
for svc in $with_services; do
|
||||||
|
svc=$(echo "$svc" | xargs) # trim whitespace
|
||||||
|
if ! echo "$svc" | grep -qE '^[a-zA-Z0-9_-]+$'; then
|
||||||
|
echo "Error: invalid service name '${svc}' — must match ^[a-zA-Z0-9_-]+$" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
# Validate known services FIRST (before jobspec check)
|
||||||
|
case "$svc" in
|
||||||
|
forgejo) ;;
|
||||||
|
*)
|
||||||
|
echo "Error: unknown service '${svc}' — known: forgejo" >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
# Check jobspec exists
|
||||||
|
local jobspec_path="${FACTORY_ROOT}/nomad/jobs/${svc}.hcl"
|
||||||
|
if [ ! -f "$jobspec_path" ]; then
|
||||||
|
echo "Error: jobspec not found: ${jobspec_path}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
deploy_cmd+=("$svc")
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ "$(id -u)" -eq 0 ]; then
|
||||||
|
"${deploy_cmd[@]}" || exit $?
|
||||||
|
else
|
||||||
|
if ! command -v sudo >/dev/null 2>&1; then
|
||||||
|
echo "Error: deploy.sh must run as root and sudo is not installed" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sudo -n -- "${deploy_cmd[@]}" || exit $?
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Print final summary
|
||||||
|
echo ""
|
||||||
|
echo "── Summary ────────────────────────────────────────────"
|
||||||
|
echo "Cluster: Nomad+Vault cluster is up"
|
||||||
|
echo "Deployed: ${with_services}"
|
||||||
|
if echo "$with_services" | grep -q "forgejo"; then
|
||||||
|
echo "Ports: forgejo: 3000"
|
||||||
|
fi
|
||||||
|
echo "────────────────────────────────────────────────────────"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit 0
|
||||||
}
|
}
|
||||||
|
|
||||||
disinto_init() {
|
disinto_init() {
|
||||||
|
|
@ -721,7 +802,7 @@ disinto_init() {
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Parse flags
|
# Parse flags
|
||||||
local branch="" repo_root="" ci_id="0" auto_yes=false forge_url_flag="" bare=false rotate_tokens=false use_build=false dry_run=false backend="docker" empty=false
|
local branch="" repo_root="" ci_id="0" auto_yes=false forge_url_flag="" bare=false rotate_tokens=false use_build=false dry_run=false backend="docker" empty=false with_services=""
|
||||||
while [ $# -gt 0 ]; do
|
while [ $# -gt 0 ]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
--branch) branch="$2"; shift 2 ;;
|
--branch) branch="$2"; shift 2 ;;
|
||||||
|
|
@ -730,6 +811,8 @@ disinto_init() {
|
||||||
--forge-url) forge_url_flag="$2"; shift 2 ;;
|
--forge-url) forge_url_flag="$2"; shift 2 ;;
|
||||||
--backend) backend="$2"; shift 2 ;;
|
--backend) backend="$2"; shift 2 ;;
|
||||||
--backend=*) backend="${1#--backend=}"; shift ;;
|
--backend=*) backend="${1#--backend=}"; shift ;;
|
||||||
|
--with) with_services="$2"; shift 2 ;;
|
||||||
|
--with=*) with_services="${1#--with=}"; shift ;;
|
||||||
--bare) bare=true; shift ;;
|
--bare) bare=true; shift ;;
|
||||||
--build) use_build=true; shift ;;
|
--build) use_build=true; shift ;;
|
||||||
--empty) empty=true; shift ;;
|
--empty) empty=true; shift ;;
|
||||||
|
|
@ -764,11 +847,23 @@ disinto_init() {
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# --with requires --backend=nomad
|
||||||
|
if [ -n "$with_services" ] && [ "$backend" != "nomad" ]; then
|
||||||
|
echo "Error: --with requires --backend=nomad" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --empty and --with are mutually exclusive
|
||||||
|
if [ "$empty" = true ] && [ -n "$with_services" ]; then
|
||||||
|
echo "Error: --empty and --with are mutually exclusive" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
# Dispatch on backend — the nomad path runs lib/init/nomad/cluster-up.sh
|
# Dispatch on backend — the nomad path runs lib/init/nomad/cluster-up.sh
|
||||||
# (S0.4). The default and --empty variants are identical today; Step 1
|
# (S0.4). The default and --empty variants are identical today; Step 1
|
||||||
# will branch on $empty to add job deployment to the default path.
|
# will branch on $empty to add job deployment to the default path.
|
||||||
if [ "$backend" = "nomad" ]; then
|
if [ "$backend" = "nomad" ]; then
|
||||||
_disinto_init_nomad "$dry_run" "$empty"
|
_disinto_init_nomad "$dry_run" "$empty" "$with_services"
|
||||||
# shellcheck disable=SC2317 # _disinto_init_nomad always exits today;
|
# shellcheck disable=SC2317 # _disinto_init_nomad always exits today;
|
||||||
# `return` is defensive against future refactors.
|
# `return` is defensive against future refactors.
|
||||||
return
|
return
|
||||||
|
|
|
||||||
|
|
@ -1,54 +1,94 @@
|
||||||
# agents-llama — Local-Qwen Agents
|
# Local-Model Agents
|
||||||
|
|
||||||
The `agents-llama` service is an optional compose service that runs agents
|
Local-model agents run the same agent code as the Claude-backed agents, but
|
||||||
backed by a local llama-server instance (e.g. Qwen) instead of the Anthropic
|
connect to a local llama-server (or compatible OpenAI-API endpoint) instead of
|
||||||
API. It uses the same Docker image as the main `agents` service but connects to
|
the Anthropic API. This document describes the current activation flow using
|
||||||
a local inference endpoint via `ANTHROPIC_BASE_URL`.
|
`disinto hire-an-agent` and `[agents.X]` TOML configuration.
|
||||||
|
|
||||||
Two profiles are available:
|
## Overview
|
||||||
|
|
||||||
| Profile | Service | Roles | Use case |
|
Local-model agents are configured via `[agents.<name>]` sections in
|
||||||
|---------|---------|-------|----------|
|
`projects/<project>.toml`. Each agent gets:
|
||||||
| _(default)_ | `agents-llama` | `dev` only | Conservative: single-role soak test |
|
- Its own Forgejo bot user with dedicated API token and password
|
||||||
| `agents-llama-all` | `agents-llama-all` | all 7 (review, dev, gardener, architect, planner, predictor, supervisor) | Pre-migration: validate every role on llama before Nomad cutover |
|
- A dedicated compose service `agents-<name>`
|
||||||
|
- Isolated credentials stored as `FORGE_TOKEN_<USER_UPPER>` and `FORGE_PASS_<USER_UPPER>` in `.env`
|
||||||
|
|
||||||
## Enabling
|
## Prerequisites
|
||||||
|
|
||||||
Set `ENABLE_LLAMA_AGENT=1` in `.env` (or `.env.enc`) and provide the required
|
- **llama-server** (or compatible OpenAI-API endpoint) running on the host,
|
||||||
credentials:
|
reachable from inside Docker at the URL you will configure.
|
||||||
|
- A disinto factory already initialized (`disinto init` completed).
|
||||||
|
|
||||||
```env
|
## Hiring a local-model agent
|
||||||
ENABLE_LLAMA_AGENT=1
|
|
||||||
FORGE_TOKEN_LLAMA=<dev-qwen API token>
|
|
||||||
FORGE_PASS_LLAMA=<dev-qwen password>
|
|
||||||
ANTHROPIC_BASE_URL=http://host.docker.internal:8081 # llama-server endpoint
|
|
||||||
```
|
|
||||||
|
|
||||||
Then regenerate the compose file (`disinto init ...`) and bring the stack up.
|
Use `disinto hire-an-agent` with `--local-model` to create a bot user and
|
||||||
|
configure the agent:
|
||||||
## Hiring a new agent
|
|
||||||
|
|
||||||
Use `disinto hire-an-agent` to create a Forgejo user, API token, and password,
|
|
||||||
and write all required credentials to `.env`:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Local model agent
|
# Hire a local-model agent for the dev role
|
||||||
disinto hire-an-agent dev-qwen dev \
|
disinto hire-an-agent dev-qwen dev \
|
||||||
--local-model http://10.10.10.1:8081 \
|
--local-model http://10.10.10.1:8081 \
|
||||||
--model unsloth/Qwen3.5-35B-A3B
|
--model unsloth/Qwen3.5-35B-A3B
|
||||||
|
|
||||||
# Anthropic backend agent (requires ANTHROPIC_API_KEY in environment)
|
|
||||||
disinto hire-an-agent dev-qwen dev
|
|
||||||
```
|
```
|
||||||
|
|
||||||
The command writes the following to `.env`:
|
The command performs these steps:
|
||||||
- `FORGE_TOKEN_<USER_UPPER>` — derived from the agent's Forgejo username (e.g., `FORGE_TOKEN_DEV_QWEN`)
|
|
||||||
- `FORGE_PASS_<USER_UPPER>` — the agent's Forgejo password
|
|
||||||
- `ANTHROPIC_BASE_URL` (local model) or `ANTHROPIC_API_KEY` (Anthropic backend)
|
|
||||||
|
|
||||||
## Rotation
|
1. **Creates a Forgejo user** `dev-qwen` with a random password
|
||||||
|
2. **Generates an API token** for the user
|
||||||
|
3. **Writes credentials to `.env`**:
|
||||||
|
- `FORGE_TOKEN_DEV_QWEN` — the API token
|
||||||
|
- `FORGE_PASS_DEV_QWEN` — the password
|
||||||
|
- `ANTHROPIC_BASE_URL` — the llama endpoint (required by the agent)
|
||||||
|
4. **Writes `[agents.dev-qwen]` to `projects/<project>.toml`** with:
|
||||||
|
- `base_url`, `model`, `api_key`
|
||||||
|
- `roles = ["dev"]`
|
||||||
|
- `forge_user = "dev-qwen"`
|
||||||
|
- `compact_pct = 60`
|
||||||
|
- `poll_interval = 60`
|
||||||
|
5. **Regenerates `docker-compose.yml`** to include the `agents-dev-qwen` service
|
||||||
|
|
||||||
Re-running `disinto hire-an-agent <same-name>` rotates credentials idempotently:
|
### Anthropic backend agents
|
||||||
|
|
||||||
|
For agents that use Anthropic API instead of a local model, omit `--local-model`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Anthropic backend agent (requires ANTHROPIC_API_KEY in environment)
|
||||||
|
export ANTHROPIC_API_KEY="sk-..."
|
||||||
|
disinto hire-an-agent dev-claude dev
|
||||||
|
```
|
||||||
|
|
||||||
|
This writes `ANTHROPIC_API_KEY` to `.env` instead of `ANTHROPIC_BASE_URL`.
|
||||||
|
|
||||||
|
## Activation and running
|
||||||
|
|
||||||
|
Once hired, the agent service is added to `docker-compose.yml`. Start the
|
||||||
|
service with `docker compose up -d`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start all agent services
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
# Start a single named agent service
|
||||||
|
docker compose up -d agents-dev-qwen
|
||||||
|
|
||||||
|
# Start multiple named agent services
|
||||||
|
docker compose up -d agents-dev-qwen agents-planner
|
||||||
|
```
|
||||||
|
|
||||||
|
### Stopping agents
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Stop a specific agent service
|
||||||
|
docker compose down agents-dev-qwen
|
||||||
|
|
||||||
|
# Stop all agent services
|
||||||
|
docker compose down
|
||||||
|
```
|
||||||
|
|
||||||
|
## Credential rotation
|
||||||
|
|
||||||
|
Re-running `disinto hire-an-agent <same-name>` with the same parameters rotates
|
||||||
|
credentials idempotently:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Re-hire the same agent to rotate token and password
|
# Re-hire the same agent to rotate token and password
|
||||||
|
|
@ -66,39 +106,86 @@ disinto hire-an-agent dev-qwen dev \
|
||||||
This is the recommended way to rotate agent credentials. The `.env` file is
|
This is the recommended way to rotate agent credentials. The `.env` file is
|
||||||
updated in place, so no manual editing is required.
|
updated in place, so no manual editing is required.
|
||||||
|
|
||||||
If you need to manually rotate credentials, you can:
|
If you need to manually rotate credentials:
|
||||||
1. Generate a new token in Forgejo admin UI
|
1. Generate a new token in Forgejo admin UI
|
||||||
2. Edit `.env` and replace `FORGE_TOKEN_<USER_UPPER>` and `FORGE_PASS_<USER_UPPER>`
|
2. Edit `.env` and replace `FORGE_TOKEN_<USER_UPPER>` and `FORGE_PASS_<USER_UPPER>`
|
||||||
3. Restart the agent service: `docker compose restart disinto-agents-<name>`
|
3. Restart the agent service: `docker compose restart agents-<name>`
|
||||||
|
|
||||||
### Running all 7 roles (agents-llama-all)
|
## Configuration reference
|
||||||
|
|
||||||
```bash
|
### Environment variables (`.env`)
|
||||||
docker compose --profile agents-llama-all up -d
|
|
||||||
|
| Variable | Description | Example |
|
||||||
|
|----------|-------------|---------|
|
||||||
|
| `FORGE_TOKEN_<USER_UPPER>` | Forgejo API token for the bot user | `FORGE_TOKEN_DEV_QWEN` |
|
||||||
|
| `FORGE_PASS_<USER_UPPER>` | Forgejo password for the bot user | `FORGE_PASS_DEV_QWEN` |
|
||||||
|
| `ANTHROPIC_BASE_URL` | Local llama endpoint (local model agents) | `http://host.docker.internal:8081` |
|
||||||
|
| `ANTHROPIC_API_KEY` | Anthropic API key (Anthropic backend agents) | `sk-...` |
|
||||||
|
|
||||||
|
### Project TOML (`[agents.<name>]` section)
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[agents.dev-qwen]
|
||||||
|
base_url = "http://10.10.10.1:8081"
|
||||||
|
model = "unsloth/Qwen3.5-35B-A3B"
|
||||||
|
api_key = "sk-no-key-required"
|
||||||
|
roles = ["dev"]
|
||||||
|
forge_user = "dev-qwen"
|
||||||
|
compact_pct = 60
|
||||||
|
poll_interval = 60
|
||||||
```
|
```
|
||||||
|
|
||||||
This starts the `agents-llama-all` container with all 7 bot roles against the
|
| Field | Description |
|
||||||
local llama endpoint. The per-role forge tokens (`FORGE_REVIEW_TOKEN`,
|
|-------|-------------|
|
||||||
`FORGE_GARDENER_TOKEN`, etc.) must be set in `.env` — they are the same tokens
|
| `base_url` | llama-server endpoint |
|
||||||
used by the Claude-backed `agents` container.
|
| `model` | Model name (for logging/identification) |
|
||||||
|
| `api_key` | Required by API; set to placeholder for llama |
|
||||||
## Prerequisites
|
| `roles` | Agent roles this instance handles |
|
||||||
|
| `forge_user` | Forgejo bot username |
|
||||||
- **llama-server** (or compatible OpenAI-API endpoint) running on the host,
|
| `compact_pct` | Context compaction threshold (lower = more aggressive) |
|
||||||
reachable from inside Docker at the URL set in `ANTHROPIC_BASE_URL`.
|
| `poll_interval` | Seconds between polling cycles |
|
||||||
- A Forgejo bot user (e.g. `dev-qwen`) with its own API token and password,
|
|
||||||
stored as `FORGE_TOKEN_LLAMA` / `FORGE_PASS_LLAMA`.
|
|
||||||
|
|
||||||
## Behaviour
|
## Behaviour
|
||||||
|
|
||||||
- `agents-llama`: `AGENT_ROLES=dev` — only picks up dev work.
|
- Each agent runs with `AGENT_ROLES` set to its configured roles
|
||||||
- `agents-llama-all`: `AGENT_ROLES=review,dev,gardener,architect,planner,predictor,supervisor` — runs all 7 roles.
|
|
||||||
- `CLAUDE_AUTOCOMPACT_PCT_OVERRIDE=60` — more aggressive compaction for smaller
|
- `CLAUDE_AUTOCOMPACT_PCT_OVERRIDE=60` — more aggressive compaction for smaller
|
||||||
context windows.
|
context windows
|
||||||
- Serialises on the llama-server's single KV cache (AD-002).
|
- Agents serialize on the llama-server's single KV cache (AD-002)
|
||||||
|
|
||||||
## Disabling
|
## Troubleshooting
|
||||||
|
|
||||||
Set `ENABLE_LLAMA_AGENT=0` (or leave it unset) and regenerate. The service
|
### Agent service not starting
|
||||||
block is omitted entirely from `docker-compose.yml`; the stack starts cleanly
|
|
||||||
without it.
|
Check that the service was created by `disinto hire-an-agent`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose config | grep -A5 "agents-dev-qwen"
|
||||||
|
```
|
||||||
|
|
||||||
|
If the service is missing, re-run `disinto hire-an-agent dev-qwen dev` to
|
||||||
|
regenerate `docker-compose.yml`.
|
||||||
|
|
||||||
|
### Model endpoint unreachable
|
||||||
|
|
||||||
|
Verify llama-server is accessible from inside Docker:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose -f docker-compose.yml exec agents curl -sf http://host.docker.internal:8081/health
|
||||||
|
```
|
||||||
|
|
||||||
|
If using a custom host IP, update `ANTHROPIC_BASE_URL` in `.env`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Update the base URL
|
||||||
|
sed -i 's|^ANTHROPIC_BASE_URL=.*|ANTHROPIC_BASE_URL=http://192.168.1.100:8081|' .env
|
||||||
|
|
||||||
|
# Restart the agent
|
||||||
|
docker compose restart agents-dev-qwen
|
||||||
|
```
|
||||||
|
|
||||||
|
### Invalid agent name
|
||||||
|
|
||||||
|
Agent names must match `^[a-z]([a-z0-9]|-[a-z0-9])*$` (lowercase letters, digits,
|
||||||
|
hyphens; starts with letter, ends with alphanumeric). Invalid names like
|
||||||
|
`dev-qwen2` (trailing digit is OK) or `dev--qwen` (consecutive hyphens) will
|
||||||
|
be rejected.
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ it owns.
|
||||||
## What does NOT live here yet
|
## What does NOT live here yet
|
||||||
|
|
||||||
- **Jobspecs.** Step 0 brings up an *empty* cluster. Step 1 (and later)
|
- **Jobspecs.** Step 0 brings up an *empty* cluster. Step 1 (and later)
|
||||||
adds `*.nomad.hcl` job files for forgejo, woodpecker, agents, caddy,
|
adds `*.hcl` job files for forgejo, woodpecker, agents, caddy,
|
||||||
etc. When that lands, jobspecs will live in `nomad/jobs/` and each
|
etc. When that lands, jobspecs will live in `nomad/jobs/` and each
|
||||||
will get its own header comment pointing to the `host_volume` names
|
will get its own header comment pointing to the `host_volume` names
|
||||||
it consumes (`volume = "forgejo-data"`, etc. — declared in
|
it consumes (`volume = "forgejo-data"`, etc. — declared in
|
||||||
|
|
@ -35,11 +35,11 @@ it owns.
|
||||||
|
|
||||||
## Adding a jobspec (Step 1 and later)
|
## Adding a jobspec (Step 1 and later)
|
||||||
|
|
||||||
1. Drop a file in `nomad/jobs/<service>.nomad.hcl`. The `.nomad.hcl`
|
1. Drop a file in `nomad/jobs/<service>.hcl`. The `.hcl` suffix is
|
||||||
suffix is load-bearing: `.woodpecker/nomad-validate.yml` globs on
|
load-bearing: `.woodpecker/nomad-validate.yml` globs on exactly that
|
||||||
exactly that suffix to auto-pick up new jobspecs (see step 2 in
|
suffix to auto-pick up new jobspecs (see step 2 in "How CI validates
|
||||||
"How CI validates these files" below). Anything else in
|
these files" below). Anything else in `nomad/jobs/` is silently
|
||||||
`nomad/jobs/` is silently skipped by CI.
|
skipped by CI.
|
||||||
2. If it needs persistent state, reference a `host_volume` already
|
2. If it needs persistent state, reference a `host_volume` already
|
||||||
declared in `client.hcl` — *don't* add ad-hoc host paths in the
|
declared in `client.hcl` — *don't* add ad-hoc host paths in the
|
||||||
jobspec. If a new volume is needed, add it to **both**:
|
jobspec. If a new volume is needed, add it to **both**:
|
||||||
|
|
@ -52,9 +52,9 @@ it owns.
|
||||||
rejects the mismatch at placement time instead.
|
rejects the mismatch at placement time instead.
|
||||||
3. Pin image tags — `image = "forgejo/forgejo:1.22.5"`, not `:latest`.
|
3. Pin image tags — `image = "forgejo/forgejo:1.22.5"`, not `:latest`.
|
||||||
4. No pipeline edit required — step 2 of `nomad-validate.yml` globs
|
4. No pipeline edit required — step 2 of `nomad-validate.yml` globs
|
||||||
over `nomad/jobs/*.nomad.hcl` and validates every match. Just make
|
over `nomad/jobs/*.hcl` and validates every match. Just make sure
|
||||||
sure the existing `nomad/**` trigger path still covers your file
|
the existing `nomad/**` trigger path still covers your file (it
|
||||||
(it does for anything under `nomad/jobs/`).
|
does for anything under `nomad/jobs/`).
|
||||||
|
|
||||||
## How CI validates these files
|
## How CI validates these files
|
||||||
|
|
||||||
|
|
@ -67,7 +67,7 @@ fail-closed steps:
|
||||||
driver config. Vault HCL is excluded (different tool). Jobspecs are
|
driver config. Vault HCL is excluded (different tool). Jobspecs are
|
||||||
excluded too — agent-config and jobspec are disjoint HCL grammars;
|
excluded too — agent-config and jobspec are disjoint HCL grammars;
|
||||||
running this step on a jobspec rejects it with "unknown block 'job'".
|
running this step on a jobspec rejects it with "unknown block 'job'".
|
||||||
2. **`nomad job validate nomad/jobs/*.nomad.hcl`** (loop, one call per file)
|
2. **`nomad job validate nomad/jobs/*.hcl`** (loop, one call per file)
|
||||||
— parses each jobspec's HCL, fails on unknown stanzas, missing
|
— parses each jobspec's HCL, fails on unknown stanzas, missing
|
||||||
required fields, wrong value types, invalid driver config. Runs
|
required fields, wrong value types, invalid driver config. Runs
|
||||||
offline (no Nomad server needed) so CI exit 0 ≠ "this will schedule
|
offline (no Nomad server needed) so CI exit 0 ≠ "this will schedule
|
||||||
|
|
@ -79,7 +79,7 @@ fail-closed steps:
|
||||||
- image reachability — `image = "codeberg.org/forgejo/forgejo:11.0"`
|
- image reachability — `image = "codeberg.org/forgejo/forgejo:11.0"`
|
||||||
is accepted even if the registry is down or the tag is wrong.
|
is accepted even if the registry is down or the tag is wrong.
|
||||||
New jobspecs are picked up automatically by the glob — no pipeline
|
New jobspecs are picked up automatically by the glob — no pipeline
|
||||||
edit needed as long as the file is named `<name>.nomad.hcl`.
|
edit needed as long as the file is named `<name>.hcl`.
|
||||||
3. **`vault operator diagnose -config=nomad/vault.hcl -skip=storage -skip=listener`**
|
3. **`vault operator diagnose -config=nomad/vault.hcl -skip=storage -skip=listener`**
|
||||||
— Vault's equivalent syntax + schema check. `-skip=storage/listener`
|
— Vault's equivalent syntax + schema check. `-skip=storage/listener`
|
||||||
disables the runtime checks (CI containers don't have
|
disables the runtime checks (CI containers don't have
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# nomad/jobs/forgejo.nomad.hcl — Forgejo git server (Nomad service job)
|
# nomad/jobs/forgejo.hcl — Forgejo git server (Nomad service job)
|
||||||
#
|
#
|
||||||
# Part of the Nomad+Vault migration (S1.1, issue #840). First jobspec to
|
# Part of the Nomad+Vault migration (S1.1, issue #840). First jobspec to
|
||||||
# land under nomad/jobs/ — proves the docker driver + host_volume plumbing
|
# land under nomad/jobs/ — proves the docker driver + host_volume plumbing
|
||||||
|
|
@ -143,3 +143,51 @@ setup_file() {
|
||||||
[[ "$output" == *"repo URL required"* ]]
|
[[ "$output" == *"repo URL required"* ]]
|
||||||
[[ "$output" != *"Unknown option"* ]]
|
[[ "$output" != *"Unknown option"* ]]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ── --with flag tests ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@test "disinto init --backend=nomad --with forgejo --dry-run prints deploy plan" {
|
||||||
|
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with forgejo --dry-run
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
[[ "$output" == *"services to deploy: forgejo"* ]]
|
||||||
|
[[ "$output" == *"[deploy] [dry-run] nomad job validate"* ]]
|
||||||
|
[[ "$output" == *"[deploy] [dry-run] nomad job run -detach"* ]]
|
||||||
|
[[ "$output" == *"[deploy] dry-run complete"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "disinto init --backend=nomad --with forgejo,forgejo --dry-run handles comma-separated services" {
|
||||||
|
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with forgejo,forgejo --dry-run
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
[[ "$output" == *"services to deploy: forgejo,forgejo"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "disinto init --backend=docker --with forgejo errors with '--with requires --backend=nomad'" {
|
||||||
|
run "$DISINTO_BIN" init placeholder/repo --backend=docker --with forgejo
|
||||||
|
[ "$status" -ne 0 ]
|
||||||
|
[[ "$output" == *"--with requires --backend=nomad"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "disinto init --backend=nomad --empty --with forgejo errors with mutually exclusive" {
|
||||||
|
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --empty --with forgejo
|
||||||
|
[ "$status" -ne 0 ]
|
||||||
|
[[ "$output" == *"--empty and --with are mutually exclusive"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "disinto init --backend=nomad --with unknown-service errors with unknown service" {
|
||||||
|
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with unknown-service --dry-run
|
||||||
|
[ "$status" -ne 0 ]
|
||||||
|
[[ "$output" == *"unknown service"* ]]
|
||||||
|
[[ "$output" == *"known: forgejo"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "disinto init --backend=nomad --with forgejo (flag=value syntax) works" {
|
||||||
|
run "$DISINTO_BIN" init placeholder/repo --backend=nomad --with=forgejo --dry-run
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
[[ "$output" == *"services to deploy: forgejo"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "disinto init --backend=nomad --with forgejo --empty --dry-run rejects in any order" {
|
||||||
|
run "$DISINTO_BIN" init placeholder/repo --with forgejo --backend=nomad --empty --dry-run
|
||||||
|
[ "$status" -ne 0 ]
|
||||||
|
[[ "$output" == *"--empty and --with are mutually exclusive"* ]]
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue