Compare commits

...

5 commits

Author SHA1 Message Date
dev-qwen2
23e47e3820 fix: bug: disinto init --backend=nomad — does not bootstrap Forgejo admin user (#1069)
Some checks failed
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/nomad-validate Pipeline was successful
ci/woodpecker/pr/ci Pipeline failed
ci/woodpecker/pr/nomad-validate Pipeline was successful
ci/woodpecker/pr/smoke-init Pipeline was successful
2026-04-20 08:06:06 +00:00
49190359b8 Merge pull request 'fix: bug: deploy.sh 360s still too tight for chat cold-start + cascade-skip masks edge/vault-runner (#1070)' (#1071) from fix/issue-1070 into main
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/nomad-validate Pipeline was successful
2026-04-20 08:04:27 +00:00
f89d22dd39 Merge pull request 'fix: bug: disinto backup import — schema mismatch with create; 0 issues imported (#1068)' (#1072) from fix/issue-1068 into main
Some checks are pending
ci/woodpecker/push/ci Pipeline is pending
2026-04-20 08:01:51 +00:00
Agent
4c6d545060 fix: bug: disinto backup import — schema mismatch with create; 0 issues imported (#1068)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
2026-04-20 07:58:25 +00:00
Claude
d1a026c702 fix: deploy.sh 360s still too tight for chat cold-start + cascade-skip masks edge/vault-runner (#1070)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/nomad-validate Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
ci/woodpecker/pr/nomad-validate Pipeline was successful
Two changes:
- Set JOB_READY_TIMEOUT_CHAT=600 (chat cold-start takes ~5-6 min on fresh LXC)
- On deploy timeout/failure, log WARNING and continue submitting remaining jobs
  instead of dying immediately; print final health summary with failed jobs list

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-20 07:56:30 +00:00
4 changed files with 332 additions and 26 deletions

View file

@ -838,6 +838,11 @@ _disinto_init_nomad() {
fi
echo "[deploy] [dry-run] nomad job validate ${jobspec_path}"
echo "[deploy] [dry-run] nomad job run -detach ${jobspec_path}"
# Post-deploy: forgejo-bootstrap
if [ "$svc" = "forgejo" ]; then
local bootstrap_script="${FACTORY_ROOT}/lib/init/nomad/forgejo-bootstrap.sh"
echo "[deploy] [dry-run] [post-deploy] would run ${bootstrap_script}"
fi
done
echo "[deploy] dry-run complete"
fi
@ -1054,6 +1059,26 @@ _disinto_init_nomad() {
fi
sudo -n -- "${deploy_cmd[@]}" || exit $?
fi
# Post-deploy: bootstrap Forgejo admin user after forgejo deployment
if [ "$svc" = "forgejo" ]; then
echo ""
echo "── Bootstrapping Forgejo admin user ───────────────────────"
local bootstrap_script="${FACTORY_ROOT}/lib/init/nomad/forgejo-bootstrap.sh"
if [ -x "$bootstrap_script" ]; then
if [ "$(id -u)" -eq 0 ]; then
"$bootstrap_script" || exit $?
else
if ! command -v sudo >/dev/null 2>&1; then
echo "Error: forgejo-bootstrap.sh must run as root and sudo is not installed" >&2
exit 1
fi
sudo -n -- "$bootstrap_script" || exit $?
fi
else
echo "warning: forgejo-bootstrap.sh not found or not executable" >&2
fi
fi
done
# Run vault-runner (unconditionally, not gated by --with) — infrastructure job

View file

@ -252,32 +252,33 @@ backup_import_disinto_ops_repo() {
}
# ── Step 4: Import issues from backup ────────────────────────────────────────
# Usage: backup_import_issues <slug> <issues_dir>
# Usage: backup_import_issues <slug> <issues_file>
# issues_file is a JSON array of issues (per create schema)
# Returns: 0 on success
backup_import_issues() {
local slug="$1"
local issues_dir="$2"
local issues_file="$2"
if [ ! -d "$issues_dir" ]; then
backup_log "No issues directory found, skipping"
if [ ! -f "$issues_file" ]; then
backup_log "No issues file found, skipping"
return 0
fi
local count
count=$(jq 'length' "$issues_file")
backup_log "Importing ${count} issues from ${issues_file}"
local created=0
local skipped=0
for issue_file in "${issues_dir}"/*.json; do
[ -f "$issue_file" ] || continue
backup_log "Processing issue file: $(basename "$issue_file")"
for i in $(seq 0 $((count - 1))); do
local issue_num title body
issue_num=$(jq -r '.number // empty' "$issue_file")
title=$(jq -r '.title // empty' "$issue_file")
body=$(jq -r '.body // empty' "$issue_file")
issue_num=$(jq -r ".[${i}].number" "$issues_file")
title=$(jq -r ".[${i}].title" "$issues_file")
body=$(jq -r ".[${i}].body" "$issues_file")
if [ -z "$issue_num" ] || [ "$issue_num" = "null" ]; then
backup_log "WARNING: skipping issue without number: $(basename "$issue_file")"
backup_log "WARNING: skipping issue without number at index ${i}"
continue
fi
@ -292,7 +293,7 @@ backup_import_issues() {
local -a labels=()
while IFS= read -r label; do
[ -n "$label" ] && labels+=("$label")
done < <(jq -r '.labels[]? // empty' "$issue_file")
done < <(jq -r ".[${i}].labels[]? // empty" "$issues_file")
# Create issue
local new_num
@ -345,19 +346,24 @@ backup_import() {
exit 1
fi
# Step 4: Import issues for each repo with issues/*.json
for repo_dir in "${BACKUP_TEMP_DIR}/repos"/*/; do
[ -d "$repo_dir" ] || continue
# Step 4: Import issues — iterate issues/<slug>.json files, each is a JSON array
for issues_file in "${BACKUP_TEMP_DIR}/issues"/*.json; do
[ -f "$issues_file" ] || continue
local slug_filename
slug_filename=$(basename "$issues_file" .json)
# Map slug-filename → forgejo-slug: "disinto" → "disinto-admin/disinto",
# "disinto-ops" → "disinto-admin/disinto-ops"
local slug
slug=$(basename "$repo_dir")
case "$slug_filename" in
"disinto") slug="${FORGE_REPO}" ;;
"disinto-ops") slug="${FORGE_OPS_REPO}" ;;
*) slug="disinto-admin/${slug_filename}" ;;
esac
backup_log "Processing repo: ${slug}"
local issues_dir="${repo_dir}issues"
if [ -d "$issues_dir" ]; then
backup_import_issues "$slug" "$issues_dir"
fi
backup_log "Processing issues from ${slug_filename}.json (${slug})"
backup_import_issues "$slug" "$issues_file"
done
# Summary

View file

@ -19,10 +19,12 @@
# JOB_READY_TIMEOUT_SECS — poll timeout in seconds (default: 360)
# JOB_READY_TIMEOUT_<JOBNAME> — per-job timeout override (e.g.,
# JOB_READY_TIMEOUT_FORGEJO=300)
# Built-in: JOB_READY_TIMEOUT_CHAT=600
#
# Exit codes:
# 0 success (all jobs deployed and healthy, or dry-run completed)
# 1 failure (validation error, timeout, or nomad command failure)
# 1 failure (validation error, or one or more jobs unhealthy after all
# jobs submitted — deploy does NOT cascade-skip on timeout)
#
# Idempotency:
# Running twice back-to-back on a healthy cluster is a no-op. Jobs that are
@ -35,7 +37,11 @@ SCRIPT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="${REPO_ROOT:-$(cd "${SCRIPT_ROOT}/../../.." && pwd)}"
JOB_READY_TIMEOUT_SECS="${JOB_READY_TIMEOUT_SECS:-360}"
# Per-job built-in defaults (override with JOB_READY_TIMEOUT_<JOBNAME> env var)
JOB_READY_TIMEOUT_CHAT="${JOB_READY_TIMEOUT_CHAT:-600}"
DRY_RUN=0
FAILED_JOBS=() # jobs that timed out or failed deployment
log() { printf '[deploy] %s\n' "$*" >&2; }
die() { printf '[deploy] ERROR: %s\n' "$*" >&2; exit 1; }
@ -168,6 +174,43 @@ _wait_job_running() {
return 1
}
# ── Helper: _run_post_deploy <job_name> ─────────────────────────────────────
# Runs post-deploy scripts for a job after it becomes healthy.
# Currently supports: forgejo → run forgejo-bootstrap.sh
#
# Args:
# job_name — name of the deployed job
#
# Returns:
# 0 on success (script ran or not applicable)
# 1 on failure
# ─────────────────────────────────────────────────────────────────────────────
_run_post_deploy() {
local job_name="$1"
local post_deploy_script
case "$job_name" in
forgejo)
post_deploy_script="${SCRIPT_ROOT}/forgejo-bootstrap.sh"
if [ -x "$post_deploy_script" ]; then
log "running post-deploy script for ${job_name}"
if ! "$post_deploy_script"; then
log "ERROR: post-deploy script failed for ${job_name}"
return 1
fi
log "post-deploy script completed for ${job_name}"
else
log "no post-deploy script found for ${job_name}, skipping"
fi
;;
*)
log "no post-deploy script for ${job_name}, skipping"
;;
esac
return 0
}
# ── Main: deploy each job in order ───────────────────────────────────────────
for job_name in "${JOBS[@]}"; do
jobspec_path="${REPO_ROOT}/nomad/jobs/${job_name}.hcl"
@ -186,6 +229,9 @@ for job_name in "${JOBS[@]}"; do
log "[dry-run] nomad job validate ${jobspec_path}"
log "[dry-run] nomad job run -detach ${jobspec_path}"
log "[dry-run] (would wait for '${job_name}' to become healthy for ${job_timeout}s)"
case "$job_name" in
forgejo) log "[dry-run] [post-deploy] would run forgejo-bootstrap.sh" ;;
esac
continue
fi
@ -215,7 +261,13 @@ for job_name in "${JOBS[@]}"; do
# 4. Wait for healthy state
if ! _wait_job_running "$job_name" "$job_timeout"; then
die "deployment for job '${job_name}' did not reach successful state"
log "WARNING: deployment for job '${job_name}' did not reach successful state — continuing with remaining jobs"
FAILED_JOBS+=("$job_name")
fi
# 5. Run post-deploy scripts
if ! _run_post_deploy "$job_name"; then
die "post-deploy script failed for job '${job_name}'"
fi
done
@ -223,4 +275,17 @@ if [ "$DRY_RUN" -eq 1 ]; then
log "dry-run complete"
fi
# ── Final health summary ─────────────────────────────────────────────────────
if [ "${#FAILED_JOBS[@]}" -gt 0 ]; then
log ""
log "=== DEPLOY SUMMARY ==="
log "The following jobs did NOT reach healthy state:"
for failed in "${FAILED_JOBS[@]}"; do
log " - ${failed}"
done
log "All other jobs were submitted and healthy."
log "======================"
exit 1
fi
exit 0

View file

@ -0,0 +1,210 @@
#!/usr/bin/env bash
# =============================================================================
# lib/init/nomad/forgejo-bootstrap.sh — Bootstrap Forgejo admin user
#
# Part of the Nomad+Vault migration (S2.4, issue #1069). Creates the
# disinto-admin user in Forgejo if it doesn't exist, enabling:
# - First-login success without manual intervention
# - PAT generation via API (required for disinto backup import #1058)
#
# The script is idempotent — re-running after success is a no-op.
#
# Scope:
# - Checks if user 'disinto-admin' exists via GET /api/v1/users/search
# - If not: POST /api/v1/admin/users to create admin user
# - Uses FORGE_ADMIN_PASS from environment (required)
#
# Idempotency contract:
# - User 'disinto-admin' exists → skip creation, log
# "[forgejo-bootstrap] admin user already exists"
# - User creation fails with "user already exists" → treat as success
#
# Preconditions:
# - Forgejo reachable at $FORGE_URL (default: http://127.0.0.1:3000)
# - Forgejo admin token at $FORGE_TOKEN (from Vault or env)
# - FORGE_ADMIN_PASS set (env var with admin password)
#
# Requires:
# - curl, jq
#
# Usage:
# lib/init/nomad/forgejo-bootstrap.sh
# lib/init/nomad/forgejo-bootstrap.sh --dry-run
#
# Exit codes:
# 0 success (user created + ready, or already exists)
# 1 precondition / API failure
# =============================================================================
set -euo pipefail
# ── Configuration ────────────────────────────────────────────────────────────
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
# shellcheck source=../../../lib/hvault.sh
source "${REPO_ROOT}/lib/hvault.sh"
# Configuration
FORGE_URL="${FORGE_URL:-http://127.0.0.1:3000}"
FORGE_TOKEN="${FORGE_TOKEN:-}"
FORGE_ADMIN_USER="${DISINTO_ADMIN_USER:-disinto-admin}"
FORGE_ADMIN_EMAIL="${DISINTO_ADMIN_EMAIL:-admin@disinto.local}"
# Derive FORGE_ADMIN_PASS from common env var patterns
# Priority: explicit FORGE_ADMIN_PASS > DISINTO_FORGE_ADMIN_PASS > FORGEJO_ADMIN_PASS
FORGE_ADMIN_PASS="${FORGE_ADMIN_PASS:-${DISINTO_FORGE_ADMIN_PASS:-${FORGEJO_ADMIN_PASS:-}}}"
LOG_TAG="[forgejo-bootstrap]"
log() { printf '%s %s\n' "$LOG_TAG" "$*" >&2; }
die() { printf '%s ERROR: %s\n' "$LOG_TAG" "$*" >&2; exit 1; }
# ── Flag parsing ─────────────────────────────────────────────────────────────
DRY_RUN="${DRY_RUN:-0}"
for arg in "$@"; do
case "$arg" in
--dry-run) DRY_RUN=1 ;;
-h|--help)
printf 'Usage: %s [--dry-run]\n\n' "$(basename "$0")"
printf 'Bootstrap Forgejo admin user if it does not exist.\n'
printf 'Idempotent: re-running is a no-op.\n\n'
printf 'Environment:\n'
printf ' FORGE_URL Forgejo base URL (default: http://127.0.0.1:3000)\n'
printf ' FORGE_TOKEN Forgejo admin token (from Vault or env)\n'
printf ' FORGE_ADMIN_PASS Admin password (required)\n'
printf ' DISINTO_ADMIN_USER Username for admin account (default: disinto-admin)\n'
printf ' DISINTO_ADMIN_EMAIL Admin email (default: admin@disinto.local)\n\n'
printf ' --dry-run Print planned actions without modifying Forgejo.\n'
exit 0
;;
*) die "invalid argument: ${arg} (try --help)" ;;
esac
done
# ── Precondition checks ──────────────────────────────────────────────────────
log "── Precondition check ──"
if [ -z "$FORGE_URL" ]; then
die "FORGE_URL is not set"
fi
if [ -z "$FORGE_ADMIN_PASS" ]; then
die "FORGE_ADMIN_PASS is not set (required for admin user creation)"
fi
# Resolve FORGE_TOKEN from Vault if not set in env
if [ -z "$FORGE_TOKEN" ]; then
log "reading FORGE_TOKEN from Vault at kv/disinto/shared/forge/token"
_hvault_default_env
token_raw="$(hvault_get_or_empty "kv/data/disinto/shared/forge/token" 2>/dev/null) || true"
if [ -n "$token_raw" ]; then
FORGE_TOKEN="$(printf '%s' "$token_raw" | jq -r '.data.data.token // empty' 2>/dev/null)" || true
fi
if [ -z "$FORGE_TOKEN" ]; then
die "FORGE_TOKEN not set and not found in Vault"
fi
log "forge token loaded from Vault"
fi
# ── Step 1/2: Check if admin user already exists ─────────────────────────────
log "── Step 1/2: check if admin user '${FORGE_ADMIN_USER}' exists ──"
# Search for the user via the public API (no auth needed for search)
user_search_raw=$(curl -sf --max-time 10 \
"${FORGE_URL}/api/v1/users/search?q=${FORGE_ADMIN_USER}&limit=1" 2>/dev/null) || {
# If search fails (e.g., Forgejo not ready yet), we'll handle it
log "warning: failed to search users (Forgejo may not be ready yet)"
user_search_raw=""
}
admin_user_exists=false
user_id=""
if [ -n "$user_search_raw" ]; then
user_id=$(printf '%s' "$user_search_raw" | jq -r '.data[0].id // empty' 2>/dev/null) || true
if [ -n "$user_id" ]; then
admin_user_exists=true
log "admin user '${FORGE_ADMIN_USER}' already exists (user_id: ${user_id})"
fi
fi
# ── Step 2/2: Create admin user if needed ────────────────────────────────────
if [ "$admin_user_exists" = false ]; then
log "creating admin user '${FORGE_ADMIN_USER}'"
if [ "$DRY_RUN" -eq 1 ]; then
log "[dry-run] would create admin user with:"
log "[dry-run] username: ${FORGE_ADMIN_USER}"
log "[dry-run] email: ${FORGE_ADMIN_EMAIL}"
log "[dry-run] admin: true"
log "[dry-run] must_change_password: false"
else
# Create the admin user via the admin API
create_response=$(curl -sf --max-time 30 -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_URL}/api/v1/admin/users" \
-d "{
\"username\": \"${FORGE_ADMIN_USER}\",
\"email\": \"${FORGE_ADMIN_EMAIL}\",
\"password\": \"${FORGE_ADMIN_PASS}\",
\"admin\": true,
\"must_change_password\": false
}" 2>/dev/null) || {
# Check if the error is "user already exists" (race condition on re-run)
error_body=$(curl -s --max-time 30 -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_URL}/api/v1/admin/users" \
-d "{\"username\": \"${FORGE_ADMIN_USER}\", \"email\": \"${FORGE_ADMIN_EMAIL}\", \"password\": \"${FORGE_ADMIN_PASS}\", \"admin\": true, \"must_change_password\": false}" 2>/dev/null) || error_body=""
if echo "$error_body" | grep -q '"message".*"user already exists"'; then
log "admin user '${FORGE_ADMIN_USER}' already exists (race condition handled)"
admin_user_exists=true
else
die "failed to create admin user in Forgejo: ${error_body:-unknown error}"
fi
}
# Extract user_id from response
user_id=$(printf '%s' "$create_response" | jq -r '.id // empty' 2>/dev/null) || true
if [ -n "$user_id" ]; then
admin_user_exists=true
log "admin user '${FORGE_ADMIN_USER}' created (user_id: ${user_id})"
else
die "failed to extract user_id from Forgejo response"
fi
fi
else
log "admin user '${FORGE_ADMIN_USER}' already exists — skipping creation"
fi
# ── Step 3/3: Verify user was created and is admin ───────────────────────────
log "── Step 3/3: verify admin user is properly configured ──"
if [ "$DRY_RUN" -eq 1 ]; then
log "[dry-run] would verify admin user configuration"
log "done — [dry-run] complete"
else
# Verify the user exists and is admin
verify_response=$(curl -sf --max-time 10 \
-u "${FORGE_ADMIN_USER}:${FORGE_ADMIN_PASS}" \
"${FORGE_URL}/api/v1/user" 2>/dev/null) || {
die "failed to verify admin user credentials"
}
is_admin=$(printf '%s' "$verify_response" | jq -r '.is_admin // false' 2>/dev/null) || true
login=$(printf '%s' "$verify_response" | jq -r '.login // empty' 2>/dev/null) || true
if [ "$is_admin" != "true" ]; then
die "admin user '${FORGE_ADMIN_USER}' is not marked as admin"
fi
if [ "$login" != "$FORGE_ADMIN_USER" ]; then
die "admin user login mismatch: expected '${FORGE_ADMIN_USER}', got '${login}'"
fi
log "admin user verified: login=${login}, is_admin=${is_admin}"
log "done — Forgejo admin user is ready"
fi
exit 0