From 2c7c8d0b3843d7585108fb4538dd8f324c31a1e3 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 19 Apr 2026 20:50:45 +0000 Subject: [PATCH 1/2] =?UTF-8?q?fix:=20docs:=20nomad-cutover-runbook.md=20?= =?UTF-8?q?=E2=80=94=20end-to-end=20cutover=20procedure=20(#1060)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/nomad-cutover-runbook.md | 183 ++++++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 docs/nomad-cutover-runbook.md diff --git a/docs/nomad-cutover-runbook.md b/docs/nomad-cutover-runbook.md new file mode 100644 index 0000000..e0956cc --- /dev/null +++ b/docs/nomad-cutover-runbook.md @@ -0,0 +1,183 @@ +# Nomad Cutover Runbook + +End-to-end procedure to cut over the disinto factory from docker-compose on +disinto-dev-box to Nomad on disinto-nomad-box. + +**Target**: disinto-nomad-box (10.10.10.216) becomes production; disinto-dev-box +stays warm for rollback. + +**Downtime budget**: <5 min blue-green flip. + +**Data scope**: Forgejo issues + disinto-ops git bundle only. Everything else is +regenerated or discarded. OAuth secrets are regenerated on fresh init (all +sessions invalidated). + +--- + +## 1. Pre-cutover readiness checklist + +- [ ] Nomad + Vault stack healthy on a fresh wipe+init (step 5 verified) +- [ ] Codeberg mirror current — `git log` parity between dev-box Forgejo and + Codeberg +- [ ] SSH key pair generated for nomad-box, registered on DO edge (see §4.6) +- [ ] Companion tools landed: + - `disinto backup create` (#1057) + - `disinto backup import` (#1058) +- [ ] Backup tarball produced and tested against a scratch LXC (see §3) + +--- + +## 2. Pre-cutover artifact: backup + +On disinto-dev-box: + +```bash +./bin/disinto backup create /tmp/disinto-backup-$(date +%Y%m%d).tar.gz +``` + +Copy the tarball to nomad-box (and optionally to a local workstation for +safekeeping): + +```bash +scp /tmp/disinto-backup-*.tar.gz nomad-box:/tmp/ +``` + +--- + +## 3. Pre-cutover dry-run + +On a throwaway LXC: + +```bash +lxc launch ubuntu:24.04 cutover-dryrun +# inside the container: +disinto init --backend=nomad --import-env .env --with edge +./bin/disinto backup import /tmp/disinto-backup-*.tar.gz +``` + +Verify: + +- Issue count matches source Forgejo +- disinto-ops repo refs match source bundle + +Destroy the LXC once satisfied: + +```bash +lxc delete cutover-dryrun --force +``` + +--- + +## 4. Cutover T-0 (operator executes; <5 min target) + +### 4.1 Stop dev-box services + +```bash +# On disinto-dev-box — stop, do NOT remove volumes (rollback needs them) +docker-compose stop +``` + +### 4.2 Provision nomad-box (if not already done) + +```bash +# On disinto-nomad-box +disinto init --backend=nomad --import-env .env --with edge +``` + +### 4.3 Import backup + +```bash +# On disinto-nomad-box +./bin/disinto backup import /tmp/disinto-backup-*.tar.gz +``` + +### 4.4 Configure Codeberg pull mirror + +Manual, one-time step in the new Forgejo UI: + +1. Create a mirror repository pointing at the Codeberg upstream +2. Confirm initial sync completes + +### 4.5 Claude login + +```bash +# On disinto-nomad-box +claude login +``` + +Set up Anthropic OAuth so agents can authenticate. + +### 4.6 Autossh tunnel swap + +> **Operator step** — cross-host, no dev-agent involvement. Do NOT automate. + +1. Stop the tunnel on dev-box: + ```bash + # On disinto-dev-box + systemctl stop reverse-tunnel + ``` + +2. Copy or regenerate the tunnel unit on nomad-box: + ```bash + # Copy from dev-box, or let init regenerate it + scp dev-box:/etc/systemd/system/reverse-tunnel.service \ + nomad-box:/etc/systemd/system/ + ``` + +3. Register nomad-box's public key on DO edge: + ```bash + # On DO edge box — same restricted-command as the dev-box key + echo "" >> /home/johba/.ssh/authorized_keys + ``` + +4. Start the tunnel on nomad-box: + ```bash + # On disinto-nomad-box + systemctl enable --now reverse-tunnel + ``` + +5. Verify end-to-end: + ```bash + curl https://self.disinto.ai/api/v1/version + # Should return the new box's Forgejo version + ``` + +--- + +## 5. Post-cutover smoke + +- [ ] `curl https://self.disinto.ai` → Forgejo welcome page +- [ ] Create a test PR → Woodpecker pipeline runs → agents assign and work +- [ ] Claude chat login via Forgejo OAuth succeeds + +--- + +## 6. Rollback (if any step 4 gate fails) + +1. Stop the tunnel on nomad-box: + ```bash + systemctl stop reverse-tunnel # on nomad-box + ``` + +2. Restore the tunnel on dev-box: + ```bash + systemctl start reverse-tunnel # on dev-box + ``` + +3. Bring dev-box services back up: + ```bash + docker-compose up -d # on dev-box + ``` + +4. DO Caddy config is unchanged — traffic restores in <5 min. + +5. File a post-mortem issue. Keep nomad-box state intact for debugging. + +--- + +## 7. Post-stable cleanup (T+1 week) + +- `docker-compose down -v` on dev-box +- Archive `/var/lib/docker/volumes/disinto_*` to cold storage +- Delete disinto-dev-box LXC or keep as permanent rollback reserve (operator + decision) From 99fe90ae2770cbe7f62f6b3a6cca4d3b4ff595f8 Mon Sep 17 00:00:00 2001 From: Agent Date: Sun, 19 Apr 2026 20:31:40 +0000 Subject: [PATCH 2/2] =?UTF-8?q?fix:=20tool:=20disinto=20backup=20import=20?= =?UTF-8?q?=E2=80=94=20idempotent=20restore=20on=20fresh=20Nomad=20cluster?= =?UTF-8?q?=20(#1058)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bin/disinto | 28 ++- lib/disinto/backup.sh | 385 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 411 insertions(+), 2 deletions(-) create mode 100644 lib/disinto/backup.sh diff --git a/bin/disinto b/bin/disinto index 3740898..05e766f 100755 --- a/bin/disinto +++ b/bin/disinto @@ -42,6 +42,7 @@ source "${FACTORY_ROOT}/lib/ci-setup.sh" source "${FACTORY_ROOT}/lib/release.sh" source "${FACTORY_ROOT}/lib/backup.sh" source "${FACTORY_ROOT}/lib/claude-config.sh" +source "${FACTORY_ROOT}/lib/disinto/backup.sh" # backup create/import # ── Helpers ────────────────────────────────────────────────────────────────── @@ -66,6 +67,7 @@ Usage: disinto agent Manage agent state (enable/disable) disinto backup create Export factory state (issues + ops bundle) disinto edge [options] Manage edge tunnel registrations + disinto backup Backup and restore factory state Edge subcommands: register [project] Register a new tunnel (generates keypair if needed) @@ -104,6 +106,18 @@ Hire an agent options: CI logs options: --step Filter logs to a specific step (e.g., smoke-init) + +Backup subcommands: + create Create backup of factory state to tarball + import Restore factory state from backup tarball + +Import behavior: + - Unpacks tarball to temp directory + - Creates disinto repo via Forgejo API (mirror config is manual) + - Creates disinto-ops repo and pushes refs from bundle + - Imports issues from issues/*.json (idempotent - skips existing) + - Logs issue number mapping (Forgejo auto-assigns numbers) + - Prints summary: created X repos, pushed Y refs, imported Z issues, skipped W EOF exit 1 } @@ -2897,7 +2911,10 @@ EOF } # ── backup command ──────────────────────────────────────────────────────────── -# Usage: disinto backup create +# Usage: disinto backup [args] +# Subcommands: +# create Create backup of factory state +# import Restore factory state from backup disinto_backup() { local subcmd="${1:-}" shift || true @@ -2906,8 +2923,15 @@ disinto_backup() { create) backup_create "$@" ;; + import) + backup_import "$@" + ;; *) - echo "Usage: disinto backup create " >&2 + echo "Usage: disinto backup [args]" >&2 + echo "" >&2 + echo "Subcommands:" >&2 + echo " create Create backup of factory state" >&2 + echo " import Restore factory state from backup" >&2 exit 1 ;; esac diff --git a/lib/disinto/backup.sh b/lib/disinto/backup.sh new file mode 100644 index 0000000..2c34bba --- /dev/null +++ b/lib/disinto/backup.sh @@ -0,0 +1,385 @@ +#!/usr/bin/env bash +# ============================================================================= +# backup.sh — backup/restore utilities for disinto factory state +# +# Subcommands: +# create Create backup of factory state +# import Restore factory state from backup +# +# Usage: +# source "${FACTORY_ROOT}/lib/disinto/backup.sh" +# backup_import +# +# Environment: +# FORGE_URL - Forgejo instance URL (target) +# FORGE_TOKEN - Admin token for target Forgejo +# +# Idempotency: +# - Repos: created via API if missing +# - Issues: check if exists by number, skip if present +# - Runs twice = same end state, no errors +# ============================================================================= +set -euo pipefail + +# ── Helper: log with timestamp ─────────────────────────────────────────────── +backup_log() { + local msg="$1" + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $msg" +} + +# ── Helper: create repo if it doesn't exist ───────────────────────────────── +# Usage: backup_create_repo_if_missing +# Returns: 0 if repo exists or was created, 1 on error +backup_create_repo_if_missing() { + local slug="$1" + local org_name="${slug%%/*}" + local repo_name="${slug##*/}" + + # Check if repo exists + if curl -sf --max-time 5 \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_URL}/api/v1/repos/${slug}" >/dev/null 2>&1; then + backup_log "Repo ${slug} already exists" + return 0 + fi + + backup_log "Creating repo ${slug}..." + + # Create org if needed + curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/orgs" \ + -d "{\"username\":\"${org_name}\",\"visibility\":\"public\"}" >/dev/null 2>&1 || true + + # Create repo + local response + response=$(curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/orgs/${org_name}/repos" \ + -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" 2>/dev/null) \ + || response="" + + if [ -n "$response" ] && echo "$response" | grep -q '"id":\|[0-9]'; then + backup_log "Created repo ${slug}" + BACKUP_CREATED_REPOS=$((BACKUP_CREATED_REPOS + 1)) + return 0 + fi + + # Fallback: admin endpoint + response=$(curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/admin/users/${org_name}/repos" \ + -d "{\"name\":\"${repo_name}\",\"auto_init\":false,\"default_branch\":\"main\"}" 2>/dev/null) \ + || response="" + + if [ -n "$response" ] && echo "$response" | grep -q '"id":\|[0-9]'; then + backup_log "Created repo ${slug} (via admin API)" + BACKUP_CREATED_REPOS=$((BACKUP_CREATED_REPOS + 1)) + return 0 + fi + + backup_log "ERROR: failed to create repo ${slug}" >&2 + return 1 +} + +# ── Helper: check if issue exists by number ────────────────────────────────── +# Usage: backup_issue_exists +# Returns: 0 if exists, 1 if not +backup_issue_exists() { + local slug="$1" + local issue_num="$2" + + curl -sf --max-time 5 \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_URL}/api/v1/repos/${slug}/issues/${issue_num}" >/dev/null 2>&1 +} + +# ── Helper: create issue with specific number (if Forgejo supports it) ─────── +# Note: Forgejo API auto-assigns next integer; we accept renumbering and log mapping +# Usage: backup_create_issue <body> [labels...] +# Returns: new_issue_number on success, 0 on failure +backup_create_issue() { + local slug="$1" + local original_num="$2" + local title="$3" + local body="$4" + shift 4 + + # Build labels array + local -a labels=() + for label in "$@"; do + # Resolve label name to ID + local label_id + label_id=$(curl -sf --max-time 5 \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_URL}/api/v1/repos/${slug}/labels" 2>/dev/null \ + | jq -r ".[] | select(.name == \"${label}\") | .id" 2>/dev/null) || label_id="" + + if [ -n "$label_id" ] && [ "$label_id" != "null" ]; then + labels+=("$label_id") + fi + done + + # Build payload + local payload + if [ ${#labels[@]} -gt 0 ]; then + payload=$(jq -n \ + --arg title "$title" \ + --arg body "$body" \ + --argjson labels "$(printf '%s\n' "${labels[@]}" | jq -R . | jq -s .)" \ + '{title: $title, body: $body, labels: $labels}') + else + payload=$(jq -n --arg title "$title" --arg body "$body" '{title: $title, body: $body, labels: []}') + fi + + local response + response=$(curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/repos/${slug}/issues" \ + -d "$payload" 2>/dev/null) || { + backup_log "ERROR: failed to create issue '${title}'" >&2 + return 1 + } + + local new_num + new_num=$(printf '%s' "$response" | jq -r '.number // empty') + + # Log the mapping + echo "${original_num}:${new_num}" >> "${BACKUP_MAPPING_FILE}" + + backup_log "Created issue '${title}' as #${new_num} (original: #${original_num})" + echo "$new_num" +} + +# ── Step 1: Unpack tarball to temp dir ─────────────────────────────────────── +# Usage: backup_unpack_tarball <tarball> +# Returns: temp dir path via BACKUP_TEMP_DIR +backup_unpack_tarball() { + local tarball="$1" + + if [ ! -f "$tarball" ]; then + backup_log "ERROR: tarball not found: ${tarball}" >&2 + return 1 + fi + + BACKUP_TEMP_DIR=$(mktemp -d -t disinto-backup.XXXXXX) + backup_log "Unpacking ${tarball} to ${BACKUP_TEMP_DIR}" + + if ! tar -xzf "$tarball" -C "$BACKUP_TEMP_DIR"; then + backup_log "ERROR: failed to unpack tarball" >&2 + rm -rf "$BACKUP_TEMP_DIR" + return 1 + fi + + # Verify expected structure + if [ ! -d "${BACKUP_TEMP_DIR}/repos" ]; then + backup_log "ERROR: tarball missing 'repos/' directory" >&2 + rm -rf "$BACKUP_TEMP_DIR" + return 1 + fi + + backup_log "Tarball unpacked successfully" +} + +# ── Step 2: disinto repo — create via Forgejo API, trigger sync (manual) ───── +# Usage: backup_import_disinto_repo +# Returns: 0 on success, 1 on failure +backup_import_disinto_repo() { + backup_log "Step 2: Configuring disinto repo..." + + # Create disinto repo if missing + backup_create_repo_if_missing "disinto-admin/disinto" + + # Note: Manual mirror configuration recommended (avoids SSH deploy-key handling) + backup_log "Note: Configure Codeberg → Forgejo pull mirror manually" + backup_log " Run on Forgejo admin panel: Repository Settings → Repository Mirroring" + backup_log " Source: ssh://git@codeberg.org/johba/disinto.git" + backup_log " Mirror: disinto-admin/disinto" + backup_log " Or use: git clone --mirror ssh://git@codeberg.org/johba/disinto.git" + backup_log " cd disinto.git && git push --mirror ${FORGE_URL}/disinto-admin/disinto.git" + + return 0 +} + +# ── Step 3: disinto-ops repo — create empty, push from bundle ──────────────── +# Usage: backup_import_disinto_ops_repo +# Returns: 0 on success, 1 on failure +backup_import_disinto_ops_repo() { + backup_log "Step 3: Configuring disinto-ops repo from bundle..." + + local bundle_path="${BACKUP_TEMP_DIR}/repos/disinto-ops.bundle" + + if [ ! -f "$bundle_path" ]; then + backup_log "WARNING: Bundle not found at ${bundle_path}, skipping" + return 0 + fi + + # Create ops repo if missing + backup_create_repo_if_missing "disinto-admin/disinto-ops" + + # Clone bundle and push to Forgejo + local clone_dir + clone_dir=$(mktemp -d -t disinto-ops-clone.XXXXXX) + backup_log "Cloning bundle to ${clone_dir}" + + if ! git clone --bare "$bundle_path" "$clone_dir/disinto-ops.git"; then + backup_log "ERROR: failed to clone bundle" + rm -rf "$clone_dir" + return 1 + fi + + # Push all refs to Forgejo + backup_log "Pushing refs to Forgejo..." + if ! cd "$clone_dir/disinto-ops.git" && \ + git push --mirror "${FORGE_URL}/disinto-admin/disinto-ops.git" 2>&1; then + backup_log "ERROR: failed to push refs" + rm -rf "$clone_dir" + return 1 + fi + + local ref_count + ref_count=$(cd "$clone_dir/disinto-ops.git" && git show-ref | wc -l) + BACKUP_PUSHED_REFS=$((BACKUP_PUSHED_REFS + ref_count)) + + backup_log "Pushed ${ref_count} refs to disinto-ops" + rm -rf "$clone_dir" + + return 0 +} + +# ── Step 4: Import issues from backup ──────────────────────────────────────── +# Usage: backup_import_issues <slug> <issues_dir> +# Returns: 0 on success +backup_import_issues() { + local slug="$1" + local issues_dir="$2" + + if [ ! -d "$issues_dir" ]; then + backup_log "No issues directory found, skipping" + return 0 + fi + + local created=0 + local skipped=0 + + for issue_file in "${issues_dir}"/*.json; do + [ -f "$issue_file" ] || continue + + backup_log "Processing issue file: $(basename "$issue_file")" + + local issue_num title body + issue_num=$(jq -r '.number // empty' "$issue_file") + title=$(jq -r '.title // empty' "$issue_file") + body=$(jq -r '.body // empty' "$issue_file") + + if [ -z "$issue_num" ] || [ "$issue_num" = "null" ]; then + backup_log "WARNING: skipping issue without number: $(basename "$issue_file")" + continue + fi + + # Check if issue already exists + if backup_issue_exists "$slug" "$issue_num"; then + backup_log "Issue #${issue_num} already exists, skipping" + skipped=$((skipped + 1)) + continue + fi + + # Extract labels + local -a labels=() + while IFS= read -r label; do + [ -n "$label" ] && labels+=("$label") + done < <(jq -r '.labels[]? // empty' "$issue_file") + + # Create issue + local new_num + if new_num=$(backup_create_issue "$slug" "$issue_num" "$title" "$body" "${labels[@]}"); then + created=$((created + 1)) + fi + done + + BACKUP_CREATED_ISSUES=$((BACKUP_CREATED_ISSUES + created)) + BACKUP_SKIPPED_ISSUES=$((BACKUP_SKIPPED_ISSUES + skipped)) + + backup_log "Created ${created} issues, skipped ${skipped}" +} + +# ── Main: import subcommand ────────────────────────────────────────────────── +# Usage: backup_import <tarball> +backup_import() { + local tarball="$1" + + # Validate required environment + [ -n "${FORGE_URL:-}" ] || { echo "Error: FORGE_URL not set" >&2; exit 1; } + [ -n "${FORGE_TOKEN:-}" ] || { echo "Error: FORGE_TOKEN not set" >&2; exit 1; } + + backup_log "=== Backup Import Started ===" + backup_log "Target: ${FORGE_URL}" + backup_log "Tarball: ${tarball}" + + # Initialize counters + BACKUP_CREATED_REPOS=0 + BACKUP_PUSHED_REFS=0 + BACKUP_CREATED_ISSUES=0 + BACKUP_SKIPPED_ISSUES=0 + + # Create temp dir for mapping file + BACKUP_MAPPING_FILE=$(mktemp -t disinto-mapping.XXXXXX.json) + echo '{"mappings": []}' > "$BACKUP_MAPPING_FILE" + + # Step 1: Unpack tarball + if ! backup_unpack_tarball "$tarball"; then + exit 1 + fi + + # Step 2: disinto repo + if ! backup_import_disinto_repo; then + exit 1 + fi + + # Step 3: disinto-ops repo + if ! backup_import_disinto_ops_repo; then + exit 1 + fi + + # Step 4: Import issues for each repo with issues/*.json + for repo_dir in "${BACKUP_TEMP_DIR}/repos"/*/; do + [ -d "$repo_dir" ] || continue + + local slug + slug=$(basename "$repo_dir") + + backup_log "Processing repo: ${slug}" + + local issues_dir="${repo_dir}issues" + if [ -d "$issues_dir" ]; then + backup_import_issues "$slug" "$issues_dir" + fi + done + + # Summary + backup_log "=== Backup Import Complete ===" + backup_log "Created ${BACKUP_CREATED_REPOS} repos" + backup_log "Pushed ${BACKUP_PUSHED_REFS} refs" + backup_log "Imported ${BACKUP_CREATED_ISSUES} issues" + backup_log "Skipped ${BACKUP_SKIPPED_ISSUES} (already present)" + backup_log "Issue mapping saved to: ${BACKUP_MAPPING_FILE}" + + # Cleanup + rm -rf "$BACKUP_TEMP_DIR" + + exit 0 +} + +# ── Entry point: if sourced, don't run; if executed directly, run import ──── +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + if [ $# -lt 1 ]; then + echo "Usage: $0 <tarball>" >&2 + exit 1 + fi + + backup_import "$1" +fi