Compare commits
6 commits
526b7fb73a
...
cb8c131bc4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb8c131bc4 | ||
|
|
c287ec0626 | ||
| 449611e6df | |||
| 9f365e40c0 | |||
|
|
e90ff4eb7b | ||
|
|
f878427866 |
7 changed files with 344 additions and 19 deletions
21
bin/disinto
21
bin/disinto
|
|
@ -12,6 +12,7 @@
|
||||||
# disinto secrets <subcommand> Manage encrypted secrets
|
# disinto secrets <subcommand> Manage encrypted secrets
|
||||||
# disinto run <action-id> Run action in ephemeral runner container
|
# disinto run <action-id> Run action in ephemeral runner container
|
||||||
# disinto ci-logs <pipeline> [--step <name>] Read CI logs from Woodpecker SQLite
|
# disinto ci-logs <pipeline> [--step <name>] Read CI logs from Woodpecker SQLite
|
||||||
|
# disinto backup create <outfile> Export factory state for migration
|
||||||
#
|
#
|
||||||
# Usage:
|
# Usage:
|
||||||
# disinto init https://github.com/user/repo
|
# disinto init https://github.com/user/repo
|
||||||
|
|
@ -39,6 +40,7 @@ source "${FACTORY_ROOT}/lib/generators.sh"
|
||||||
source "${FACTORY_ROOT}/lib/forge-push.sh"
|
source "${FACTORY_ROOT}/lib/forge-push.sh"
|
||||||
source "${FACTORY_ROOT}/lib/ci-setup.sh"
|
source "${FACTORY_ROOT}/lib/ci-setup.sh"
|
||||||
source "${FACTORY_ROOT}/lib/release.sh"
|
source "${FACTORY_ROOT}/lib/release.sh"
|
||||||
|
source "${FACTORY_ROOT}/lib/backup.sh"
|
||||||
source "${FACTORY_ROOT}/lib/claude-config.sh"
|
source "${FACTORY_ROOT}/lib/claude-config.sh"
|
||||||
|
|
||||||
# ── Helpers ──────────────────────────────────────────────────────────────────
|
# ── Helpers ──────────────────────────────────────────────────────────────────
|
||||||
|
|
@ -62,6 +64,7 @@ Usage:
|
||||||
disinto hire-an-agent <agent-name> <role> [--formula <path>] [--local-model <url>] [--model <name>]
|
disinto hire-an-agent <agent-name> <role> [--formula <path>] [--local-model <url>] [--model <name>]
|
||||||
Hire a new agent (create user + .profile repo; re-run to rotate credentials)
|
Hire a new agent (create user + .profile repo; re-run to rotate credentials)
|
||||||
disinto agent <subcommand> Manage agent state (enable/disable)
|
disinto agent <subcommand> Manage agent state (enable/disable)
|
||||||
|
disinto backup create <outfile> Export factory state (issues + ops bundle)
|
||||||
disinto edge <verb> [options] Manage edge tunnel registrations
|
disinto edge <verb> [options] Manage edge tunnel registrations
|
||||||
|
|
||||||
Edge subcommands:
|
Edge subcommands:
|
||||||
|
|
@ -2893,6 +2896,23 @@ EOF
|
||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ── backup command ────────────────────────────────────────────────────────────
|
||||||
|
# Usage: disinto backup create <outfile.tar.gz>
|
||||||
|
disinto_backup() {
|
||||||
|
local subcmd="${1:-}"
|
||||||
|
shift || true
|
||||||
|
|
||||||
|
case "$subcmd" in
|
||||||
|
create)
|
||||||
|
backup_create "$@"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Usage: disinto backup create <outfile.tar.gz>" >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
# ── Main dispatch ────────────────────────────────────────────────────────────
|
# ── Main dispatch ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
case "${1:-}" in
|
case "${1:-}" in
|
||||||
|
|
@ -2909,6 +2929,7 @@ case "${1:-}" in
|
||||||
hire-an-agent) shift; disinto_hire_an_agent "$@" ;;
|
hire-an-agent) shift; disinto_hire_an_agent "$@" ;;
|
||||||
agent) shift; disinto_agent "$@" ;;
|
agent) shift; disinto_agent "$@" ;;
|
||||||
edge) shift; disinto_edge "$@" ;;
|
edge) shift; disinto_edge "$@" ;;
|
||||||
|
backup) shift; disinto_backup "$@" ;;
|
||||||
-h|--help) usage ;;
|
-h|--help) usage ;;
|
||||||
*) usage ;;
|
*) usage ;;
|
||||||
esac
|
esac
|
||||||
|
|
|
||||||
|
|
@ -52,8 +52,9 @@ claude_run_with_watchdog() {
|
||||||
out_file=$(mktemp) || return 1
|
out_file=$(mktemp) || return 1
|
||||||
trap 'rm -f "$out_file"' RETURN
|
trap 'rm -f "$out_file"' RETURN
|
||||||
|
|
||||||
# Start claude in background, capturing stdout to temp file
|
# Start claude in new process group (setsid creates new session, $pid is PGID leader)
|
||||||
"${cmd[@]}" > "$out_file" 2>>"$LOGFILE" &
|
# All children of claude will inherit this process group
|
||||||
|
setsid "${cmd[@]}" > "$out_file" 2>>"$LOGFILE" &
|
||||||
pid=$!
|
pid=$!
|
||||||
|
|
||||||
# Background watchdog: poll for final result marker
|
# Background watchdog: poll for final result marker
|
||||||
|
|
@ -84,12 +85,12 @@ claude_run_with_watchdog() {
|
||||||
sleep "$grace"
|
sleep "$grace"
|
||||||
if kill -0 "$pid" 2>/dev/null; then
|
if kill -0 "$pid" 2>/dev/null; then
|
||||||
log "watchdog: claude -p idle for ${grace}s after final result; SIGTERM"
|
log "watchdog: claude -p idle for ${grace}s after final result; SIGTERM"
|
||||||
kill -TERM "$pid" 2>/dev/null || true
|
kill -TERM -- "-$pid" 2>/dev/null || true
|
||||||
# Give it a moment to clean up
|
# Give it a moment to clean up
|
||||||
sleep 5
|
sleep 5
|
||||||
if kill -0 "$pid" 2>/dev/null; then
|
if kill -0 "$pid" 2>/dev/null; then
|
||||||
log "watchdog: force kill after SIGTERM timeout"
|
log "watchdog: force kill after SIGTERM timeout"
|
||||||
kill -KILL "$pid" 2>/dev/null || true
|
kill -KILL -- "-$pid" 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
@ -100,16 +101,16 @@ claude_run_with_watchdog() {
|
||||||
timeout --foreground "${CLAUDE_TIMEOUT:-7200}" tail --pid="$pid" -f /dev/null 2>/dev/null
|
timeout --foreground "${CLAUDE_TIMEOUT:-7200}" tail --pid="$pid" -f /dev/null 2>/dev/null
|
||||||
rc=$?
|
rc=$?
|
||||||
|
|
||||||
# Clean up the watchdog
|
# Clean up the watchdog (target process group if it spawned children)
|
||||||
kill "$grace_pid" 2>/dev/null || true
|
kill -- "-$grace_pid" 2>/dev/null || true
|
||||||
wait "$grace_pid" 2>/dev/null || true
|
wait "$grace_pid" 2>/dev/null || true
|
||||||
|
|
||||||
# When timeout fires (rc=124), explicitly kill the orphaned claude process
|
# When timeout fires (rc=124), explicitly kill the orphaned claude process group
|
||||||
# tail --pid is a passive waiter, not a supervisor
|
# tail --pid is a passive waiter, not a supervisor
|
||||||
if [ "$rc" -eq 124 ]; then
|
if [ "$rc" -eq 124 ]; then
|
||||||
kill "$pid" 2>/dev/null || true
|
kill -TERM -- "-$pid" 2>/dev/null || true
|
||||||
sleep 1
|
sleep 1
|
||||||
kill -KILL "$pid" 2>/dev/null || true
|
kill -KILL -- "-$pid" 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Output the captured stdout
|
# Output the captured stdout
|
||||||
|
|
|
||||||
136
lib/backup.sh
Normal file
136
lib/backup.sh
Normal file
|
|
@ -0,0 +1,136 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# disinto backup — export factory state for migration
|
||||||
|
#
|
||||||
|
# Usage: source this file, then call backup_create <outfile.tar.gz>
|
||||||
|
# Requires: FORGE_URL, FORGE_TOKEN, FORGE_REPO, FORGE_OPS_REPO, OPS_REPO_ROOT
|
||||||
|
# =============================================================================
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Fetch all issues (open + closed) for a repo slug and emit the normalized JSON array.
|
||||||
|
# Usage: _backup_fetch_issues <org/repo>
|
||||||
|
_backup_fetch_issues() {
|
||||||
|
local repo_slug="$1"
|
||||||
|
local api_url="${FORGE_API_BASE}/repos/${repo_slug}"
|
||||||
|
|
||||||
|
local all_issues="[]"
|
||||||
|
for state in open closed; do
|
||||||
|
local page=1
|
||||||
|
while true; do
|
||||||
|
local page_items
|
||||||
|
page_items=$(curl -sf -X GET \
|
||||||
|
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
"${api_url}/issues?state=${state}&type=issues&limit=50&page=${page}") || {
|
||||||
|
echo "ERROR: failed to fetch ${state} issues from ${repo_slug} (page ${page})" >&2
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
local count
|
||||||
|
count=$(printf '%s' "$page_items" | jq 'length' 2>/dev/null) || count=0
|
||||||
|
[ -z "$count" ] && count=0
|
||||||
|
[ "$count" -eq 0 ] && break
|
||||||
|
all_issues=$(printf '%s\n%s' "$all_issues" "$page_items" | jq -s 'add')
|
||||||
|
[ "$count" -lt 50 ] && break
|
||||||
|
page=$((page + 1))
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
# Normalize to the schema: number, title, body, labels, state
|
||||||
|
printf '%s' "$all_issues" | jq '[.[] | {
|
||||||
|
number: .number,
|
||||||
|
title: .title,
|
||||||
|
body: .body,
|
||||||
|
labels: [.labels[]?.name],
|
||||||
|
state: .state
|
||||||
|
}] | sort_by(.number)'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create a backup tarball of factory state.
|
||||||
|
# Usage: backup_create <outfile.tar.gz>
|
||||||
|
backup_create() {
|
||||||
|
local outfile="${1:-}"
|
||||||
|
if [ -z "$outfile" ]; then
|
||||||
|
echo "Error: output file required" >&2
|
||||||
|
echo "Usage: disinto backup create <outfile.tar.gz>" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Resolve to absolute path before cd-ing into tmpdir
|
||||||
|
case "$outfile" in
|
||||||
|
/*) ;;
|
||||||
|
*) outfile="$(pwd)/${outfile}" ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# Validate required env
|
||||||
|
: "${FORGE_URL:?FORGE_URL must be set}"
|
||||||
|
: "${FORGE_TOKEN:?FORGE_TOKEN must be set}"
|
||||||
|
: "${FORGE_REPO:?FORGE_REPO must be set}"
|
||||||
|
|
||||||
|
local forge_ops_repo="${FORGE_OPS_REPO:-${FORGE_REPO}-ops}"
|
||||||
|
local ops_repo_root="${OPS_REPO_ROOT:-}"
|
||||||
|
|
||||||
|
if [ -z "$ops_repo_root" ] || [ ! -d "$ops_repo_root/.git" ]; then
|
||||||
|
echo "Error: OPS_REPO_ROOT (${ops_repo_root:-<unset>}) is not a valid git repo" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
local tmpdir
|
||||||
|
tmpdir=$(mktemp -d)
|
||||||
|
trap 'rm -rf "$tmpdir"' EXIT
|
||||||
|
|
||||||
|
local project_name="${FORGE_REPO##*/}"
|
||||||
|
|
||||||
|
echo "=== disinto backup create ==="
|
||||||
|
echo "Forge: ${FORGE_URL}"
|
||||||
|
echo "Repos: ${FORGE_REPO}, ${forge_ops_repo}"
|
||||||
|
|
||||||
|
# ── 1. Export issues ──────────────────────────────────────────────────────
|
||||||
|
mkdir -p "${tmpdir}/issues"
|
||||||
|
|
||||||
|
echo "Fetching issues for ${FORGE_REPO}..."
|
||||||
|
_backup_fetch_issues "$FORGE_REPO" > "${tmpdir}/issues/${project_name}.json"
|
||||||
|
local main_count
|
||||||
|
main_count=$(jq 'length' "${tmpdir}/issues/${project_name}.json")
|
||||||
|
echo " ${main_count} issues exported"
|
||||||
|
|
||||||
|
echo "Fetching issues for ${forge_ops_repo}..."
|
||||||
|
_backup_fetch_issues "$forge_ops_repo" > "${tmpdir}/issues/${project_name}-ops.json"
|
||||||
|
local ops_count
|
||||||
|
ops_count=$(jq 'length' "${tmpdir}/issues/${project_name}-ops.json")
|
||||||
|
echo " ${ops_count} issues exported"
|
||||||
|
|
||||||
|
# ── 2. Git bundle of ops repo ────────────────────────────────────────────
|
||||||
|
mkdir -p "${tmpdir}/repos"
|
||||||
|
|
||||||
|
echo "Creating git bundle for ${forge_ops_repo}..."
|
||||||
|
git -C "$ops_repo_root" bundle create "${tmpdir}/repos/${project_name}-ops.bundle" --all 2>&1
|
||||||
|
echo " bundle created ($(du -h "${tmpdir}/repos/${project_name}-ops.bundle" | cut -f1))"
|
||||||
|
|
||||||
|
# ── 3. Metadata ──────────────────────────────────────────────────────────
|
||||||
|
local created_at
|
||||||
|
created_at=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
|
||||||
|
jq -n \
|
||||||
|
--arg created_at "$created_at" \
|
||||||
|
--arg source_host "$(hostname)" \
|
||||||
|
--argjson schema_version 1 \
|
||||||
|
--arg forgejo_url "$FORGE_URL" \
|
||||||
|
'{
|
||||||
|
created_at: $created_at,
|
||||||
|
source_host: $source_host,
|
||||||
|
schema_version: $schema_version,
|
||||||
|
forgejo_url: $forgejo_url
|
||||||
|
}' > "${tmpdir}/metadata.json"
|
||||||
|
|
||||||
|
# ── 4. Pack tarball ──────────────────────────────────────────────────────
|
||||||
|
echo "Creating tarball: ${outfile}"
|
||||||
|
tar -czf "$outfile" -C "$tmpdir" metadata.json issues repos
|
||||||
|
local size
|
||||||
|
size=$(du -h "$outfile" | cut -f1)
|
||||||
|
echo "=== Backup complete: ${outfile} (${size}) ==="
|
||||||
|
|
||||||
|
# Clean up before returning — the EXIT trap references the local $tmpdir
|
||||||
|
# which goes out of scope after return, causing 'unbound variable' under set -u.
|
||||||
|
trap - EXIT
|
||||||
|
rm -rf "$tmpdir"
|
||||||
|
}
|
||||||
|
|
@ -405,6 +405,9 @@ services:
|
||||||
WOODPECKER_SERVER: localhost:9000
|
WOODPECKER_SERVER: localhost:9000
|
||||||
WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-}
|
WOODPECKER_AGENT_SECRET: ${WOODPECKER_AGENT_SECRET:-}
|
||||||
WOODPECKER_GRPC_SECURE: "false"
|
WOODPECKER_GRPC_SECURE: "false"
|
||||||
|
WOODPECKER_GRPC_KEEPALIVE_TIME: "10s"
|
||||||
|
WOODPECKER_GRPC_KEEPALIVE_TIMEOUT: "20s"
|
||||||
|
WOODPECKER_GRPC_KEEPALIVE_PERMIT_WITHOUT_CALLS: "true"
|
||||||
WOODPECKER_HEALTHCHECK_ADDR: ":3333"
|
WOODPECKER_HEALTHCHECK_ADDR: ":3333"
|
||||||
WOODPECKER_BACKEND_DOCKER_NETWORK: ${WOODPECKER_CI_NETWORK:-disinto_disinto-net}
|
WOODPECKER_BACKEND_DOCKER_NETWORK: ${WOODPECKER_CI_NETWORK:-disinto_disinto-net}
|
||||||
WOODPECKER_MAX_WORKFLOWS: 1
|
WOODPECKER_MAX_WORKFLOWS: 1
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@ job "woodpecker-agent" {
|
||||||
check {
|
check {
|
||||||
type = "http"
|
type = "http"
|
||||||
path = "/healthz"
|
path = "/healthz"
|
||||||
interval = "15s"
|
interval = "10s"
|
||||||
timeout = "3s"
|
timeout = "3s"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -89,10 +89,13 @@ job "woodpecker-agent" {
|
||||||
# Nomad's port stanza to the allocation's IP (not localhost), so the
|
# Nomad's port stanza to the allocation's IP (not localhost), so the
|
||||||
# agent must use the LXC's eth0 IP, not 127.0.0.1.
|
# agent must use the LXC's eth0 IP, not 127.0.0.1.
|
||||||
env {
|
env {
|
||||||
WOODPECKER_SERVER = "${attr.unique.network.ip-address}:9000"
|
WOODPECKER_SERVER = "${attr.unique.network.ip-address}:9000"
|
||||||
WOODPECKER_GRPC_SECURE = "false"
|
WOODPECKER_GRPC_SECURE = "false"
|
||||||
WOODPECKER_MAX_WORKFLOWS = "1"
|
WOODPECKER_GRPC_KEEPALIVE_TIME = "10s"
|
||||||
WOODPECKER_HEALTHCHECK_ADDR = ":3333"
|
WOODPECKER_GRPC_KEEPALIVE_TIMEOUT = "20s"
|
||||||
|
WOODPECKER_GRPC_KEEPALIVE_PERMIT_WITHOUT_CALLS = "true"
|
||||||
|
WOODPECKER_MAX_WORKFLOWS = "1"
|
||||||
|
WOODPECKER_HEALTHCHECK_ADDR = ":3333"
|
||||||
}
|
}
|
||||||
|
|
||||||
# ── Vault-templated agent secret ──────────────────────────────────
|
# ── Vault-templated agent secret ──────────────────────────────────
|
||||||
|
|
|
||||||
|
|
@ -52,8 +52,35 @@ REVIEW_TMPDIR=$(mktemp -d)
|
||||||
|
|
||||||
log() { printf '[%s] PR#%s %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$PR_NUMBER" "$*" >> "$LOGFILE"; }
|
log() { printf '[%s] PR#%s %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$PR_NUMBER" "$*" >> "$LOGFILE"; }
|
||||||
status() { printf '[%s] PR #%s: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$PR_NUMBER" "$*" > "$STATUSFILE"; log "$*"; }
|
status() { printf '[%s] PR #%s: %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$PR_NUMBER" "$*" > "$STATUSFILE"; log "$*"; }
|
||||||
cleanup() { rm -rf "$REVIEW_TMPDIR" "$LOCKFILE" "$STATUSFILE" "/tmp/${PROJECT_NAME}-review-graph-${PR_NUMBER}.json"; }
|
|
||||||
trap cleanup EXIT
|
# cleanup — remove temp files (NOT lockfile — cleanup_on_exit handles that)
|
||||||
|
cleanup() {
|
||||||
|
rm -rf "$REVIEW_TMPDIR" "$STATUSFILE" "/tmp/${PROJECT_NAME}-review-graph-${PR_NUMBER}.json"
|
||||||
|
}
|
||||||
|
|
||||||
|
# cleanup_on_exit — defensive cleanup: remove lockfile if we own it, kill residual children
|
||||||
|
# This handles the case where review-pr.sh is terminated unexpectedly (e.g., watchdog SIGTERM)
|
||||||
|
cleanup_on_exit() {
|
||||||
|
local ec=$?
|
||||||
|
# Remove lockfile only if we own it (PID matches $$)
|
||||||
|
if [ -f "$LOCKFILE" ] && [ -n "$(cat "$LOCKFILE" 2>/dev/null)" ]; then
|
||||||
|
if [ "$(cat "$LOCKFILE" 2>/dev/null)" = "$$" ]; then
|
||||||
|
rm -f "$LOCKFILE"
|
||||||
|
log "cleanup_on_exit: removed lockfile (we owned it)"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
# Kill any direct children that may have been spawned by this process
|
||||||
|
# (e.g., bash -c commands from Claude's Bash tool that didn't get reaped)
|
||||||
|
pkill -P $$ 2>/dev/null || true
|
||||||
|
# Call the main cleanup function to remove temp files
|
||||||
|
cleanup
|
||||||
|
exit "$ec"
|
||||||
|
}
|
||||||
|
trap cleanup_on_exit EXIT INT TERM
|
||||||
|
|
||||||
|
# Note: EXIT trap is already set above. The cleanup function is still available for
|
||||||
|
# non-error exits (e.g., normal completion via exit 0 after verdict posted).
|
||||||
|
# When review succeeds, we want to skip lockfile removal since the verdict was posted.
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# LOG ROTATION
|
# LOG ROTATION
|
||||||
|
|
@ -104,6 +131,7 @@ if [ "$PR_STATE" != "open" ]; then
|
||||||
log "SKIP: state=${PR_STATE}"
|
log "SKIP: state=${PR_STATE}"
|
||||||
worktree_cleanup "$WORKTREE"
|
worktree_cleanup "$WORKTREE"
|
||||||
rm -f "$OUTPUT_FILE" "$SID_FILE" 2>/dev/null || true
|
rm -f "$OUTPUT_FILE" "$SID_FILE" 2>/dev/null || true
|
||||||
|
rm -f "$LOCKFILE"
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -113,7 +141,7 @@ fi
|
||||||
CI_STATE=$(ci_commit_status "$PR_SHA")
|
CI_STATE=$(ci_commit_status "$PR_SHA")
|
||||||
CI_NOTE=""
|
CI_NOTE=""
|
||||||
if ! ci_passed "$CI_STATE"; then
|
if ! ci_passed "$CI_STATE"; then
|
||||||
ci_required_for_pr "$PR_NUMBER" && { log "SKIP: CI=${CI_STATE}"; exit 0; }
|
ci_required_for_pr "$PR_NUMBER" && { log "SKIP: CI=${CI_STATE}"; rm -f "$LOCKFILE"; exit 0; }
|
||||||
CI_NOTE=" (not required — non-code PR)"
|
CI_NOTE=" (not required — non-code PR)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -123,10 +151,10 @@ fi
|
||||||
ALL_COMMENTS=$(forge_api_all "/issues/${PR_NUMBER}/comments")
|
ALL_COMMENTS=$(forge_api_all "/issues/${PR_NUMBER}/comments")
|
||||||
HAS_CMT=$(printf '%s' "$ALL_COMMENTS" | jq --arg s "$PR_SHA" \
|
HAS_CMT=$(printf '%s' "$ALL_COMMENTS" | jq --arg s "$PR_SHA" \
|
||||||
'[.[]|select(.body|contains("<!-- reviewed: "+$s+" -->"))]|length')
|
'[.[]|select(.body|contains("<!-- reviewed: "+$s+" -->"))]|length')
|
||||||
[ "${HAS_CMT:-0}" -gt 0 ] && [ "$FORCE" != "--force" ] && { log "SKIP: reviewed ${PR_SHA:0:7}"; exit 0; }
|
[ "${HAS_CMT:-0}" -gt 0 ] && [ "$FORCE" != "--force" ] && { log "SKIP: reviewed ${PR_SHA:0:7}"; rm -f "$LOCKFILE"; exit 0; }
|
||||||
HAS_FML=$(forge_api_all "/pulls/${PR_NUMBER}/reviews" | jq --arg s "$PR_SHA" \
|
HAS_FML=$(forge_api_all "/pulls/${PR_NUMBER}/reviews" | jq --arg s "$PR_SHA" \
|
||||||
'[.[]|select(.commit_id==$s)|select(.state!="COMMENT")]|length')
|
'[.[]|select(.commit_id==$s)|select(.state!="COMMENT")]|length')
|
||||||
[ "${HAS_FML:-0}" -gt 0 ] && [ "$FORCE" != "--force" ] && { log "SKIP: formal review"; exit 0; }
|
[ "${HAS_FML:-0}" -gt 0 ] && [ "$FORCE" != "--force" ] && { log "SKIP: formal review"; rm -f "$LOCKFILE"; exit 0; }
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# RE-REVIEW DETECTION
|
# RE-REVIEW DETECTION
|
||||||
|
|
@ -324,3 +352,7 @@ esac
|
||||||
profile_write_journal "review-${PR_NUMBER}" "Review PR #${PR_NUMBER} (${VERDICT})" "${VERDICT,,}" "" || true
|
profile_write_journal "review-${PR_NUMBER}" "Review PR #${PR_NUMBER} (${VERDICT})" "${VERDICT,,}" "" || true
|
||||||
|
|
||||||
log "DONE: ${VERDICT} (re-review: ${IS_RE_REVIEW})"
|
log "DONE: ${VERDICT} (re-review: ${IS_RE_REVIEW})"
|
||||||
|
|
||||||
|
# Remove lockfile on successful completion (cleanup_on_exit will also do this,
|
||||||
|
# but we do it here to avoid the trap running twice)
|
||||||
|
rm -f "$LOCKFILE"
|
||||||
|
|
|
||||||
129
tests/test-watchdog-process-group.sh
Executable file
129
tests/test-watchdog-process-group.sh
Executable file
|
|
@ -0,0 +1,129 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# test-watchdog-process-group.sh — Test that claude_run_with_watchdog kills orphan children
|
||||||
|
#
|
||||||
|
# This test verifies that when claude_run_with_watchdog terminates the Claude process,
|
||||||
|
# all child processes (including those spawned by Claude's Bash tool) are also killed.
|
||||||
|
#
|
||||||
|
# Reproducer scenario:
|
||||||
|
# 1. Create a fake "claude" stub that:
|
||||||
|
# a. Spawns a long-running child process (sleep 3600)
|
||||||
|
# b. Writes a result marker to stdout to trigger idle detection
|
||||||
|
# c. Stays running
|
||||||
|
# 2. Run claude_run_with_watchdog with the stub
|
||||||
|
# 3. Before the fix: sleep child survives (orphaned to PID 1)
|
||||||
|
# 4. After the fix: sleep child dies (killed as part of process group with -PID)
|
||||||
|
#
|
||||||
|
# Usage: ./tests/test-watchdog-process-group.sh
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
||||||
|
TEST_TMP="/tmp/test-watchdog-$$"
|
||||||
|
LOGFILE="${TEST_TMP}/log.txt"
|
||||||
|
PASS=true
|
||||||
|
|
||||||
|
# shellcheck disable=SC2317
|
||||||
|
cleanup_test() {
|
||||||
|
rm -rf "$TEST_TMP"
|
||||||
|
}
|
||||||
|
trap cleanup_test EXIT INT TERM
|
||||||
|
|
||||||
|
mkdir -p "$TEST_TMP"
|
||||||
|
|
||||||
|
log() {
|
||||||
|
printf '[TEST] %s\n' "$*" | tee -a "$LOGFILE"
|
||||||
|
}
|
||||||
|
|
||||||
|
fail() {
|
||||||
|
printf '[TEST] FAIL: %s\n' "$*" | tee -a "$LOGFILE"
|
||||||
|
PASS=false
|
||||||
|
}
|
||||||
|
|
||||||
|
pass() {
|
||||||
|
printf '[TEST] PASS: %s\n' "$*" | tee -a "$LOGFILE"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Export required environment variables
|
||||||
|
export CLAUDE_TIMEOUT=10 # Short timeout for testing
|
||||||
|
export CLAUDE_IDLE_GRACE=2 # Short grace period for testing
|
||||||
|
export LOGFILE="${LOGFILE}" # Required by agent-sdk.sh
|
||||||
|
|
||||||
|
# Create a fake claude stub that:
|
||||||
|
# 1. Spawns a long-running child process (sleep 3600) that will become an orphan if parent is killed
|
||||||
|
# 2. Writes a result marker to stdout (to trigger the watchdog's idle-after-result path)
|
||||||
|
# 3. Stays running so the watchdog can kill it
|
||||||
|
cat > "${TEST_TMP}/fake-claude" << 'FAKE_CLAUDE_EOF'
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# Fake claude that spawns a child and stays running
|
||||||
|
# Simulates Claude's behavior when it spawns a Bash tool command
|
||||||
|
|
||||||
|
# Write result marker to stdout (triggers watchdog idle detection)
|
||||||
|
echo '{"type":"result","session_id":"test-session-123","verdict":"APPROVE"}'
|
||||||
|
|
||||||
|
# Spawn a child that simulates Claude's Bash tool hanging
|
||||||
|
# This is the process that should be killed when the parent is terminated
|
||||||
|
sleep 3600 &
|
||||||
|
CHILD_PID=$!
|
||||||
|
|
||||||
|
# Log the child PID for debugging
|
||||||
|
echo "FAKE_CLAUDE_CHILD_PID=$CHILD_PID" >&2
|
||||||
|
|
||||||
|
# Stay running - sleep in a loop so the watchdog can kill us
|
||||||
|
while true; do
|
||||||
|
sleep 3600 &
|
||||||
|
wait $! 2>/dev/null || true
|
||||||
|
done
|
||||||
|
FAKE_CLAUDE_EOF
|
||||||
|
chmod +x "${TEST_TMP}/fake-claude"
|
||||||
|
|
||||||
|
log "Testing claude_run_with_watchdog process group cleanup..."
|
||||||
|
|
||||||
|
# Source the library and run claude_run_with_watchdog
|
||||||
|
cd "$SCRIPT_DIR"
|
||||||
|
source lib/agent-sdk.sh
|
||||||
|
|
||||||
|
log "Starting claude_run_with_watchdog with fake claude..."
|
||||||
|
|
||||||
|
# Run the function directly (not as a script)
|
||||||
|
# We need to capture output and redirect stderr
|
||||||
|
OUTPUT_FILE="${TEST_TMP}/output.txt"
|
||||||
|
timeout 35 bash -c "
|
||||||
|
source '${SCRIPT_DIR}/lib/agent-sdk.sh'
|
||||||
|
CLAUDE_TIMEOUT=10 CLAUDE_IDLE_GRACE=2 LOGFILE='${LOGFILE}' claude_run_with_watchdog '${TEST_TMP}/fake-claude' > '${OUTPUT_FILE}' 2>&1
|
||||||
|
exit \$?
|
||||||
|
" || true
|
||||||
|
|
||||||
|
# Give the watchdog a moment to clean up
|
||||||
|
log "Waiting for cleanup..."
|
||||||
|
sleep 5
|
||||||
|
|
||||||
|
# More precise check: look for sleep 3600 processes
|
||||||
|
# These would be the orphans from our fake claude
|
||||||
|
ORPHAN_COUNT=$(pgrep -a sleep 2>/dev/null | grep -c "sleep 3600" 2>/dev/null || echo "0")
|
||||||
|
|
||||||
|
if [ "$ORPHAN_COUNT" -gt 0 ]; then
|
||||||
|
log "Found $ORPHAN_COUNT orphan sleep 3600 processes:"
|
||||||
|
pgrep -a sleep | grep "sleep 3600"
|
||||||
|
fail "Orphan children found - process group cleanup did not work"
|
||||||
|
else
|
||||||
|
pass "No orphan children found - process group cleanup worked"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Also verify that the fake claude itself is not running
|
||||||
|
FAKE_CLAUDE_COUNT=$(pgrep -c -f "fake-claude" 2>/dev/null || echo "0")
|
||||||
|
if [ "$FAKE_CLAUDE_COUNT" -gt 0 ]; then
|
||||||
|
log "Found $FAKE_CLAUDE_COUNT fake-claude processes still running"
|
||||||
|
fail "Fake claude process(es) still running"
|
||||||
|
else
|
||||||
|
pass "Fake claude process terminated"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
echo ""
|
||||||
|
if [ "$PASS" = true ]; then
|
||||||
|
log "All tests passed!"
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
log "Some tests failed. See log at $LOGFILE"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
Loading…
Add table
Add a link
Reference in a new issue