fix: fix: standardize logging across all agents — capture errors, log exit codes, consistent format (#367) #383

Closed
dev-qwen wants to merge 1 commit from fix/issue-367 into main
10 changed files with 116 additions and 53 deletions

View file

@ -36,7 +36,7 @@ source "$FACTORY_ROOT/lib/guard.sh"
# shellcheck source=../lib/agent-sdk.sh
source "$FACTORY_ROOT/lib/agent-sdk.sh"
LOG_FILE="$SCRIPT_DIR/architect.log"
LOG_FILE="${DISINTO_LOG_DIR}/architect/architect.log"
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
LOGFILE="$LOG_FILE"
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
@ -44,7 +44,8 @@ SID_FILE="/tmp/architect-session-${PROJECT_NAME}.sid"
SCRATCH_FILE="/tmp/architect-${PROJECT_NAME}-scratch.md"
WORKTREE="/tmp/${PROJECT_NAME}-architect-run"
log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
# Override LOG_AGENT for consistent agent identification
LOG_AGENT="architect"
# ── Guards ────────────────────────────────────────────────────────────────
check_active architect

View file

@ -47,9 +47,14 @@ VAULT_ENV="${SCRIPT_ROOT}/../vault/vault-env.sh"
# Comma-separated list of Forgejo usernames with admin role
ADMIN_USERS="${FORGE_ADMIN_USERS:-vault-bot,admin}"
# Log function
# Persistent log file for dispatcher
DISPATCHER_LOG_FILE="${DISINTO_LOG_DIR:-/tmp}/dispatcher/dispatcher.log"
mkdir -p "$(dirname "$DISPATCHER_LOG_FILE")"
# Log function with standardized format
log() {
printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*"
local agent="${LOG_AGENT:-dispatcher}"
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$DISPATCHER_LOG_FILE"
}
# -----------------------------------------------------------------------------

View file

@ -55,7 +55,8 @@ RESULT_FILE="/tmp/gardener-result-${PROJECT_NAME}.txt"
GARDENER_PR_FILE="/tmp/gardener-pr-${PROJECT_NAME}.txt"
WORKTREE="/tmp/${PROJECT_NAME}-gardener-run"
log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
# Override LOG_AGENT for consistent agent identification
LOG_AGENT="gardener"
# ── Guards ────────────────────────────────────────────────────────────────
check_active gardener
@ -156,19 +157,21 @@ _gardener_execute_manifest() {
case "$action" in
add_label)
local label label_id
local label label_id http_code resp
label=$(jq -r ".[$i].label" "$manifest_file")
label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/labels" | jq -r --arg n "$label" \
'.[] | select(.name == $n) | .id') || true
if [ -n "$label_id" ]; then
if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \
resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \
-H 'Content-Type: application/json' \
"${FORGE_API}/issues/${issue}/labels" \
-d "{\"labels\":[${label_id}]}" >/dev/null 2>&1; then
-d "{\"labels\":[${label_id}]}" 2>/dev/null) || true
http_code=$(echo "$resp" | tail -1)
if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
log "manifest: add_label '${label}' to #${issue}"
else
log "manifest: FAILED add_label '${label}' to #${issue}"
log "manifest: FAILED add_label '${label}' to #${issue}: HTTP ${http_code}"
fi
else
log "manifest: FAILED add_label — label '${label}' not found"
@ -176,17 +179,19 @@ _gardener_execute_manifest() {
;;
remove_label)
local label label_id
local label label_id http_code resp
label=$(jq -r ".[$i].label" "$manifest_file")
label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/labels" | jq -r --arg n "$label" \
'.[] | select(.name == $n) | .id') || true
if [ -n "$label_id" ]; then
if curl -sf -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues/${issue}/labels/${label_id}" >/dev/null 2>&1; then
resp=$(curl -sf -w "\n%{http_code}" -X DELETE -H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_API}/issues/${issue}/labels/${label_id}" 2>/dev/null) || true
http_code=$(echo "$resp" | tail -1)
if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
log "manifest: remove_label '${label}' from #${issue}"
else
log "manifest: FAILED remove_label '${label}' from #${issue}"
log "manifest: FAILED remove_label '${label}' from #${issue}: HTTP ${http_code}"
fi
else
log "manifest: FAILED remove_label — label '${label}' not found"
@ -194,34 +199,38 @@ _gardener_execute_manifest() {
;;
close)
local reason
local reason http_code resp
reason=$(jq -r ".[$i].reason // empty" "$manifest_file")
if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
-H 'Content-Type: application/json' \
"${FORGE_API}/issues/${issue}" \
-d '{"state":"closed"}' >/dev/null 2>&1; then
-d '{"state":"closed"}' 2>/dev/null) || true
http_code=$(echo "$resp" | tail -1)
if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
log "manifest: closed #${issue} (${reason})"
else
log "manifest: FAILED close #${issue}"
log "manifest: FAILED close #${issue}: HTTP ${http_code}"
fi
;;
comment)
local body escaped_body
local body escaped_body http_code resp
body=$(jq -r ".[$i].body" "$manifest_file")
escaped_body=$(printf '%s' "$body" | jq -Rs '.')
if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \
resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \
-H 'Content-Type: application/json' \
"${FORGE_API}/issues/${issue}/comments" \
-d "{\"body\":${escaped_body}}" >/dev/null 2>&1; then
-d "{\"body\":${escaped_body}}" 2>/dev/null) || true
http_code=$(echo "$resp" | tail -1)
if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
log "manifest: commented on #${issue}"
else
log "manifest: FAILED comment on #${issue}"
log "manifest: FAILED comment on #${issue}: HTTP ${http_code}"
fi
;;
create_issue)
local title body labels escaped_title escaped_body label_ids
local title body labels escaped_title escaped_body label_ids http_code resp
title=$(jq -r ".[$i].title" "$manifest_file")
body=$(jq -r ".[$i].body" "$manifest_file")
labels=$(jq -r ".[$i].labels // [] | .[]" "$manifest_file")
@ -241,40 +250,46 @@ _gardener_execute_manifest() {
done <<< "$labels"
[ -n "$ids_json" ] && label_ids="[${ids_json}]"
fi
if curl -sf -X POST -H "Authorization: token ${FORGE_TOKEN}" \
resp=$(curl -sf -w "\n%{http_code}" -X POST -H "Authorization: token ${FORGE_TOKEN}" \
-H 'Content-Type: application/json' \
"${FORGE_API}/issues" \
-d "{\"title\":${escaped_title},\"body\":${escaped_body},\"labels\":${label_ids}}" >/dev/null 2>&1; then
-d "{\"title\":${escaped_title},\"body\":${escaped_body},\"labels\":${label_ids}}" 2>/dev/null) || true
http_code=$(echo "$resp" | tail -1)
if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
log "manifest: created issue '${title}'"
else
log "manifest: FAILED create_issue '${title}'"
log "manifest: FAILED create_issue '${title}': HTTP ${http_code}"
fi
;;
edit_body)
local body escaped_body
local body escaped_body http_code resp
body=$(jq -r ".[$i].body" "$manifest_file")
escaped_body=$(printf '%s' "$body" | jq -Rs '.')
if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
-H 'Content-Type: application/json' \
"${FORGE_API}/issues/${issue}" \
-d "{\"body\":${escaped_body}}" >/dev/null 2>&1; then
-d "{\"body\":${escaped_body}}" 2>/dev/null) || true
http_code=$(echo "$resp" | tail -1)
if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
log "manifest: edited body of #${issue}"
else
log "manifest: FAILED edit_body #${issue}"
log "manifest: FAILED edit_body #${issue}: HTTP ${http_code}"
fi
;;
close_pr)
local pr
local pr http_code resp
pr=$(jq -r ".[$i].pr" "$manifest_file")
if curl -sf -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
resp=$(curl -sf -w "\n%{http_code}" -X PATCH -H "Authorization: token ${FORGE_TOKEN}" \
-H 'Content-Type: application/json' \
"${FORGE_API}/pulls/${pr}" \
-d '{"state":"closed"}' >/dev/null 2>&1; then
-d '{"state":"closed"}' 2>/dev/null) || true
http_code=$(echo "$resp" | tail -1)
if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
log "manifest: closed PR #${pr}"
else
log "manifest: FAILED close_pr #${pr}"
log "manifest: FAILED close_pr #${pr}: HTTP ${http_code}"
fi
;;

View file

@ -52,12 +52,16 @@ agent_run() {
log "agent_run: starting (resume=${resume_id:-(new)}, dir=${run_dir})"
output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") && rc=0 || rc=$?
if [ "$rc" -eq 124 ]; then
log "agent_run: timeout after ${CLAUDE_TIMEOUT:-7200}s"
log "agent_run: timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $rc)"
elif [ "$rc" -ne 0 ]; then
log "agent_run: claude exited with code $rc"
# Log last 3 lines of output for diagnostics
if [ -n "$output" ]; then
log "agent_run: last output lines: $(echo "$output" | tail -3)"
fi
fi
if [ -z "$output" ]; then
log "agent_run: empty output (claude may have crashed or failed)"
log "agent_run: empty output (claude may have crashed or failed, exit code: $rc)"
fi
# Extract and persist session_id
@ -89,9 +93,13 @@ agent_run() {
local nudge_rc
output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") && nudge_rc=0 || nudge_rc=$?
if [ "$nudge_rc" -eq 124 ]; then
log "agent_run: nudge timeout after ${CLAUDE_TIMEOUT:-7200}s"
log "agent_run: nudge timeout after ${CLAUDE_TIMEOUT:-7200}s (exit code $nudge_rc)"
elif [ "$nudge_rc" -ne 0 ]; then
log "agent_run: nudge claude exited with code $nudge_rc"
# Log last 3 lines of output for diagnostics
if [ -n "$output" ]; then
log "agent_run: nudge last output lines: $(echo "$output" | tail -3)"
fi
fi
new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true
if [ -n "$new_sid" ]; then

View file

@ -13,7 +13,7 @@ FACTORY_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
if [ "${DISINTO_CONTAINER:-}" = "1" ]; then
DISINTO_DATA_DIR="${HOME}/data"
DISINTO_LOG_DIR="${DISINTO_DATA_DIR}/logs"
mkdir -p "${DISINTO_DATA_DIR}" "${DISINTO_LOG_DIR}"/{dev,action,review,supervisor,vault,site,metrics,gardener}
mkdir -p "${DISINTO_DATA_DIR}" "${DISINTO_LOG_DIR}"/{dev,action,review,supervisor,vault,site,metrics,gardener,planner,predictor,architect,dispatcher}
else
DISINTO_LOG_DIR="${FACTORY_ROOT}"
fi
@ -138,8 +138,12 @@ unset CLAWHUB_TOKEN 2>/dev/null || true
export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1
# Shared log helper
# Usage: log "message"
# Output: [2026-04-03T14:00:00Z] agent: message
# Where agent is set via LOG_AGENT variable (defaults to caller's context)
log() {
printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*"
local agent="${LOG_AGENT:-agent}"
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*"
}
# =============================================================================

View file

@ -357,11 +357,18 @@ pr_close() {
local pr_num="$1"
_prl_log "closing PR #${pr_num}"
curl -sf -X PATCH \
local resp http_code
resp=$(curl -sf -w "\n%{http_code}" -X PATCH \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_API}/pulls/${pr_num}" \
-d '{"state":"closed"}' >/dev/null 2>&1 || true
-d '{"state":"closed"}' 2>/dev/null) || true
http_code=$(printf '%s\n' "$resp" | tail -1)
if [ "$http_code" != "200" ] && [ "$http_code" != "204" ]; then
_prl_log "pr_close FAILED: HTTP ${http_code} for PR #${pr_num}"
return 1
fi
_prl_log "PR #${pr_num} closed"
}
# ---------------------------------------------------------------------------
@ -398,11 +405,18 @@ pr_walk_to_merge() {
if [ "${_PR_CI_FAILURE_TYPE:-}" = "infra" ] && [ "$ci_retry_count" -lt 1 ]; then
ci_retry_count=$((ci_retry_count + 1))
_prl_log "infra failure — retriggering CI (retry ${ci_retry_count})"
local rebase_output rebase_rc
( cd "$worktree" && \
git commit --allow-empty -m "ci: retrigger after infra failure" --no-verify && \
git fetch "$remote" "${PRIMARY_BRANCH}" 2>/dev/null && \
git rebase "${remote}/${PRIMARY_BRANCH}" && \
git push --force-with-lease "$remote" HEAD ) 2>&1 | tail -5 || true
git push --force-with-lease "$remote" HEAD ) > /tmp/rebase-output-$$ 2>&1
rebase_rc=$?
rebase_output=$(cat /tmp/rebase-output-$$)
rm -f /tmp/rebase-output-$$
if [ "$rebase_rc" -ne 0 ]; then
_prl_log "rebase/push failed (exit code $rebase_rc): $(echo "$rebase_output" | tail -5)"
fi
continue
fi

View file

@ -35,7 +35,7 @@ source "$FACTORY_ROOT/lib/guard.sh"
# shellcheck source=../lib/agent-sdk.sh
source "$FACTORY_ROOT/lib/agent-sdk.sh"
LOG_FILE="$SCRIPT_DIR/planner.log"
LOG_FILE="${DISINTO_LOG_DIR}/planner/planner.log"
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
LOGFILE="$LOG_FILE"
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
@ -43,7 +43,8 @@ SID_FILE="/tmp/planner-session-${PROJECT_NAME}.sid"
SCRATCH_FILE="/tmp/planner-${PROJECT_NAME}-scratch.md"
WORKTREE="/tmp/${PROJECT_NAME}-planner-run"
log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
# Override LOG_AGENT for consistent agent identification
LOG_AGENT="planner"
# ── Guards ────────────────────────────────────────────────────────────────
check_active planner

View file

@ -36,7 +36,7 @@ source "$FACTORY_ROOT/lib/guard.sh"
# shellcheck source=../lib/agent-sdk.sh
source "$FACTORY_ROOT/lib/agent-sdk.sh"
LOG_FILE="$SCRIPT_DIR/predictor.log"
LOG_FILE="${DISINTO_LOG_DIR}/predictor/predictor.log"
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
LOGFILE="$LOG_FILE"
# shellcheck disable=SC2034 # consumed by agent-sdk.sh
@ -44,7 +44,8 @@ SID_FILE="/tmp/predictor-session-${PROJECT_NAME}.sid"
SCRATCH_FILE="/tmp/predictor-${PROJECT_NAME}-scratch.md"
WORKTREE="/tmp/${PROJECT_NAME}-predictor-run"
log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
# Override LOG_AGENT for consistent agent identification
LOG_AGENT="predictor"
# ── Guards ────────────────────────────────────────────────────────────────
check_active predictor

View file

@ -23,9 +23,15 @@ LOGFILE="${DISINTO_LOG_DIR}/review/review-poll.log"
MAX_REVIEWS=3
REVIEW_IDLE_TIMEOUT=14400 # 4h: kill review session if idle
log() {
printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*" >> "$LOGFILE"
# Override LOG_AGENT for consistent agent identification
LOG_AGENT="review"
# Override log() to append to review-specific log file
_log() {
log "$@" >> "$LOGFILE"
}
# shellcheck disable=SC2034
log="$_log"
# Log rotation
if [ -f "$LOGFILE" ]; then
@ -126,10 +132,11 @@ if [ -n "$REVIEW_SIDS" ]; then
log " #${pr_num} re-review: new commits (${reviewed_sha:0:7}${current_sha:0:7})"
if "${SCRIPT_DIR}/review-pr.sh" "$pr_num" 2>&1; then
review_output=$("${SCRIPT_DIR}/review-pr.sh" "$pr_num" 2>&1) && review_rc=0 || review_rc=$?
if [ "$review_rc" -eq 0 ]; then
REVIEWED=$((REVIEWED + 1))
else
log " #${pr_num} re-review failed"
log " #${pr_num} re-review failed (exit code $review_rc): $(echo "$review_output" | tail -3)"
fi
[ "$REVIEWED" -lt "$MAX_REVIEWS" ] || break
@ -180,10 +187,11 @@ while IFS= read -r line; do
log " #${PR_NUM} error check: ${ERROR_COMMENTS:-0} prior error(s) for ${PR_SHA:0:7}"
if "${SCRIPT_DIR}/review-pr.sh" "$PR_NUM" 2>&1; then
review_output=$("${SCRIPT_DIR}/review-pr.sh" "$PR_NUM" 2>&1) && review_rc=0 || review_rc=$?
if [ "$review_rc" -eq 0 ]; then
REVIEWED=$((REVIEWED + 1))
else
log " #${PR_NUM} review failed"
log " #${PR_NUM} review failed (exit code $review_rc): $(echo "$review_output" | tail -3)"
fi
if [ "$REVIEWED" -ge "$MAX_REVIEWS" ]; then

View file

@ -46,7 +46,8 @@ SID_FILE="/tmp/supervisor-session-${PROJECT_NAME}.sid"
SCRATCH_FILE="/tmp/supervisor-${PROJECT_NAME}-scratch.md"
WORKTREE="/tmp/${PROJECT_NAME}-supervisor-run"
log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%S)Z] $*" >> "$LOG_FILE"; }
# Override LOG_AGENT for consistent agent identification
LOG_AGENT="supervisor"
# ── Guards ────────────────────────────────────────────────────────────────
check_active supervisor
@ -67,10 +68,15 @@ resolve_agent_identity || true
# ── Collect pre-flight metrics ────────────────────────────────────────────
log "Running preflight.sh"
PREFLIGHT_OUTPUT=""
PREFLIGHT_RC=0
if PREFLIGHT_OUTPUT=$(bash "$SCRIPT_DIR/preflight.sh" "$PROJECT_TOML" 2>&1); then
log "Preflight collected ($(echo "$PREFLIGHT_OUTPUT" | wc -l) lines)"
else
log "WARNING: preflight.sh failed, continuing with partial data"
PREFLIGHT_RC=$?
log "WARNING: preflight.sh failed (exit code $PREFLIGHT_RC), continuing with partial data"
if [ -n "$PREFLIGHT_OUTPUT" ]; then
log "Preflight error: $(echo "$PREFLIGHT_OUTPUT" | tail -3)"
fi
fi
# ── Load formula + context ───────────────────────────────────────────────