fix: fix: standardize logging across all agents — capture errors, log exit codes, consistent format (#367)
Some checks failed
ci/woodpecker/push/ci Pipeline failed
ci/woodpecker/pr/ci Pipeline failed
ci/woodpecker/pr/smoke-init Pipeline was successful

This commit is contained in:
Agent 2026-04-07 18:51:34 +00:00
parent f686d47a98
commit d216e6294f
11 changed files with 190 additions and 136 deletions

View file

@ -52,9 +52,11 @@ agent_run() {
log "agent_run: starting (resume=${resume_id:-(new)}, dir=${run_dir})"
output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") && rc=0 || rc=$?
if [ "$rc" -eq 124 ]; then
log "agent_run: timeout after ${CLAUDE_TIMEOUT:-7200}s"
log "agent_run: TIMEOUT after ${CLAUDE_TIMEOUT:-7200}s (exit code $rc)"
elif [ "$rc" -ne 0 ]; then
log "agent_run: claude exited with code $rc"
local tail_lines
tail_lines=$(echo "$output" | tail -3)
log "agent_run: FAILED with exit code $rc: ${tail_lines:-no output}"
fi
if [ -z "$output" ]; then
log "agent_run: empty output (claude may have crashed or failed)"
@ -89,9 +91,11 @@ agent_run() {
local nudge_rc
output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") && nudge_rc=0 || nudge_rc=$?
if [ "$nudge_rc" -eq 124 ]; then
log "agent_run: nudge timeout after ${CLAUDE_TIMEOUT:-7200}s"
log "agent_run: nudge TIMEOUT after ${CLAUDE_TIMEOUT:-7200}s (exit code $nudge_rc)"
elif [ "$nudge_rc" -ne 0 ]; then
log "agent_run: nudge claude exited with code $nudge_rc"
local nudge_tail
nudge_tail=$(echo "$output" | tail -3)
log "agent_run: nudge FAILED with exit code $nudge_rc: ${nudge_tail:-no output}"
fi
new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true
if [ -n "$new_sid" ]; then

View file

@ -138,8 +138,12 @@ unset CLAWHUB_TOKEN 2>/dev/null || true
export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1
# Shared log helper
# Usage: log "message"
# Sets LOG_AGENT to control the agent name prefix (default: derived from SCRIPT_DIR)
# Output format: [2026-04-03T14:00:00Z] agent: message
log() {
printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*"
local agent="${LOG_AGENT:-$(basename "$(dirname "$(dirname "${BASH_SOURCE[0]}")")")}"
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*"
}
# =============================================================================

View file

@ -357,11 +357,19 @@ pr_close() {
local pr_num="$1"
_prl_log "closing PR #${pr_num}"
curl -sf -X PATCH \
local resp http_code
resp=$(curl -sf -w "\n%{http_code}" -X PATCH \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_API}/pulls/${pr_num}" \
-d '{"state":"closed"}' >/dev/null 2>&1 || true
-d '{"state":"closed"}' 2>/dev/null) || true
http_code=$(printf '%s\n' "$resp" | tail -1)
if [ "$http_code" != "200" ] && [ "$http_code" != "204" ]; then
local body
body=$(printf '%s\n' "$resp" | sed '$d' | head -1)
_prl_log "pr_close FAILED for PR #${pr_num}: HTTP ${http_code} ${body:0:200}"
return 1
fi
}
# ---------------------------------------------------------------------------
@ -398,11 +406,17 @@ pr_walk_to_merge() {
if [ "${_PR_CI_FAILURE_TYPE:-}" = "infra" ] && [ "$ci_retry_count" -lt 1 ]; then
ci_retry_count=$((ci_retry_count + 1))
_prl_log "infra failure — retriggering CI (retry ${ci_retry_count})"
( cd "$worktree" && \
local rebase_output rebase_rc
rebase_output=$( ( cd "$worktree" && \
git commit --allow-empty -m "ci: retrigger after infra failure" --no-verify && \
git fetch "$remote" "${PRIMARY_BRANCH}" 2>/dev/null && \
git rebase "${remote}/${PRIMARY_BRANCH}" && \
git push --force-with-lease "$remote" HEAD ) 2>&1 | tail -5 || true
git push --force-with-lease "$remote" HEAD ) 2>&1 ) || rebase_rc=$?
if [ -n "$rebase_rc" ] && [ "$rebase_rc" -ne 0 ]; then
_prl_log "infra retrigger FAILED (exit code $rebase_rc): $(echo "$rebase_output" | tail -3)"
else
_prl_log "infra retrigger succeeded"
fi
continue
fi