fix: fix: standardize logging across all agents — capture errors, log exit codes, consistent format (#367)
This commit is contained in:
parent
f686d47a98
commit
d216e6294f
11 changed files with 190 additions and 136 deletions
|
|
@ -52,9 +52,11 @@ agent_run() {
|
|||
log "agent_run: starting (resume=${resume_id:-(new)}, dir=${run_dir})"
|
||||
output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude "${args[@]}" 2>>"$LOGFILE") && rc=0 || rc=$?
|
||||
if [ "$rc" -eq 124 ]; then
|
||||
log "agent_run: timeout after ${CLAUDE_TIMEOUT:-7200}s"
|
||||
log "agent_run: TIMEOUT after ${CLAUDE_TIMEOUT:-7200}s (exit code $rc)"
|
||||
elif [ "$rc" -ne 0 ]; then
|
||||
log "agent_run: claude exited with code $rc"
|
||||
local tail_lines
|
||||
tail_lines=$(echo "$output" | tail -3)
|
||||
log "agent_run: FAILED with exit code $rc: ${tail_lines:-no output}"
|
||||
fi
|
||||
if [ -z "$output" ]; then
|
||||
log "agent_run: empty output (claude may have crashed or failed)"
|
||||
|
|
@ -89,9 +91,11 @@ agent_run() {
|
|||
local nudge_rc
|
||||
output=$(cd "$run_dir" && flock -w 600 "$lock_file" timeout "${CLAUDE_TIMEOUT:-7200}" claude -p "$nudge" --resume "$_AGENT_SESSION_ID" --output-format json --dangerously-skip-permissions --max-turns 50 ${CLAUDE_MODEL:+--model "$CLAUDE_MODEL"} 2>>"$LOGFILE") && nudge_rc=0 || nudge_rc=$?
|
||||
if [ "$nudge_rc" -eq 124 ]; then
|
||||
log "agent_run: nudge timeout after ${CLAUDE_TIMEOUT:-7200}s"
|
||||
log "agent_run: nudge TIMEOUT after ${CLAUDE_TIMEOUT:-7200}s (exit code $nudge_rc)"
|
||||
elif [ "$nudge_rc" -ne 0 ]; then
|
||||
log "agent_run: nudge claude exited with code $nudge_rc"
|
||||
local nudge_tail
|
||||
nudge_tail=$(echo "$output" | tail -3)
|
||||
log "agent_run: nudge FAILED with exit code $nudge_rc: ${nudge_tail:-no output}"
|
||||
fi
|
||||
new_sid=$(printf '%s' "$output" | jq -r '.session_id // empty' 2>/dev/null) || true
|
||||
if [ -n "$new_sid" ]; then
|
||||
|
|
|
|||
|
|
@ -138,8 +138,12 @@ unset CLAWHUB_TOKEN 2>/dev/null || true
|
|||
export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1
|
||||
|
||||
# Shared log helper
|
||||
# Usage: log "message"
|
||||
# Sets LOG_AGENT to control the agent name prefix (default: derived from SCRIPT_DIR)
|
||||
# Output format: [2026-04-03T14:00:00Z] agent: message
|
||||
log() {
|
||||
printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*"
|
||||
local agent="${LOG_AGENT:-$(basename "$(dirname "$(dirname "${BASH_SOURCE[0]}")")")}"
|
||||
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
|
|
|
|||
|
|
@ -357,11 +357,19 @@ pr_close() {
|
|||
local pr_num="$1"
|
||||
|
||||
_prl_log "closing PR #${pr_num}"
|
||||
curl -sf -X PATCH \
|
||||
local resp http_code
|
||||
resp=$(curl -sf -w "\n%{http_code}" -X PATCH \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${FORGE_API}/pulls/${pr_num}" \
|
||||
-d '{"state":"closed"}' >/dev/null 2>&1 || true
|
||||
-d '{"state":"closed"}' 2>/dev/null) || true
|
||||
http_code=$(printf '%s\n' "$resp" | tail -1)
|
||||
if [ "$http_code" != "200" ] && [ "$http_code" != "204" ]; then
|
||||
local body
|
||||
body=$(printf '%s\n' "$resp" | sed '$d' | head -1)
|
||||
_prl_log "pr_close FAILED for PR #${pr_num}: HTTP ${http_code} ${body:0:200}"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -398,11 +406,17 @@ pr_walk_to_merge() {
|
|||
if [ "${_PR_CI_FAILURE_TYPE:-}" = "infra" ] && [ "$ci_retry_count" -lt 1 ]; then
|
||||
ci_retry_count=$((ci_retry_count + 1))
|
||||
_prl_log "infra failure — retriggering CI (retry ${ci_retry_count})"
|
||||
( cd "$worktree" && \
|
||||
local rebase_output rebase_rc
|
||||
rebase_output=$( ( cd "$worktree" && \
|
||||
git commit --allow-empty -m "ci: retrigger after infra failure" --no-verify && \
|
||||
git fetch "$remote" "${PRIMARY_BRANCH}" 2>/dev/null && \
|
||||
git rebase "${remote}/${PRIMARY_BRANCH}" && \
|
||||
git push --force-with-lease "$remote" HEAD ) 2>&1 | tail -5 || true
|
||||
git push --force-with-lease "$remote" HEAD ) 2>&1 ) || rebase_rc=$?
|
||||
if [ -n "$rebase_rc" ] && [ "$rebase_rc" -ne 0 ]; then
|
||||
_prl_log "infra retrigger FAILED (exit code $rebase_rc): $(echo "$rebase_output" | tail -3)"
|
||||
else
|
||||
_prl_log "infra retrigger succeeded"
|
||||
fi
|
||||
continue
|
||||
fi
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue