fix: fix: standardize logging across all agents — capture errors, log exit codes, consistent format (#367)
This commit is contained in:
parent
f686d47a98
commit
d216e6294f
11 changed files with 190 additions and 136 deletions
|
|
@ -47,9 +47,14 @@ VAULT_ENV="${SCRIPT_ROOT}/../vault/vault-env.sh"
|
|||
# Comma-separated list of Forgejo usernames with admin role
|
||||
ADMIN_USERS="${FORGE_ADMIN_USERS:-vault-bot,admin}"
|
||||
|
||||
# Log function
|
||||
# Persistent log file for dispatcher
|
||||
DISPATCHER_LOG_FILE="${DISINTO_LOG_DIR:-/tmp}/dispatcher/dispatcher.log"
|
||||
mkdir -p "$(dirname "$DISPATCHER_LOG_FILE")"
|
||||
|
||||
# Log function with standardized format
|
||||
log() {
|
||||
printf '[%s] %s\n' "$(date -u '+%Y-%m-%d %H:%M:%S UTC')" "$*"
|
||||
local agent="${LOG_AGENT:-dispatcher}"
|
||||
printf '[%s] %s: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" "$agent" "$*" >> "$DISPATCHER_LOG_FILE"
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
|
@ -185,7 +190,7 @@ verify_admin_approver() {
|
|||
# Fetch reviews for this PR
|
||||
local reviews_json
|
||||
reviews_json=$(get_pr_reviews "$pr_number") || {
|
||||
log "WARNING: Could not fetch reviews for PR #${pr_number} — skipping"
|
||||
log "WARNING: Could not fetch reviews for PR #${pr_number} — skipping" >> "$DISPATCHER_LOG_FILE"
|
||||
return 1
|
||||
}
|
||||
|
||||
|
|
@ -193,7 +198,7 @@ verify_admin_approver() {
|
|||
local review_count
|
||||
review_count=$(echo "$reviews_json" | jq 'length // 0')
|
||||
if [ "$review_count" -eq 0 ]; then
|
||||
log "WARNING: No reviews found for PR #${pr_number} — rejecting"
|
||||
log "WARNING: No reviews found for PR #${pr_number} — rejecting" >> "$DISPATCHER_LOG_FILE"
|
||||
return 1
|
||||
fi
|
||||
|
||||
|
|
@ -216,12 +221,12 @@ verify_admin_approver() {
|
|||
|
||||
# Check if reviewer is admin
|
||||
if is_allowed_admin "$reviewer"; then
|
||||
log "Verified: PR #${pr_number} approved by admin '${reviewer}'"
|
||||
log "Verified: PR #${pr_number} approved by admin '${reviewer}'" >> "$DISPATCHER_LOG_FILE"
|
||||
return 0
|
||||
fi
|
||||
done < <(echo "$reviews_json" | jq -c '.[]')
|
||||
|
||||
log "WARNING: No admin approval found for PR #${pr_number} — rejecting"
|
||||
log "WARNING: No admin approval found for PR #${pr_number} — rejecting" >> "$DISPATCHER_LOG_FILE"
|
||||
return 1
|
||||
}
|
||||
|
||||
|
|
@ -243,11 +248,11 @@ verify_admin_merged() {
|
|||
# Get the PR that introduced this file
|
||||
local pr_num
|
||||
pr_num=$(get_pr_for_file "$toml_file") || {
|
||||
log "WARNING: No PR found for action ${action_id} — skipping (possible direct push)"
|
||||
log "WARNING: No PR found for action ${action_id} — skipping (possible direct push)" >> "$DISPATCHER_LOG_FILE"
|
||||
return 1
|
||||
}
|
||||
|
||||
log "Action ${action_id} arrived via PR #${pr_num}"
|
||||
log "Action ${action_id} arrived via PR #${pr_num}" >> "$DISPATCHER_LOG_FILE"
|
||||
|
||||
# First, try admin approver check (for auto-merge workflow)
|
||||
if verify_admin_approver "$pr_num" "$action_id"; then
|
||||
|
|
@ -257,7 +262,7 @@ verify_admin_merged() {
|
|||
# Fallback: Check merger (backwards compatibility for manual merges)
|
||||
local merger_json
|
||||
merger_json=$(get_pr_merger "$pr_num") || {
|
||||
log "WARNING: Could not fetch PR #${pr_num} details — skipping"
|
||||
log "WARNING: Could not fetch PR #${pr_num} details — skipping" >> "$DISPATCHER_LOG_FILE"
|
||||
return 1
|
||||
}
|
||||
|
||||
|
|
@ -267,22 +272,22 @@ verify_admin_merged() {
|
|||
|
||||
# Check if PR is merged
|
||||
if [[ "$merged" != "true" ]]; then
|
||||
log "WARNING: PR #${pr_num} is not merged — skipping"
|
||||
log "WARNING: PR #${pr_num} is not merged — skipping" >> "$DISPATCHER_LOG_FILE"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Check if merger is admin
|
||||
if [ -z "$merger_username" ]; then
|
||||
log "WARNING: Could not determine PR #${pr_num} merger — skipping"
|
||||
log "WARNING: Could not determine PR #${pr_num} merger — skipping" >> "$DISPATCHER_LOG_FILE"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! is_allowed_admin "$merger_username"; then
|
||||
log "WARNING: PR #${pr_num} merged by non-admin user '${merger_username}' — skipping"
|
||||
log "WARNING: PR #${pr_num} merged by non-admin user '${merger_username}' — skipping" >> "$DISPATCHER_LOG_FILE"
|
||||
return 1
|
||||
fi
|
||||
|
||||
log "Verified: PR #${pr_num} merged by admin '${merger_username}' (fallback check)"
|
||||
log "Verified: PR #${pr_num} merged by admin '${merger_username}' (fallback check)" >> "$DISPATCHER_LOG_FILE"
|
||||
return 0
|
||||
}
|
||||
|
||||
|
|
@ -343,7 +348,7 @@ write_result() {
|
|||
'{id: $id, exit_code: $exit_code, timestamp: $timestamp, logs: $logs}' \
|
||||
> "$result_file"
|
||||
|
||||
log "Result written: ${result_file}"
|
||||
log "Result written: ${result_file}" >> "$DISPATCHER_LOG_FILE"
|
||||
}
|
||||
|
||||
# Launch runner for the given action
|
||||
|
|
@ -353,18 +358,18 @@ launch_runner() {
|
|||
local action_id
|
||||
action_id=$(basename "$toml_file" .toml)
|
||||
|
||||
log "Launching runner for action: ${action_id}"
|
||||
log "Launching runner for action: ${action_id}" >> "$DISPATCHER_LOG_FILE"
|
||||
|
||||
# Validate TOML
|
||||
if ! validate_action "$toml_file"; then
|
||||
log "ERROR: Action validation failed for ${action_id}"
|
||||
log "ERROR: Action validation failed for ${action_id}" >> "$DISPATCHER_LOG_FILE"
|
||||
write_result "$action_id" 1 "Validation failed: see logs above"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Verify admin merge
|
||||
if ! verify_admin_merged "$toml_file"; then
|
||||
log "ERROR: Admin merge verification failed for ${action_id}"
|
||||
log "ERROR: Admin merge verification failed for ${action_id}" >> "$DISPATCHER_LOG_FILE"
|
||||
write_result "$action_id" 1 "Admin merge verification failed: see logs above"
|
||||
return 1
|
||||
fi
|
||||
|
|
@ -400,7 +405,7 @@ launch_runner() {
|
|||
fi
|
||||
done
|
||||
else
|
||||
log "Action ${action_id} has no secrets declared — runner will execute without extra env vars"
|
||||
log "Action ${action_id} has no secrets declared — runner will execute without extra env vars" >> "$DISPATCHER_LOG_FILE"
|
||||
fi
|
||||
|
||||
# Add formula and action id as arguments (safe from shell injection)
|
||||
|
|
@ -423,7 +428,7 @@ launch_runner() {
|
|||
log_cmd+=("$arg")
|
||||
fi
|
||||
done
|
||||
log "Running: ${log_cmd[*]}"
|
||||
log "Running: ${log_cmd[*]}" >> "$DISPATCHER_LOG_FILE"
|
||||
|
||||
# Create temp file for logs
|
||||
local log_file
|
||||
|
|
@ -443,9 +448,9 @@ launch_runner() {
|
|||
write_result "$action_id" "$exit_code" "$logs"
|
||||
|
||||
if [ $exit_code -eq 0 ]; then
|
||||
log "Runner completed successfully for action: ${action_id}"
|
||||
log "Runner completed successfully for action: ${action_id}" >> "$DISPATCHER_LOG_FILE"
|
||||
else
|
||||
log "Runner failed for action: ${action_id} (exit code: ${exit_code})"
|
||||
log "Runner failed for action: ${action_id} (exit code: ${exit_code})" >> "$DISPATCHER_LOG_FILE"
|
||||
fi
|
||||
|
||||
return $exit_code
|
||||
|
|
@ -512,7 +517,7 @@ dispatch_reproduce() {
|
|||
local issue_number="$1"
|
||||
|
||||
if is_reproduce_running "$issue_number"; then
|
||||
log "Reproduce already running for issue #${issue_number}, skipping"
|
||||
log "Reproduce already running for issue #${issue_number}, skipping" >> "$DISPATCHER_LOG_FILE"
|
||||
return 0
|
||||
fi
|
||||
|
||||
|
|
@ -523,11 +528,11 @@ dispatch_reproduce() {
|
|||
done
|
||||
|
||||
if [ -z "$project_toml" ]; then
|
||||
log "WARNING: no project TOML found under ${FACTORY_ROOT}/projects/ — skipping reproduce for #${issue_number}"
|
||||
log "WARNING: no project TOML found under ${FACTORY_ROOT}/projects/ — skipping reproduce for #${issue_number}" >> "$DISPATCHER_LOG_FILE"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log "Dispatching reproduce-agent for issue #${issue_number} (project: ${project_toml})"
|
||||
log "Dispatching reproduce-agent for issue #${issue_number} (project: ${project_toml})" >> "$DISPATCHER_LOG_FILE"
|
||||
|
||||
# Build docker run command using array (safe from injection)
|
||||
local -a cmd=(docker run --rm
|
||||
|
|
@ -575,7 +580,7 @@ dispatch_reproduce() {
|
|||
"${cmd[@]}" &
|
||||
local bg_pid=$!
|
||||
echo "$bg_pid" > "$(_reproduce_lockfile "$issue_number")"
|
||||
log "Reproduce container launched (pid ${bg_pid}) for issue #${issue_number}"
|
||||
log "Reproduce container launched (pid ${bg_pid}) for issue #${issue_number}" >> "$DISPATCHER_LOG_FILE"
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
|
@ -636,7 +641,7 @@ dispatch_triage() {
|
|||
local issue_number="$1"
|
||||
|
||||
if is_triage_running "$issue_number"; then
|
||||
log "Triage already running for issue #${issue_number}, skipping"
|
||||
log "Triage already running for issue #${issue_number}, skipping" >> "$DISPATCHER_LOG_FILE"
|
||||
return 0
|
||||
fi
|
||||
|
||||
|
|
@ -647,7 +652,7 @@ dispatch_triage() {
|
|||
done
|
||||
|
||||
if [ -z "$project_toml" ]; then
|
||||
log "WARNING: no project TOML found under ${FACTORY_ROOT}/projects/ — skipping triage for #${issue_number}"
|
||||
log "WARNING: no project TOML found under ${FACTORY_ROOT}/projects/ — skipping triage for #${issue_number}" >> "$DISPATCHER_LOG_FILE"
|
||||
return 0
|
||||
fi
|
||||
|
||||
|
|
@ -700,7 +705,7 @@ dispatch_triage() {
|
|||
"${cmd[@]}" &
|
||||
local bg_pid=$!
|
||||
echo "$bg_pid" > "$(_triage_lockfile "$issue_number")"
|
||||
log "Triage container launched (pid ${bg_pid}) for issue #${issue_number}"
|
||||
log "Triage container launched (pid ${bg_pid}) for issue #${issue_number}" >> "$DISPATCHER_LOG_FILE"
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
|
@ -710,19 +715,19 @@ dispatch_triage() {
|
|||
# Clone or pull the ops repo
|
||||
ensure_ops_repo() {
|
||||
if [ ! -d "${OPS_REPO_ROOT}/.git" ]; then
|
||||
log "Cloning ops repo from ${FORGE_URL}/${FORGE_OPS_REPO}..."
|
||||
log "Cloning ops repo from ${FORGE_URL}/${FORGE_OPS_REPO}..." >> "$DISPATCHER_LOG_FILE"
|
||||
git clone "${FORGE_URL}/${FORGE_OPS_REPO}" "${OPS_REPO_ROOT}"
|
||||
else
|
||||
log "Pulling latest ops repo changes..."
|
||||
log "Pulling latest ops repo changes..." >> "$DISPATCHER_LOG_FILE"
|
||||
(cd "${OPS_REPO_ROOT}" && git pull --rebase)
|
||||
fi
|
||||
}
|
||||
|
||||
# Main dispatcher loop
|
||||
main() {
|
||||
log "Starting dispatcher..."
|
||||
log "Polling ops repo: ${VAULT_ACTIONS_DIR}"
|
||||
log "Admin users: ${ADMIN_USERS}"
|
||||
log "Starting dispatcher..." >> "$DISPATCHER_LOG_FILE"
|
||||
log "Polling ops repo: ${VAULT_ACTIONS_DIR}" >> "$DISPATCHER_LOG_FILE"
|
||||
log "Admin users: ${ADMIN_USERS}" >> "$DISPATCHER_LOG_FILE"
|
||||
|
||||
while true; do
|
||||
# Refresh ops repo at the start of each poll cycle
|
||||
|
|
@ -730,7 +735,7 @@ main() {
|
|||
|
||||
# Check if actions directory exists
|
||||
if [ ! -d "${VAULT_ACTIONS_DIR}" ]; then
|
||||
log "Actions directory not found: ${VAULT_ACTIONS_DIR}"
|
||||
log "Actions directory not found: ${VAULT_ACTIONS_DIR}" >> "$DISPATCHER_LOG_FILE"
|
||||
sleep 60
|
||||
continue
|
||||
fi
|
||||
|
|
@ -745,7 +750,7 @@ main() {
|
|||
|
||||
# Skip if already completed
|
||||
if is_action_completed "$action_id"; then
|
||||
log "Action ${action_id} already completed, skipping"
|
||||
log "Action ${action_id} already completed, skipping" >> "$DISPATCHER_LOG_FILE"
|
||||
continue
|
||||
fi
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue