fix: address review findings for escalation race condition, SQL injection, and sc_codes scope

- Race condition: mv escalations.jsonl to a PID-stamped snapshot before
  processing so concurrent dev-poll appends go to a fresh file; rm snapshot
  after loop — no entries are ever silently dropped
- SQL injection: validate ESC_PR_SHA is a 40-char hex string before
  interpolating into the wpdb query
- sc_codes scope: compute per-file from file_errors (already filtered to
  that file) instead of the entire step log; also switch grep to -F so
  dots in filenames are not treated as regex wildcards
- step_pid validation: reject non-integer values from Woodpecker API before
  passing as CLI argument
- Fallback body now distinguishes "CI logs unavailable" from "logs found
  but issue creation API calls failed"
- ESC_GENERIC_FAIL: avoid leading blank line by using conditional separator
  and fix code-block opening newline
- is_escalated(): remove dead esc_file/done_file locals; add Python-level
  int() guard so empty/non-numeric issue or pr values fail cleanly instead
  of producing a syntax error suppressed by 2>/dev/null

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
openhands 2026-03-17 15:11:53 +00:00
parent d9520f48a6
commit 13bc948b1d
2 changed files with 48 additions and 15 deletions

View file

@ -221,7 +221,13 @@ ESCALATION_FILE="${FACTORY_ROOT}/supervisor/escalations.jsonl"
ESCALATION_DONE="${FACTORY_ROOT}/supervisor/escalations.done.jsonl"
if [ -s "$ESCALATION_FILE" ]; then
ESCALATION_COUNT=$(wc -l < "$ESCALATION_FILE")
# Atomically snapshot the file before processing to prevent race with
# concurrent dev-poll appends: new entries go to a fresh ESCALATION_FILE
# while we process the snapshot, so nothing is ever silently dropped.
ESCALATION_SNAP="${ESCALATION_FILE}.processing.$$"
mv "$ESCALATION_FILE" "$ESCALATION_SNAP"
ESCALATION_COUNT=$(wc -l < "$ESCALATION_SNAP")
flog "Processing ${ESCALATION_COUNT} escalation(s) from dev-agent"
while IFS= read -r esc_entry; do
@ -245,9 +251,15 @@ if [ -s "$ESCALATION_FILE" ]; then
ESC_PIPELINE=""
ESC_SUB_ISSUES_CREATED=0
ESC_GENERIC_FAIL=""
ESC_LOGS_AVAILABLE=0
if [ -n "$ESC_PR_SHA" ]; then
ESC_PIPELINE=$(wpdb -c "SELECT number FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND commit='${ESC_PR_SHA}' ORDER BY created DESC LIMIT 1;" 2>/dev/null | xargs || true)
# Validate SHA is a 40-char hex string before interpolating into SQL
if [[ "$ESC_PR_SHA" =~ ^[0-9a-fA-F]{40}$ ]]; then
ESC_PIPELINE=$(wpdb -c "SELECT number FROM pipelines WHERE repo_id=${WOODPECKER_REPO_ID} AND commit='${ESC_PR_SHA}' ORDER BY created DESC LIMIT 1;" 2>/dev/null | xargs || true)
else
flog "WARNING: ESC_PR_SHA '${ESC_PR_SHA}' is not a valid hex SHA — skipping pipeline lookup"
fi
fi
if [ -n "$ESC_PIPELINE" ]; then
@ -258,8 +270,10 @@ if [ -s "$ESCALATION_FILE" ]; then
while IFS=$'\t' read -r step_pid step_name; do
[ -z "$step_pid" ] && continue
[[ "$step_pid" =~ ^[0-9]+$ ]] || { flog "WARNING: invalid step_pid '${step_pid}' — skipping"; continue; }
step_logs=$(woodpecker-cli pipeline log show "${CODEBERG_REPO}" "${ESC_PIPELINE}" "${step_pid}" 2>/dev/null | tail -150 || true)
[ -z "$step_logs" ] && continue
ESC_LOGS_AVAILABLE=1
if echo "$step_name" | grep -qi "shellcheck"; then
# Create one sub-issue per file with ShellCheck errors
@ -267,8 +281,10 @@ if [ -s "$ESCALATION_FILE" ]; then
while IFS= read -r sc_file; do
[ -z "$sc_file" ] && continue
file_errors=$(echo "$step_logs" | grep -A3 "In ${sc_file} line" | head -30)
sc_codes=$(echo "$step_logs" | grep -oP 'SC\d+' | sort -u | tr '\n' ' ' | sed 's/ $//' || true)
# grep -F for literal filename match (dots in filenames are regex wildcards)
file_errors=$(echo "$step_logs" | grep -F -A3 "In ${sc_file} line" | head -30)
# SC codes only from this file's errors, not the whole step log
sc_codes=$(echo "$file_errors" | grep -oP 'SC\d+' | sort -u | tr '\n' ' ' | sed 's/ $//' || true)
sub_title="fix: ShellCheck errors in ${sc_file} (from PR #${ESC_PR})"
sub_body="## ShellCheck CI failure — \`${sc_file}\`
@ -304,9 +320,14 @@ Fix all ShellCheck errors${sc_codes:+ (${sc_codes})} in \`${sc_file}\` so PR #${
else
# Accumulate non-ShellCheck failures for one combined issue
ESC_GENERIC_FAIL="${ESC_GENERIC_FAIL}
=== ${step_name} ===
esc_section="=== ${step_name} ===
$(echo "$step_logs" | tail -50)"
if [ -z "$ESC_GENERIC_FAIL" ]; then
ESC_GENERIC_FAIL="$esc_section"
else
ESC_GENERIC_FAIL="${ESC_GENERIC_FAIL}
${esc_section}"
fi
fi
done <<< "$FAILED_STEPS"
fi
@ -319,7 +340,8 @@ $(echo "$step_logs" | tail -50)"
Spawned by supervisor from escalated issue #${ESC_ISSUE} (PR #${ESC_PR} failed CI after ${ESC_ATTEMPTS} attempt(s)).
### Failed step output
\`\`\`${ESC_GENERIC_FAIL}
\`\`\`
${ESC_GENERIC_FAIL}
\`\`\`
### Context
@ -342,14 +364,24 @@ Spawned by supervisor from escalated issue #${ESC_ISSUE} (PR #${ESC_PR} failed C
fi
fi
# Fallback: no CI logs available — create a generic investigation issue
# Fallback: no sub-issues created — differentiate logs-unavailable from creation failure
if [ "$ESC_SUB_ISSUES_CREATED" -eq 0 ]; then
sub_title="fix: investigate CI failure for PR #${ESC_PR} (from issue #${ESC_ISSUE})"
sub_body="## CI failure — investigation required
if [ "$ESC_LOGS_AVAILABLE" -eq 1 ]; then
# Logs were fetched but all issue creation API calls failed
sub_body="## CI failure — investigation required
Spawned by supervisor from escalated issue #${ESC_ISSUE} (PR #${ESC_PR} failed CI after ${ESC_ATTEMPTS} attempt(s)). CI logs were retrieved but sub-issue creation failed (API error).
Check PR #${ESC_PR} CI output, identify the failing checks, and fix them so the PR can merge."
else
# Could not retrieve CI logs at all
sub_body="## CI failure — investigation required
Spawned by supervisor from escalated issue #${ESC_ISSUE} (PR #${ESC_PR} failed CI after ${ESC_ATTEMPTS} attempt(s)). CI logs were unavailable at escalation time.
Check PR #${ESC_PR} CI output, identify the failing checks, and fix them so the PR can merge."
fi
new_issue=$(curl -sf -X POST \
-H "Authorization: token ${CODEBERG_TOKEN}" \
@ -367,10 +399,9 @@ Check PR #${ESC_PR} CI output, identify the failing checks, and fix them so the
# Mark as processed
echo "$esc_entry" >> "$ESCALATION_DONE"
done < "$ESCALATION_FILE"
done < "$ESCALATION_SNAP"
# Clear processed escalations
> "$ESCALATION_FILE"
rm -f "$ESCALATION_SNAP"
flog "Escalations processed — moved to $(basename "$ESCALATION_DONE")"
fi