fix: address review feedback on escalation triage (#185)
- supervisor-poll.sh: check PR state before retrigger; discard stale escalations for closed/merged PRs instead of pushing to their branches - supervisor-poll.sh: bump escalation ts to now on failed retrigger push, so the 30-min cooldown resets and alert flooding is avoided on persistent failures - ci-helpers.sh: require at least one confirmed infra step before returning "infra"; prevents false-positive when all step names are empty strings - ci-helpers.sh: clarify header comment to distinguish per-function requirements - AGENTS.md: document classify_pipeline_failure() in ci-helpers.sh table row Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
47eccdb8ae
commit
97ffdca95c
3 changed files with 19 additions and 6 deletions
|
|
@ -227,7 +227,7 @@ sourced as needed.
|
||||||
| File | What it provides | Sourced by |
|
| File | What it provides | Sourced by |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`CODEBERG_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `codeberg_api()`, `codeberg_api_all()` (accepts optional second TOKEN parameter, defaults to `$CODEBERG_TOKEN`), `woodpecker_api()`, `wpdb()`, `matrix_send()`, `matrix_send_ctx()`. Auto-loads project TOML if `PROJECT_TOML` is set. | Every agent |
|
| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`CODEBERG_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `codeberg_api()`, `codeberg_api_all()` (accepts optional second TOKEN parameter, defaults to `$CODEBERG_TOKEN`), `woodpecker_api()`, `wpdb()`, `matrix_send()`, `matrix_send_ctx()`. Auto-loads project TOML if `PROJECT_TOML` is set. | Every agent |
|
||||||
| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). | dev-poll, review-poll, review-pr, supervisor-poll |
|
| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `classify_pipeline_failure()` — returns "infra" if all failed Woodpecker steps are git-step exit 128/137, else "code". | dev-poll, review-poll, review-pr, supervisor-poll |
|
||||||
| `lib/ci-debug.sh` | CLI tool for Woodpecker CI: `list`, `status`, `logs`, `failures` subcommands. Not sourced — run directly. | Humans / dev-agent (tool access) |
|
| `lib/ci-debug.sh` | CLI tool for Woodpecker CI: `list`, `status`, `logs`, `failures` subcommands. Not sourced — run directly. | Humans / dev-agent (tool access) |
|
||||||
| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `CODEBERG_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, Matrix config, etc.). | env.sh (when `PROJECT_TOML` is set), supervisor-poll (per-project iteration) |
|
| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `CODEBERG_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, Matrix config, etc.). | env.sh (when `PROJECT_TOML` is set), supervisor-poll (per-project iteration) |
|
||||||
| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` patterns. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll |
|
| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` patterns. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll |
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,8 @@
|
||||||
# ci-helpers.sh — Shared CI helper functions
|
# ci-helpers.sh — Shared CI helper functions
|
||||||
#
|
#
|
||||||
# Source from any script: source "$(dirname "$0")/../lib/ci-helpers.sh"
|
# Source from any script: source "$(dirname "$0")/../lib/ci-helpers.sh"
|
||||||
# Requires: WOODPECKER_REPO_ID (from env.sh / project config)
|
# ci_passed() requires: WOODPECKER_REPO_ID (from env.sh / project config)
|
||||||
|
# classify_pipeline_failure() requires: woodpecker_api() (defined in env.sh)
|
||||||
|
|
||||||
# ci_passed <state> — check if CI is passing (or no CI configured)
|
# ci_passed <state> — check if CI is passing (or no CI configured)
|
||||||
# Returns 0 if state is "success", or if no CI is configured and
|
# Returns 0 if state is "success", or if no CI is configured and
|
||||||
|
|
@ -39,18 +40,20 @@ classify_pipeline_failure() {
|
||||||
fi
|
fi
|
||||||
|
|
||||||
all_infra=true
|
all_infra=true
|
||||||
|
_infra_count=0
|
||||||
while IFS=$'\t' read -r _sname _ecode; do
|
while IFS=$'\t' read -r _sname _ecode; do
|
||||||
[ -z "$_sname" ] && continue
|
[ -z "$_sname" ] && continue
|
||||||
# git step with exit 128 (connection/rate-limit) or 137 (OOM) → infra
|
# git step with exit 128 (connection/rate-limit) or 137 (OOM) → infra
|
||||||
if [[ "$_sname" == git* ]] && { [ "$_ecode" = "128" ] || [ "$_ecode" = "137" ]; }; then
|
if [[ "$_sname" == git* ]] && { [ "$_ecode" = "128" ] || [ "$_ecode" = "137" ]; }; then
|
||||||
: # infra step — continue checking remaining steps
|
_infra_count=$(( _infra_count + 1 ))
|
||||||
else
|
else
|
||||||
all_infra=false
|
all_infra=false
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
done <<< "$failed_steps"
|
done <<< "$failed_steps"
|
||||||
|
|
||||||
if [ "$all_infra" = true ]; then
|
# Require at least one confirmed infra step (guards against all-empty-name steps)
|
||||||
|
if [ "$all_infra" = true ] && [ "$_infra_count" -gt 0 ]; then
|
||||||
echo "infra"
|
echo "infra"
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -410,11 +410,16 @@ check_project() {
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Get the PR's branch from Codeberg
|
# Get the PR's branch and state from Codeberg
|
||||||
_esc_pr_json=$(codeberg_api GET "/pulls/${_esc_pr}" 2>/dev/null) || {
|
_esc_pr_json=$(codeberg_api GET "/pulls/${_esc_pr}" 2>/dev/null) || {
|
||||||
flog "${proj_name}: PR #${_esc_pr}: failed to fetch PR info, keeping escalation"
|
flog "${proj_name}: PR #${_esc_pr}: failed to fetch PR info, keeping escalation"
|
||||||
printf '%s\n' "$_esc_line" >> "$_esc_tmp"; continue
|
printf '%s\n' "$_esc_line" >> "$_esc_tmp"; continue
|
||||||
}
|
}
|
||||||
|
_esc_pr_state=$(printf '%s' "$_esc_pr_json" | jq -r '.state // ""' 2>/dev/null)
|
||||||
|
if [ "$_esc_pr_state" != "open" ]; then
|
||||||
|
flog "${proj_name}: PR #${_esc_pr} is ${_esc_pr_state:-unknown} — discarding stale escalation"
|
||||||
|
continue # PR merged/closed externally; escalation no longer actionable
|
||||||
|
fi
|
||||||
_esc_branch=$(printf '%s' "$_esc_pr_json" | jq -r '.head.ref // ""' 2>/dev/null)
|
_esc_branch=$(printf '%s' "$_esc_pr_json" | jq -r '.head.ref // ""' 2>/dev/null)
|
||||||
if [ -z "$_esc_branch" ]; then
|
if [ -z "$_esc_branch" ]; then
|
||||||
printf '%s\n' "$_esc_line" >> "$_esc_tmp"; continue
|
printf '%s\n' "$_esc_line" >> "$_esc_tmp"; continue
|
||||||
|
|
@ -493,7 +498,12 @@ json.dump(d, open(f, 'w'))
|
||||||
# Escalation removed — do NOT write to _esc_tmp
|
# Escalation removed — do NOT write to _esc_tmp
|
||||||
else
|
else
|
||||||
p2 "${proj_name}: PR #${_esc_pr}: infra-only CI exhaustion but retrigger push failed"
|
p2 "${proj_name}: PR #${_esc_pr}: infra-only CI exhaustion but retrigger push failed"
|
||||||
printf '%s\n' "$_esc_line" >> "$_esc_tmp"
|
# Bump timestamp to now so the 30-min cooldown resets; prevents alert flood
|
||||||
|
# on persistent push failures (SSH key issue, Codeberg outage, etc.)
|
||||||
|
_esc_now=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
||||||
|
_esc_bumped=$(printf '%s' "$_esc_line" | jq -c --arg ts "$_esc_now" '.ts = $ts' 2>/dev/null \
|
||||||
|
|| printf '%s' "$_esc_line")
|
||||||
|
printf '%s\n' "$_esc_bumped" >> "$_esc_tmp"
|
||||||
fi
|
fi
|
||||||
done < "$_esc_file"
|
done < "$_esc_file"
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue