Merge pull request 'fix: feat: disinto validate rejects CI steps with no timeout declared (#1137)' (#1149) from fix/issue-1137 into main
Some checks failed
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/lint-ci Pipeline was successful
ci/woodpecker/push/nomad-validate Pipeline failed

This commit is contained in:
dev-qwen 2026-04-21 18:29:56 +00:00
commit 5b69f1ec70
13 changed files with 386 additions and 1 deletions

View file

@ -4,10 +4,16 @@
# Steps:
# 1. shellcheck — lint all .sh files (warnings+errors)
# 2. duplicate-detection — report copy-pasted code blocks (fails only on new duplicates for PRs)
#
# Timeouts:
# Workflow-level default (10m) applies to all steps unless overridden.
when:
event: [push, pull_request]
# Workflow-level timeout default — propagates to all steps without their own timeout.
timeout: 10m
# Override default clone to authenticate against Forgejo using FORGE_TOKEN.
# Required because Forgejo is configured with REQUIRE_SIGN_IN, so anonymous
# git clones fail with exit code 128. FORGE_TOKEN is injected globally via

View file

@ -13,6 +13,9 @@
# Triggers:
# - Pull requests that modify edge-related files
#
# Timeouts:
# Workflow-level default (10m) applies to all steps unless overridden.
#
# Environment variables (inherited from WOODPECKER_ENVIRONMENT):
# EDGE_BASE_URL — Edge proxy URL for reference (default: http://localhost)
# EDGE_TIMEOUT — Request timeout in seconds (default: 30)
@ -22,6 +25,9 @@
when:
event: pull_request
# Workflow-level timeout default — propagates to all steps without their own timeout.
timeout: 10m
steps:
# ── 1. ShellCheck on smoke script ────────────────────────────────────────
# `shellcheck` validates bash syntax, style, and common pitfalls.

34
.woodpecker/lint-ci.yml Normal file
View file

@ -0,0 +1,34 @@
# .woodpecker/lint-ci.yml — CI pipeline config validator
#
# Runs `disinto validate lint-ci` to check all .woodpecker/*.yml files for:
# - Steps missing a timeout declaration
# - Network-fetch commands without per-command timeouts
#
# Triggers on PRs/pushes that touch any CI config or the validator itself.
when:
- event: [push, pull_request]
path:
- ".woodpecker/**"
- "bin/disinto"
# Workflow-level timeout default — propagates to all steps without their own timeout.
timeout: 5m
# Authenticated clone — same pattern as .woodpecker/ci.yml.
clone:
git:
image: alpine/git
commands:
- AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:$FORGE_TOKEN@|")
- git clone --depth 1 "$AUTH_URL" .
- git fetch --depth 1 origin "$CI_COMMIT_REF"
- git checkout FETCH_HEAD
steps:
- name: lint-ci
image: alpine:3
commands:
- apk add --no-cache bash python3 py3-yaml
- bash bin/disinto validate lint-ci .
# Workflow-level timeout (10m) applies to all steps.

View file

@ -44,6 +44,10 @@
# Pinned image versions match lib/init/nomad/install.sh (nomad 1.9.5 /
# vault 1.18.5). Bump there AND here together — drift = CI passing on
# syntax the runtime would reject.
#
# Timeouts:
# Workflow-level default (15m) applies to all steps unless overridden
# (vault-policy-validate needs longer for dev server startup).
# =============================================================================
when:
@ -57,6 +61,9 @@ when:
- "vault/roles.yaml"
- ".woodpecker/nomad-validate.yml"
# Workflow-level timeout default — propagates to all steps without their own timeout.
timeout: 15m
# Authenticated clone — same pattern as .woodpecker/ci.yml. Forgejo is
# configured with REQUIRE_SIGN_IN, so anonymous git clones fail (exit 128).
# FORGE_TOKEN is injected globally via WOODPECKER_ENVIRONMENT.
@ -265,7 +272,7 @@ steps:
- name: vault-roles-validate
image: python:3.12-alpine
commands:
- pip install --quiet --disable-pip-version-check pyyaml yamllint
- pip install --quiet --disable-pip-version-check --default-timeout 30 pyyaml yamllint
- |
set -e
if [ ! -f vault/roles.yaml ]; then

View file

@ -4,6 +4,10 @@
# - ghcr.io/disinto/reproduce:<tag>
# - ghcr.io/disinto/edge:<tag>
#
# Timeouts:
# Workflow-level default (20m) applies to all steps unless overridden.
# Image builds can be slow for large images.
#
# Requires GHCR_TOKEN secret configured in Woodpecker with push access
# to ghcr.io/disinto.
@ -11,6 +15,9 @@ when:
event: tag
ref: refs/tags/v*
# Workflow-level timeout default — propagates to all steps without their own timeout.
timeout: 20m
clone:
git:
image: alpine/git

View file

@ -3,6 +3,9 @@
# Triggers on pull requests touching secret-adjacent paths.
# Sources lib/secret-scan.sh and scans each changed file's content.
# Exits non-zero if any potential secret is detected.
#
# Timeouts:
# Workflow-level default (5m) applies to all steps unless overridden.
when:
- event: pull_request
@ -15,6 +18,9 @@ when:
- "lib/hvault.sh"
- "lib/action-vault.sh"
# Workflow-level timeout default — propagates to all steps without their own timeout.
timeout: 5m
clone:
git:
image: alpine/git

View file

@ -8,6 +8,9 @@ when:
- "tests/**"
- ".woodpecker/smoke-init.yml"
# Workflow-level timeout default — propagates to all steps without their own timeout.
timeout: 5m
steps:
- name: smoke-init
image: python:3-alpine

View file

@ -13,6 +13,7 @@
# disinto run <action-id> Run action in ephemeral runner container
# disinto ci-logs <pipeline> [--step <name>] Read CI logs from Woodpecker SQLite
# disinto backup create <outfile> Export factory state for migration
# disinto validate [subcommand] Validate factory artifacts (lint-ci)
#
# Usage:
# disinto init https://github.com/user/repo
@ -68,6 +69,10 @@ Usage:
disinto backup create <outfile> Export factory state (issues + ops bundle)
disinto edge <verb> [options] Manage edge tunnel registrations
disinto backup <subcommand> Backup and restore factory state
disinto validate <subcommand> Validate factory artifacts
Validate subcommands:
lint-ci Lint .woodpecker/*.yml for missing timeouts
Edge subcommands:
register [project] Register a new tunnel (generates keypair if needed)
@ -3009,6 +3014,214 @@ disinto_backup() {
esac
}
# ── validate command ─────────────────────────────────────────────────────────
# Validates CI pipeline configs and other factory artifacts.
# Usage: disinto validate [subcommand]
# lint-ci Lint .woodpecker/*.yml for missing timeouts and unsafe commands
disinto_validate() {
local subcmd="${1:-lint-ci}"
shift || true
case "$subcmd" in
lint-ci)
_validate_lint_ci "$@"
;;
*)
cat <<EOF >&2
Usage: disinto validate <subcommand>
Validate factory artifacts:
lint-ci Lint .woodpecker/*.yml for missing timeouts and unsafe commands
Subcommands:
lint-ci Check CI pipeline files for:
- Steps missing a timeout declaration
- Network-fetch commands without per-command timeouts
EOF
exit 1
;;
esac
}
# Lint .woodpecker/*.yml files for missing timeouts and unsafe commands.
#
# Checks:
# 1. Step-level timeout: every step must declare a `timeout:` value or
# inherit from a workflow-level default.
# 2. Command-level timeout: network-fetch commands (curl, wget, pip, etc.)
# must include an explicit timeout flag (e.g. --max-time, --timeout).
_validate_lint_ci() {
local lint_dir="${1:-.}"
local woodpecker_dir="${lint_dir}/.woodpecker"
local errors=0
local warnings=0
if [ ! -d "$woodpecker_dir" ]; then
echo "No .woodpecker/ directory found at ${woodpecker_dir}"
exit 0
fi
local -a yml_files=()
while IFS= read -r f; do
yml_files+=("$f")
done < <(find "$woodpecker_dir" -maxdepth 1 -name '*.yml' -o -name '*.yaml' 2>/dev/null | sort)
if [ ${#yml_files[@]} -eq 0 ]; then
echo "No .woodpecker/*.yml files found"
exit 0
fi
echo "Linting CI pipeline files in ${woodpecker_dir}..."
echo ""
for yml in "${yml_files[@]}"; do
local rel_path
rel_path=$(realpath --relative-to="$(pwd)" "$yml" 2>/dev/null || echo "$yml")
# Use Python to parse YAML and check for timeouts
local result
result=$(python3 -c "
import yaml, sys, re
with open('$yml') as f:
try:
doc = yaml.safe_load(f)
except yaml.YAMLError as e:
print(f'FATAL:YAML parse error: {e}', file=sys.stderr)
sys.exit(1)
if not isinstance(doc, dict):
sys.exit(0)
# Check workflow-level timeout default
workflow_timeout = doc.get('timeout')
if isinstance(workflow_timeout, (int, float)):
workflow_timeout = str(workflow_timeout)
errors = []
warnings = []
steps = doc.get('steps', [])
if not isinstance(steps, list):
sys.exit(0)
for step in steps:
if not isinstance(step, dict):
continue
name = step.get('name', '<unnamed>')
commands = step.get('commands', [])
if not isinstance(commands, list):
continue
# Check step-level timeout (unless workflow default exists)
if 'timeout' not in step and workflow_timeout is None:
errors.append(f'error: {name} — step has no timeout; add \`timeout: 5m\` or inherit from workflow default')
# Check commands for network-fetch without timeout flags
cmd_text = ' '.join(str(c) for c in commands)
lines = commands # check each command individually
for cmd in lines:
cmd_str = str(cmd)
# Skip comments and empty lines
stripped = cmd_str.strip()
if not stripped or stripped.startswith('#'):
continue
# Skip package manager installs (e.g., apk add ... curl)
if re.search(r'\b(apk|apt|yum|dnf|brew)\s+(add|install)\b', cmd_str):
continue
# Skip shell/python invocations (commands that execute scripts)
if re.match(r'\s*(bash|sh|zsh|python3?|node)\s', cmd_str):
continue
# Network-fetch binaries to check
# curl — check for --max-time, -m, or --connect-timeout
if re.search(r'\bcurl\b', cmd_str):
if not re.search(r'(--max-time|-m\s+\d|--connect-timeout)', cmd_str):
warnings.append(f'warning: {name}/command — curl without --max-time; consider: curl --max-time 30 ...')
# wget — check for --timeout
if re.search(r'\bwget\b', cmd_str):
if not re.search(r'--timeout=', cmd_str):
warnings.append(f'warning: {name}/command — wget without --timeout; consider: wget --timeout=30 ...')
# pip/pip3 — check for --default-timeout or --timeout
if re.search(r'\b(pip3?|pipenv)\b', cmd_str) and re.search(r'\b(install|i)\b', cmd_str):
if not re.search(r'(--default-timeout|--timeout)', cmd_str):
warnings.append(f'warning: {name}/command — pip install without --default-timeout; consider: --default-timeout 30')
# npm — check for --timeout
if re.search(r'\bnpm\b', cmd_str) and re.search(r'\b(install|add)\b', cmd_str):
if not re.search(r'--timeout', cmd_str):
warnings.append(f'warning: {name}/command — npm install without --timeout; consider: --timeout 30000')
# yarn — check for --timeout
if re.search(r'\byarn\b', cmd_str) and re.search(r'\b(add|install)\b', cmd_str):
if not re.search(r'--timeout', cmd_str):
warnings.append(f'warning: {name}/command — yarn add without --timeout; consider: --timeout 30000')
# go get — no direct timeout flag, but we warn about it
if re.search(r'\bgo\s+get\b', cmd_str):
warnings.append(f'warning: {name}/command — go get has no timeout flag; wrap in a timeout(1) command')
# cargo install — check for --timeout (cargo doesn't have one natively)
if re.search(r'\bcargo\s+install\b', cmd_str):
warnings.append(f'warning: {name}/command — cargo install has no timeout flag; wrap in a timeout(1) command')
# gem install — no timeout flag
if re.search(r'\bgem\s+install\b', cmd_str):
warnings.append(f'warning: {name}/command — gem install has no timeout flag; wrap in a timeout(1) command')
# brew install — no timeout flag
if re.search(r'\bbrew\s+install\b', cmd_str):
warnings.append(f'warning: {name}/command — brew install has no timeout flag; wrap in a timeout(1) command')
if errors:
for e in errors:
print(f'E:{e}')
if warnings:
for w in warnings:
print(f'W:{w}')
" 2>&1) || {
echo "ERROR: failed to parse $rel_path" >&2
echo "$result" >&2
exit 1
}
# Parse Python output
while IFS= read -r line; do
case "$line" in
E:*)
echo "${line#E:}" >&2
errors=$((errors + 1))
;;
W:*)
echo "${line#W:}"
warnings=$((warnings + 1))
;;
esac
done <<< "$result"
done
echo ""
echo "lint-ci: ${errors} error(s), ${warnings} warning(s)"
if [ "$errors" -gt 0 ]; then
echo ""
echo "Fix: add \`timeout:\` to each step, or set a workflow-level default at the top of the pipeline file." >&2
exit 1
fi
if [ "$warnings" -gt 0 ]; then
echo "(warnings are non-blocking — add per-command timeouts for network calls)" >&2
fi
exit 0
}
# ── Main dispatch ────────────────────────────────────────────────────────────
case "${1:-}" in
@ -3027,6 +3240,7 @@ case "${1:-}" in
agent) shift; disinto_agent "$@" ;;
edge) shift; disinto_edge "$@" ;;
backup) shift; disinto_backup "$@" ;;
validate) shift; disinto_validate "$@" ;;
-h|--help) usage ;;
*) usage ;;
esac

View file

@ -0,0 +1,13 @@
# Test fixture: curl without --max-time should trigger a warning
# Used by tests/test-lint-ci.bats to verify the command-level timeout check
when:
- event: pull_request
timeout: 5m
steps:
- name: bad-curl
image: alpine:3
commands:
- curl https://example.com

View file

@ -0,0 +1,13 @@
# Test fixture: curl with --max-time should pass cleanly
# Used by tests/test-lint-ci.bats to verify the command-level timeout check
when:
- event: pull_request
timeout: 5m
steps:
- name: good-curl
image: alpine:3
commands:
- curl --max-time 30 https://example.com

View file

@ -0,0 +1,11 @@
# Test fixture: step without timeout should trigger an error
# Used by tests/test-lint-ci.bats to verify the step-level timeout check
when:
- event: pull_request
steps:
- name: no-timeout-step
image: alpine:3
commands:
- echo "this step has no timeout"

View file

@ -0,0 +1,13 @@
# Test fixture: workflow-level timeout should satisfy all steps
# Used by tests/test-lint-ci.bats to verify workflow-level timeout propagation
when:
- event: pull_request
timeout: 10m
steps:
- name: inherits-timeout
image: alpine:3
commands:
- echo "inherits workflow timeout"

52
tests/test-lint-ci.bats Normal file
View file

@ -0,0 +1,52 @@
# tests/test-lint-ci.bats — Tests for `disinto validate lint-ci`
#
# Verifies the CI timeout validator:
# 1. Step-level timeout errors fire when missing
# 2. Workflow-level timeout satisfies all steps
# 3. curl without --max-time triggers a warning
# 4. curl with --max-time passes cleanly
load bats
DISINTO="${FACTORY_ROOT:-$(cd "$(dirname "$0")/.." && pwd)}/bin/disinto"
FIXTURES="$(cd "$(dirname "$0")/fixtures/lint-ci" && pwd)"
# ── Step-level timeout errors ────────────────────────────────────────────────
@test "missing step timeout triggers error" {
local output
output=$(bash "$DISINTO" validate lint-ci "$FIXTURES/missing-timeout" 2>&1)
local rc=$?
echo "$output"
[ "$rc" -eq 1 ]
echo "$output" | grep -q "error:.*no-timeout-step.*step has no timeout"
}
@test "workflow-level timeout satisfies all steps" {
local output
output=$(bash "$DISINTO" validate lint-ci "$FIXTURES/workflow-timeout" 2>&1)
local rc=$?
echo "$output"
[ "$rc" -eq 0 ]
echo "$output" | grep -q "lint-ci: 0 error(s), 0 warning(s)"
}
# ── Command-level timeout warnings ───────────────────────────────────────────
@test "curl without --max-time triggers warning" {
local output
output=$(bash "$DISINTO" validate lint-ci "$FIXTURES/bad-curl" 2>&1)
local rc=$?
echo "$output"
[ "$rc" -eq 0 ]
echo "$output" | grep -q "warning:.*curl without --max-time"
}
@test "curl with --max-time passes cleanly" {
local output
output=$(bash "$DISINTO" validate lint-ci "$FIXTURES/good-curl" 2>&1)
local rc=$?
echo "$output"
[ "$rc" -eq 0 ]
echo "$output" | grep -q "lint-ci: 0 error(s), 0 warning(s)"
}