fix: [nomad-step-2] S2.6 — CI: vault policy fmt + validate + roles.yaml check (#884)
This commit is contained in:
parent
88e49b9e9d
commit
108b928cfc
5 changed files with 725 additions and 9 deletions
|
|
@ -6,11 +6,20 @@
|
|||
# lib/init/nomad/, plus the `disinto init` dispatcher, gets checked
|
||||
# before it can land.
|
||||
#
|
||||
# Also includes Vault policy validation (S2.6, issue #884):
|
||||
# - vault policy fmt -check on vault/policies/*.hcl
|
||||
# - vault policy validate on each policy file
|
||||
# - roles.yaml validator (yamllint + policy reference check)
|
||||
# - secret-scan gate on policy files
|
||||
#
|
||||
# Triggers on PRs (and pushes) that touch any of:
|
||||
# nomad/** — HCL configs (server, client, vault)
|
||||
# lib/init/nomad/** — cluster-up / install / systemd / vault-init
|
||||
# bin/disinto — `disinto init --backend=nomad` dispatcher
|
||||
# tests/disinto-init-nomad.bats — the bats suite itself
|
||||
# vault/policies/*.hcl — Vault ACL policies (S2.6)
|
||||
# vault/roles.yaml — JWT auth role definitions (S2.6)
|
||||
# lib/init/nomad/vault-*.sh — Vault init scripts (S2.6)
|
||||
# .woodpecker/nomad-validate.yml — the pipeline definition
|
||||
#
|
||||
# Steps (all fail-closed — any error blocks merge):
|
||||
|
|
@ -19,8 +28,12 @@
|
|||
# nomad/jobs/*.hcl (new jobspecs get
|
||||
# CI coverage automatically)
|
||||
# 3. vault-operator-diagnose — `vault operator diagnose` syntax check on vault.hcl
|
||||
# 4. shellcheck-nomad — shellcheck the cluster-up + install scripts + disinto
|
||||
# 5. bats-init-nomad — `disinto init --backend=nomad --dry-run` smoke tests
|
||||
# 4. vault-policy-fmt — `vault policy fmt -check` on vault/policies/*.hcl
|
||||
# 5. vault-policy-validate — `vault policy validate` on each policy file
|
||||
# 6. vault-roles-validate — yamllint + policy reference check
|
||||
# 7. vault-secret-scan — scan policy files for embedded secrets
|
||||
# 8. shellcheck-nomad — shellcheck the cluster-up + install scripts + disinto
|
||||
# 9. bats-init-nomad — `disinto init --backend=nomad --dry-run` smoke tests
|
||||
#
|
||||
# Pinned image versions match lib/init/nomad/install.sh (nomad 1.9.5 /
|
||||
# vault 1.18.5). Bump there AND here together — drift = CI passing on
|
||||
|
|
@ -34,6 +47,9 @@ when:
|
|||
- "lib/init/nomad/**"
|
||||
- "bin/disinto"
|
||||
- "tests/disinto-init-nomad.bats"
|
||||
- "vault/policies/*.hcl"
|
||||
- "vault/roles.yaml"
|
||||
- "lib/init/nomad/vault-*.sh"
|
||||
- ".woodpecker/nomad-validate.yml"
|
||||
|
||||
# Authenticated clone — same pattern as .woodpecker/ci.yml. Forgejo is
|
||||
|
|
@ -123,7 +139,231 @@ steps:
|
|||
*) echo "vault config: hard failure (rc=$rc)" >&2; exit "$rc" ;;
|
||||
esac
|
||||
|
||||
# ── 4. Shellcheck ────────────────────────────────────────────────────────
|
||||
# ── 4. Vault policy fmt -check ───────────────────────────────────────────
|
||||
# `vault policy fmt -check` is non-destructive; it reads each policy file
|
||||
# and compares against the formatted version. Any difference means the file
|
||||
# needs formatting. This enforces consistent indentation (2-space), no
|
||||
# trailing whitespace, and proper HCL formatting conventions.
|
||||
#
|
||||
# CI runs this BEFORE vault policy validate because unformatted HCL can
|
||||
# sometimes cause confusing validation errors.
|
||||
- name: vault-policy-fmt
|
||||
image: hashicorp/vault:1.18.5
|
||||
commands:
|
||||
- |
|
||||
set -e
|
||||
failed=0
|
||||
for f in vault/policies/*.hcl; do
|
||||
[ -f "$f" ] || continue
|
||||
echo "fmt-check: $f"
|
||||
if ! vault policy fmt -check "$f" > /dev/null 2>&1; then
|
||||
echo " ERROR: $f is not formatted correctly" >&2
|
||||
vault policy fmt -check "$f" >&2 || true
|
||||
failed=1
|
||||
fi
|
||||
done
|
||||
if [ "$failed" -gt 0 ]; then
|
||||
echo "vault-policy-fmt: formatting errors found" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "vault-policy-fmt: all policies formatted correctly"
|
||||
|
||||
# ── 5. Vault policy validate ─────────────────────────────────────────────
|
||||
# `vault policy validate` performs syntax + semantic validation:
|
||||
# - Checks for unknown stanzas/blocks
|
||||
# - Validates path patterns are valid
|
||||
# - Validates capabilities are known (read, list, create, update, delete, sudo)
|
||||
# - Checks for missing required fields
|
||||
#
|
||||
# Requires a running Vault instance (dev mode is sufficient for CI).
|
||||
# Uses the default dev server at http://127.0.0.1:8200 with "root" token.
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 — policy is valid
|
||||
# 1 — policy has errors (syntax or semantic)
|
||||
#
|
||||
# CI starts a Vault dev server inline for validation.
|
||||
- name: vault-policy-validate
|
||||
image: hashicorp/vault:1.18.5
|
||||
commands:
|
||||
- |
|
||||
set -e
|
||||
# Start Vault dev server in background
|
||||
vault server -dev -dev-root-token-id=root -dev-listen-address=0.0.0.0:8200 &
|
||||
VAULT_PID=$!
|
||||
trap "kill $VAULT_PID 2>/dev/null || true" EXIT
|
||||
|
||||
# Wait for Vault to be ready
|
||||
for i in $(seq 1 30); do
|
||||
if vault status > /dev/null 2>&1; then
|
||||
echo "vault-policy-validate: Vault is ready"
|
||||
break
|
||||
fi
|
||||
sleep 0.5
|
||||
done
|
||||
|
||||
# Validate each policy
|
||||
failed=0
|
||||
for f in vault/policies/*.hcl; do
|
||||
[ -f "$f" ] || continue
|
||||
echo "validate: $f"
|
||||
if ! VAULT_ADDR=http://127.0.0.1:8200 VAULT_TOKEN=root vault policy validate "$f" > /dev/null 2>&1; then
|
||||
echo " ERROR: $f validation failed" >&2
|
||||
VAULT_ADDR=http://127.0.0.1:8200 VAULT_TOKEN=root vault policy validate "$f" >&2 || true
|
||||
failed=1
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$failed" -gt 0 ]; then
|
||||
echo "vault-policy-validate: validation errors found" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "vault-policy-validate: all policies valid"
|
||||
|
||||
# ── 6. Vault roles.yaml validate ─────────────────────────────────────────
|
||||
# Validates vault/roles.yaml:
|
||||
# 1. yamllint check — ensures YAML syntax is valid
|
||||
# 2. Policy reference check — each role references a policy that exists
|
||||
# 3. Required fields check — each role has name, policies, and auth fields
|
||||
#
|
||||
# If roles.yaml doesn't exist yet, this step is skipped (it will be added
|
||||
# in S2.3 alongside JWT auth configuration).
|
||||
- name: vault-roles-validate
|
||||
image: python:3.12-alpine
|
||||
commands:
|
||||
- |
|
||||
set -e
|
||||
apk add --no-cache yamllint jq
|
||||
|
||||
if [ ! -f vault/roles.yaml ]; then
|
||||
echo "vault-roles-validate: roles.yaml not found, skipping"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "yamllint: vault/roles.yaml"
|
||||
yamllint -q vault/roles.yaml || {
|
||||
echo " ERROR: yamllint found issues" >&2
|
||||
exit 1
|
||||
}
|
||||
echo " OK"
|
||||
|
||||
# Extract and validate policy references
|
||||
echo "policy-reference-check: validating policy references"
|
||||
policy_dir="vault/policies"
|
||||
failed=0
|
||||
|
||||
# Get referenced policies from roles.yaml
|
||||
referenced=$(jq -r '.roles[].policies[]?' vault/roles.yaml 2>/dev/null | sort -u || true)
|
||||
|
||||
if [ -z "$referenced" ]; then
|
||||
echo "vault-roles-validate: no policies referenced in roles.yaml" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get existing policy names
|
||||
existing=$(find "$policy_dir" -maxdepth 1 -name '*.hcl' -type f -exec basename {} .hcl \; | sort)
|
||||
|
||||
for policy in $referenced; do
|
||||
if ! echo "$existing" | grep -q "^${policy}$"; then
|
||||
echo "vault-roles-validate: ERROR: policy '$policy' referenced but not found" >&2
|
||||
failed=1
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$failed" -gt 0 ]; then
|
||||
echo "vault-roles-validate: policy reference errors found" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "vault-roles-validate: all policy references valid"
|
||||
|
||||
# ── 7. Vault secret-scan ─────────────────────────────────────────────────
|
||||
# Scans policy HCL files for embedded secrets (rare but dangerous copy-paste
|
||||
# mistake). Uses the same patterns as lib/secret-scan.sh:
|
||||
# - Long hex strings (32+ chars)
|
||||
# - API key patterns
|
||||
# - URLs with embedded credentials
|
||||
# - Bearer tokens
|
||||
#
|
||||
# Environment variables like $TOKEN or ${TOKEN} are excluded as safe.
|
||||
- name: vault-secret-scan
|
||||
image: alpine:3.19
|
||||
commands:
|
||||
- |
|
||||
set -e
|
||||
apk add --no-cache bash
|
||||
|
||||
# Copy the secret-scan.sh script into the container
|
||||
cat > /tmp/secret-scan.sh << 'EOF'
|
||||
#!/usr/bin/env bash
|
||||
# Inline version of lib/secret-scan.sh for CI secret detection
|
||||
|
||||
_SECRET_PATTERNS=(
|
||||
'[0-9a-fA-F]{32,}'
|
||||
'Bearer [A-Za-z0-9_/+=-]{20,}'
|
||||
'0x[0-9a-fA-F]{64}'
|
||||
'https?://[^[:space:]]*[0-9a-fA-F]{20,}'
|
||||
'AKIA[0-9A-Z]{16}'
|
||||
'(API_KEY|SECRET|TOKEN|PRIVATE_KEY|PASSWORD|INFURA|ALCHEMY)=[^[:space:]"]{16,}'
|
||||
)
|
||||
|
||||
_SAFE_PATTERNS=(
|
||||
'\$\{?[A-Z_]+\}?'
|
||||
'commit [0-9a-f]{40}'
|
||||
'Merge [0-9a-f]{40}'
|
||||
'last-reviewed: [0-9a-f]{40}'
|
||||
'codeberg\.org/[^[:space:]]+'
|
||||
'localhost:3000/[^[:space:]]+'
|
||||
'SC[0-9]{4}'
|
||||
)
|
||||
|
||||
scan_for_secrets() {
|
||||
local text="${1:-$(cat)}"
|
||||
local found=0
|
||||
|
||||
local cleaned="$text"
|
||||
for safe in "${_SAFE_PATTERNS[@]}"; do
|
||||
cleaned=$(printf '%s' "$cleaned" | sed -E "s/${safe}/__SAFE__/g" 2>/dev/null || printf '%s' "$cleaned")
|
||||
done
|
||||
|
||||
for pattern in "${_SECRET_PATTERNS[@]}"; do
|
||||
local matches
|
||||
matches=$(printf '%s' "$cleaned" | grep -oE "$pattern" 2>/dev/null || true)
|
||||
if [ -n "$matches" ]; then
|
||||
while IFS= read -r match; do
|
||||
[ "$match" = "__SAFE__" ] && continue
|
||||
[ -z "$match" ] && continue
|
||||
printf 'secret-scan: detected potential secret matching pattern [%s]: %s\n' \
|
||||
"$pattern" "${match:0:8}...${match: -4}" >&2
|
||||
found=1
|
||||
done <<< "$matches"
|
||||
fi
|
||||
done
|
||||
|
||||
return $found
|
||||
}
|
||||
EOF
|
||||
chmod +x /tmp/secret-scan.sh
|
||||
|
||||
# Scan policy files
|
||||
echo "secret-scan: vault/policies/*.hcl"
|
||||
failed=0
|
||||
for f in vault/policies/*.hcl; do
|
||||
[ -f "$f" ] || continue
|
||||
echo " scanning: $f"
|
||||
if ! /tmp/secret-scan.sh < "$f"; then
|
||||
echo " ERROR: potential secrets detected in $f" >&2
|
||||
failed=1
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$failed" -gt 0 ]; then
|
||||
echo "vault-secret-scan: secrets detected" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "vault-secret-scan: no secrets detected"
|
||||
|
||||
# ── 8. Shellcheck ────────────────────────────────────────────────────────
|
||||
# Covers the new lib/init/nomad/*.sh scripts plus bin/disinto (which owns
|
||||
# the backend dispatcher). bin/disinto has no .sh extension so the
|
||||
# repo-wide shellcheck in .woodpecker/ci.yml skips it — this step is the
|
||||
|
|
@ -133,7 +373,7 @@ steps:
|
|||
commands:
|
||||
- shellcheck --severity=warning lib/init/nomad/*.sh bin/disinto
|
||||
|
||||
# ── 5. bats: `disinto init --backend=nomad --dry-run` ────────────────────
|
||||
# ── 9. bats: `disinto init --backend=nomad --dry-run` ────────────────────
|
||||
# Smoke-tests the CLI dispatcher: both --backend=nomad variants exit 0
|
||||
# with the expected step list, and --backend=docker stays on the docker
|
||||
# path (regression guard). Pure dry-run — no sudo, no network.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue