fix: fix: action formulas must not contain secrets in issue body (#291)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
openhands 2026-03-21 09:54:21 +00:00
parent 3793339da5
commit 0b453fdab0
6 changed files with 131 additions and 3 deletions

View file

@ -2,16 +2,26 @@
# file-action-issue.sh — File an action issue for a formula run
#
# Usage: source this file, then call file_action_issue.
# Requires: codeberg_api() from lib/env.sh, jq
# Requires: codeberg_api() from lib/env.sh, jq, lib/secret-scan.sh
#
# file_action_issue <formula_name> <title> <body>
# Sets FILED_ISSUE_NUM on success.
# Returns: 0=created, 1=duplicate exists, 2=label not found, 3=API error
# Returns: 0=created, 1=duplicate exists, 2=label not found, 3=API error, 4=secrets detected
# Load secret scanner
# shellcheck source=secret-scan.sh
source "$(dirname "${BASH_SOURCE[0]}")/secret-scan.sh"
file_action_issue() {
local formula_name="$1" title="$2" body="$3"
FILED_ISSUE_NUM=""
# Secret scan: reject issue bodies containing embedded secrets
if ! scan_for_secrets "$body" 2>/dev/null; then
echo "file-action-issue: BLOCKED — issue body for '${formula_name}' contains potential secrets. Use env var references instead." >&2
return 4
fi
# Dedup: skip if an open action issue for this formula already exists
local open_actions
open_actions=$(codeberg_api GET "/issues?state=open&type=issues&labels=action&limit=50" 2>/dev/null || true)

95
lib/secret-scan.sh Normal file
View file

@ -0,0 +1,95 @@
#!/usr/bin/env bash
# secret-scan.sh — Detect secrets in text before it reaches issue bodies or comments
#
# Usage: source this file, then call scan_for_secrets.
#
# scan_for_secrets <text>
# Returns: 0 = clean, 1 = secrets detected
# Outputs: matched patterns to stderr (for logging)
#
# redact_secrets <text>
# Outputs: text with detected secrets replaced by [REDACTED]
# Patterns that indicate embedded secrets (extended regex)
_SECRET_PATTERNS=(
# Long hex strings (API keys, tokens): 32+ hex chars as a standalone token
'[0-9a-fA-F]{32,}'
# Bearer/token auth headers with actual values
'Bearer [A-Za-z0-9_/+=-]{20,}'
# Private keys (0x-prefixed 64+ hex)
'0x[0-9a-fA-F]{64}'
# URLs with embedded credentials (user:pass@host or api-key in path)
'https?://[^[:space:]]*[0-9a-fA-F]{20,}'
# AWS-style keys
'AKIA[0-9A-Z]{16}'
# Generic secret assignment patterns (KEY=<long value>)
'(API_KEY|SECRET|TOKEN|PRIVATE_KEY|PASSWORD|INFURA|ALCHEMY)=[^[:space:]"]{16,}'
)
# Known safe patterns to exclude (env var references, not actual values)
_SAFE_PATTERNS=(
# Shell variable references: $VAR, ${VAR}, ${VAR:-default}
'\$\{?[A-Z_]+\}?'
# Git SHAs in typical git contexts (commit refs, not standalone secrets)
'commit [0-9a-f]{40}'
'Merge [0-9a-f]{40}'
# Codeberg/GitHub URLs with short hex (PR refs, commit links)
'codeberg\.org/[^[:space:]]+'
# ShellCheck directive codes
'SC[0-9]{4}'
)
# scan_for_secrets — check text for potential secrets
# Args: text (via stdin or $1)
# Returns: 0 = clean, 1 = secrets found
# Outputs: matched patterns to stderr
scan_for_secrets() {
local text="${1:-$(cat)}"
local found=0
# Strip known safe patterns before scanning
local cleaned="$text"
for safe in "${_SAFE_PATTERNS[@]}"; do
cleaned=$(printf '%s' "$cleaned" | sed -E "s/${safe}/__SAFE__/g" 2>/dev/null || printf '%s' "$cleaned")
done
for pattern in "${_SECRET_PATTERNS[@]}"; do
local matches
matches=$(printf '%s' "$cleaned" | grep -oE "$pattern" 2>/dev/null || true)
if [ -n "$matches" ]; then
# Filter out short matches that are likely false positives (git SHAs in safe context)
while IFS= read -r match; do
# Skip if match is entirely the word __SAFE__ (already excluded)
[ "$match" = "__SAFE__" ] && continue
# Skip empty
[ -z "$match" ] && continue
printf 'secret-scan: detected potential secret matching pattern [%s]: %s\n' \
"$pattern" "${match:0:8}...${match: -4}" >&2
found=1
done <<< "$matches"
fi
done
return $found
}
# redact_secrets — replace detected secrets with [REDACTED]
# Args: text (via stdin or $1)
# Outputs: sanitized text
redact_secrets() {
local text="${1:-$(cat)}"
# Replace long hex strings (32+ chars) not preceded by $ (env var refs)
text=$(printf '%s' "$text" | sed -E 's/([^$]|^)([0-9a-fA-F]{32,})/\1[REDACTED]/g')
# Replace URLs with embedded long hex
text=$(printf '%s' "$text" | sed -E 's|(https?://[^[:space:]]*)[0-9a-fA-F]{20,}|\1[REDACTED]|g')
# Replace secret assignments (KEY=value)
text=$(printf '%s' "$text" | sed -E 's/((API_KEY|SECRET|TOKEN|PRIVATE_KEY|PASSWORD|INFURA|ALCHEMY)=)[^[:space:]"]{16,}/\1[REDACTED]/g')
# Replace Bearer tokens
text=$(printf '%s' "$text" | sed -E 's/(Bearer )[A-Za-z0-9_/+=-]{20,}/\1[REDACTED]/g')
printf '%s' "$text"
}