fix: fix: action formulas must not contain secrets in issue body (#291)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
openhands 2026-03-21 09:54:21 +00:00
parent 3793339da5
commit 0b453fdab0
6 changed files with 131 additions and 3 deletions

View file

@ -39,5 +39,10 @@ MATRIX_BOT_USER=@factory:your.server # bot's Matrix user ID
MATRIX_TOKEN= # bot's access token MATRIX_TOKEN= # bot's access token
MATRIX_ROOM_ID= # coordination room ID (!xxx:your.server) MATRIX_ROOM_ID= # coordination room ID (!xxx:your.server)
# ── Project-specific secrets ──────────────────────────────────────────────
# Store all project secrets here so formulas reference env vars, never hardcode.
# Example: BASE_RPC_URL for on-chain evolution scripts.
BASE_RPC_URL=
# ── Tuning ──────────────────────────────────────────────────────────────── # ── Tuning ────────────────────────────────────────────────────────────────
CLAUDE_TIMEOUT=7200 # max seconds per Claude invocation CLAUDE_TIMEOUT=7200 # max seconds per Claude invocation

View file

@ -53,6 +53,7 @@ disinto/
- Source shared environment: `source "$(dirname "$0")/../lib/env.sh"` - Source shared environment: `source "$(dirname "$0")/../lib/env.sh"`
- Log to `$LOGFILE` using the `log()` function from env.sh or defined locally - Log to `$LOGFILE` using the `log()` function from env.sh or defined locally
- Never hardcode secrets — all come from `.env` or TOML project files - Never hardcode secrets — all come from `.env` or TOML project files
- Never embed secrets in issue bodies, PR descriptions, or comments — use env var references (e.g. `$BASE_RPC_URL`)
- ShellCheck must pass (CI runs `shellcheck` on all `.sh` files) - ShellCheck must pass (CI runs `shellcheck` on all `.sh` files)
- Avoid duplicate code — shared helpers go in `lib/` - Avoid duplicate code — shared helpers go in `lib/`
@ -344,7 +345,8 @@ sourced as needed.
| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` patterns. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll | | `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` patterns. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll |
| `lib/matrix_listener.sh` | Long-poll Matrix sync daemon. Dispatches thread replies to the correct agent via well-known files (`/tmp/{agent}-escalation-reply`). Handles supervisor, gardener, dev, review, vault, and action reply routing. Run as systemd service. | Standalone daemon | | `lib/matrix_listener.sh` | Long-poll Matrix sync daemon. Dispatches thread replies to the correct agent via well-known files (`/tmp/{agent}-escalation-reply`). Handles supervisor, gardener, dev, review, vault, and action reply routing. Run as systemd service. | Standalone daemon |
| `lib/formula-session.sh` | `acquire_cron_lock()`, `check_memory()`, `load_formula()`, `build_context_block()`, `start_formula_session()`, `formula_phase_callback()`, `build_prompt_footer()`, `run_formula_and_monitor()` — shared helpers for formula-driven cron agents (lock, memory guard, formula loading, prompt assembly, tmux session, monitor loop, crash recovery). | planner-run.sh, predictor-run.sh | | `lib/formula-session.sh` | `acquire_cron_lock()`, `check_memory()`, `load_formula()`, `build_context_block()`, `start_formula_session()`, `formula_phase_callback()`, `build_prompt_footer()`, `run_formula_and_monitor()` — shared helpers for formula-driven cron agents (lock, memory guard, formula loading, prompt assembly, tmux session, monitor loop, crash recovery). | planner-run.sh, predictor-run.sh |
| `lib/file-action-issue.sh` | `file_action_issue()` — dedup check, label lookup, and issue creation for formula-driven cron wrappers. Sets `FILED_ISSUE_NUM` on success. | gardener-run.sh | | `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | file-action-issue.sh, phase-handler.sh |
| `lib/file-action-issue.sh` | `file_action_issue()` — dedup check, secret scan, label lookup, and issue creation for formula-driven cron wrappers. Sets `FILED_ISSUE_NUM` on success. Returns 4 if secrets detected in body. | gardener-run.sh |
| `lib/agent-session.sh` | Shared tmux + Claude session helpers: `create_agent_session()`, `inject_formula()`, `agent_wait_for_claude_ready()`, `agent_inject_into_session()`, `agent_kill_session()`, `monitor_phase_loop()`, `read_phase()`, `write_compact_context()`. `create_agent_session(session, workdir, [phase_file])` optionally installs a PostToolUse hook (matcher `Bash\|Write`) that detects phase file writes in real-time — when Claude writes to the phase file, the hook writes a marker so `monitor_phase_loop` reacts on the next poll instead of waiting for mtime changes. Also installs a StopFailure hook (matcher `rate_limit\|server_error\|authentication_failed\|billing_error`) that writes `PHASE:failed` with an `api_error` reason to the phase file and touches the phase-changed marker, so the orchestrator discovers API errors within one poll cycle instead of waiting for idle timeout. Also installs a SessionStart hook (matcher `compact`) that re-injects phase protocol instructions after context compaction — callers write the context file via `write_compact_context(phase_file, content)`, and the hook (`on-compact-reinject.sh`) outputs the file content to stdout so Claude retains critical instructions. When `MATRIX_THREAD_ID` is exported, also installs a Stop hook (`on-stop-matrix.sh`) that streams each Claude response to the Matrix thread. `monitor_phase_loop` sets `_MONITOR_LOOP_EXIT` to one of: `done`, `idle_timeout`, `idle_prompt` (Claude returned to `` for 3 consecutive polls without writing any phase — callback invoked with `PHASE:failed`, session already dead), `crashed`, or a `PHASE:*` string. **Callers must handle `idle_prompt`** in both their callback and their post-loop exit handler — see [`docs/PHASE-PROTOCOL.md` § idle_prompt](docs/PHASE-PROTOCOL.md#idle_prompt-exit-reason) for the full contract. | dev-agent.sh, gardener-agent.sh, action-agent.sh | | `lib/agent-session.sh` | Shared tmux + Claude session helpers: `create_agent_session()`, `inject_formula()`, `agent_wait_for_claude_ready()`, `agent_inject_into_session()`, `agent_kill_session()`, `monitor_phase_loop()`, `read_phase()`, `write_compact_context()`. `create_agent_session(session, workdir, [phase_file])` optionally installs a PostToolUse hook (matcher `Bash\|Write`) that detects phase file writes in real-time — when Claude writes to the phase file, the hook writes a marker so `monitor_phase_loop` reacts on the next poll instead of waiting for mtime changes. Also installs a StopFailure hook (matcher `rate_limit\|server_error\|authentication_failed\|billing_error`) that writes `PHASE:failed` with an `api_error` reason to the phase file and touches the phase-changed marker, so the orchestrator discovers API errors within one poll cycle instead of waiting for idle timeout. Also installs a SessionStart hook (matcher `compact`) that re-injects phase protocol instructions after context compaction — callers write the context file via `write_compact_context(phase_file, content)`, and the hook (`on-compact-reinject.sh`) outputs the file content to stdout so Claude retains critical instructions. When `MATRIX_THREAD_ID` is exported, also installs a Stop hook (`on-stop-matrix.sh`) that streams each Claude response to the Matrix thread. `monitor_phase_loop` sets `_MONITOR_LOOP_EXIT` to one of: `done`, `idle_timeout`, `idle_prompt` (Claude returned to `` for 3 consecutive polls without writing any phase — callback invoked with `PHASE:failed`, session already dead), `crashed`, or a `PHASE:*` string. **Callers must handle `idle_prompt`** in both their callback and their post-loop exit handler — see [`docs/PHASE-PROTOCOL.md` § idle_prompt](docs/PHASE-PROTOCOL.md#idle_prompt-exit-reason) for the full contract. | dev-agent.sh, gardener-agent.sh, action-agent.sh |
--- ---

View file

@ -288,6 +288,13 @@ ${PRIOR_SECTION}## Instructions
CODEBERG_TOKEN, CODEBERG_API, CODEBERG_REPO, CODEBERG_WEB, PROJECT_NAME CODEBERG_TOKEN, CODEBERG_API, CODEBERG_REPO, CODEBERG_WEB, PROJECT_NAME
(all sourced from ${FACTORY_ROOT}/.env) (all sourced from ${FACTORY_ROOT}/.env)
### CRITICAL: Never embed secrets in issue bodies, comments, or PR descriptions
- NEVER put API keys, tokens, passwords, or private keys in issue text or comments.
- Always reference secrets via env var names (e.g. \\\$BASE_RPC_URL, \\\$CODEBERG_TOKEN).
- If a formula step needs a secret, read it from .env or the environment at runtime.
- Before posting any comment, verify it contains no credentials, hex keys > 32 chars,
or URLs with embedded API keys.
If the prior comments above show work already completed, resume from where it If the prior comments above show work already completed, resume from where it
left off. left off.

View file

@ -22,6 +22,10 @@
# shellcheck disable=SC2154 # globals are set in dev-agent.sh before calling # shellcheck disable=SC2154 # globals are set in dev-agent.sh before calling
# shellcheck disable=SC2034 # CLAIMED is read by cleanup() in dev-agent.sh # shellcheck disable=SC2034 # CLAIMED is read by cleanup() in dev-agent.sh
# Load secret scanner for redacting tmux output before posting to issues
# shellcheck source=../lib/secret-scan.sh
source "$(dirname "${BASH_SOURCE[0]}")/../lib/secret-scan.sh"
# --- Default globals (agents can override after sourcing) --- # --- Default globals (agents can override after sourcing) ---
: "${CI_POLL_TIMEOUT:=1800}" : "${CI_POLL_TIMEOUT:=1800}"
: "${REVIEW_POLL_TIMEOUT:=10800}" : "${REVIEW_POLL_TIMEOUT:=10800}"
@ -51,6 +55,11 @@ post_blocked_diagnostic() {
tmux_output=$(tmux capture-pane -p -t "$session" -S -50 2>/dev/null || true) tmux_output=$(tmux capture-pane -p -t "$session" -S -50 2>/dev/null || true)
fi fi
# Redact any secrets from tmux output before posting to issue
if [ -n "$tmux_output" ]; then
tmux_output=$(redact_secrets "$tmux_output")
fi
# Build diagnostic comment body # Build diagnostic comment body
local comment local comment
comment="### Session failure diagnostic comment="### Session failure diagnostic

View file

@ -2,16 +2,26 @@
# file-action-issue.sh — File an action issue for a formula run # file-action-issue.sh — File an action issue for a formula run
# #
# Usage: source this file, then call file_action_issue. # Usage: source this file, then call file_action_issue.
# Requires: codeberg_api() from lib/env.sh, jq # Requires: codeberg_api() from lib/env.sh, jq, lib/secret-scan.sh
# #
# file_action_issue <formula_name> <title> <body> # file_action_issue <formula_name> <title> <body>
# Sets FILED_ISSUE_NUM on success. # Sets FILED_ISSUE_NUM on success.
# Returns: 0=created, 1=duplicate exists, 2=label not found, 3=API error # Returns: 0=created, 1=duplicate exists, 2=label not found, 3=API error, 4=secrets detected
# Load secret scanner
# shellcheck source=secret-scan.sh
source "$(dirname "${BASH_SOURCE[0]}")/secret-scan.sh"
file_action_issue() { file_action_issue() {
local formula_name="$1" title="$2" body="$3" local formula_name="$1" title="$2" body="$3"
FILED_ISSUE_NUM="" FILED_ISSUE_NUM=""
# Secret scan: reject issue bodies containing embedded secrets
if ! scan_for_secrets "$body" 2>/dev/null; then
echo "file-action-issue: BLOCKED — issue body for '${formula_name}' contains potential secrets. Use env var references instead." >&2
return 4
fi
# Dedup: skip if an open action issue for this formula already exists # Dedup: skip if an open action issue for this formula already exists
local open_actions local open_actions
open_actions=$(codeberg_api GET "/issues?state=open&type=issues&labels=action&limit=50" 2>/dev/null || true) open_actions=$(codeberg_api GET "/issues?state=open&type=issues&labels=action&limit=50" 2>/dev/null || true)

95
lib/secret-scan.sh Normal file
View file

@ -0,0 +1,95 @@
#!/usr/bin/env bash
# secret-scan.sh — Detect secrets in text before it reaches issue bodies or comments
#
# Usage: source this file, then call scan_for_secrets.
#
# scan_for_secrets <text>
# Returns: 0 = clean, 1 = secrets detected
# Outputs: matched patterns to stderr (for logging)
#
# redact_secrets <text>
# Outputs: text with detected secrets replaced by [REDACTED]
# Patterns that indicate embedded secrets (extended regex)
_SECRET_PATTERNS=(
# Long hex strings (API keys, tokens): 32+ hex chars as a standalone token
'[0-9a-fA-F]{32,}'
# Bearer/token auth headers with actual values
'Bearer [A-Za-z0-9_/+=-]{20,}'
# Private keys (0x-prefixed 64+ hex)
'0x[0-9a-fA-F]{64}'
# URLs with embedded credentials (user:pass@host or api-key in path)
'https?://[^[:space:]]*[0-9a-fA-F]{20,}'
# AWS-style keys
'AKIA[0-9A-Z]{16}'
# Generic secret assignment patterns (KEY=<long value>)
'(API_KEY|SECRET|TOKEN|PRIVATE_KEY|PASSWORD|INFURA|ALCHEMY)=[^[:space:]"]{16,}'
)
# Known safe patterns to exclude (env var references, not actual values)
_SAFE_PATTERNS=(
# Shell variable references: $VAR, ${VAR}, ${VAR:-default}
'\$\{?[A-Z_]+\}?'
# Git SHAs in typical git contexts (commit refs, not standalone secrets)
'commit [0-9a-f]{40}'
'Merge [0-9a-f]{40}'
# Codeberg/GitHub URLs with short hex (PR refs, commit links)
'codeberg\.org/[^[:space:]]+'
# ShellCheck directive codes
'SC[0-9]{4}'
)
# scan_for_secrets — check text for potential secrets
# Args: text (via stdin or $1)
# Returns: 0 = clean, 1 = secrets found
# Outputs: matched patterns to stderr
scan_for_secrets() {
local text="${1:-$(cat)}"
local found=0
# Strip known safe patterns before scanning
local cleaned="$text"
for safe in "${_SAFE_PATTERNS[@]}"; do
cleaned=$(printf '%s' "$cleaned" | sed -E "s/${safe}/__SAFE__/g" 2>/dev/null || printf '%s' "$cleaned")
done
for pattern in "${_SECRET_PATTERNS[@]}"; do
local matches
matches=$(printf '%s' "$cleaned" | grep -oE "$pattern" 2>/dev/null || true)
if [ -n "$matches" ]; then
# Filter out short matches that are likely false positives (git SHAs in safe context)
while IFS= read -r match; do
# Skip if match is entirely the word __SAFE__ (already excluded)
[ "$match" = "__SAFE__" ] && continue
# Skip empty
[ -z "$match" ] && continue
printf 'secret-scan: detected potential secret matching pattern [%s]: %s\n' \
"$pattern" "${match:0:8}...${match: -4}" >&2
found=1
done <<< "$matches"
fi
done
return $found
}
# redact_secrets — replace detected secrets with [REDACTED]
# Args: text (via stdin or $1)
# Outputs: sanitized text
redact_secrets() {
local text="${1:-$(cat)}"
# Replace long hex strings (32+ chars) not preceded by $ (env var refs)
text=$(printf '%s' "$text" | sed -E 's/([^$]|^)([0-9a-fA-F]{32,})/\1[REDACTED]/g')
# Replace URLs with embedded long hex
text=$(printf '%s' "$text" | sed -E 's|(https?://[^[:space:]]*)[0-9a-fA-F]{20,}|\1[REDACTED]|g')
# Replace secret assignments (KEY=value)
text=$(printf '%s' "$text" | sed -E 's/((API_KEY|SECRET|TOKEN|PRIVATE_KEY|PASSWORD|INFURA|ALCHEMY)=)[^[:space:]"]{16,}/\1[REDACTED]/g')
# Replace Bearer tokens
text=$(printf '%s' "$text" | sed -E 's/(Bearer )[A-Za-z0-9_/+=-]{20,}/\1[REDACTED]/g')
printf '%s' "$text"
}