Merge pull request 'fix: fix: action formulas must not contain secrets in issue body (#291)' (#473) from fix/issue-291 into main

2026-03-21 11:08:54 +01:00 · 2026-03-21 11:08:54 +01:00 · 0dd607c1bb
commit 0dd607c1bb
parent 3793339da5 1c8c6a8caf
7 changed files with 142 additions and 6 deletions
--- a/.env.example
+++ b/.env.example
@ -39,5 +39,10 @@ MATRIX_BOT_USER=@factory:your.server       # bot's Matrix user ID
 MATRIX_TOKEN=                              # bot's access token
 MATRIX_ROOM_ID=                            # coordination room ID (!xxx:your.server)

+# ── Project-specific secrets ──────────────────────────────────────────────
+# Store all project secrets here so formulas reference env vars, never hardcode.
+# Example: BASE_RPC_URL for on-chain evolution scripts.
+BASE_RPC_URL=
+
 # ── Tuning ────────────────────────────────────────────────────────────────
 CLAUDE_TIMEOUT=7200                        # max seconds per Claude invocation
--- a/.woodpecker/agent-smoke.sh
+++ b/.woodpecker/agent-smoke.sh
@ -97,6 +97,7 @@ echo "=== 2/2  Function resolution ==="
 #   lib/ci-helpers.sh       — sourced by pollers and review (ci_passed, classify_pipeline_failure, etc.)
 #   lib/load-project.sh     — sourced by env.sh when PROJECT_TOML is set
 #   lib/file-action-issue.sh — sourced by gardener-run.sh (file_action_issue)
+#   lib/secret-scan.sh      — sourced by file-action-issue.sh, phase-handler.sh (scan_for_secrets, redact_secrets)
 #   lib/formula-session.sh  — sourced by formula-driven agents (acquire_cron_lock, run_formula_and_monitor, etc.)
 #
 # Excluded — not sourced inline by agents:
@ -108,7 +109,7 @@ echo "=== 2/2  Function resolution ==="
 # If a new lib file is added and sourced by agents, add it to LIB_FUNS below
 # and add a check_script call for it in the lib files section further down.
 LIB_FUNS=$(
-  for f in lib/agent-session.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/file-action-issue.sh lib/formula-session.sh; do
+  for f in lib/agent-session.sh lib/env.sh lib/ci-helpers.sh lib/load-project.sh lib/secret-scan.sh lib/file-action-issue.sh lib/formula-session.sh; do
    if [ -f "$f" ]; then get_fns "$f"; fi
  done | sort -u
 )
@ -174,7 +175,8 @@ check_script() {
 check_script lib/env.sh
 check_script lib/agent-session.sh
 check_script lib/ci-helpers.sh
-check_script lib/file-action-issue.sh
+check_script lib/secret-scan.sh
+check_script lib/file-action-issue.sh   lib/secret-scan.sh
 check_script lib/formula-session.sh     lib/agent-session.sh
 check_script lib/load-project.sh

@ -187,7 +189,7 @@ check_script lib/parse-deps.sh
 # Agent scripts — list cross-sourced files where function scope flows across files.
 # dev-agent.sh sources phase-handler.sh; phase-handler.sh calls helpers defined in dev-agent.sh.
 check_script dev/dev-agent.sh          dev/phase-handler.sh
-check_script dev/phase-handler.sh      dev/dev-agent.sh
+check_script dev/phase-handler.sh      dev/dev-agent.sh lib/secret-scan.sh
 check_script dev/dev-poll.sh
 check_script dev/phase-test.sh
 check_script gardener/gardener-agent.sh  lib/agent-session.sh
--- a/AGENTS.md
+++ b/AGENTS.md
@ -53,6 +53,7 @@ disinto/
 - Source shared environment: `source "$(dirname "$0")/../lib/env.sh"`
 - Log to `$LOGFILE` using the `log()` function from env.sh or defined locally
 - Never hardcode secrets — all come from `.env` or TOML project files
+- Never embed secrets in issue bodies, PR descriptions, or comments — use env var references (e.g. `$BASE_RPC_URL`)
 - ShellCheck must pass (CI runs `shellcheck` on all `.sh` files)
 - Avoid duplicate code — shared helpers go in `lib/`

@ -344,7 +345,8 @@ sourced as needed.
 | `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` patterns. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll, supervisor-poll |
 | `lib/matrix_listener.sh` | Long-poll Matrix sync daemon. Dispatches thread replies to the correct agent via well-known files (`/tmp/{agent}-escalation-reply`). Handles supervisor, gardener, dev, review, vault, and action reply routing. Run as systemd service. | Standalone daemon |
 | `lib/formula-session.sh` | `acquire_cron_lock()`, `check_memory()`, `load_formula()`, `build_context_block()`, `start_formula_session()`, `formula_phase_callback()`, `build_prompt_footer()`, `run_formula_and_monitor()` — shared helpers for formula-driven cron agents (lock, memory guard, formula loading, prompt assembly, tmux session, monitor loop, crash recovery). | planner-run.sh, predictor-run.sh |
-| `lib/file-action-issue.sh` | `file_action_issue()` — dedup check, label lookup, and issue creation for formula-driven cron wrappers. Sets `FILED_ISSUE_NUM` on success. | gardener-run.sh |
+| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | file-action-issue.sh, phase-handler.sh |
+| `lib/file-action-issue.sh` | `file_action_issue()` — dedup check, secret scan, label lookup, and issue creation for formula-driven cron wrappers. Sets `FILED_ISSUE_NUM` on success. Returns 4 if secrets detected in body. | gardener-run.sh |
 | `lib/agent-session.sh` | Shared tmux + Claude session helpers: `create_agent_session()`, `inject_formula()`, `agent_wait_for_claude_ready()`, `agent_inject_into_session()`, `agent_kill_session()`, `monitor_phase_loop()`, `read_phase()`, `write_compact_context()`. `create_agent_session(session, workdir, [phase_file])` optionally installs a PostToolUse hook (matcher `Bash\|Write`) that detects phase file writes in real-time — when Claude writes to the phase file, the hook writes a marker so `monitor_phase_loop` reacts on the next poll instead of waiting for mtime changes. Also installs a StopFailure hook (matcher `rate_limit\|server_error\|authentication_failed\|billing_error`) that writes `PHASE:failed` with an `api_error` reason to the phase file and touches the phase-changed marker, so the orchestrator discovers API errors within one poll cycle instead of waiting for idle timeout. Also installs a SessionStart hook (matcher `compact`) that re-injects phase protocol instructions after context compaction — callers write the context file via `write_compact_context(phase_file, content)`, and the hook (`on-compact-reinject.sh`) outputs the file content to stdout so Claude retains critical instructions. When `MATRIX_THREAD_ID` is exported, also installs a Stop hook (`on-stop-matrix.sh`) that streams each Claude response to the Matrix thread. `monitor_phase_loop` sets `_MONITOR_LOOP_EXIT` to one of: `done`, `idle_timeout`, `idle_prompt` (Claude returned to `❯` for 3 consecutive polls without writing any phase — callback invoked with `PHASE:failed`, session already dead), `crashed`, or a `PHASE:*` string. **Callers must handle `idle_prompt`** in both their callback and their post-loop exit handler — see [`docs/PHASE-PROTOCOL.md` § idle_prompt](docs/PHASE-PROTOCOL.md#idle_prompt-exit-reason) for the full contract. | dev-agent.sh, gardener-agent.sh, action-agent.sh |

 ---
--- a/action/action-agent.sh
+++ b/action/action-agent.sh
@ -288,6 +288,13 @@ ${PRIOR_SECTION}## Instructions
   CODEBERG_TOKEN, CODEBERG_API, CODEBERG_REPO, CODEBERG_WEB, PROJECT_NAME
   (all sourced from ${FACTORY_ROOT}/.env)

+### CRITICAL: Never embed secrets in issue bodies, comments, or PR descriptions
+   - NEVER put API keys, tokens, passwords, or private keys in issue text or comments.
+   - Always reference secrets via env var names (e.g. \\\$BASE_RPC_URL, \\\$CODEBERG_TOKEN).
+   - If a formula step needs a secret, read it from .env or the environment at runtime.
+   - Before posting any comment, verify it contains no credentials, hex keys > 32 chars,
+     or URLs with embedded API keys.
+
 If the prior comments above show work already completed, resume from where it
 left off.

--- a/dev/phase-handler.sh
+++ b/dev/phase-handler.sh
@ -22,6 +22,10 @@
 # shellcheck disable=SC2154  # globals are set in dev-agent.sh before calling
 # shellcheck disable=SC2034  # CLAIMED is read by cleanup() in dev-agent.sh

+# Load secret scanner for redacting tmux output before posting to issues
+# shellcheck source=../lib/secret-scan.sh
+source "$(dirname "${BASH_SOURCE[0]}")/../lib/secret-scan.sh"
+
 # --- Default globals (agents can override after sourcing) ---
 : "${CI_POLL_TIMEOUT:=1800}"
 : "${REVIEW_POLL_TIMEOUT:=10800}"
@ -51,6 +55,11 @@ post_blocked_diagnostic() {
    tmux_output=$(tmux capture-pane -p -t "$session" -S -50 2>/dev/null || true)
  fi

+  # Redact any secrets from tmux output before posting to issue
+  if [ -n "$tmux_output" ]; then
+    tmux_output=$(redact_secrets "$tmux_output")
+  fi
+
  # Build diagnostic comment body
  local comment
  comment="### Session failure diagnostic
--- a/lib/file-action-issue.sh
+++ b/lib/file-action-issue.sh
@ -2,16 +2,26 @@
 # file-action-issue.sh — File an action issue for a formula run
 #
 # Usage: source this file, then call file_action_issue.
-# Requires: codeberg_api() from lib/env.sh, jq
+# Requires: codeberg_api() from lib/env.sh, jq, lib/secret-scan.sh
 #
 # file_action_issue <formula_name> <title> <body>
 #   Sets FILED_ISSUE_NUM on success.
-#   Returns: 0=created, 1=duplicate exists, 2=label not found, 3=API error
+#   Returns: 0=created, 1=duplicate exists, 2=label not found, 3=API error, 4=secrets detected
+
+# Load secret scanner
+# shellcheck source=secret-scan.sh
+source "$(dirname "${BASH_SOURCE[0]}")/secret-scan.sh"

 file_action_issue() {
  local formula_name="$1" title="$2" body="$3"
  FILED_ISSUE_NUM=""

+  # Secret scan: reject issue bodies containing embedded secrets
+  if ! scan_for_secrets "$body"; then
+    echo "file-action-issue: BLOCKED — issue body for '${formula_name}' contains potential secrets. Use env var references instead." >&2
+    return 4
+  fi
+
  # Dedup: skip if an open action issue for this formula already exists
  local open_actions
  open_actions=$(codeberg_api GET "/issues?state=open&type=issues&labels=action&limit=50" 2>/dev/null || true)
--- a/lib/secret-scan.sh
+++ b/lib/secret-scan.sh
@ -0,0 +1,101 @@
+#!/usr/bin/env bash
+# secret-scan.sh — Detect secrets in text before it reaches issue bodies or comments
+#
+# Usage: source this file, then call scan_for_secrets.
+#
+# scan_for_secrets <text>
+#   Returns: 0 = clean, 1 = secrets detected
+#   Outputs: matched patterns to stderr (for logging)
+#
+# redact_secrets <text>
+#   Outputs: text with detected secrets replaced by [REDACTED]
+
+# Patterns that indicate embedded secrets (extended regex)
+_SECRET_PATTERNS=(
+  # Long hex strings (API keys, tokens): 32+ hex chars as a standalone token
+  '[0-9a-fA-F]{32,}'
+  # Bearer/token auth headers with actual values
+  'Bearer [A-Za-z0-9_/+=-]{20,}'
+  # Private keys (0x-prefixed 64+ hex)
+  '0x[0-9a-fA-F]{64}'
+  # URLs with embedded credentials (user:pass@host or api-key in path)
+  'https?://[^[:space:]]*[0-9a-fA-F]{20,}'
+  # AWS-style keys
+  'AKIA[0-9A-Z]{16}'
+  # Generic secret assignment patterns (KEY=<long value>)
+  '(API_KEY|SECRET|TOKEN|PRIVATE_KEY|PASSWORD|INFURA|ALCHEMY)=[^[:space:]"]{16,}'
+)
+
+# Known safe patterns to exclude (env var references, not actual values)
+_SAFE_PATTERNS=(
+  # Shell variable references: $VAR, ${VAR}, ${VAR:-default}
+  '\$\{?[A-Z_]+\}?'
+  # Git SHAs in typical git contexts (commit refs, not standalone secrets)
+  'commit [0-9a-f]{40}'
+  'Merge [0-9a-f]{40}'
+  # Codeberg/GitHub URLs with short hex (PR refs, commit links)
+  'codeberg\.org/[^[:space:]]+'
+  # ShellCheck directive codes
+  'SC[0-9]{4}'
+)
+
+# scan_for_secrets — check text for potential secrets
+# Args: text (via stdin or $1)
+# Returns: 0 = clean, 1 = secrets found
+# Outputs: matched patterns to stderr
+scan_for_secrets() {
+  local text="${1:-$(cat)}"
+  local found=0
+
+  # Strip known safe patterns before scanning
+  local cleaned="$text"
+  for safe in "${_SAFE_PATTERNS[@]}"; do
+    cleaned=$(printf '%s' "$cleaned" | sed -E "s/${safe}/__SAFE__/g" 2>/dev/null || printf '%s' "$cleaned")
+  done
+
+  for pattern in "${_SECRET_PATTERNS[@]}"; do
+    local matches
+    matches=$(printf '%s' "$cleaned" | grep -oE "$pattern" 2>/dev/null || true)
+    if [ -n "$matches" ]; then
+      # Filter out short matches that are likely false positives (git SHAs in safe context)
+      while IFS= read -r match; do
+        # Skip if match is entirely the word __SAFE__ (already excluded)
+        [ "$match" = "__SAFE__" ] && continue
+        # Skip empty
+        [ -z "$match" ] && continue
+        printf 'secret-scan: detected potential secret matching pattern [%s]: %s\n' \
+          "$pattern" "${match:0:8}...${match: -4}" >&2
+        found=1
+      done <<< "$matches"
+    fi
+  done
+
+  return $found
+}
+
+# redact_secrets — replace detected secrets with [REDACTED]
+# Args: text (via stdin or $1)
+# Outputs: sanitized text
+redact_secrets() {
+  local text="${1:-$(cat)}"
+
+  # Replace AWS AKIA keys
+  text=$(printf '%s' "$text" | sed -E 's/AKIA[0-9A-Z]{16}/[REDACTED]/g')
+
+  # Replace Ethereum private keys (0x + 64 hex chars)
+  text=$(printf '%s' "$text" | sed -E 's/0x[0-9a-fA-F]{64}/[REDACTED]/g')
+
+  # Replace long hex strings (32+ chars) not preceded by $ (env var refs)
+  text=$(printf '%s' "$text" | sed -E 's/([^$]|^)([0-9a-fA-F]{32,})/\1[REDACTED]/g')
+
+  # Replace URLs with embedded long hex
+  text=$(printf '%s' "$text" | sed -E 's|(https?://[^[:space:]]*)[0-9a-fA-F]{20,}|\1[REDACTED]|g')
+
+  # Replace secret assignments (KEY=value)
+  text=$(printf '%s' "$text" | sed -E 's/((API_KEY|SECRET|TOKEN|PRIVATE_KEY|PASSWORD|INFURA|ALCHEMY)=)[^[:space:]"]{16,}/\1[REDACTED]/g')
+
+  # Replace Bearer tokens
+  text=$(printf '%s' "$text" | sed -E 's/(Bearer )[A-Za-z0-9_/+=-]{20,}/\1[REDACTED]/g')
+
+  printf '%s' "$text"
+}