disinto/formulas/collect-engagement.toml
Claude 88676e65ae
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
ci/woodpecker/pr/smoke-init Pipeline was successful
fix: feat: consolidate secret stores — single granular secrets/*.enc, deprecate .env.vault.enc (#777)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 18:35:03 +00:00

172 lines
6.3 KiB
TOML

# formulas/collect-engagement.toml — Collect website engagement data
#
# Daily formula: SSH into Caddy host, fetch access log, parse locally,
# commit evidence JSON to ops repo via Forgejo API.
#
# Triggered by cron in the edge container entrypoint (daily at 23:50 UTC).
# Design choices from #426: Q1=A (fetch raw log, process locally),
# Q2=A (direct cron in edge container), Q3=B (dedicated purpose-limited SSH key).
#
# Steps: fetch-log → parse-engagement → commit-evidence
name = "collect-engagement"
description = "SSH-fetch Caddy access log, parse engagement metrics, commit evidence"
version = 1
[context]
files = ["AGENTS.md"]
[vars.caddy_host]
description = "SSH host for the Caddy server"
required = false
default = "${CADDY_SSH_HOST:-disinto.ai}"
[vars.caddy_user]
description = "SSH user on the Caddy host"
required = false
default = "${CADDY_SSH_USER:-debian}"
[vars.caddy_log_path]
description = "Path to Caddy access log on the remote host"
required = false
default = "${CADDY_ACCESS_LOG:-/var/log/caddy/access.log}"
[vars.local_log_path]
description = "Local path to store fetched access log"
required = false
default = "/tmp/caddy-access-log-fetch.log"
[vars.evidence_dir]
description = "Evidence output directory in the ops repo"
required = false
default = "evidence/engagement"
# ── Step 1: SSH fetch ────────────────────────────────────────────────
[[steps]]
id = "fetch-log"
title = "Fetch Caddy access log from remote host via SSH"
description = """
Fetch today's Caddy access log segment from the remote host using SCP.
The SSH key is read from the environment (CADDY_SSH_KEY), which is
decrypted from secrets/CADDY_SSH_KEY.enc by the edge entrypoint. It is NEVER hardcoded.
1. Write the SSH key to a temporary file with restricted permissions:
_ssh_key_file=$(mktemp)
trap 'rm -f "$_ssh_key_file"' EXIT
printf '%s\n' "$CADDY_SSH_KEY" > "$_ssh_key_file"
chmod 0600 "$_ssh_key_file"
2. Verify connectivity:
ssh -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new \
-o ConnectTimeout=10 -o BatchMode=yes \
{{caddy_user}}@{{caddy_host}} 'echo ok'
3. Fetch the access log via scp:
scp -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new \
-o ConnectTimeout=10 -o BatchMode=yes \
"{{caddy_user}}@{{caddy_host}}:{{caddy_log_path}}" \
"{{local_log_path}}"
4. Verify the fetched file is non-empty:
if [ ! -s "{{local_log_path}}" ]; then
echo "WARNING: fetched access log is empty site may have no traffic"
else
echo "Fetched $(wc -l < "{{local_log_path}}") lines from {{caddy_host}}"
fi
5. Clean up the temporary key file:
rm -f "$_ssh_key_file"
"""
# ── Step 2: Parse engagement ─────────────────────────────────────────
[[steps]]
id = "parse-engagement"
title = "Run collect-engagement.sh against the local log copy"
description = """
Run the engagement parser against the locally fetched access log.
1. Set CADDY_ACCESS_LOG to point at the local copy so collect-engagement.sh
reads from it instead of the default path:
export CADDY_ACCESS_LOG="{{local_log_path}}"
2. Run the parser:
bash "$FACTORY_ROOT/site/collect-engagement.sh"
3. Verify the evidence JSON was written:
REPORT_DATE=$(date -u +%Y-%m-%d)
EVIDENCE_FILE="${OPS_REPO_ROOT}/{{evidence_dir}}/${REPORT_DATE}.json"
if [ -f "$EVIDENCE_FILE" ]; then
echo "Evidence written: $EVIDENCE_FILE"
jq . "$EVIDENCE_FILE"
else
echo "ERROR: evidence file not found at $EVIDENCE_FILE"
exit 1
fi
4. Clean up the fetched log:
rm -f "{{local_log_path}}"
"""
needs = ["fetch-log"]
# ── Step 3: Commit evidence ──────────────────────────────────────────
[[steps]]
id = "commit-evidence"
title = "Commit evidence JSON to ops repo via Forgejo API"
description = """
Commit the dated evidence JSON to the ops repo so the planner can
consume it during gap analysis.
1. Read the evidence file:
REPORT_DATE=$(date -u +%Y-%m-%d)
EVIDENCE_FILE="${OPS_REPO_ROOT}/{{evidence_dir}}/${REPORT_DATE}.json"
CONTENT=$(base64 < "$EVIDENCE_FILE")
2. Check if the file already exists in the ops repo (update vs create):
OPS_OWNER="${OPS_FORGE_OWNER:-${FORGE_REPO%%/*}}"
OPS_REPO="${OPS_FORGE_REPO:-${PROJECT_NAME:-disinto}-ops}"
FILE_PATH="{{evidence_dir}}/${REPORT_DATE}.json"
EXISTING=$(curl -sf \
-H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \
2>/dev/null || echo "")
3. Create or update the file via Forgejo API:
if [ -n "$EXISTING" ] && printf '%s' "$EXISTING" | jq -e '.sha' >/dev/null 2>&1; then
# Update existing file
SHA=$(printf '%s' "$EXISTING" | jq -r '.sha')
curl -sf -X PUT \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \
-d "$(jq -nc --arg content "$CONTENT" --arg sha "$SHA" --arg msg "evidence: engagement ${REPORT_DATE}" \
'{message: $msg, content: $content, sha: $sha}')"
echo "Updated existing evidence file in ops repo"
else
# Create new file
curl -sf -X POST \
-H "Authorization: token ${FORGE_TOKEN}" \
-H "Content-Type: application/json" \
"${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \
-d "$(jq -nc --arg content "$CONTENT" --arg msg "evidence: engagement ${REPORT_DATE}" \
'{message: $msg, content: $content}')"
echo "Created evidence file in ops repo"
fi
4. Verify the commit landed:
VERIFY=$(curl -sf \
-H "Authorization: token ${FORGE_TOKEN}" \
"${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \
| jq -r '.name // empty')
if [ "$VERIFY" = "${REPORT_DATE}.json" ]; then
echo "Evidence committed: ${FILE_PATH}"
else
echo "ERROR: could not verify evidence commit"
exit 1
fi
"""
needs = ["parse-engagement"]