# formulas/collect-engagement.toml — Collect website engagement data # # Daily formula: SSH into Caddy host, fetch access log, parse locally, # commit evidence JSON to ops repo via Forgejo API. # # Triggered by cron in the edge container entrypoint (daily at 23:50 UTC). # Design choices from #426: Q1=A (fetch raw log, process locally), # Q2=A (direct cron in edge container), Q3=B (dedicated purpose-limited SSH key). # # Steps: fetch-log → parse-engagement → commit-evidence name = "collect-engagement" description = "SSH-fetch Caddy access log, parse engagement metrics, commit evidence" version = 1 [context] files = ["AGENTS.md"] [vars.caddy_host] description = "SSH host for the Caddy server" required = false default = "${CADDY_SSH_HOST:-disinto.ai}" [vars.caddy_user] description = "SSH user on the Caddy host" required = false default = "${CADDY_SSH_USER:-debian}" [vars.caddy_log_path] description = "Path to Caddy access log on the remote host" required = false default = "${CADDY_ACCESS_LOG:-/var/log/caddy/access.log}" [vars.local_log_path] description = "Local path to store fetched access log" required = false default = "/tmp/caddy-access-log-fetch.log" [vars.evidence_dir] description = "Evidence output directory in the ops repo" required = false default = "evidence/engagement" # ── Step 1: SSH fetch ──────────────────────────────────────────────── [[steps]] id = "fetch-log" title = "Fetch Caddy access log from remote host via SSH" description = """ Fetch today's Caddy access log segment from the remote host using SCP. The SSH key is read from the environment (CADDY_SSH_KEY), which is decrypted from .env.vault.enc by the dispatcher. It is NEVER hardcoded. 1. Write the SSH key to a temporary file with restricted permissions: _ssh_key_file=$(mktemp) trap 'rm -f "$_ssh_key_file"' EXIT printf '%s\n' "$CADDY_SSH_KEY" > "$_ssh_key_file" chmod 0600 "$_ssh_key_file" 2. Verify connectivity: ssh -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new \ -o ConnectTimeout=10 -o BatchMode=yes \ {{caddy_user}}@{{caddy_host}} 'echo ok' 3. Fetch the access log via scp: scp -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new \ -o ConnectTimeout=10 -o BatchMode=yes \ "{{caddy_user}}@{{caddy_host}}:{{caddy_log_path}}" \ "{{local_log_path}}" 4. Verify the fetched file is non-empty: if [ ! -s "{{local_log_path}}" ]; then echo "WARNING: fetched access log is empty — site may have no traffic" else echo "Fetched $(wc -l < "{{local_log_path}}") lines from {{caddy_host}}" fi 5. Clean up the temporary key file: rm -f "$_ssh_key_file" """ # ── Step 2: Parse engagement ───────────────────────────────────────── [[steps]] id = "parse-engagement" title = "Run collect-engagement.sh against the local log copy" description = """ Run the engagement parser against the locally fetched access log. 1. Set CADDY_ACCESS_LOG to point at the local copy so collect-engagement.sh reads from it instead of the default path: export CADDY_ACCESS_LOG="{{local_log_path}}" 2. Run the parser: bash "$FACTORY_ROOT/site/collect-engagement.sh" 3. Verify the evidence JSON was written: REPORT_DATE=$(date -u +%Y-%m-%d) EVIDENCE_FILE="${OPS_REPO_ROOT}/{{evidence_dir}}/${REPORT_DATE}.json" if [ -f "$EVIDENCE_FILE" ]; then echo "Evidence written: $EVIDENCE_FILE" jq . "$EVIDENCE_FILE" else echo "ERROR: evidence file not found at $EVIDENCE_FILE" exit 1 fi 4. Clean up the fetched log: rm -f "{{local_log_path}}" """ needs = ["fetch-log"] # ── Step 3: Commit evidence ────────────────────────────────────────── [[steps]] id = "commit-evidence" title = "Commit evidence JSON to ops repo via Forgejo API" description = """ Commit the dated evidence JSON to the ops repo so the planner can consume it during gap analysis. 1. Read the evidence file: REPORT_DATE=$(date -u +%Y-%m-%d) EVIDENCE_FILE="${OPS_REPO_ROOT}/{{evidence_dir}}/${REPORT_DATE}.json" CONTENT=$(base64 < "$EVIDENCE_FILE") 2. Check if the file already exists in the ops repo (update vs create): OPS_OWNER="${OPS_FORGE_OWNER:-${FORGE_REPO%%/*}}" OPS_REPO="${OPS_FORGE_REPO:-${PROJECT_NAME:-disinto}-ops}" FILE_PATH="{{evidence_dir}}/${REPORT_DATE}.json" EXISTING=$(curl -sf \ -H "Authorization: token ${FORGE_TOKEN}" \ "${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \ 2>/dev/null || echo "") 3. Create or update the file via Forgejo API: if [ -n "$EXISTING" ] && printf '%s' "$EXISTING" | jq -e '.sha' >/dev/null 2>&1; then # Update existing file SHA=$(printf '%s' "$EXISTING" | jq -r '.sha') curl -sf -X PUT \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ "${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \ -d "$(jq -nc --arg content "$CONTENT" --arg sha "$SHA" --arg msg "evidence: engagement ${REPORT_DATE}" \ '{message: $msg, content: $content, sha: $sha}')" echo "Updated existing evidence file in ops repo" else # Create new file curl -sf -X POST \ -H "Authorization: token ${FORGE_TOKEN}" \ -H "Content-Type: application/json" \ "${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \ -d "$(jq -nc --arg content "$CONTENT" --arg msg "evidence: engagement ${REPORT_DATE}" \ '{message: $msg, content: $content}')" echo "Created evidence file in ops repo" fi 4. Verify the commit landed: VERIFY=$(curl -sf \ -H "Authorization: token ${FORGE_TOKEN}" \ "${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \ | jq -r '.name // empty') if [ "$VERIFY" = "${REPORT_DATE}.json" ]; then echo "Evidence committed: ${FILE_PATH}" else echo "ERROR: could not verify evidence commit" exit 1 fi """ needs = ["parse-engagement"]