disinto/formulas/collect-engagement.toml

# formulas/collect-engagement.toml — Collect website engagement data
#
# Daily formula: SSH into Caddy host, fetch access log, parse locally,
# commit evidence JSON to ops repo via Forgejo API.
#
# Triggered by cron in the edge container entrypoint (daily at 23:50 UTC).
# Design choices from #426: Q1=A (fetch raw log, process locally),
# Q2=A (direct cron in edge container), Q3=B (dedicated purpose-limited SSH key).
#
# Steps: fetch-log → parse-engagement → commit-evidence

name        = "collect-engagement"
description = "SSH-fetch Caddy access log, parse engagement metrics, commit evidence"
version     = 1

[context]
files = ["AGENTS.md"]

[vars.caddy_host]
description = "SSH host for the Caddy server"
required    = false
default     = "${CADDY_SSH_HOST:-disinto.ai}"

[vars.caddy_user]
description = "SSH user on the Caddy host"
required    = false
default     = "${CADDY_SSH_USER:-debian}"

[vars.caddy_log_path]
description = "Path to Caddy access log on the remote host"
required    = false
default     = "${CADDY_ACCESS_LOG:-/var/log/caddy/access.log}"

[vars.local_log_path]
description = "Local path to store fetched access log"
required    = false
default     = "/tmp/caddy-access-log-fetch.log"

[vars.evidence_dir]
description = "Evidence output directory in the ops repo"
required    = false
default     = "evidence/engagement"

# ── Step 1: SSH fetch ────────────────────────────────────────────────

[[steps]]
id          = "fetch-log"
title       = "Fetch Caddy access log from remote host via SSH"
description = """
Fetch today's Caddy access log segment from the remote host using SCP.

The SSH key is read from the environment (CADDY_SSH_KEY), which is
decrypted from .env.vault.enc by the dispatcher. It is NEVER hardcoded.

1. Write the SSH key to a temporary file with restricted permissions:
     _ssh_key_file=$(mktemp)
     trap 'rm -f "$_ssh_key_file"' EXIT
     printf '%s\n' "$CADDY_SSH_KEY" > "$_ssh_key_file"
     chmod 0600 "$_ssh_key_file"

2. Verify connectivity:
     ssh -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new \
       -o ConnectTimeout=10 -o BatchMode=yes \
       {{caddy_user}}@{{caddy_host}} 'echo ok'

3. Fetch the access log via scp:
     scp -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new \
       -o ConnectTimeout=10 -o BatchMode=yes \
       "{{caddy_user}}@{{caddy_host}}:{{caddy_log_path}}" \
       "{{local_log_path}}"

4. Verify the fetched file is non-empty:
     if [ ! -s "{{local_log_path}}" ]; then
       echo "WARNING: fetched access log is empty — site may have no traffic"
     else
       echo "Fetched $(wc -l < "{{local_log_path}}") lines from {{caddy_host}}"
     fi

5. Clean up the temporary key file:
     rm -f "$_ssh_key_file"
"""

# ── Step 2: Parse engagement ─────────────────────────────────────────

[[steps]]
id          = "parse-engagement"
title       = "Run collect-engagement.sh against the local log copy"
description = """
Run the engagement parser against the locally fetched access log.

1. Set CADDY_ACCESS_LOG to point at the local copy so collect-engagement.sh
   reads from it instead of the default path:
     export CADDY_ACCESS_LOG="{{local_log_path}}"

2. Run the parser:
     bash "$FACTORY_ROOT/site/collect-engagement.sh"

3. Verify the evidence JSON was written:
     REPORT_DATE=$(date -u +%Y-%m-%d)
     EVIDENCE_FILE="${OPS_REPO_ROOT}/{{evidence_dir}}/${REPORT_DATE}.json"
     if [ -f "$EVIDENCE_FILE" ]; then
       echo "Evidence written: $EVIDENCE_FILE"
       jq . "$EVIDENCE_FILE"
     else
       echo "ERROR: evidence file not found at $EVIDENCE_FILE"
       exit 1
     fi

4. Clean up the fetched log:
     rm -f "{{local_log_path}}"
"""
needs       = ["fetch-log"]

# ── Step 3: Commit evidence ──────────────────────────────────────────

[[steps]]
id          = "commit-evidence"
title       = "Commit evidence JSON to ops repo via Forgejo API"
description = """
Commit the dated evidence JSON to the ops repo so the planner can
consume it during gap analysis.

1. Read the evidence file:
     REPORT_DATE=$(date -u +%Y-%m-%d)
     EVIDENCE_FILE="${OPS_REPO_ROOT}/{{evidence_dir}}/${REPORT_DATE}.json"
     CONTENT=$(base64 < "$EVIDENCE_FILE")

2. Check if the file already exists in the ops repo (update vs create):
     OPS_OWNER="${OPS_FORGE_OWNER:-${FORGE_REPO%%/*}}"
     OPS_REPO="${OPS_FORGE_REPO:-${PROJECT_NAME:-disinto}-ops}"
     FILE_PATH="{{evidence_dir}}/${REPORT_DATE}.json"

     EXISTING=$(curl -sf \
       -H "Authorization: token ${FORGE_TOKEN}" \
       "${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \
       2>/dev/null || echo "")

3. Create or update the file via Forgejo API:
     if [ -n "$EXISTING" ] && printf '%s' "$EXISTING" | jq -e '.sha' >/dev/null 2>&1; then
       # Update existing file
       SHA=$(printf '%s' "$EXISTING" | jq -r '.sha')
       curl -sf -X PUT \
         -H "Authorization: token ${FORGE_TOKEN}" \
         -H "Content-Type: application/json" \
         "${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \
         -d "$(jq -nc --arg content "$CONTENT" --arg sha "$SHA" --arg msg "evidence: engagement ${REPORT_DATE}" \
           '{message: $msg, content: $content, sha: $sha}')"
       echo "Updated existing evidence file in ops repo"
     else
       # Create new file
       curl -sf -X POST \
         -H "Authorization: token ${FORGE_TOKEN}" \
         -H "Content-Type: application/json" \
         "${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \
         -d "$(jq -nc --arg content "$CONTENT" --arg msg "evidence: engagement ${REPORT_DATE}" \
           '{message: $msg, content: $content}')"
       echo "Created evidence file in ops repo"
     fi

4. Verify the commit landed:
     VERIFY=$(curl -sf \
       -H "Authorization: token ${FORGE_TOKEN}" \
       "${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \
       | jq -r '.name // empty')
     if [ "$VERIFY" = "${REPORT_DATE}.json" ]; then
       echo "Evidence committed: ${FILE_PATH}"
     else
       echo "ERROR: could not verify evidence commit"
       exit 1
     fi
"""
needs       = ["parse-engagement"]
fix: feat: collect-engagement formula + container script — SSH fetch + local parse + evidence commit (#745) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-04-15 07:01:37 +00:00			`# formulas/collect-engagement.toml — Collect website engagement data`
			`#`
			`# Daily formula: SSH into Caddy host, fetch access log, parse locally,`
			`# commit evidence JSON to ops repo via Forgejo API.`
			`#`
			`# Triggered by cron in the edge container entrypoint (daily at 23:50 UTC).`
			`# Design choices from #426: Q1=A (fetch raw log, process locally),`
			`# Q2=A (direct cron in edge container), Q3=B (dedicated purpose-limited SSH key).`
			`#`
			`# Steps: fetch-log → parse-engagement → commit-evidence`

			`name = "collect-engagement"`
			`description = "SSH-fetch Caddy access log, parse engagement metrics, commit evidence"`
			`version = 1`

			`[context]`
			`files = ["AGENTS.md"]`

			`[vars.caddy_host]`
			`description = "SSH host for the Caddy server"`
			`required = false`
			`default = "${CADDY_SSH_HOST:-disinto.ai}"`

			`[vars.caddy_user]`
			`description = "SSH user on the Caddy host"`
			`required = false`
			`default = "${CADDY_SSH_USER:-debian}"`

			`[vars.caddy_log_path]`
			`description = "Path to Caddy access log on the remote host"`
			`required = false`
			`default = "${CADDY_ACCESS_LOG:-/var/log/caddy/access.log}"`

			`[vars.local_log_path]`
			`description = "Local path to store fetched access log"`
			`required = false`
			`default = "/tmp/caddy-access-log-fetch.log"`

			`[vars.evidence_dir]`
			`description = "Evidence output directory in the ops repo"`
			`required = false`
			`default = "evidence/engagement"`

			`# ── Step 1: SSH fetch ────────────────────────────────────────────────`

			`[[steps]]`
			`id = "fetch-log"`
			`title = "Fetch Caddy access log from remote host via SSH"`
			`description = """`
			`Fetch today's Caddy access log segment from the remote host using SCP.`

			`The SSH key is read from the environment (CADDY_SSH_KEY), which is`
			`decrypted from .env.vault.enc by the dispatcher. It is NEVER hardcoded.`

			`1. Write the SSH key to a temporary file with restricted permissions:`
			`_ssh_key_file=$(mktemp)`
			`trap 'rm -f "$_ssh_key_file"' EXIT`
			`printf '%s\n' "$CADDY_SSH_KEY" > "$_ssh_key_file"`
			`chmod 0600 "$_ssh_key_file"`

			`2. Verify connectivity:`
			`ssh -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new \`
			`-o ConnectTimeout=10 -o BatchMode=yes \`
			`{{caddy_user}}@{{caddy_host}} 'echo ok'`

			`3. Fetch the access log via scp:`
			`scp -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new \`
			`-o ConnectTimeout=10 -o BatchMode=yes \`
			`"{{caddy_user}}@{{caddy_host}}:{{caddy_log_path}}" \`
			`"{{local_log_path}}"`

			`4. Verify the fetched file is non-empty:`
			`if [ ! -s "{{local_log_path}}" ]; then`
			`echo "WARNING: fetched access log is empty — site may have no traffic"`
			`else`
			`echo "Fetched $(wc -l < "{{local_log_path}}") lines from {{caddy_host}}"`
			`fi`

			`5. Clean up the temporary key file:`
			`rm -f "$_ssh_key_file"`
			`"""`

			`# ── Step 2: Parse engagement ─────────────────────────────────────────`

			`[[steps]]`
			`id = "parse-engagement"`
			`title = "Run collect-engagement.sh against the local log copy"`
			`description = """`
			`Run the engagement parser against the locally fetched access log.`

			`1. Set CADDY_ACCESS_LOG to point at the local copy so collect-engagement.sh`
			`reads from it instead of the default path:`
			`export CADDY_ACCESS_LOG="{{local_log_path}}"`

			`2. Run the parser:`
			`bash "$FACTORY_ROOT/site/collect-engagement.sh"`

			`3. Verify the evidence JSON was written:`
			`REPORT_DATE=$(date -u +%Y-%m-%d)`
			`EVIDENCE_FILE="${OPS_REPO_ROOT}/{{evidence_dir}}/${REPORT_DATE}.json"`
			`if [ -f "$EVIDENCE_FILE" ]; then`
			`echo "Evidence written: $EVIDENCE_FILE"`
			`jq . "$EVIDENCE_FILE"`
			`else`
			`echo "ERROR: evidence file not found at $EVIDENCE_FILE"`
			`exit 1`
			`fi`

			`4. Clean up the fetched log:`
			`rm -f "{{local_log_path}}"`
			`"""`
			`needs = ["fetch-log"]`

			`# ── Step 3: Commit evidence ──────────────────────────────────────────`

			`[[steps]]`
			`id = "commit-evidence"`
			`title = "Commit evidence JSON to ops repo via Forgejo API"`
			`description = """`
			`Commit the dated evidence JSON to the ops repo so the planner can`
			`consume it during gap analysis.`

			`1. Read the evidence file:`
			`REPORT_DATE=$(date -u +%Y-%m-%d)`
			`EVIDENCE_FILE="${OPS_REPO_ROOT}/{{evidence_dir}}/${REPORT_DATE}.json"`
			`CONTENT=$(base64 < "$EVIDENCE_FILE")`

			`2. Check if the file already exists in the ops repo (update vs create):`
			`OPS_OWNER="${OPS_FORGE_OWNER:-${FORGE_REPO%%/*}}"`
			`OPS_REPO="${OPS_FORGE_REPO:-${PROJECT_NAME:-disinto}-ops}"`
			`FILE_PATH="{{evidence_dir}}/${REPORT_DATE}.json"`

			`EXISTING=$(curl -sf \`
			`-H "Authorization: token ${FORGE_TOKEN}" \`
			`"${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \`
			`2>/dev/null \|\| echo "")`

			`3. Create or update the file via Forgejo API:`
			`if [ -n "$EXISTING" ] && printf '%s' "$EXISTING" \| jq -e '.sha' >/dev/null 2>&1; then`
			`# Update existing file`
			`SHA=$(printf '%s' "$EXISTING" \| jq -r '.sha')`
			`curl -sf -X PUT \`
			`-H "Authorization: token ${FORGE_TOKEN}" \`
			`-H "Content-Type: application/json" \`
			`"${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \`
			`-d "$(jq -nc --arg content "$CONTENT" --arg sha "$SHA" --arg msg "evidence: engagement ${REPORT_DATE}" \`
			`'{message: $msg, content: $content, sha: $sha}')"`
			`echo "Updated existing evidence file in ops repo"`
			`else`
			`# Create new file`
			`curl -sf -X POST \`
			`-H "Authorization: token ${FORGE_TOKEN}" \`
			`-H "Content-Type: application/json" \`
			`"${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \`
			`-d "$(jq -nc --arg content "$CONTENT" --arg msg "evidence: engagement ${REPORT_DATE}" \`
			`'{message: $msg, content: $content}')"`
			`echo "Created evidence file in ops repo"`
			`fi`

			`4. Verify the commit landed:`
			`VERIFY=$(curl -sf \`
			`-H "Authorization: token ${FORGE_TOKEN}" \`
			`"${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \`
			`\| jq -r '.name // empty')`
			`if [ "$VERIFY" = "${REPORT_DATE}.json" ]; then`
			`echo "Evidence committed: ${FILE_PATH}"`
			`else`
			`echo "ERROR: could not verify evidence commit"`
			`exit 1`
			`fi`
			`"""`
			`needs = ["parse-engagement"]`