diff --git a/docker/edge/entrypoint-edge.sh b/docker/edge/entrypoint-edge.sh index d3b08b7..7fc4f4f 100755 --- a/docker/edge/entrypoint-edge.sh +++ b/docker/edge/entrypoint-edge.sh @@ -173,6 +173,41 @@ PROJECT_TOML="${PROJECT_TOML:-projects/disinto.toml}" sleep 1200 # 20 minutes done) & +# Start daily engagement collection cron loop in background (#745) +# Runs collect-engagement.sh daily at ~23:50 UTC via a sleep loop that +# calculates seconds until the next 23:50 window. SSH key from .env.vault.enc. +(while true; do + # Calculate seconds until next 23:50 UTC + _now=$(date -u +%s) + _target=$(date -u -d "today 23:50" +%s 2>/dev/null || date -u -d "23:50" +%s 2>/dev/null || echo 0) + if [ "$_target" -le "$_now" ]; then + _target=$(( _target + 86400 )) + fi + _sleep_secs=$(( _target - _now )) + echo "edge: collect-engagement scheduled in ${_sleep_secs}s (next 23:50 UTC)" >&2 + sleep "$_sleep_secs" + # Set CADDY_ACCESS_LOG so the script reads from the fetched local copy + _fetch_log="/tmp/caddy-access-log-fetch.log" + if [ -n "${CADDY_SSH_KEY:-}" ]; then + _ssh_key_file=$(mktemp) + printf '%s\n' "$CADDY_SSH_KEY" > "$_ssh_key_file" + chmod 0600 "$_ssh_key_file" + scp -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o BatchMode=yes \ + "${CADDY_SSH_USER:-debian}@${CADDY_SSH_HOST:-disinto.ai}:${CADDY_ACCESS_LOG:-/var/log/caddy/access.log}" \ + "$_fetch_log" 2>&1 | tee -a /opt/disinto-logs/collect-engagement.log || true + rm -f "$_ssh_key_file" + if [ -s "$_fetch_log" ]; then + CADDY_ACCESS_LOG="$_fetch_log" bash /opt/disinto/site/collect-engagement.sh 2>&1 \ + | tee -a /opt/disinto-logs/collect-engagement.log || true + else + echo "edge: collect-engagement: fetched log is empty, skipping parse" >&2 + fi + rm -f "$_fetch_log" + else + echo "edge: collect-engagement: CADDY_SSH_KEY not set, skipping" >&2 + fi +done) & + # Caddy as main process — run in foreground via wait so background jobs survive # (exec replaces the shell, which can orphan backgrounded subshells) caddy run --config /etc/caddy/Caddyfile --adapter caddyfile & diff --git a/formulas/collect-engagement.toml b/formulas/collect-engagement.toml new file mode 100644 index 0000000..fdfa65e --- /dev/null +++ b/formulas/collect-engagement.toml @@ -0,0 +1,172 @@ +# formulas/collect-engagement.toml — Collect website engagement data +# +# Daily formula: SSH into Caddy host, fetch access log, parse locally, +# commit evidence JSON to ops repo via Forgejo API. +# +# Triggered by cron in the edge container entrypoint (daily at 23:50 UTC). +# Design choices from #426: Q1=A (fetch raw log, process locally), +# Q2=A (direct cron in edge container), Q3=B (dedicated purpose-limited SSH key). +# +# Steps: fetch-log → parse-engagement → commit-evidence + +name = "collect-engagement" +description = "SSH-fetch Caddy access log, parse engagement metrics, commit evidence" +version = 1 + +[context] +files = ["AGENTS.md"] + +[vars.caddy_host] +description = "SSH host for the Caddy server" +required = false +default = "${CADDY_SSH_HOST:-disinto.ai}" + +[vars.caddy_user] +description = "SSH user on the Caddy host" +required = false +default = "${CADDY_SSH_USER:-debian}" + +[vars.caddy_log_path] +description = "Path to Caddy access log on the remote host" +required = false +default = "${CADDY_ACCESS_LOG:-/var/log/caddy/access.log}" + +[vars.local_log_path] +description = "Local path to store fetched access log" +required = false +default = "/tmp/caddy-access-log-fetch.log" + +[vars.evidence_dir] +description = "Evidence output directory in the ops repo" +required = false +default = "evidence/engagement" + +# ── Step 1: SSH fetch ──────────────────────────────────────────────── + +[[steps]] +id = "fetch-log" +title = "Fetch Caddy access log from remote host via SSH" +description = """ +Fetch today's Caddy access log segment from the remote host using SCP. + +The SSH key is read from the environment (CADDY_SSH_KEY), which is +decrypted from .env.vault.enc by the dispatcher. It is NEVER hardcoded. + +1. Write the SSH key to a temporary file with restricted permissions: + _ssh_key_file=$(mktemp) + trap 'rm -f "$_ssh_key_file"' EXIT + printf '%s\n' "$CADDY_SSH_KEY" > "$_ssh_key_file" + chmod 0600 "$_ssh_key_file" + +2. Verify connectivity: + ssh -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new \ + -o ConnectTimeout=10 -o BatchMode=yes \ + {{caddy_user}}@{{caddy_host}} 'echo ok' + +3. Fetch the access log via scp: + scp -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new \ + -o ConnectTimeout=10 -o BatchMode=yes \ + "{{caddy_user}}@{{caddy_host}}:{{caddy_log_path}}" \ + "{{local_log_path}}" + +4. Verify the fetched file is non-empty: + if [ ! -s "{{local_log_path}}" ]; then + echo "WARNING: fetched access log is empty — site may have no traffic" + else + echo "Fetched $(wc -l < "{{local_log_path}}") lines from {{caddy_host}}" + fi + +5. Clean up the temporary key file: + rm -f "$_ssh_key_file" +""" + +# ── Step 2: Parse engagement ───────────────────────────────────────── + +[[steps]] +id = "parse-engagement" +title = "Run collect-engagement.sh against the local log copy" +description = """ +Run the engagement parser against the locally fetched access log. + +1. Set CADDY_ACCESS_LOG to point at the local copy so collect-engagement.sh + reads from it instead of the default path: + export CADDY_ACCESS_LOG="{{local_log_path}}" + +2. Run the parser: + bash "$FACTORY_ROOT/site/collect-engagement.sh" + +3. Verify the evidence JSON was written: + REPORT_DATE=$(date -u +%Y-%m-%d) + EVIDENCE_FILE="${OPS_REPO_ROOT}/{{evidence_dir}}/${REPORT_DATE}.json" + if [ -f "$EVIDENCE_FILE" ]; then + echo "Evidence written: $EVIDENCE_FILE" + jq . "$EVIDENCE_FILE" + else + echo "ERROR: evidence file not found at $EVIDENCE_FILE" + exit 1 + fi + +4. Clean up the fetched log: + rm -f "{{local_log_path}}" +""" +needs = ["fetch-log"] + +# ── Step 3: Commit evidence ────────────────────────────────────────── + +[[steps]] +id = "commit-evidence" +title = "Commit evidence JSON to ops repo via Forgejo API" +description = """ +Commit the dated evidence JSON to the ops repo so the planner can +consume it during gap analysis. + +1. Read the evidence file: + REPORT_DATE=$(date -u +%Y-%m-%d) + EVIDENCE_FILE="${OPS_REPO_ROOT}/{{evidence_dir}}/${REPORT_DATE}.json" + CONTENT=$(base64 < "$EVIDENCE_FILE") + +2. Check if the file already exists in the ops repo (update vs create): + OPS_OWNER="${OPS_FORGE_OWNER:-${FORGE_REPO%%/*}}" + OPS_REPO="${OPS_FORGE_REPO:-${PROJECT_NAME:-disinto}-ops}" + FILE_PATH="{{evidence_dir}}/${REPORT_DATE}.json" + + EXISTING=$(curl -sf \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \ + 2>/dev/null || echo "") + +3. Create or update the file via Forgejo API: + if [ -n "$EXISTING" ] && printf '%s' "$EXISTING" | jq -e '.sha' >/dev/null 2>&1; then + # Update existing file + SHA=$(printf '%s' "$EXISTING" | jq -r '.sha') + curl -sf -X PUT \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \ + -d "$(jq -nc --arg content "$CONTENT" --arg sha "$SHA" --arg msg "evidence: engagement ${REPORT_DATE}" \ + '{message: $msg, content: $content, sha: $sha}')" + echo "Updated existing evidence file in ops repo" + else + # Create new file + curl -sf -X POST \ + -H "Authorization: token ${FORGE_TOKEN}" \ + -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \ + -d "$(jq -nc --arg content "$CONTENT" --arg msg "evidence: engagement ${REPORT_DATE}" \ + '{message: $msg, content: $content}')" + echo "Created evidence file in ops repo" + fi + +4. Verify the commit landed: + VERIFY=$(curl -sf \ + -H "Authorization: token ${FORGE_TOKEN}" \ + "${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \ + | jq -r '.name // empty') + if [ "$VERIFY" = "${REPORT_DATE}.json" ]; then + echo "Evidence committed: ${FILE_PATH}" + else + echo "ERROR: could not verify evidence commit" + exit 1 + fi +""" +needs = ["parse-engagement"]