Merge pull request 'fix: feat: collect-engagement formula + container script — SSH fetch + local parse + evidence commit (#745)' (#761) from fix/issue-745 into main
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
This commit is contained in:
commit
c7a1c444e9
2 changed files with 207 additions and 0 deletions
|
|
@ -173,6 +173,41 @@ PROJECT_TOML="${PROJECT_TOML:-projects/disinto.toml}"
|
|||
sleep 1200 # 20 minutes
|
||||
done) &
|
||||
|
||||
# Start daily engagement collection cron loop in background (#745)
|
||||
# Runs collect-engagement.sh daily at ~23:50 UTC via a sleep loop that
|
||||
# calculates seconds until the next 23:50 window. SSH key from .env.vault.enc.
|
||||
(while true; do
|
||||
# Calculate seconds until next 23:50 UTC
|
||||
_now=$(date -u +%s)
|
||||
_target=$(date -u -d "today 23:50" +%s 2>/dev/null || date -u -d "23:50" +%s 2>/dev/null || echo 0)
|
||||
if [ "$_target" -le "$_now" ]; then
|
||||
_target=$(( _target + 86400 ))
|
||||
fi
|
||||
_sleep_secs=$(( _target - _now ))
|
||||
echo "edge: collect-engagement scheduled in ${_sleep_secs}s (next 23:50 UTC)" >&2
|
||||
sleep "$_sleep_secs"
|
||||
# Set CADDY_ACCESS_LOG so the script reads from the fetched local copy
|
||||
_fetch_log="/tmp/caddy-access-log-fetch.log"
|
||||
if [ -n "${CADDY_SSH_KEY:-}" ]; then
|
||||
_ssh_key_file=$(mktemp)
|
||||
printf '%s\n' "$CADDY_SSH_KEY" > "$_ssh_key_file"
|
||||
chmod 0600 "$_ssh_key_file"
|
||||
scp -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o BatchMode=yes \
|
||||
"${CADDY_SSH_USER:-debian}@${CADDY_SSH_HOST:-disinto.ai}:${CADDY_ACCESS_LOG:-/var/log/caddy/access.log}" \
|
||||
"$_fetch_log" 2>&1 | tee -a /opt/disinto-logs/collect-engagement.log || true
|
||||
rm -f "$_ssh_key_file"
|
||||
if [ -s "$_fetch_log" ]; then
|
||||
CADDY_ACCESS_LOG="$_fetch_log" bash /opt/disinto/site/collect-engagement.sh 2>&1 \
|
||||
| tee -a /opt/disinto-logs/collect-engagement.log || true
|
||||
else
|
||||
echo "edge: collect-engagement: fetched log is empty, skipping parse" >&2
|
||||
fi
|
||||
rm -f "$_fetch_log"
|
||||
else
|
||||
echo "edge: collect-engagement: CADDY_SSH_KEY not set, skipping" >&2
|
||||
fi
|
||||
done) &
|
||||
|
||||
# Caddy as main process — run in foreground via wait so background jobs survive
|
||||
# (exec replaces the shell, which can orphan backgrounded subshells)
|
||||
caddy run --config /etc/caddy/Caddyfile --adapter caddyfile &
|
||||
|
|
|
|||
172
formulas/collect-engagement.toml
Normal file
172
formulas/collect-engagement.toml
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
# formulas/collect-engagement.toml — Collect website engagement data
|
||||
#
|
||||
# Daily formula: SSH into Caddy host, fetch access log, parse locally,
|
||||
# commit evidence JSON to ops repo via Forgejo API.
|
||||
#
|
||||
# Triggered by cron in the edge container entrypoint (daily at 23:50 UTC).
|
||||
# Design choices from #426: Q1=A (fetch raw log, process locally),
|
||||
# Q2=A (direct cron in edge container), Q3=B (dedicated purpose-limited SSH key).
|
||||
#
|
||||
# Steps: fetch-log → parse-engagement → commit-evidence
|
||||
|
||||
name = "collect-engagement"
|
||||
description = "SSH-fetch Caddy access log, parse engagement metrics, commit evidence"
|
||||
version = 1
|
||||
|
||||
[context]
|
||||
files = ["AGENTS.md"]
|
||||
|
||||
[vars.caddy_host]
|
||||
description = "SSH host for the Caddy server"
|
||||
required = false
|
||||
default = "${CADDY_SSH_HOST:-disinto.ai}"
|
||||
|
||||
[vars.caddy_user]
|
||||
description = "SSH user on the Caddy host"
|
||||
required = false
|
||||
default = "${CADDY_SSH_USER:-debian}"
|
||||
|
||||
[vars.caddy_log_path]
|
||||
description = "Path to Caddy access log on the remote host"
|
||||
required = false
|
||||
default = "${CADDY_ACCESS_LOG:-/var/log/caddy/access.log}"
|
||||
|
||||
[vars.local_log_path]
|
||||
description = "Local path to store fetched access log"
|
||||
required = false
|
||||
default = "/tmp/caddy-access-log-fetch.log"
|
||||
|
||||
[vars.evidence_dir]
|
||||
description = "Evidence output directory in the ops repo"
|
||||
required = false
|
||||
default = "evidence/engagement"
|
||||
|
||||
# ── Step 1: SSH fetch ────────────────────────────────────────────────
|
||||
|
||||
[[steps]]
|
||||
id = "fetch-log"
|
||||
title = "Fetch Caddy access log from remote host via SSH"
|
||||
description = """
|
||||
Fetch today's Caddy access log segment from the remote host using SCP.
|
||||
|
||||
The SSH key is read from the environment (CADDY_SSH_KEY), which is
|
||||
decrypted from .env.vault.enc by the dispatcher. It is NEVER hardcoded.
|
||||
|
||||
1. Write the SSH key to a temporary file with restricted permissions:
|
||||
_ssh_key_file=$(mktemp)
|
||||
trap 'rm -f "$_ssh_key_file"' EXIT
|
||||
printf '%s\n' "$CADDY_SSH_KEY" > "$_ssh_key_file"
|
||||
chmod 0600 "$_ssh_key_file"
|
||||
|
||||
2. Verify connectivity:
|
||||
ssh -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new \
|
||||
-o ConnectTimeout=10 -o BatchMode=yes \
|
||||
{{caddy_user}}@{{caddy_host}} 'echo ok'
|
||||
|
||||
3. Fetch the access log via scp:
|
||||
scp -i "$_ssh_key_file" -o StrictHostKeyChecking=accept-new \
|
||||
-o ConnectTimeout=10 -o BatchMode=yes \
|
||||
"{{caddy_user}}@{{caddy_host}}:{{caddy_log_path}}" \
|
||||
"{{local_log_path}}"
|
||||
|
||||
4. Verify the fetched file is non-empty:
|
||||
if [ ! -s "{{local_log_path}}" ]; then
|
||||
echo "WARNING: fetched access log is empty — site may have no traffic"
|
||||
else
|
||||
echo "Fetched $(wc -l < "{{local_log_path}}") lines from {{caddy_host}}"
|
||||
fi
|
||||
|
||||
5. Clean up the temporary key file:
|
||||
rm -f "$_ssh_key_file"
|
||||
"""
|
||||
|
||||
# ── Step 2: Parse engagement ─────────────────────────────────────────
|
||||
|
||||
[[steps]]
|
||||
id = "parse-engagement"
|
||||
title = "Run collect-engagement.sh against the local log copy"
|
||||
description = """
|
||||
Run the engagement parser against the locally fetched access log.
|
||||
|
||||
1. Set CADDY_ACCESS_LOG to point at the local copy so collect-engagement.sh
|
||||
reads from it instead of the default path:
|
||||
export CADDY_ACCESS_LOG="{{local_log_path}}"
|
||||
|
||||
2. Run the parser:
|
||||
bash "$FACTORY_ROOT/site/collect-engagement.sh"
|
||||
|
||||
3. Verify the evidence JSON was written:
|
||||
REPORT_DATE=$(date -u +%Y-%m-%d)
|
||||
EVIDENCE_FILE="${OPS_REPO_ROOT}/{{evidence_dir}}/${REPORT_DATE}.json"
|
||||
if [ -f "$EVIDENCE_FILE" ]; then
|
||||
echo "Evidence written: $EVIDENCE_FILE"
|
||||
jq . "$EVIDENCE_FILE"
|
||||
else
|
||||
echo "ERROR: evidence file not found at $EVIDENCE_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
4. Clean up the fetched log:
|
||||
rm -f "{{local_log_path}}"
|
||||
"""
|
||||
needs = ["fetch-log"]
|
||||
|
||||
# ── Step 3: Commit evidence ──────────────────────────────────────────
|
||||
|
||||
[[steps]]
|
||||
id = "commit-evidence"
|
||||
title = "Commit evidence JSON to ops repo via Forgejo API"
|
||||
description = """
|
||||
Commit the dated evidence JSON to the ops repo so the planner can
|
||||
consume it during gap analysis.
|
||||
|
||||
1. Read the evidence file:
|
||||
REPORT_DATE=$(date -u +%Y-%m-%d)
|
||||
EVIDENCE_FILE="${OPS_REPO_ROOT}/{{evidence_dir}}/${REPORT_DATE}.json"
|
||||
CONTENT=$(base64 < "$EVIDENCE_FILE")
|
||||
|
||||
2. Check if the file already exists in the ops repo (update vs create):
|
||||
OPS_OWNER="${OPS_FORGE_OWNER:-${FORGE_REPO%%/*}}"
|
||||
OPS_REPO="${OPS_FORGE_REPO:-${PROJECT_NAME:-disinto}-ops}"
|
||||
FILE_PATH="{{evidence_dir}}/${REPORT_DATE}.json"
|
||||
|
||||
EXISTING=$(curl -sf \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \
|
||||
2>/dev/null || echo "")
|
||||
|
||||
3. Create or update the file via Forgejo API:
|
||||
if [ -n "$EXISTING" ] && printf '%s' "$EXISTING" | jq -e '.sha' >/dev/null 2>&1; then
|
||||
# Update existing file
|
||||
SHA=$(printf '%s' "$EXISTING" | jq -r '.sha')
|
||||
curl -sf -X PUT \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \
|
||||
-d "$(jq -nc --arg content "$CONTENT" --arg sha "$SHA" --arg msg "evidence: engagement ${REPORT_DATE}" \
|
||||
'{message: $msg, content: $content, sha: $sha}')"
|
||||
echo "Updated existing evidence file in ops repo"
|
||||
else
|
||||
# Create new file
|
||||
curl -sf -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \
|
||||
-d "$(jq -nc --arg content "$CONTENT" --arg msg "evidence: engagement ${REPORT_DATE}" \
|
||||
'{message: $msg, content: $content}')"
|
||||
echo "Created evidence file in ops repo"
|
||||
fi
|
||||
|
||||
4. Verify the commit landed:
|
||||
VERIFY=$(curl -sf \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_URL}/api/v1/repos/${OPS_OWNER}/${OPS_REPO}/contents/${FILE_PATH}" \
|
||||
| jq -r '.name // empty')
|
||||
if [ "$VERIFY" = "${REPORT_DATE}.json" ]; then
|
||||
echo "Evidence committed: ${FILE_PATH}"
|
||||
else
|
||||
echo "ERROR: could not verify evidence commit"
|
||||
exit 1
|
||||
fi
|
||||
"""
|
||||
needs = ["parse-engagement"]
|
||||
Loading…
Add table
Add a link
Reference in a new issue