Compare commits
1 commit
main
...
fix/issue-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
603e007e6e |
38 changed files with 147 additions and 1003 deletions
|
|
@ -4,16 +4,10 @@
|
|||
# Steps:
|
||||
# 1. shellcheck — lint all .sh files (warnings+errors)
|
||||
# 2. duplicate-detection — report copy-pasted code blocks (fails only on new duplicates for PRs)
|
||||
#
|
||||
# Timeouts:
|
||||
# Workflow-level default (10m) applies to all steps unless overridden.
|
||||
|
||||
when:
|
||||
event: [push, pull_request]
|
||||
|
||||
# Workflow-level timeout default — propagates to all steps without their own timeout.
|
||||
timeout: 10m
|
||||
|
||||
# Override default clone to authenticate against Forgejo using FORGE_TOKEN.
|
||||
# Required because Forgejo is configured with REQUIRE_SIGN_IN, so anonymous
|
||||
# git clones fail with exit code 128. FORGE_TOKEN is injected globally via
|
||||
|
|
@ -22,13 +16,8 @@ clone:
|
|||
git:
|
||||
image: alpine/git
|
||||
commands:
|
||||
- |
|
||||
if [ -n "${FORGE_TOKEN:-}" ]; then
|
||||
AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:${FORGE_TOKEN}@|")
|
||||
git clone --depth 1 "$AUTH_URL" .
|
||||
else
|
||||
git clone --depth 1 "$CI_REPO_CLONE_URL" .
|
||||
fi
|
||||
- AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:$FORGE_TOKEN@|")
|
||||
- git clone --depth 1 "$AUTH_URL" .
|
||||
- git fetch --depth 1 origin "$CI_COMMIT_REF"
|
||||
- git checkout FETCH_HEAD
|
||||
|
||||
|
|
|
|||
|
|
@ -7,15 +7,13 @@
|
|||
#
|
||||
# Checks:
|
||||
# 1. shellcheck — syntax check on tests/smoke-edge-subpath.sh
|
||||
# 2. caddyfile-routing-test — verify Caddyfile routing block shape
|
||||
# 3. test-caddyfile-routing — run standalone unit test for Caddyfile structure
|
||||
# 2. caddy validate — validate the Caddyfile template syntax
|
||||
# 3. caddyfile-routing-test — verify Caddyfile routing block shape
|
||||
# 4. test-caddyfile-routing — run standalone unit test for Caddyfile structure
|
||||
#
|
||||
# Triggers:
|
||||
# - Pull requests that modify edge-related files
|
||||
#
|
||||
# Timeouts:
|
||||
# Workflow-level default (10m) applies to all steps unless overridden.
|
||||
#
|
||||
# Environment variables (inherited from WOODPECKER_ENVIRONMENT):
|
||||
# EDGE_BASE_URL — Edge proxy URL for reference (default: http://localhost)
|
||||
# EDGE_TIMEOUT — Request timeout in seconds (default: 30)
|
||||
|
|
@ -25,9 +23,6 @@
|
|||
when:
|
||||
event: pull_request
|
||||
|
||||
# Workflow-level timeout default — propagates to all steps without their own timeout.
|
||||
timeout: 10m
|
||||
|
||||
steps:
|
||||
# ── 1. ShellCheck on smoke script ────────────────────────────────────────
|
||||
# `shellcheck` validates bash syntax, style, and common pitfalls.
|
||||
|
|
@ -64,7 +59,6 @@ steps:
|
|||
echo ''
|
||||
echo ' # Reverse proxy to Forgejo'
|
||||
echo ' handle /forge/* {'
|
||||
echo ' uri strip_prefix /forge'
|
||||
echo ' reverse_proxy 127.0.0.1:3000'
|
||||
echo ' }'
|
||||
echo ''
|
||||
|
|
@ -100,7 +94,22 @@ steps:
|
|||
cp edge-render/Caddyfile edge-render/Caddyfile.rendered
|
||||
echo "Caddyfile rendered successfully"
|
||||
|
||||
# ── 3. Caddyfile routing block shape test ─────────────────────────────────
|
||||
# ── 3. Caddy config validation ───────────────────────────────────────────
|
||||
# `caddy validate` checks Caddyfile syntax and configuration.
|
||||
# This validates the rendered Caddyfile against Caddy's parser.
|
||||
# Exit codes:
|
||||
# 0 — configuration is valid
|
||||
# 1 — configuration has errors
|
||||
- name: caddy-validate
|
||||
image: alpine:3.19
|
||||
commands:
|
||||
- apk add --no-cache ca-certificates curl
|
||||
- curl -sS -o /tmp/caddy "https://caddyserver.com/api/download?os=linux&arch=amd64"
|
||||
- chmod +x /tmp/caddy
|
||||
- /tmp/caddy version
|
||||
- /tmp/caddy validate --config edge-render/Caddyfile.rendered --adapter caddyfile
|
||||
|
||||
# ── 4. Caddyfile routing block shape test ─────────────────────────────────
|
||||
# Verify that the Caddyfile contains all required routing blocks:
|
||||
# - /forge/ — Forgejo subpath
|
||||
# - /ci/ — Woodpecker subpath
|
||||
|
|
@ -181,7 +190,7 @@ steps:
|
|||
exit 1
|
||||
fi
|
||||
|
||||
# ── 4. Standalone Caddyfile routing test ─────────────────────────────────
|
||||
# ── 5. Standalone Caddyfile routing test ─────────────────────────────────
|
||||
# Run the standalone unit test for Caddyfile routing block validation.
|
||||
# This test extracts the Caddyfile template from edge.hcl and validates
|
||||
# its structure without requiring a running Caddy instance.
|
||||
|
|
|
|||
|
|
@ -1,34 +0,0 @@
|
|||
# .woodpecker/lint-ci.yml — CI pipeline config validator
|
||||
#
|
||||
# Runs `disinto validate lint-ci` to check all .woodpecker/*.yml files for:
|
||||
# - Steps missing a timeout declaration
|
||||
# - Network-fetch commands without per-command timeouts
|
||||
#
|
||||
# Triggers on PRs/pushes that touch any CI config or the validator itself.
|
||||
|
||||
when:
|
||||
- event: [push, pull_request]
|
||||
path:
|
||||
- ".woodpecker/**"
|
||||
- "bin/disinto"
|
||||
|
||||
# Workflow-level timeout default — propagates to all steps without their own timeout.
|
||||
timeout: 5m
|
||||
|
||||
# Authenticated clone — same pattern as .woodpecker/ci.yml.
|
||||
clone:
|
||||
git:
|
||||
image: alpine/git
|
||||
commands:
|
||||
- AUTH_URL=$(printf '%s' "$CI_REPO_CLONE_URL" | sed "s|://|://token:$FORGE_TOKEN@|")
|
||||
- git clone --depth 1 "$AUTH_URL" .
|
||||
- git fetch --depth 1 origin "$CI_COMMIT_REF"
|
||||
- git checkout FETCH_HEAD
|
||||
|
||||
steps:
|
||||
- name: lint-ci
|
||||
image: alpine:3
|
||||
commands:
|
||||
- apk add --no-cache bash python3 py3-yaml
|
||||
- bash bin/disinto validate lint-ci .
|
||||
# Workflow-level timeout (10m) applies to all steps.
|
||||
|
|
@ -44,10 +44,6 @@
|
|||
# Pinned image versions match lib/init/nomad/install.sh (nomad 1.9.5 /
|
||||
# vault 1.18.5). Bump there AND here together — drift = CI passing on
|
||||
# syntax the runtime would reject.
|
||||
#
|
||||
# Timeouts:
|
||||
# Workflow-level default (15m) applies to all steps unless overridden
|
||||
# (vault-policy-validate needs longer for dev server startup).
|
||||
# =============================================================================
|
||||
|
||||
when:
|
||||
|
|
@ -61,9 +57,6 @@ when:
|
|||
- "vault/roles.yaml"
|
||||
- ".woodpecker/nomad-validate.yml"
|
||||
|
||||
# Workflow-level timeout default — propagates to all steps without their own timeout.
|
||||
timeout: 15m
|
||||
|
||||
# Authenticated clone — same pattern as .woodpecker/ci.yml. Forgejo is
|
||||
# configured with REQUIRE_SIGN_IN, so anonymous git clones fail (exit 128).
|
||||
# FORGE_TOKEN is injected globally via WOODPECKER_ENVIRONMENT.
|
||||
|
|
@ -272,7 +265,7 @@ steps:
|
|||
- name: vault-roles-validate
|
||||
image: python:3.12-alpine
|
||||
commands:
|
||||
- pip install --quiet --disable-pip-version-check --default-timeout 30 pyyaml yamllint
|
||||
- pip install --quiet --disable-pip-version-check pyyaml yamllint
|
||||
- |
|
||||
set -e
|
||||
if [ ! -f vault/roles.yaml ]; then
|
||||
|
|
|
|||
|
|
@ -4,10 +4,6 @@
|
|||
# - ghcr.io/disinto/reproduce:<tag>
|
||||
# - ghcr.io/disinto/edge:<tag>
|
||||
#
|
||||
# Timeouts:
|
||||
# Workflow-level default (20m) applies to all steps unless overridden.
|
||||
# Image builds can be slow for large images.
|
||||
#
|
||||
# Requires GHCR_TOKEN secret configured in Woodpecker with push access
|
||||
# to ghcr.io/disinto.
|
||||
|
||||
|
|
@ -15,9 +11,6 @@ when:
|
|||
event: tag
|
||||
ref: refs/tags/v*
|
||||
|
||||
# Workflow-level timeout default — propagates to all steps without their own timeout.
|
||||
timeout: 20m
|
||||
|
||||
clone:
|
||||
git:
|
||||
image: alpine/git
|
||||
|
|
|
|||
|
|
@ -3,9 +3,6 @@
|
|||
# Triggers on pull requests touching secret-adjacent paths.
|
||||
# Sources lib/secret-scan.sh and scans each changed file's content.
|
||||
# Exits non-zero if any potential secret is detected.
|
||||
#
|
||||
# Timeouts:
|
||||
# Workflow-level default (5m) applies to all steps unless overridden.
|
||||
|
||||
when:
|
||||
- event: pull_request
|
||||
|
|
@ -18,9 +15,6 @@ when:
|
|||
- "lib/hvault.sh"
|
||||
- "lib/action-vault.sh"
|
||||
|
||||
# Workflow-level timeout default — propagates to all steps without their own timeout.
|
||||
timeout: 5m
|
||||
|
||||
clone:
|
||||
git:
|
||||
image: alpine/git
|
||||
|
|
|
|||
|
|
@ -8,9 +8,6 @@ when:
|
|||
- "tests/**"
|
||||
- ".woodpecker/smoke-init.yml"
|
||||
|
||||
# Workflow-level timeout default — propagates to all steps without their own timeout.
|
||||
timeout: 5m
|
||||
|
||||
steps:
|
||||
- name: smoke-init
|
||||
image: python:3-alpine
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: 5be020b9de1a719cb331b930cf45caf7559473f7 -->
|
||||
<!-- last-reviewed: 19ead14edecbc4e05e7bfe3d43f573ca8189e953 -->
|
||||
# Disinto — Agent Instructions
|
||||
|
||||
## What this repo is
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: 5be020b9de1a719cb331b930cf45caf7559473f7 -->
|
||||
<!-- last-reviewed: 19ead14edecbc4e05e7bfe3d43f573ca8189e953 -->
|
||||
# Architect — Agent Instructions
|
||||
|
||||
## What this agent is
|
||||
|
|
|
|||
|
|
@ -119,11 +119,6 @@ You are the architect agent for ${FORGE_REPO}. Work through the formula below.
|
|||
Your role: strategic decomposition of vision issues into development sprints.
|
||||
Propose sprints via PRs on the ops repo, converse with humans through PR comments.
|
||||
You are READ-ONLY on the project repo — sub-issues are filed by filer-bot after sprint PR merge (#764).
|
||||
DO NOT create issues, PRs, or any other resource on the project repo. Any sub-issue
|
||||
specification must go only into the filer:begin/filer:end block of the sprint pitch.
|
||||
If you think sub-issues should be filed, write them into the sprint file's filer:begin
|
||||
block only. You do not have permission to POST to the project repo and any such call
|
||||
will return 403 and fail this run.
|
||||
|
||||
## Project context
|
||||
${CONTEXT_BLOCK}
|
||||
|
|
@ -152,11 +147,6 @@ You are the architect agent for ${FORGE_REPO}. Work through the formula below.
|
|||
Your role: strategic decomposition of vision issues into development sprints.
|
||||
Propose sprints via PRs on the ops repo, converse with humans through PR comments.
|
||||
You are READ-ONLY on the project repo — sub-issues are filed by filer-bot after sprint PR merge (#764).
|
||||
DO NOT create issues, PRs, or any other resource on the project repo. Any sub-issue
|
||||
specification must go only into the filer:begin/filer:end block of the sprint pitch.
|
||||
If you think sub-issues should be filed, write them into the sprint file's filer:begin
|
||||
block only. You do not have permission to POST to the project repo and any such call
|
||||
will return 403 and fail this run.
|
||||
|
||||
## CURRENT STATE: Approved PR awaiting initial design questions
|
||||
|
||||
|
|
@ -191,11 +181,6 @@ You are the architect agent for ${FORGE_REPO}. Work through the formula below.
|
|||
Your role: strategic decomposition of vision issues into development sprints.
|
||||
Propose sprints via PRs on the ops repo, converse with humans through PR comments.
|
||||
You are READ-ONLY on the project repo — sub-issues are filed by filer-bot after sprint PR merge (#764).
|
||||
DO NOT create issues, PRs, or any other resource on the project repo. Any sub-issue
|
||||
specification must go only into the filer:begin/filer:end block of the sprint pitch.
|
||||
If you think sub-issues should be filed, write them into the sprint file's filer:begin
|
||||
block only. You do not have permission to POST to the project repo and any such call
|
||||
will return 403 and fail this run.
|
||||
|
||||
## CURRENT STATE: Design Q&A in progress
|
||||
|
||||
|
|
@ -546,11 +531,6 @@ IMPORTANT: Do NOT include design forks or questions. This is a go/no-go pitch.
|
|||
The ## Sub-issues block is parsed by the filer-bot pipeline after sprint PR merge.
|
||||
Each sub-issue between filer:begin/end markers becomes a Forgejo issue.
|
||||
|
||||
CRITICAL: You are READ-ONLY on the project repo. DO NOT create issues, PRs, or
|
||||
POST to any /repos/${FORGE_REPO}/... endpoint. Sub-issues belong only inside the
|
||||
filer:begin/filer:end block above. Any direct API call to the project repo will
|
||||
return 403 and abort this run.
|
||||
|
||||
---
|
||||
|
||||
${pitch_context}
|
||||
|
|
@ -917,27 +897,6 @@ if [ "${has_responses_to_process:-false}" = "true" ]; then
|
|||
fi
|
||||
fi
|
||||
|
||||
# ── Regression guard: detect direct issue creation by architect session ──
|
||||
# Scans the architect log for any POST to the project repo's /issues endpoint.
|
||||
# This is a cheap guard — if the model used its Bash tool to curl POST /issues
|
||||
# on the project repo, it would appear in the log. Fails loudly on detection.
|
||||
check_architect_issue_filing() {
|
||||
local project_repo_path
|
||||
project_repo_path="/repos/${FORGE_REPO}/issues"
|
||||
|
||||
if grep -q "POST.*${project_repo_path}" "$LOG_FILE" 2>/dev/null; then
|
||||
log "ERROR: regression detected — architect session attempted to POST to ${project_repo_path}"
|
||||
log "This violates the read-only contract established in #764."
|
||||
log "The architect-bot must NOT file issues directly on the project repo."
|
||||
log "Sub-issues are filed exclusively by filer-bot after sprint PR merge."
|
||||
echo "FATAL: architect-bot attempted direct issue creation on project repo" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Run regression guard before cleanup
|
||||
check_architect_issue_filing
|
||||
|
||||
# ── Clean up scratch files (legacy single file + per-issue files) ──────────
|
||||
rm -f "$SCRATCH_FILE"
|
||||
rm -f "${SCRATCH_FILE_PREFIX}"-*.md
|
||||
|
|
|
|||
319
bin/disinto
319
bin/disinto
|
|
@ -13,7 +13,6 @@
|
|||
# disinto run <action-id> Run action in ephemeral runner container
|
||||
# disinto ci-logs <pipeline> [--step <name>] Read CI logs from Woodpecker SQLite
|
||||
# disinto backup create <outfile> Export factory state for migration
|
||||
# disinto validate [subcommand] Validate factory artifacts (lint-ci)
|
||||
#
|
||||
# Usage:
|
||||
# disinto init https://github.com/user/repo
|
||||
|
|
@ -65,26 +64,22 @@ Usage:
|
|||
disinto release <version> Create vault PR for release (e.g., v1.2.0)
|
||||
disinto hire-an-agent <agent-name> <role> [--formula <path>] [--local-model <url>] [--model <name>]
|
||||
Hire a new agent (create user + .profile repo; re-run to rotate credentials)
|
||||
disinto role <subcommand> Manage roles (enable/disable)
|
||||
disinto agent <subcommand> Manage agent state (enable/disable)
|
||||
disinto backup create <outfile> Export factory state (issues + ops bundle)
|
||||
disinto edge <verb> [options] Manage edge tunnel registrations
|
||||
disinto backup <subcommand> Backup and restore factory state
|
||||
disinto validate <subcommand> Validate factory artifacts
|
||||
|
||||
Validate subcommands:
|
||||
lint-ci Lint .woodpecker/*.yml for missing timeouts
|
||||
|
||||
Edge subcommands:
|
||||
register [project] Register a new tunnel (generates keypair if needed)
|
||||
deregister <project> Remove a tunnel registration
|
||||
status Show registered tunnels
|
||||
|
||||
Role subcommands:
|
||||
disable <role> Remove state file to disable role
|
||||
enable <role> Create state file to enable role
|
||||
disable --all Disable all roles
|
||||
enable --all Enable all roles
|
||||
status Show which roles are enabled/disabled
|
||||
Agent subcommands:
|
||||
disable <agent> Remove state file to disable agent
|
||||
enable <agent> Create state file to enable agent
|
||||
disable --all Disable all agents
|
||||
enable --all Enable all agents
|
||||
status Show which agents are enabled/disabled
|
||||
|
||||
Init options:
|
||||
--branch <name> Primary branch (default: auto-detect)
|
||||
|
|
@ -863,15 +858,18 @@ _disinto_init_nomad() {
|
|||
echo "[deploy] vault-runner: jobspec not found, skipping"
|
||||
fi
|
||||
|
||||
# Build custom images dry-run (if agents or edge services are included)
|
||||
if echo ",$with_services," | grep -qE ",(agents|edge),"; then
|
||||
# Build custom images dry-run (if agents, chat, or edge services are included)
|
||||
if echo ",$with_services," | grep -qE ",(agents|chat|edge),"; then
|
||||
echo ""
|
||||
echo "── Build images dry-run ──────────────────────────────"
|
||||
if echo ",$with_services," | grep -q ",agents,"; then
|
||||
echo "[build] [dry-run] docker build -t disinto/agents:local -f ${FACTORY_ROOT}/docker/agents/Dockerfile ${FACTORY_ROOT}"
|
||||
fi
|
||||
if echo ",$with_services," | grep -q ",chat,"; then
|
||||
echo "[build] [dry-run] docker build -t disinto/chat:local -f ${FACTORY_ROOT}/docker/chat/Dockerfile ${FACTORY_ROOT}/docker/chat"
|
||||
fi
|
||||
if echo ",$with_services," | grep -q ",edge,"; then
|
||||
echo "[build] [dry-run] docker build -t disinto/edge:local -f ${FACTORY_ROOT}/docker/edge/Dockerfile ${FACTORY_ROOT}"
|
||||
echo "[build] [dry-run] docker build -t disinto/edge:local -f ${FACTORY_ROOT}/docker/edge/Dockerfile ${FACTORY_ROOT}/docker/edge"
|
||||
fi
|
||||
fi
|
||||
exit 0
|
||||
|
|
@ -964,7 +962,7 @@ _disinto_init_nomad() {
|
|||
# Build custom images required by Nomad jobs (S4.2, S5.2, S5.5) — before deploy.
|
||||
# Single-node factory dev box: no multi-node pull needed, no registry auth.
|
||||
# Can upgrade to approach B (registry push/pull) later if multi-node.
|
||||
if echo ",$with_services," | grep -qE ",(agents|edge),"; then
|
||||
if echo ",$with_services," | grep -qE ",(agents|chat|edge),"; then
|
||||
echo ""
|
||||
echo "── Building custom images ─────────────────────────────"
|
||||
if echo ",$with_services," | grep -q ",agents,"; then
|
||||
|
|
@ -972,10 +970,15 @@ _disinto_init_nomad() {
|
|||
echo "── Building $tag ─────────────────────────────"
|
||||
docker build -t "$tag" -f "${FACTORY_ROOT}/docker/agents/Dockerfile" "${FACTORY_ROOT}" 2>&1 | tail -5
|
||||
fi
|
||||
if echo ",$with_services," | grep -q ",chat,"; then
|
||||
local tag="disinto/chat:local"
|
||||
echo "── Building $tag ─────────────────────────────"
|
||||
docker build -t "$tag" -f "${FACTORY_ROOT}/docker/chat/Dockerfile" "${FACTORY_ROOT}/docker/chat" 2>&1 | tail -5
|
||||
fi
|
||||
if echo ",$with_services," | grep -q ",edge,"; then
|
||||
local tag="disinto/edge:local"
|
||||
echo "── Building $tag ─────────────────────────────"
|
||||
docker build -t "$tag" -f "${FACTORY_ROOT}/docker/edge/Dockerfile" "${FACTORY_ROOT}" 2>&1 | tail -5
|
||||
docker build -t "$tag" -f "${FACTORY_ROOT}/docker/edge/Dockerfile" "${FACTORY_ROOT}/docker/edge" 2>&1 | tail -5
|
||||
fi
|
||||
fi
|
||||
|
||||
|
|
@ -2594,15 +2597,15 @@ disinto_ci_logs() {
|
|||
fi
|
||||
}
|
||||
|
||||
# ── role command ──────────────────────────────────────────────────────────────
|
||||
# Manage role state files (enable/disable roles)
|
||||
# Usage: disinto role <subcommand> [role-name]
|
||||
# disable <role> Remove state file to disable role
|
||||
# enable <role> Create state file to enable role
|
||||
# disable --all Disable all roles
|
||||
# enable --all Enable all roles
|
||||
# status Show enabled/disabled roles
|
||||
disinto_role() {
|
||||
# ── agent command ─────────────────────────────────────────────────────────────
|
||||
# Manage agent state files (enable/disable agents)
|
||||
# Usage: disinto agent <subcommand> [agent-name]
|
||||
# disable <agent> Remove state file to disable agent
|
||||
# enable <agent> Create state file to enable agent
|
||||
# disable --all Disable all agents
|
||||
# enable --all Enable all agents
|
||||
# status Show enabled/disabled agents
|
||||
disinto_agent() {
|
||||
local subcmd="${1:-}"
|
||||
local state_dir="${FACTORY_ROOT}/state"
|
||||
local all_agents=("dev" "reviewer" "gardener" "architect" "planner" "predictor")
|
||||
|
|
@ -2614,13 +2617,13 @@ disinto_role() {
|
|||
disable)
|
||||
local agent="${2:-}"
|
||||
if [ -z "$agent" ]; then
|
||||
echo "Error: role name required" >&2
|
||||
echo "Usage: disinto role disable <role-name>" >&2
|
||||
echo " disinto role disable --all" >&2
|
||||
echo "Error: agent name required" >&2
|
||||
echo "Usage: disinto agent disable <agent-name>" >&2
|
||||
echo " disinto agent disable --all" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [ "$agent" = "--all" ]; then
|
||||
echo "Disabling all roles..."
|
||||
echo "Disabling all agents..."
|
||||
for a in "${all_agents[@]}"; do
|
||||
local state_file="${state_dir}/.${a}-active"
|
||||
if [ -f "$state_file" ]; then
|
||||
|
|
@ -2640,8 +2643,8 @@ disinto_role() {
|
|||
fi
|
||||
done
|
||||
if [ "$valid" = false ]; then
|
||||
echo "Error: unknown role '${agent}'" >&2
|
||||
echo "Valid roles: ${all_agents[*]}" >&2
|
||||
echo "Error: unknown agent '${agent}'" >&2
|
||||
echo "Valid agents: ${all_agents[*]}" >&2
|
||||
exit 1
|
||||
fi
|
||||
local state_file="${state_dir}/.${agent}-active"
|
||||
|
|
@ -2656,13 +2659,13 @@ disinto_role() {
|
|||
enable)
|
||||
local agent="${2:-}"
|
||||
if [ -z "$agent" ]; then
|
||||
echo "Error: role name required" >&2
|
||||
echo "Usage: disinto role enable <role-name>" >&2
|
||||
echo " disinto role enable --all" >&2
|
||||
echo "Error: agent name required" >&2
|
||||
echo "Usage: disinto agent enable <agent-name>" >&2
|
||||
echo " disinto agent enable --all" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [ "$agent" = "--all" ]; then
|
||||
echo "Enabling all roles..."
|
||||
echo "Enabling all agents..."
|
||||
for a in "${all_agents[@]}"; do
|
||||
local state_file="${state_dir}/.${a}-active"
|
||||
if [ -f "$state_file" ]; then
|
||||
|
|
@ -2682,8 +2685,8 @@ disinto_role() {
|
|||
fi
|
||||
done
|
||||
if [ "$valid" = false ]; then
|
||||
echo "Error: unknown role '${agent}'" >&2
|
||||
echo "Valid roles: ${all_agents[*]}" >&2
|
||||
echo "Error: unknown agent '${agent}'" >&2
|
||||
echo "Valid agents: ${all_agents[*]}" >&2
|
||||
exit 1
|
||||
fi
|
||||
local state_file="${state_dir}/.${agent}-active"
|
||||
|
|
@ -2696,10 +2699,10 @@ disinto_role() {
|
|||
fi
|
||||
;;
|
||||
status)
|
||||
echo "Role Status"
|
||||
echo "=========="
|
||||
printf "%-12s %s\n" "ROLE" "STATUS"
|
||||
printf "%-12s %s\n" "----" "------"
|
||||
echo "Agent Status"
|
||||
echo "============"
|
||||
printf "%-12s %s\n" "AGENT" "STATUS"
|
||||
printf "%-12s %s\n" "------" "------"
|
||||
for a in "${all_agents[@]}"; do
|
||||
local state_file="${state_dir}/.${a}-active"
|
||||
local status
|
||||
|
|
@ -2713,29 +2716,23 @@ disinto_role() {
|
|||
;;
|
||||
*)
|
||||
cat <<EOF >&2
|
||||
Usage: disinto role <subcommand>
|
||||
Usage: disinto agent <subcommand>
|
||||
|
||||
Manage roles (enable/disable):
|
||||
Manage agent state files (enable/disable agents):
|
||||
|
||||
disable <role> Remove state file to disable role
|
||||
enable <role> Create state file to enable role
|
||||
disable --all Disable all roles
|
||||
enable --all Enable all roles
|
||||
status Show which roles are enabled/disabled
|
||||
disable <agent> Remove state file to disable agent
|
||||
enable <agent> Create state file to enable agent
|
||||
disable --all Disable all agents
|
||||
enable --all Enable all agents
|
||||
status Show which agents are enabled/disabled
|
||||
|
||||
Valid roles: dev, reviewer, gardener, architect, planner, predictor
|
||||
Valid agents: dev, reviewer, gardener, architect, planner, predictor
|
||||
EOF
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# ── agent command (deprecated — use 'role') ──────────────────────────────────
|
||||
disinto_agent() {
|
||||
echo "Warning: 'disinto agent' is deprecated, use 'disinto role' instead" >&2
|
||||
disinto_role "$@"
|
||||
}
|
||||
|
||||
# ── edge command ──────────────────────────────────────────────────────────────
|
||||
# Manage edge tunnel registrations (reverse SSH tunnels to edge hosts)
|
||||
# Usage: disinto edge <verb> [options]
|
||||
|
|
@ -3014,214 +3011,6 @@ disinto_backup() {
|
|||
esac
|
||||
}
|
||||
|
||||
# ── validate command ─────────────────────────────────────────────────────────
|
||||
# Validates CI pipeline configs and other factory artifacts.
|
||||
# Usage: disinto validate [subcommand]
|
||||
# lint-ci Lint .woodpecker/*.yml for missing timeouts and unsafe commands
|
||||
disinto_validate() {
|
||||
local subcmd="${1:-lint-ci}"
|
||||
shift || true
|
||||
|
||||
case "$subcmd" in
|
||||
lint-ci)
|
||||
_validate_lint_ci "$@"
|
||||
;;
|
||||
*)
|
||||
cat <<EOF >&2
|
||||
Usage: disinto validate <subcommand>
|
||||
|
||||
Validate factory artifacts:
|
||||
|
||||
lint-ci Lint .woodpecker/*.yml for missing timeouts and unsafe commands
|
||||
|
||||
Subcommands:
|
||||
lint-ci Check CI pipeline files for:
|
||||
- Steps missing a timeout declaration
|
||||
- Network-fetch commands without per-command timeouts
|
||||
EOF
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Lint .woodpecker/*.yml files for missing timeouts and unsafe commands.
|
||||
#
|
||||
# Checks:
|
||||
# 1. Step-level timeout: every step must declare a `timeout:` value or
|
||||
# inherit from a workflow-level default.
|
||||
# 2. Command-level timeout: network-fetch commands (curl, wget, pip, etc.)
|
||||
# must include an explicit timeout flag (e.g. --max-time, --timeout).
|
||||
_validate_lint_ci() {
|
||||
local lint_dir="${1:-.}"
|
||||
local woodpecker_dir="${lint_dir}/.woodpecker"
|
||||
local errors=0
|
||||
local warnings=0
|
||||
|
||||
if [ ! -d "$woodpecker_dir" ]; then
|
||||
echo "No .woodpecker/ directory found at ${woodpecker_dir}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
local -a yml_files=()
|
||||
while IFS= read -r f; do
|
||||
yml_files+=("$f")
|
||||
done < <(find "$woodpecker_dir" -maxdepth 1 -name '*.yml' -o -name '*.yaml' 2>/dev/null | sort)
|
||||
|
||||
if [ ${#yml_files[@]} -eq 0 ]; then
|
||||
echo "No .woodpecker/*.yml files found"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Linting CI pipeline files in ${woodpecker_dir}..."
|
||||
echo ""
|
||||
|
||||
for yml in "${yml_files[@]}"; do
|
||||
local rel_path
|
||||
rel_path=$(realpath --relative-to="$(pwd)" "$yml" 2>/dev/null || echo "$yml")
|
||||
|
||||
# Use Python to parse YAML and check for timeouts
|
||||
local result
|
||||
result=$(python3 -c "
|
||||
import yaml, sys, re
|
||||
|
||||
with open('$yml') as f:
|
||||
try:
|
||||
doc = yaml.safe_load(f)
|
||||
except yaml.YAMLError as e:
|
||||
print(f'FATAL:YAML parse error: {e}', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if not isinstance(doc, dict):
|
||||
sys.exit(0)
|
||||
|
||||
# Check workflow-level timeout default
|
||||
workflow_timeout = doc.get('timeout')
|
||||
if isinstance(workflow_timeout, (int, float)):
|
||||
workflow_timeout = str(workflow_timeout)
|
||||
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
steps = doc.get('steps', [])
|
||||
if not isinstance(steps, list):
|
||||
sys.exit(0)
|
||||
|
||||
for step in steps:
|
||||
if not isinstance(step, dict):
|
||||
continue
|
||||
name = step.get('name', '<unnamed>')
|
||||
commands = step.get('commands', [])
|
||||
if not isinstance(commands, list):
|
||||
continue
|
||||
|
||||
# Check step-level timeout (unless workflow default exists)
|
||||
if 'timeout' not in step and workflow_timeout is None:
|
||||
errors.append(f'error: {name} — step has no timeout; add \`timeout: 5m\` or inherit from workflow default')
|
||||
|
||||
# Check commands for network-fetch without timeout flags
|
||||
cmd_text = ' '.join(str(c) for c in commands)
|
||||
lines = commands # check each command individually
|
||||
|
||||
for cmd in lines:
|
||||
cmd_str = str(cmd)
|
||||
# Skip comments and empty lines
|
||||
stripped = cmd_str.strip()
|
||||
if not stripped or stripped.startswith('#'):
|
||||
continue
|
||||
|
||||
# Skip package manager installs (e.g., apk add ... curl)
|
||||
if re.search(r'\b(apk|apt|yum|dnf|brew)\s+(add|install)\b', cmd_str):
|
||||
continue
|
||||
|
||||
# Skip shell/python invocations (commands that execute scripts)
|
||||
if re.match(r'\s*(bash|sh|zsh|python3?|node)\s', cmd_str):
|
||||
continue
|
||||
|
||||
# Network-fetch binaries to check
|
||||
# curl — check for --max-time, -m, or --connect-timeout
|
||||
if re.search(r'\bcurl\b', cmd_str):
|
||||
if not re.search(r'(--max-time|-m\s+\d|--connect-timeout)', cmd_str):
|
||||
warnings.append(f'warning: {name}/command — curl without --max-time; consider: curl --max-time 30 ...')
|
||||
|
||||
# wget — check for --timeout
|
||||
if re.search(r'\bwget\b', cmd_str):
|
||||
if not re.search(r'--timeout=', cmd_str):
|
||||
warnings.append(f'warning: {name}/command — wget without --timeout; consider: wget --timeout=30 ...')
|
||||
|
||||
# pip/pip3 — check for --default-timeout or --timeout
|
||||
if re.search(r'\b(pip3?|pipenv)\b', cmd_str) and re.search(r'\b(install|i)\b', cmd_str):
|
||||
if not re.search(r'(--default-timeout|--timeout)', cmd_str):
|
||||
warnings.append(f'warning: {name}/command — pip install without --default-timeout; consider: --default-timeout 30')
|
||||
|
||||
# npm — check for --timeout
|
||||
if re.search(r'\bnpm\b', cmd_str) and re.search(r'\b(install|add)\b', cmd_str):
|
||||
if not re.search(r'--timeout', cmd_str):
|
||||
warnings.append(f'warning: {name}/command — npm install without --timeout; consider: --timeout 30000')
|
||||
|
||||
# yarn — check for --timeout
|
||||
if re.search(r'\byarn\b', cmd_str) and re.search(r'\b(add|install)\b', cmd_str):
|
||||
if not re.search(r'--timeout', cmd_str):
|
||||
warnings.append(f'warning: {name}/command — yarn add without --timeout; consider: --timeout 30000')
|
||||
|
||||
# go get — no direct timeout flag, but we warn about it
|
||||
if re.search(r'\bgo\s+get\b', cmd_str):
|
||||
warnings.append(f'warning: {name}/command — go get has no timeout flag; wrap in a timeout(1) command')
|
||||
|
||||
# cargo install — check for --timeout (cargo doesn't have one natively)
|
||||
if re.search(r'\bcargo\s+install\b', cmd_str):
|
||||
warnings.append(f'warning: {name}/command — cargo install has no timeout flag; wrap in a timeout(1) command')
|
||||
|
||||
# gem install — no timeout flag
|
||||
if re.search(r'\bgem\s+install\b', cmd_str):
|
||||
warnings.append(f'warning: {name}/command — gem install has no timeout flag; wrap in a timeout(1) command')
|
||||
|
||||
# brew install — no timeout flag
|
||||
if re.search(r'\bbrew\s+install\b', cmd_str):
|
||||
warnings.append(f'warning: {name}/command — brew install has no timeout flag; wrap in a timeout(1) command')
|
||||
|
||||
if errors:
|
||||
for e in errors:
|
||||
print(f'E:{e}')
|
||||
if warnings:
|
||||
for w in warnings:
|
||||
print(f'W:{w}')
|
||||
" 2>&1) || {
|
||||
echo "ERROR: failed to parse $rel_path" >&2
|
||||
echo "$result" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Parse Python output
|
||||
while IFS= read -r line; do
|
||||
case "$line" in
|
||||
E:*)
|
||||
echo "${line#E:}" >&2
|
||||
errors=$((errors + 1))
|
||||
;;
|
||||
W:*)
|
||||
echo "${line#W:}"
|
||||
warnings=$((warnings + 1))
|
||||
;;
|
||||
esac
|
||||
done <<< "$result"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "lint-ci: ${errors} error(s), ${warnings} warning(s)"
|
||||
|
||||
if [ "$errors" -gt 0 ]; then
|
||||
echo ""
|
||||
echo "Fix: add \`timeout:\` to each step, or set a workflow-level default at the top of the pipeline file." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$warnings" -gt 0 ]; then
|
||||
echo "(warnings are non-blocking — add per-command timeouts for network calls)" >&2
|
||||
fi
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
# ── Main dispatch ────────────────────────────────────────────────────────────
|
||||
|
||||
case "${1:-}" in
|
||||
|
|
@ -3236,11 +3025,9 @@ case "${1:-}" in
|
|||
ci-logs) shift; disinto_ci_logs "$@" ;;
|
||||
release) shift; disinto_release "$@" ;;
|
||||
hire-an-agent) shift; disinto_hire_an_agent "$@" ;;
|
||||
role) shift; disinto_role "$@" ;;
|
||||
agent) shift; disinto_agent "$@" ;;
|
||||
edge) shift; disinto_edge "$@" ;;
|
||||
backup) shift; disinto_backup "$@" ;;
|
||||
validate) shift; disinto_validate "$@" ;;
|
||||
-h|--help) usage ;;
|
||||
*) usage ;;
|
||||
esac
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: 5be020b9de1a719cb331b930cf45caf7559473f7 -->
|
||||
<!-- last-reviewed: 19ead14edecbc4e05e7bfe3d43f573ca8189e953 -->
|
||||
# Dev Agent
|
||||
|
||||
**Role**: Implement issues autonomously — write code, push branches, address
|
||||
|
|
|
|||
|
|
@ -545,7 +545,7 @@ print(cfg.get('primary_branch', 'main'))
|
|||
if [ $((supervisor_iteration % SUPERVISOR_INTERVAL)) -eq 0 ] && [ "$now" -ge "$supervisor_iteration" ]; then
|
||||
if ! pgrep -f "supervisor-run.sh" >/dev/null; then
|
||||
log "Running supervisor (iteration ${iteration}, ${SUPERVISOR_INTERVAL}s interval) for ${toml}"
|
||||
gosu agent bash -c "cd ${DISINTO_DIR} && bash supervisor/supervisor-run.sh \"${toml}\"" >> "${DISINTO_LOG_DIR}/supervisor/supervisor.log" 2>&1 &
|
||||
gosu agent bash -c "cd ${DISINTO_DIR} && bash supervisor/supervisor-run.sh \"${toml}\"" >> "${DISINTO_LOG_DIR}/supervisor.log" 2>&1 &
|
||||
else
|
||||
log "Skipping supervisor — already running"
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ if [ -f /opt/disinto/lib/git-creds.sh ]; then
|
|||
fi
|
||||
|
||||
# Ensure log directory exists
|
||||
mkdir -p /opt/disinto-logs/supervisor
|
||||
mkdir -p /opt/disinto-logs
|
||||
|
||||
# ── Reverse tunnel (optional) ──────────────────────────────────────────
|
||||
# When EDGE_TUNNEL_HOST is set, open a single reverse-SSH forward so the
|
||||
|
|
@ -169,7 +169,7 @@ bash /opt/disinto/docker/edge/dispatcher.sh &
|
|||
# Start supervisor loop in background
|
||||
PROJECT_TOML="${PROJECT_TOML:-projects/disinto.toml}"
|
||||
(while true; do
|
||||
bash /opt/disinto/supervisor/supervisor-run.sh "/opt/disinto/${PROJECT_TOML}" 2>&1 | tee -a /opt/disinto-logs/supervisor/supervisor.log || true
|
||||
bash /opt/disinto/supervisor/supervisor-run.sh "/opt/disinto/${PROJECT_TOML}" 2>&1 | tee -a /opt/disinto-logs/supervisor.log || true
|
||||
sleep 1200 # 20 minutes
|
||||
done) &
|
||||
|
||||
|
|
|
|||
|
|
@ -22,8 +22,6 @@
|
|||
# architect-bot: READ-ONLY on project repo (GET issues/PRs/labels for context).
|
||||
# Cannot POST/PUT/PATCH/DELETE any project-repo resource.
|
||||
# Write access ONLY on ops repo (branches, PRs, comments).
|
||||
# DO NOT create issues on the project repo. Sub-issues are filed by
|
||||
# filer-bot after sprint PR merge via the ops-filer pipeline.
|
||||
# filer-bot: issues:write on project repo. Files sub-issues from merged sprint
|
||||
# PRs via ops-filer pipeline. Adds in-progress label to vision issues.
|
||||
#
|
||||
|
|
@ -175,10 +173,6 @@ The ## Sub-issues block is parsed by the filer-bot pipeline after sprint PR merg
|
|||
Each sub-issue between filer:begin/end markers becomes a Forgejo issue on the
|
||||
project repo. The filer appends a decomposed-from marker to each body automatically.
|
||||
|
||||
CRITICAL: You DO NOT have access to the project repo API. Sub-issues are filed
|
||||
by filer-bot from the sprint file after merge. Do NOT attempt to create issues
|
||||
via API calls — the token will 403 and the run will fail.
|
||||
|
||||
4. Bash creates PR:
|
||||
- Create branch: architect/sprint-{pitch-number}
|
||||
- Write sprint spec to sprints/{sprint-slug}.md
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: 5be020b9de1a719cb331b930cf45caf7559473f7 -->
|
||||
<!-- last-reviewed: 19ead14edecbc4e05e7bfe3d43f573ca8189e953 -->
|
||||
# Gardener Agent
|
||||
|
||||
**Role**: Backlog grooming — detect duplicate issues, missing acceptance
|
||||
|
|
|
|||
|
|
@ -1,22 +1 @@
|
|||
[
|
||||
{
|
||||
"action": "edit_body",
|
||||
"issue": 1150,
|
||||
"body": "## Problem\n\n`supervisor-run.sh` writes its structured log to `data/logs/supervisor/supervisor.log` (directory form). The polling loop in `docker/agents/entrypoint.sh` redirects the supervisor invocation's stderr to `data/logs/supervisor.log` (singular file form, sibling of the directory). Two different paths for one component's log stream.\n\nWhy this matters: when #1120's unbound-variable abort happened, the real error landed in the singular `data/logs/supervisor.log` (the stderr-redirect path), but operators checking supervisor health looked at `data/logs/supervisor/supervisor.log` (the directory form the agent writes) and saw only `--- Supervisor run start ---` with nothing after. That dual-sink is why the failure was silent for ~48h.\n\nThis is a class-of-failure pattern: any future silent-abort in `supervisor-run.sh` will repeat the same invisibility, because the two sinks are structurally divergent. #1121 fixes the specific unbound-var root cause; this one removes the invisibility layer so the next silent-abort class surfaces immediately.\n\n## Fix\n\nUnify to a single path. Recommended: keep the directory form `data/logs/supervisor/supervisor.log` as the canonical sink, and change the entrypoint stderr redirect to append into the same file instead of a sibling.\n\nChange locations:\n\n1. **`docker/agents/entrypoint.sh`** — the line that invokes `supervisor-run.sh` and redirects stderr. Grep `supervisor-run.sh` or `supervisor.log` in the entrypoint to find it. Change from:\n ```bash\n bash supervisor/supervisor-run.sh 2> data/logs/supervisor.log\n ```\n to:\n ```bash\n bash supervisor/supervisor-run.sh 2>> data/logs/supervisor/supervisor.log\n ```\n Use `2>>` (append) not `2>` (overwrite) so a stderr abort on one iteration does not wipe the structured log written by previous iterations.\n\n2. **Audit `supervisor-run.sh` itself** for any hardcoded reference to the singular path. If found, migrate to the directory path.\n\n3. **Document the canonical sink** in `supervisor/AGENTS.md` (or the nearest AGENTS.md covering supervisor entrypoints) so the path does not re-fork in the future.\n\n## Acceptance criteria\n\n- [ ] Only one on-disk path for supervisor logs: `find data/logs -name 'supervisor*'` returns the directory form only, no sibling singular file.\n- [ ] An intentionally-failing supervisor run on a throwaway branch (e.g. add `: ${DOES_NOT_EXIST:?boom}` at the top of `supervisor-run.sh`) produces visible error output in the canonical sink on the next polling iteration.\n- [ ] No regression: normal supervisor runs continue to write the `--- Supervisor run start ---` / `--- Supervisor run done ---` markers.\n- [ ] The fix applies inside `disinto-agents` without requiring image rebuild (entrypoint mount path) — or, if image rebuild is required, that requirement is noted in the PR body.\n\n## Affected files\n\n- `docker/agents/entrypoint.sh` — change stderr redirect for supervisor invocation\n- `supervisor/supervisor-run.sh` — audit for hardcoded singular log path\n- `supervisor/AGENTS.md` — document canonical log sink\n\n## Related\n\n- #1120 — the 48h silent-abort incident that exposed the dual-sink\n- #1121 — unbound-var root-cause fix; this issue is the complementary visibility fix\n- Vision #1147 (heartbeat + self-restart for long-running loops) — forward direction; unifying the log path is the minimal precondition for any heartbeat writer to reliably emit failure breadcrumbs"
|
||||
},
|
||||
{
|
||||
"action": "edit_body",
|
||||
"issue": 1124,
|
||||
"body": "## Symptom\n\nThe `caddy-validate` step in the `edge-subpath` workflow fails intermittently with:\n\n```\nGet \"http://%2Fvar%2Frun%2Fdocker.sock/v1.41/containers/wp_01KPQZ2WV7SVX68TDRC7DP2Z9M/json\": context deadline exceeded\n```\n\nExit code on the step: `126`. Downstream steps (`caddyfile-routing-test`, `test-caddyfile-routing`, etc.) get skipped, and the workflow reports `failure`.\n\nThis showed up on PR #1108 (gardener housekeeping, commit `0946ca9828`, pipeline 1597, workflow id 3470, step pid 12). Also pending-forever on the sibling workflows for PR #1112 (pipeline 1599) and PR #1113 (pipeline 1601).\n\nThe `edge-subpath` workflow is not in the required-status-contexts list (branch protection requires `ci/woodpecker/pr/ci` and `ci/woodpecker/push/ci` only), so this does not block merge by itself. But it leaves combined commit status at `failure`/`pending` and reviewer-agent gates on combined status — every legitimate review flow stalls here.\n\n## Reproduction\n\nHappens under load when multiple pipelines queue up. The step mounts the host `/var/run/docker.sock` and does Docker-in-Docker introspection; the `GET container` call times out during socket saturation.\n\n## Likely cause\n\n1. **Socket passthrough is saturated.** Nested Docker API calls exceed the default deadline during pipeline pile-up.\n2. **Woodpecker agent step timeout is too tight** for caddy-validate during busy periods.\n3. **The step code uses a short `context.WithTimeout`** that does not account for a busy Docker daemon.\n\n## Fix candidates\n\n- If the step's container-introspect is incidental, switch to polling with retry + exponential backoff and a larger overall budget (60–120s).\n- If the step needs to spawn a sibling container, run caddy validate directly inside the workflow container (no docker.sock mount needed — `caddy validate` is a binary call).\n- Short-term: mark `edge-subpath` as optional or move it to a separate optional pipeline so it stops polluting combined status on otherwise-green PRs.\n\n## Acceptance criteria\n\n- [ ] A PR that passes the required `ci` workflow also produces a green (or explicitly-optional) `edge-subpath` result, with no `context deadline exceeded` in the step logs over ten consecutive runs.\n- [ ] Reviewer-agent no longer gets blocked by the `edge-subpath` workflow on merge-eligible PRs.\n- [ ] If the fix is \"mark as optional,\" the branch-protection required-contexts list is reviewed so it is clear which checks actually gate merges.\n\n## Affected files\n\n- `.woodpecker/edge-subpath.yml` — the CI pipeline defining the caddy-validate step\n- `tests/smoke-edge-subpath.sh` — the smoke test script invoked by the pipeline (if it contains the docker.sock introspection)\n\n## Context\n\nObserved 2026-04-21 during triage of why PRs were backing up in queue. WP agent restart drained the queue for most workflows; this one step remained stuck or timing out. The merged commit for #1108 shipped with this check in `failure`."
|
||||
},
|
||||
{
|
||||
"action": "add_label",
|
||||
"issue": 1124,
|
||||
"label": "backlog"
|
||||
},
|
||||
{
|
||||
"action": "comment",
|
||||
"issue": 1121,
|
||||
"body": "CI on PR #1143 is showing `failure` with all `null` status values — this is the edge-subpath docker.sock timeout pattern documented in #1124.\n\nThe fix in #1143 is correct (verified: adds `resolve_forge_remote` before `formula_worktree_setup`). The CI failure is environmental, not caused by this change.\n\n**Unblock path:** Once #1124 is resolved (edge-subpath caddy-validate no longer times out on docker.sock), PR #1143 should be retriable. Alternatively, if the required CI contexts (`ci/woodpecker/pr/ci`, `ci/woodpecker/push/ci`) pass, the PR can merge independently of the edge-subpath failure."
|
||||
}
|
||||
]
|
||||
[]
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: 5be020b9de1a719cb331b930cf45caf7559473f7 -->
|
||||
<!-- last-reviewed: 19ead14edecbc4e05e7bfe3d43f573ca8189e953 -->
|
||||
# Shared Helpers (`lib/`)
|
||||
|
||||
All agents source `lib/env.sh` as their first action. Additional helpers are
|
||||
|
|
@ -7,13 +7,13 @@ sourced as needed.
|
|||
| File | What it provides | Sourced by |
|
||||
|---|---|---|
|
||||
| `lib/env.sh` | Loads `.env`, sets `FACTORY_ROOT`, exports project config (`FORGE_REPO`, `PROJECT_NAME`, etc.), defines `log()`, `forge_api()`, `forge_api_all()` (paginates all pages; accepts optional second TOKEN parameter, defaults to `$FORGE_TOKEN`; handles invalid/empty JSON responses gracefully — returns empty on parse error instead of crashing), `woodpecker_api()`, `wpdb()`, `memory_guard()` (skips agent if RAM < threshold), `load_secret()` (secret-source abstraction — see below). Auto-loads project TOML if `PROJECT_TOML` is set. Exports per-agent tokens (`FORGE_PLANNER_TOKEN`, `FORGE_GARDENER_TOKEN`, `FORGE_VAULT_TOKEN`, `FORGE_SUPERVISOR_TOKEN`, `FORGE_PREDICTOR_TOKEN`) — each falls back to `$FORGE_TOKEN` if not set. **Vault-only token guard (AD-006)**: `unset GITHUB_TOKEN CLAWHUB_TOKEN` so agents never hold external-action tokens — only the runner container receives them. **Container note**: when `DISINTO_CONTAINER=1`, `.env` is NOT re-sourced — compose already injects env vars (including `FORGE_URL=http://forgejo:3000`) and re-sourcing would clobber them. **Save/restore scope (#364)**: only `FORGE_URL` is preserved across `.env` re-sourcing (compose injects `http://forgejo:3000`, `.env` has `http://localhost:3000`). `FORGE_TOKEN` is NOT preserved so refreshed tokens in `.env` take effect immediately. **Per-agent token override (#762)**: agent run scripts export `FORGE_TOKEN_OVERRIDE=<agent-specific-token>` BEFORE sourcing `env.sh`; `env.sh` applies this override at lines 98-100, ensuring the correct identity survives any re-sourcing of `env.sh` by nested shells or `claude -p` invocations. **Required env var**: `FORGE_PASS` — bot password for git HTTP push (Forgejo 11.x rejects API tokens for `git push`, #361). **Hard preconditions (#674)**: `USER` and `HOME` must be exported by the entrypoint before sourcing. When `PROJECT_TOML` is set, `PROJECT_REPO_ROOT`, `PRIMARY_BRANCH`, and `OPS_REPO_ROOT` must also be set (by entrypoint or TOML). **`load_secret NAME [DEFAULT]` (#793)**: backend-agnostic secret resolution. Precedence: (1) `/secrets/<NAME>.env` — Nomad-rendered template, (2) current environment — already set by `.env.enc` / compose, (3) `secrets/<NAME>.enc` — age-encrypted per-key file (decrypted on demand, cached in process env), (4) DEFAULT or empty. Consumers call `$(load_secret GITHUB_TOKEN)` instead of `${GITHUB_TOKEN}` — identical behavior whether secrets come from Docker compose injection or Nomad Vault templates. | Every agent |
|
||||
| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \<reason>" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status <sha>` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number <sha>` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote <repo_id> <pipeline_num> <environment>` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this — vault redesign in progress, see #73-#77). `ci_get_logs <pipeline_number> [--step <name>]` — reads CI logs from Woodpecker SQLite database via `lib/ci-log-reader.py`; outputs last 200 lines to stdout. Requires mounted woodpecker-data volume at /woodpecker-data. `ci_get_step_logs <pipeline_num> <step_id>` — fetches per-step logs via Woodpecker REST API (`/repos/{id}/logs/{pipeline}/{step_id}`); returns raw log data for a single step. Used by `pr_poll_ci()` to build per-workflow/per-step CI diagnostics (#1051). `ci_required_contexts([branch])` — returns newline-separated list of required status check context names from branch protection; cached per poll cycle in `_CI_REQUIRED_CONTEXTS`. `_ci_reduce_required_contexts(sha, required_contexts)` — reduces commit statuses to required contexts only; stdout: `success` \| `failure` \| `pending` (#1136). | dev-poll, review-poll, review-pr |
|
||||
| `lib/ci-helpers.sh` | `ci_passed()` — returns 0 if CI state is "success" (or no CI configured). `ci_required_for_pr()` — returns 0 if PR has code files (CI required), 1 if non-code only (CI not required). `is_infra_step()` — returns 0 if a single CI step failure matches infra heuristics (clone/git exit 128, any exit 137, log timeout patterns). `classify_pipeline_failure()` — returns "infra \<reason>" if any failed Woodpecker step matches infra heuristics via `is_infra_step()`, else "code". `ensure_priority_label()` — looks up (or creates) the `priority` label and returns its ID; caches in `_PRIORITY_LABEL_ID`. `ci_commit_status <sha>` — queries Woodpecker directly for CI state, falls back to forge commit status API. `ci_pipeline_number <sha>` — returns the Woodpecker pipeline number for a commit, falls back to parsing forge status `target_url`. `ci_promote <repo_id> <pipeline_num> <environment>` — promotes a pipeline to a named Woodpecker environment (vault-gated deployment: vault approves, vault-fire calls this — vault redesign in progress, see #73-#77). `ci_get_logs <pipeline_number> [--step <name>]` — reads CI logs from Woodpecker SQLite database via `lib/ci-log-reader.py`; outputs last 200 lines to stdout. Requires mounted woodpecker-data volume at /woodpecker-data. `ci_get_step_logs <pipeline_num> <step_id>` — fetches per-step logs via Woodpecker REST API (`/repos/{id}/logs/{pipeline}/{step_id}`); returns raw log data for a single step. Used by `pr_poll_ci()` to build per-workflow/per-step CI diagnostics (#1051). | dev-poll, review-poll, review-pr |
|
||||
| `lib/ci-debug.sh` | CLI tool for Woodpecker CI: `list`, `status`, `logs`, `failures` subcommands. Not sourced — run directly. | Humans / dev-agent (tool access) |
|
||||
| `lib/ci-log-reader.py` | Python tool: reads CI logs from Woodpecker SQLite database. `<pipeline_number> [--step <name>]` — returns last 200 lines from failed steps (or specified step). Used by `ci_get_logs()` in ci-helpers.sh. Requires `WOODPECKER_DATA_DIR` (default: /woodpecker-data). | ci-helpers.sh |
|
||||
| `lib/load-project.sh` | Parses a `projects/*.toml` file into env vars (`PROJECT_NAME`, `FORGE_REPO`, `WOODPECKER_REPO_ID`, monitoring toggles, mirror config, etc.). Also exports `FORGE_REPO_OWNER` (the owner component of `FORGE_REPO`, e.g. `disinto-admin` from `disinto-admin/disinto`). Reads `repo_root` and `ops_repo_root` from the TOML for host-CLI callers. **Container path handling (#674)**: no longer derives `PROJECT_REPO_ROOT` or `OPS_REPO_ROOT` inside the script — container entrypoints export the correct paths before agent scripts source `env.sh`, and the `DISINTO_CONTAINER` guard (line 90) skips TOML overrides when those vars are already set. | env.sh (when `PROJECT_TOML` is set) |
|
||||
| `lib/parse-deps.sh` | Extracts dependency issue numbers from an issue body (stdin → stdout, one number per line). Matches `## Dependencies` / `## Depends on` / `## Blocked by` sections and inline `depends on #N` / `blocked by #N` patterns. Inline scan skips fenced code blocks to prevent false positives from code examples in issue bodies. Not sourced — executed via `bash lib/parse-deps.sh`. | dev-poll |
|
||||
| `lib/formula-session.sh` | `acquire_run_lock()`, `load_formula()`, `load_formula_or_profile()`, `build_context_block()`, `ensure_ops_repo()`, `ops_commit_and_push()`, `build_prompt_footer()`, `build_sdk_prompt_footer()`, `formula_worktree_setup()`, `formula_prepare_profile_context()`, `formula_lessons_block()`, `profile_write_journal()`, `profile_load_lessons()`, `ensure_profile_repo()`, `_profile_has_repo()`, `_count_undigested_journals()`, `_profile_digest_journals()`, `_profile_restore_lessons()`, `_profile_commit_and_push()`, `resolve_agent_identity()`, `build_graph_section()`, `build_scratch_instruction()`, `read_scratch_context()`, `cleanup_stale_crashed_worktrees()` — shared helpers for formula-driven polling-loop agents (lock, .profile repo management, prompt assembly, worktree setup). Memory guard is provided by `memory_guard()` in `lib/env.sh` (not duplicated here). `resolve_agent_identity()` — sets `FORGE_TOKEN`, `AGENT_IDENTITY`, `FORGE_REMOTE` from per-agent token env vars and FORGE_URL remote detection. `build_graph_section()` generates the structural-analysis section (runs `lib/build-graph.py`, formats JSON output) — previously duplicated in planner-run.sh and predictor-run.sh, now shared here. `cleanup_stale_crashed_worktrees()` — thin wrapper around `worktree_cleanup_stale()` from `lib/worktree.sh` (kept for backwards compatibility). **Journal digestion guards (#702)**: `_profile_digest_journals()` respects `PROFILE_DIGEST_TIMEOUT` (default 300s) and `PROFILE_DIGEST_MAX_BATCH` (default 5 journals per run); `_profile_restore_lessons()` restores the previous lessons-learned.md on digest failure. | planner-run.sh, predictor-run.sh, gardener-run.sh, supervisor-run.sh, dev-agent.sh |
|
||||
| `lib/guard.sh` | `check_active(role_name)` — reads `$FACTORY_ROOT/state/.{role_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each role. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so role dropout is visible in loop logs. Sourced by dev-poll.sh, review-poll.sh, predictor-run.sh, supervisor-run.sh. | polling-loop entry points |
|
||||
| `lib/guard.sh` | `check_active(agent_name)` — reads `$FACTORY_ROOT/state/.{agent_name}-active`; exits 0 (skip) if the file is absent. Factory is off by default — state files must be created to enable each agent. **Logs a message to stderr** when skipping (`[check_active] SKIP: state file not found`), so agent dropout is visible in loop logs. Sourced by dev-poll.sh, review-poll.sh, predictor-run.sh, supervisor-run.sh. | polling-loop entry points |
|
||||
| `lib/mirrors.sh` | `mirror_push()` — pushes `$PRIMARY_BRANCH` + tags to all configured mirror remotes (fire-and-forget background pushes). Reads `MIRROR_NAMES` and `MIRROR_*` vars exported by `load-project.sh` from the `[mirrors]` TOML section. Failures are logged but never block the pipeline. `mirror_pull_register(clone_url, owner, repo_name, [interval])` — registers a Forgejo pull mirror via `POST /repos/migrate` with `mirror: true`. Creates the target repo and queues the first sync automatically. Works against empty Forgejo instances — no pre-existing content required. Used for Nomad migration cutover: point at Codeberg source, wait for sync, then proceed with `disinto init`. See [docs/mirror-bootstrap.md](../docs/mirror-bootstrap.md) for the full cutover path. Sourced by dev-poll.sh — called after every successful merge. | dev-poll.sh |
|
||||
| `lib/build-graph.py` | Python tool: parses VISION.md, prerequisites.md (from ops repo), AGENTS.md, formulas/*.toml, evidence/ (from ops repo), and forge issues/labels into a NetworkX DiGraph. Runs structural analyses (orphaned objectives, stale prerequisites, thin evidence, circular deps) and outputs a JSON report. Used by `review-pr.sh` (per-PR changed-file analysis) and `predictor-run.sh` (full-project analysis) to provide structural context to Claude. | review-pr.sh, predictor-run.sh |
|
||||
| `lib/secret-scan.sh` | `scan_for_secrets()` — detects potential secrets (API keys, bearer tokens, private keys, URLs with embedded credentials) in text; returns 1 if secrets found. `redact_secrets()` — replaces detected secret patterns with `[REDACTED]`. | issue-lifecycle.sh |
|
||||
|
|
@ -30,7 +30,7 @@ sourced as needed.
|
|||
| `lib/git-creds.sh` | Shared git credential helper configuration. `configure_git_creds([HOME_DIR] [RUN_AS_CMD])` — writes a static credential helper script and configures git globally to use password-based HTTP auth (Forgejo 11.x rejects API tokens for `git push`, #361). **Retry on cold boot (#741)**: resolves bot username from `FORGE_TOKEN` with 5 retries (exponential backoff 1-5s); fails loudly and returns 1 if Forgejo is unreachable — never falls back to a wrong hardcoded default (exports `BOT_USER` on success). `repair_baked_cred_urls([--as RUN_AS_CMD] DIR ...)` — rewrites any git remote URLs that have credentials baked in to use clean URLs instead; uses `safe.directory` bypass for root-owned repos (#671). Requires `FORGE_PASS`, `FORGE_URL`, `FORGE_TOKEN`. | entrypoints (agents, edge) |
|
||||
| `lib/ops-setup.sh` | `setup_ops_repo()` — creates ops repo on Forgejo if it doesn't exist, configures bot collaborators, clones/initializes ops repo locally, seeds directory structure (vault, knowledge, evidence, sprints). Evidence subdirectories seeded: engagement/, red-team/, holdout/, evolution/, user-test/. Also seeds sprints/ for architect output. Exports `_ACTUAL_OPS_SLUG`. `migrate_ops_repo(ops_root, [primary_branch])` — idempotent migration helper that seeds missing directories and .gitkeep files on existing ops repos (pre-#407 deployments). | bin/disinto (init) |
|
||||
| `lib/ci-setup.sh` | `_install_cron_impl()` — installs crontab entries for bare-metal deployments (compose mode uses polling loop instead). `_create_forgejo_oauth_app()` — generic helper to create an OAuth2 app on Forgejo (shared by Woodpecker and chat). `_create_woodpecker_oauth_impl()` — creates Woodpecker OAuth2 app (thin wrapper). `_create_chat_oauth_impl()` — creates disinto-chat OAuth2 app, writes `CHAT_OAUTH_CLIENT_ID`/`CHAT_OAUTH_CLIENT_SECRET` to `.env` (#708). `_generate_woodpecker_token_impl()` — auto-generates WOODPECKER_TOKEN via OAuth2 flow. `_activate_woodpecker_repo_impl()` — activates repo in Woodpecker. All gated by `_load_ci_context()` which validates required env vars. | bin/disinto (init) |
|
||||
| `lib/generators.sh` | Template generation for `disinto init`: `generate_compose()` — docker-compose.yml (**duplicate service detection**: tracks service names during generation, aborts with `ERROR: Duplicate service name '$name' detected` on conflict; detection state is reset between calls so idempotent reinvocation is safe, #850) (uses `codeberg.org/forgejo/forgejo:11.0` tag; `CLAUDE_BIN_DIR` volume mount removed from agents/llama services — only `reproduce` and `edge` still use the host-mounted CLI (#992); adds `security_opt: [apparmor:unconfined]` to all services for rootless container compatibility; Forgejo includes a healthcheck so dependent services use `condition: service_healthy` — fixes cold-start races, #665; adds `chat` service block with isolated `chat-config` named volume and `CHAT_HISTORY_DIR` bind-mount for per-user NDJSON history persistence (#710); injects `FORWARD_AUTH_SECRET` for Caddy↔chat defense-in-depth auth (#709); subdomain fallback: `EDGE_ROUTING_MODE` (default `subpath`) and per-service `EDGE_TUNNEL_FQDN_*` vars injected into edge service (#1028); chat service rate limiting removed (#1084); chat workspace dir bind-mount: `${CHAT_WORKSPACE_DIR:-./workspace}:/var/workspace` + `CHAT_WORKSPACE_DIR` env var injected so Claude can access project working tree (#1027); all `depends_on` now use `condition: service_healthy/started` instead of bare service names; all services now include `restart: unless-stopped` including the edge service — #768; agents service now uses `image: ghcr.io/disinto/agents:${DISINTO_IMAGE_TAG:-latest}` instead of `build:` (#429); `WOODPECKER_PLUGINS_PRIVILEGED` env var added to woodpecker service (#779); agents-llama conditional block gated on `ENABLE_LLAMA_AGENT=1` (#769); `agents-llama-all` compose service (profile `agents-llama-all`, all 7 roles: review,dev,gardener,architect,planner,predictor,supervisor) added by #801; agents service gains volume mounts for `./projects`, `./.env`, `./state`), `generate_caddyfile()` — Caddyfile (routes: `/forge/*` → forgejo:3000 with `uri strip_prefix /forge` (#1103), `/woodpecker/*` → woodpecker:8000, `/staging/*` → staging:80; `/chat/login` and `/chat/oauth/callback` bypass `forward_auth` so unauthenticated users can reach the OAuth flow; `/chat/*` gated by `forward_auth` on `chat:8080/chat/auth/verify` which stamps `X-Forwarded-User` (#709); root `/` redirects to `/forge/`), `generate_staging_index()` — staging index, `generate_deploy_pipelines()` — Woodpecker deployment pipeline configs. Requires `FACTORY_ROOT`, `PROJECT_NAME`, `PRIMARY_BRANCH`. | bin/disinto (init) |
|
||||
| `lib/generators.sh` | Template generation for `disinto init`: `generate_compose()` — docker-compose.yml (**duplicate service detection**: tracks service names during generation, aborts with `ERROR: Duplicate service name '$name' detected` on conflict; detection state is reset between calls so idempotent reinvocation is safe, #850) (uses `codeberg.org/forgejo/forgejo:11.0` tag; `CLAUDE_BIN_DIR` volume mount removed from agents/llama services — only `reproduce` and `edge` still use the host-mounted CLI (#992); adds `security_opt: [apparmor:unconfined]` to all services for rootless container compatibility; Forgejo includes a healthcheck so dependent services use `condition: service_healthy` — fixes cold-start races, #665; adds `chat` service block with isolated `chat-config` named volume and `CHAT_HISTORY_DIR` bind-mount for per-user NDJSON history persistence (#710); injects `FORWARD_AUTH_SECRET` for Caddy↔chat defense-in-depth auth (#709); subdomain fallback: `EDGE_ROUTING_MODE` (default `subpath`) and per-service `EDGE_TUNNEL_FQDN_*` vars injected into edge service (#1028); chat service rate limiting removed (#1084); chat workspace dir bind-mount: `${CHAT_WORKSPACE_DIR:-./workspace}:/var/workspace` + `CHAT_WORKSPACE_DIR` env var injected so Claude can access project working tree (#1027); all `depends_on` now use `condition: service_healthy/started` instead of bare service names; all services now include `restart: unless-stopped` including the edge service — #768; agents service now uses `image: ghcr.io/disinto/agents:${DISINTO_IMAGE_TAG:-latest}` instead of `build:` (#429); `WOODPECKER_PLUGINS_PRIVILEGED` env var added to woodpecker service (#779); agents-llama conditional block gated on `ENABLE_LLAMA_AGENT=1` (#769); `agents-llama-all` compose service (profile `agents-llama-all`, all 7 roles: review,dev,gardener,architect,planner,predictor,supervisor) added by #801; agents service gains volume mounts for `./projects`, `./.env`, `./state`), `generate_caddyfile()` — Caddyfile (routes: `/forge/*` → forgejo:3000, `/woodpecker/*` → woodpecker:8000, `/staging/*` → staging:80; `/chat/login` and `/chat/oauth/callback` bypass `forward_auth` so unauthenticated users can reach the OAuth flow; `/chat/*` gated by `forward_auth` on `chat:8080/chat/auth/verify` which stamps `X-Forwarded-User` (#709); root `/` redirects to `/forge/`), `generate_staging_index()` — staging index, `generate_deploy_pipelines()` — Woodpecker deployment pipeline configs. Requires `FACTORY_ROOT`, `PROJECT_NAME`, `PRIMARY_BRANCH`. | bin/disinto (init) |
|
||||
| `lib/backup.sh` | Factory backup creation. `backup_create <outfile.tar.gz>` — exports factory state: fetches all issues (open+closed) from the project and ops repos via Forgejo API, bundles the ops repo as a git bundle, and writes a tarball. Requires `FORGE_URL`, `FORGE_TOKEN`, `FORGE_REPO`, `FORGE_OPS_REPO`, `OPS_REPO_ROOT`. Sourced by `bin/disinto backup create` (#1057). | bin/disinto (backup create) |
|
||||
| `lib/disinto/backup.sh` | Factory backup restore. `backup_import <infile.tar.gz>` — restores from a backup tarball: creates missing repos via Forgejo API, imports issues (idempotent — skips by number if present), unpacks ops repo git bundle. Idempotent: running twice produces same end state with no errors. Requires `FORGE_URL`, `FORGE_TOKEN`. Sourced by `bin/disinto backup import` (#1058). | bin/disinto (backup import) |
|
||||
| `lib/sprint-filer.sh` | Post-merge sub-issue filer for sprint PRs. Invoked by the `.woodpecker/ops-filer.yml` pipeline after a sprint PR merges to ops repo `main`. Parses `<!-- filer:begin --> ... <!-- filer:end -->` blocks from sprint PR bodies to extract sub-issue definitions, creates them on the project repo using `FORGE_FILER_TOKEN` (narrow-scope `filer-bot` identity with `issues:write` only), adds `in-progress` label to the parent vision issue, and handles vision lifecycle closure when all sub-issues are closed. Uses `filer_api_all()` for paginated fetches. Idempotent: uses `<!-- decomposed-from: #<vision>, sprint: <slug>, id: <id> -->` markers to skip already-filed issues. Requires `FORGE_FILER_TOKEN`, `FORGE_API`, `FORGE_API_BASE`, `FORGE_OPS_REPO`. | `.woodpecker/ops-filer.yml` (CI pipeline on ops repo) |
|
||||
|
|
|
|||
|
|
@ -1,99 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# backfill-labels.sh — Backfill labels on issues that were filed out of band
|
||||
#
|
||||
# Usage:
|
||||
# backfill-labels.sh <issue-num> <label> [<label> ...]
|
||||
# backfill-labels.sh 1105 backlog
|
||||
# backfill-labels.sh 1105 1106 1107 backlog
|
||||
#
|
||||
# Environment:
|
||||
# FORGE_TOKEN — API token with issues:write scope (used for label operations)
|
||||
# FORGE_API — project repo API base URL
|
||||
#
|
||||
# This script is a one-off tool for recovering from out-of-band issue filing
|
||||
# (e.g., architect-bot filing sub-issues directly instead of through filer-bot).
|
||||
# See issue #1140 for context.
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
|
||||
if [ -z "${FACTORY_ROOT:-}" ]; then
|
||||
FACTORY_ROOT="$(dirname "$SCRIPT_DIR")"
|
||||
# shellcheck source=lib/env.sh
|
||||
source "$FACTORY_ROOT/lib/env.sh"
|
||||
fi
|
||||
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "Usage: $0 <issue-num> [<issue-num> ...] <label> [<label> ...]" >&2
|
||||
echo " Last positional arg(s) are labels. All preceding args are issue numbers." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Split args: last N unique non-numeric args are labels, rest are issue numbers
|
||||
args=("$@")
|
||||
issue_nums=()
|
||||
labels=()
|
||||
|
||||
for arg in "${args[@]}"; do
|
||||
if [[ "$arg" =~ ^[0-9]+$ ]]; then
|
||||
issue_nums+=("$arg")
|
||||
else
|
||||
# Check if it looks like a label (not a number)
|
||||
labels+=("$arg")
|
||||
fi
|
||||
done
|
||||
|
||||
# If we have no non-numeric labels, treat the last arg as a label
|
||||
if [ ${#labels[@]} -eq 0 ] && [ $# -gt 0 ]; then
|
||||
labels=("${args[-1]}")
|
||||
# Rebuild issue_nums from all non-label args
|
||||
for arg in "${args[@]:0:$(($# - 1))}"; do
|
||||
issue_nums+=("$arg")
|
||||
done
|
||||
fi
|
||||
|
||||
if [ ${#issue_nums[@]} -eq 0 ]; then
|
||||
echo "ERROR: no issue numbers specified" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ${#labels[@]} -eq 0 ]; then
|
||||
echo "ERROR: no labels specified" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Resolve label IDs
|
||||
label_ids_json="[]"
|
||||
for label_name in "${labels[@]}"; do
|
||||
label_id=$(curl -sf -H "Authorization: token ${FORGE_TOKEN}" \
|
||||
"${FORGE_API}/labels" 2>/dev/null | jq -r --arg name "$label_name" \
|
||||
'.[] | select(.name == $name) | .id' 2>/dev/null) || true
|
||||
if [ -n "$label_id" ]; then
|
||||
label_ids_json=$(printf '%s' "$label_ids_json" | jq --argjson id "$label_id" '. + [$id]')
|
||||
else
|
||||
echo "WARNING: label '${label_name}' not found on project repo" >&2
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$(printf '%s' "$label_ids_json" | jq 'length')" -eq 0 ]; then
|
||||
echo "ERROR: no label IDs resolved — cannot proceed" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Apply labels to each issue
|
||||
for issue_num in "${issue_nums[@]}"; do
|
||||
echo "Adding labels ${labels[*]} to issue #${issue_num}..."
|
||||
if ! curl -sf -X POST \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${FORGE_API}/issues/${issue_num}/labels" \
|
||||
-d "{\"labels\": $(printf '%s' "$label_ids_json")}" 2>/dev/null; then
|
||||
echo "ERROR: failed to add labels to issue #${issue_num}" >&2
|
||||
continue
|
||||
fi
|
||||
echo " OK — issue #${issue_num} updated"
|
||||
done
|
||||
|
||||
echo "Done."
|
||||
|
|
@ -56,64 +56,6 @@ ci_required_for_pr() {
|
|||
echo "$files" | diff_has_code_files
|
||||
}
|
||||
|
||||
# ci_required_contexts [branch] — get required status check contexts from branch protection.
|
||||
# Cached per poll cycle (module-level variable) to avoid repeated API calls.
|
||||
# Stdout: newline-separated list of required context names, or empty if none configured.
|
||||
# shellcheck disable=SC2120 # branch arg is optional, callers may omit it
|
||||
ci_required_contexts() {
|
||||
if [ -n "${_CI_REQUIRED_CONTEXTS+set}" ]; then
|
||||
printf '%s' "$_CI_REQUIRED_CONTEXTS"
|
||||
return
|
||||
fi
|
||||
local branch="${1:-${PRIMARY_BRANCH:-main}}"
|
||||
local bp_json
|
||||
bp_json=$(forge_api GET "/branch_protections/${branch}" 2>/dev/null) || bp_json=""
|
||||
|
||||
if [ -z "$bp_json" ] || [ "$bp_json" = "null" ]; then
|
||||
_CI_REQUIRED_CONTEXTS=""
|
||||
printf '%s' "$_CI_REQUIRED_CONTEXTS"
|
||||
return
|
||||
fi
|
||||
|
||||
local enabled
|
||||
enabled=$(printf '%s' "$bp_json" | jq -r '.enable_status_check // false' 2>/dev/null) || enabled="false"
|
||||
|
||||
if [ "$enabled" != "true" ]; then
|
||||
_CI_REQUIRED_CONTEXTS=""
|
||||
printf '%s' "$_CI_REQUIRED_CONTEXTS"
|
||||
return
|
||||
fi
|
||||
|
||||
_CI_REQUIRED_CONTEXTS=$(printf '%s' "$bp_json" \
|
||||
| jq -r '.status_check_contexts // [] | .[]' 2>/dev/null) || _CI_REQUIRED_CONTEXTS=""
|
||||
printf '%s' "$_CI_REQUIRED_CONTEXTS"
|
||||
}
|
||||
|
||||
# _ci_reduce_required_contexts <sha> <required_contexts>
|
||||
# Reduce commit statuses to required contexts only.
|
||||
# Fetches per-context statuses from the forge combined endpoint and filters.
|
||||
# Stdout: success | failure | pending
|
||||
_ci_reduce_required_contexts() {
|
||||
local sha="$1" required="$2"
|
||||
local status_json
|
||||
status_json=$(forge_api GET "/commits/${sha}/status" 2>/dev/null) || { echo "pending"; return; }
|
||||
|
||||
printf '%s' "$status_json" | jq -r --arg req "$required" '
|
||||
($req | split("\n") | map(select(. != ""))) as $contexts |
|
||||
.statuses as $all |
|
||||
if ($contexts | length) == 0 then "pending"
|
||||
else
|
||||
[ $contexts[] as $ctx |
|
||||
[$all[] | select(.context == $ctx)] | sort_by(.id) | last | .status // "pending"
|
||||
] |
|
||||
if any(. == "failure" or . == "error") then "failure"
|
||||
elif all(. == "success") then "success"
|
||||
else "pending"
|
||||
end
|
||||
end
|
||||
' 2>/dev/null || echo "pending"
|
||||
}
|
||||
|
||||
# ci_passed <state> — check if CI is passing (or no CI configured)
|
||||
# Returns 0 if state is "success", or if no CI is configured and
|
||||
# state is empty/pending/unknown.
|
||||
|
|
@ -141,23 +83,11 @@ ci_failed() {
|
|||
}
|
||||
|
||||
# ci_commit_status <sha> — get CI state for a commit
|
||||
# When branch protection declares required status check contexts, reduces over
|
||||
# just those — optional workflows that are stuck/failed do not block decisions.
|
||||
# Otherwise queries Woodpecker API directly, falls back to forge combined status.
|
||||
# Queries Woodpecker API directly, falls back to forge commit status API.
|
||||
ci_commit_status() {
|
||||
local sha="$1"
|
||||
local state=""
|
||||
|
||||
# When required contexts are configured, reduce over just those
|
||||
local required
|
||||
# shellcheck disable=SC2119 # branch arg defaults to PRIMARY_BRANCH
|
||||
required=$(ci_required_contexts) || true
|
||||
if [ -n "$required" ]; then
|
||||
_ci_reduce_required_contexts "$sha" "$required"
|
||||
return
|
||||
fi
|
||||
|
||||
# No required-context filtering — original behavior
|
||||
# Primary: ask Woodpecker directly
|
||||
if [ -n "${WOODPECKER_REPO_ID:-}" ] && [ "${WOODPECKER_REPO_ID}" != "0" ]; then
|
||||
state=$(woodpecker_api "/repos/${WOODPECKER_REPO_ID}/pipelines" \
|
||||
|
|
|
|||
|
|
@ -860,7 +860,6 @@ _generate_caddyfile_subpath() {
|
|||
|
||||
# Reverse proxy to Forgejo
|
||||
handle /forge/* {
|
||||
uri strip_prefix /forge
|
||||
reverse_proxy forgejo:3000
|
||||
}
|
||||
|
||||
|
|
|
|||
14
lib/guard.sh
14
lib/guard.sh
|
|
@ -1,22 +1,22 @@
|
|||
#!/usr/bin/env bash
|
||||
# guard.sh — Active-state guard for polling-loop entry points
|
||||
#
|
||||
# Each role checks for a state file before running. If the file
|
||||
# doesn't exist, the role logs a skip and exits cleanly.
|
||||
# Each agent checks for a state file before running. If the file
|
||||
# doesn't exist, the agent logs a skip and exits cleanly.
|
||||
#
|
||||
# State files live in $FACTORY_ROOT/state/:
|
||||
# .dev-active, .reviewer-active, .planner-active, etc.
|
||||
#
|
||||
# Presence = permission to run. Absence = skip (factory off by default).
|
||||
|
||||
# check_active <role_name>
|
||||
# check_active <agent_name>
|
||||
# Exit 0 (skip) if the state file is absent.
|
||||
check_active() {
|
||||
local role_name="$1"
|
||||
local state_file="${FACTORY_ROOT}/state/.${role_name}-active"
|
||||
local agent_name="$1"
|
||||
local state_file="${FACTORY_ROOT}/state/.${agent_name}-active"
|
||||
if [ ! -f "$state_file" ]; then
|
||||
echo "[check_active] SKIP: state file state/.${role_name}-active not found — role disabled" >&2
|
||||
log "${role_name} not active — skipping"
|
||||
echo "[check_active] SKIP: state file state/.${agent_name}-active not found — agent disabled" >&2
|
||||
log "${agent_name} not active — skipping"
|
||||
exit 0
|
||||
fi
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: 5be020b9de1a719cb331b930cf45caf7559473f7 -->
|
||||
<!-- last-reviewed: 19ead14edecbc4e05e7bfe3d43f573ca8189e953 -->
|
||||
# nomad/ — Agent Instructions
|
||||
|
||||
Nomad + Vault HCL for the factory's single-node cluster. These files are
|
||||
|
|
@ -21,7 +21,7 @@ see issues #821–#992 for the step breakdown.
|
|||
| `jobs/agents.hcl` | submitted via `lib/init/nomad/deploy.sh` | All 7 agent roles (dev, review, gardener, planner, predictor, supervisor, architect) + llama variant; Vault-templated bot tokens via `service-agents` policy; `force_pull = false` — image is built locally by `bin/disinto --with agents`, no registry (S4.1, S4-fix-2, S4-fix-5, #955, #972, #978) |
|
||||
| `jobs/staging.hcl` | submitted via `lib/init/nomad/deploy.sh` | Caddy file-server mounting `docker/` as `/srv/site:ro`; no Vault integration; **dynamic host port** (no static 80 — edge owns 80/443, collision fixed in S5-fix-7 #1018); edge discovers via Nomad service registration (S5.2, #989) |
|
||||
| `jobs/chat.hcl` | submitted via `lib/init/nomad/deploy.sh` | Claude chat UI; custom `disinto/chat:local` image; sandbox hardening (cap_drop ALL, **tmpfs via mount block** not `tmpfs=` arg — S5-fix-5 #1012, pids_limit 128); Vault-templated OAuth secrets via `service-chat` policy (S5.2, #989); rate limiting removed (#1084); **workspace volume** `chat-workspace` host_volume bind-mounted to `/var/workspace` for Claude project access (#1027) — operator must register `host_volume "chat-workspace"` in `client.hcl` on each node |
|
||||
| `jobs/edge.hcl` | submitted via `lib/init/nomad/deploy.sh` | Caddy reverse proxy + dispatcher sidecar; routes /forge, /woodpecker, /staging, /chat; uses `disinto/edge:local` image built by `bin/disinto --with edge`; **both Caddy and dispatcher tasks use `network_mode = "host"`** — upstreams are `127.0.0.1:<port>` (forgejo :3000, woodpecker :8000, chat :8080), not Docker hostnames (#1031, #1034); `FORGE_URL` rendered via Nomad service discovery template (`nomadService "forgejo"` — switched from Consul `service` lookup to Nomad native service discovery, #1114) to handle bridge vs. host network differences (#1034); dispatcher Vault secret path changed to `kv/data/disinto/shared/ops-repo` (#1041); Vault-templated ops-repo creds via `service-dispatcher` policy (S5.1, #988); `/forge/*` handler adds `uri strip_prefix /forge` before proxying to forgejo (#1103); `/staging/*` strips `/staging` prefix before proxying (#1079); WebSocket endpoint `/chat/ws` uses `header_up` inside `reverse_proxy` block (moved from handle-block top level — Caddy rejects top-level `header_up`, #1117); `/chat/ws` added for streaming (#1026) |
|
||||
| `jobs/edge.hcl` | submitted via `lib/init/nomad/deploy.sh` | Caddy reverse proxy + dispatcher sidecar; routes /forge, /woodpecker, /staging, /chat; uses `disinto/edge:local` image built by `bin/disinto --with edge`; **both Caddy and dispatcher tasks use `network_mode = "host"`** — upstreams are `127.0.0.1:<port>` (forgejo :3000, woodpecker :8000, chat :8080), not Docker hostnames (#1031, #1034); `FORGE_URL` rendered via Nomad service discovery template (not static env) to handle bridge vs. host network differences (#1034); dispatcher Vault secret path changed to `kv/data/disinto/shared/ops-repo` (#1041); Vault-templated ops-repo creds via `service-dispatcher` policy (S5.1, #988); `/staging/*` strips `/staging` prefix before proxying (#1079); WebSocket endpoint `/chat/ws` added for streaming (#1026) |
|
||||
|
||||
Nomad auto-merges every `*.hcl` under `-config=/etc/nomad.d/`, so the
|
||||
split between `server.hcl` and `client.hcl` is for readability, not
|
||||
|
|
|
|||
|
|
@ -6,10 +6,10 @@
|
|||
# dispatcher sidecar polls disinto-ops for vault actions and dispatches them
|
||||
# via Nomad batch jobs.
|
||||
#
|
||||
# All upstreams discovered via Nomad service discovery (issue #1156, S5-fix-7).
|
||||
# Caddy uses network_mode = "host" but upstreams run in separate alloc netns,
|
||||
# so loopback addresses are unreachable — nomadService templates resolve the
|
||||
# dynamic address:port for each backend.
|
||||
# Host networking (issue #1031):
|
||||
# Caddy uses network_mode = "host" so upstreams are reached at
|
||||
# 127.0.0.1:<port> (forgejo :3000, woodpecker :8000, chat :8080).
|
||||
# Staging uses Nomad service discovery (S5-fix-7, issue #1018).
|
||||
#
|
||||
# Host_volume contract:
|
||||
# This job mounts caddy-data from nomad/client.hcl. Path
|
||||
|
|
@ -120,15 +120,17 @@ job "edge" {
|
|||
read_only = false
|
||||
}
|
||||
|
||||
# ── Caddyfile via Nomad service discovery (S5-fix-7, issue #1018/1156) ──
|
||||
# All upstreams rendered from Nomad service registration. Caddy picks up
|
||||
# /local/Caddyfile via entrypoint.
|
||||
# ── Caddyfile via Nomad service discovery (S5-fix-7, issue #1018) ────
|
||||
# Renders staging upstream from Nomad service registration instead of
|
||||
# hardcoded staging:80. Caddy picks up /local/Caddyfile via entrypoint.
|
||||
# Forge URL via Nomad service discovery (issue #1034) — resolves forgejo
|
||||
# service address/port dynamically for bridge network compatibility.
|
||||
template {
|
||||
destination = "local/forge.env"
|
||||
env = true
|
||||
change_mode = "restart"
|
||||
data = <<EOT
|
||||
{{ range nomadService "forgejo" -}}
|
||||
{{ range service "forgejo" -}}
|
||||
FORGE_URL=http://{{ .Address }}:{{ .Port }}
|
||||
{{- end }}
|
||||
EOT
|
||||
|
|
@ -147,16 +149,15 @@ EOT
|
|||
redir /forge/ 302
|
||||
}
|
||||
|
||||
# Reverse proxy to Forgejo — dynamic via Nomad service discovery (#1156)
|
||||
# Reverse proxy to Forgejo
|
||||
handle /forge/* {
|
||||
uri strip_prefix /forge
|
||||
{{ range nomadService "forgejo" }} reverse_proxy {{ .Address }}:{{ .Port }}
|
||||
{{ end }} }
|
||||
reverse_proxy 127.0.0.1:3000
|
||||
}
|
||||
|
||||
# Reverse proxy to Woodpecker CI — dynamic via Nomad service discovery (#1156)
|
||||
# Reverse proxy to Woodpecker CI
|
||||
handle /ci/* {
|
||||
{{ range nomadService "woodpecker" }} reverse_proxy {{ .Address }}:{{ .Port }}
|
||||
{{ end }} }
|
||||
reverse_proxy 127.0.0.1:8000
|
||||
}
|
||||
|
||||
# Reverse proxy to staging — dynamic port via Nomad service discovery
|
||||
handle /staging/* {
|
||||
|
|
@ -164,30 +165,29 @@ EOT
|
|||
{{ range nomadService "staging" }} reverse_proxy {{ .Address }}:{{ .Port }}
|
||||
{{ end }} }
|
||||
|
||||
# Chat service — reverse proxy to disinto-chat backend (#705, #1156)
|
||||
# Chat service — reverse proxy to disinto-chat backend (#705)
|
||||
# OAuth routes bypass forward_auth — unauthenticated users need these (#709)
|
||||
handle /chat/login {
|
||||
{{ range nomadService "chat" }} reverse_proxy {{ .Address }}:{{ .Port }}
|
||||
{{ end }} }
|
||||
reverse_proxy 127.0.0.1:8080
|
||||
}
|
||||
handle /chat/oauth/callback {
|
||||
{{ range nomadService "chat" }} reverse_proxy {{ .Address }}:{{ .Port }}
|
||||
{{ end }} }
|
||||
reverse_proxy 127.0.0.1:8080
|
||||
}
|
||||
# WebSocket endpoint for streaming (#1026)
|
||||
handle /chat/ws {
|
||||
{{ range nomadService "chat" }} reverse_proxy {{ .Address }}:{{ .Port }} {
|
||||
header_up Upgrade {http.request.header.Upgrade}
|
||||
header_up Connection {http.request.header.Connection}
|
||||
}
|
||||
{{ end }} }
|
||||
header_up Upgrade $http.upgrade
|
||||
header_up Connection $http.connection
|
||||
reverse_proxy 127.0.0.1:8080
|
||||
}
|
||||
# Defense-in-depth: forward_auth stamps X-Forwarded-User from session (#709)
|
||||
handle /chat/* {
|
||||
{{ range nomadService "chat" }} forward_auth {{ .Address }}:{{ .Port }} {
|
||||
forward_auth 127.0.0.1:8080 {
|
||||
uri /chat/auth/verify
|
||||
copy_headers X-Forwarded-User
|
||||
header_up X-Forward-Auth-Secret {$FORWARD_AUTH_SECRET}
|
||||
}
|
||||
reverse_proxy {{ .Address }}:{{ .Port }}
|
||||
{{ end }} }
|
||||
reverse_proxy 127.0.0.1:8080
|
||||
}
|
||||
}
|
||||
EOT
|
||||
}
|
||||
|
|
@ -241,7 +241,7 @@ EOT
|
|||
env = true
|
||||
change_mode = "restart"
|
||||
data = <<EOT
|
||||
{{ range nomadService "forgejo" -}}
|
||||
{{ range service "forgejo" -}}
|
||||
FORGE_URL=http://{{ .Address }}:{{ .Port }}
|
||||
{{- end }}
|
||||
EOT
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: 5be020b9de1a719cb331b930cf45caf7559473f7 -->
|
||||
<!-- last-reviewed: 19ead14edecbc4e05e7bfe3d43f573ca8189e953 -->
|
||||
# Planner Agent
|
||||
|
||||
**Role**: Strategic planning using a Prerequisite Tree (Theory of Constraints),
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: 5be020b9de1a719cb331b930cf45caf7559473f7 -->
|
||||
<!-- last-reviewed: 19ead14edecbc4e05e7bfe3d43f573ca8189e953 -->
|
||||
# Predictor Agent
|
||||
|
||||
**Role**: Abstract adversary (the "goblin"). Runs a 2-step formula
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: 5be020b9de1a719cb331b930cf45caf7559473f7 -->
|
||||
<!-- last-reviewed: 19ead14edecbc4e05e7bfe3d43f573ca8189e953 -->
|
||||
# Review Agent
|
||||
|
||||
**Role**: AI-powered PR review — post structured findings and formal
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: 5be020b9de1a719cb331b930cf45caf7559473f7 -->
|
||||
<!-- last-reviewed: 19ead14edecbc4e05e7bfe3d43f573ca8189e953 -->
|
||||
# Supervisor Agent
|
||||
|
||||
**Role**: Health monitoring and auto-remediation, executed as a formula-driven
|
||||
|
|
@ -8,7 +8,7 @@ issues, and writes a daily journal. When blocked on external
|
|||
resources or human decisions, files vault items instead of escalating directly.
|
||||
|
||||
**Trigger**: `supervisor-run.sh` is invoked by two polling loops:
|
||||
- **Agents container** (`docker/agents/entrypoint.sh`): every `SUPERVISOR_INTERVAL` seconds (default 1200 = 20 min). Controlled by the `supervisor` role in `AGENT_ROLES` (included in the default seven-role set since P1/#801). Logs to `data/logs/supervisor/supervisor.log` (canonical sink — both `supervisor-run.sh` internal logging and entrypoint stderr redirect write to this single file).
|
||||
- **Agents container** (`docker/agents/entrypoint.sh`): every `SUPERVISOR_INTERVAL` seconds (default 1200 = 20 min). Controlled by the `supervisor` role in `AGENT_ROLES` (included in the default seven-role set since P1/#801). Logs to `supervisor.log` in the agents container.
|
||||
- **Edge container** (`docker/edge/entrypoint-edge.sh`): separate loop in the edge container (line 169-172). Runs independently of the agents container's polling schedule.
|
||||
|
||||
Both invoke the same `supervisor-run.sh`. Sources `lib/guard.sh` and calls `check_active supervisor` first — skips if `$FACTORY_ROOT/state/.supervisor-active` is absent. Then runs `claude -p` via `agent-sdk.sh`, injects `formulas/run-supervisor.toml` with pre-collected metrics as context, and cleans up on completion or timeout.
|
||||
|
|
@ -39,11 +39,6 @@ Both invoke the same `supervisor-run.sh`. Sources `lib/guard.sh` and calls `chec
|
|||
- `$OPS_REPO_ROOT/knowledge/*.md` — Domain-specific remediation guides (memory,
|
||||
disk, CI, git, dev-agent, review-agent, forge)
|
||||
|
||||
**Canonical log sink**: `data/logs/supervisor/supervisor.log` — all supervisor output
|
||||
(structured log from `supervisor-run.sh` and stderr from the entrypoint invocation)
|
||||
goes to this single file. Do not introduce a second path; see #1150 for the dual-sink
|
||||
incident that motivated unification.
|
||||
|
||||
**Alert priorities**: P0 (memory crisis), P1 (disk), P2 (factory stopped/stalled),
|
||||
P3 (degraded PRs, circular deps, stale deps), P4 (housekeeping).
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +0,0 @@
|
|||
# Test fixture: curl without --max-time should trigger a warning
|
||||
# Used by tests/test-lint-ci.bats to verify the command-level timeout check
|
||||
|
||||
when:
|
||||
- event: pull_request
|
||||
|
||||
timeout: 5m
|
||||
|
||||
steps:
|
||||
- name: bad-curl
|
||||
image: alpine:3
|
||||
commands:
|
||||
- curl https://example.com
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
# Test fixture: curl with --max-time should pass cleanly
|
||||
# Used by tests/test-lint-ci.bats to verify the command-level timeout check
|
||||
|
||||
when:
|
||||
- event: pull_request
|
||||
|
||||
timeout: 5m
|
||||
|
||||
steps:
|
||||
- name: good-curl
|
||||
image: alpine:3
|
||||
commands:
|
||||
- curl --max-time 30 https://example.com
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
# Test fixture: step without timeout should trigger an error
|
||||
# Used by tests/test-lint-ci.bats to verify the step-level timeout check
|
||||
|
||||
when:
|
||||
- event: pull_request
|
||||
|
||||
steps:
|
||||
- name: no-timeout-step
|
||||
image: alpine:3
|
||||
commands:
|
||||
- echo "this step has no timeout"
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
# Test fixture: workflow-level timeout should satisfy all steps
|
||||
# Used by tests/test-lint-ci.bats to verify workflow-level timeout propagation
|
||||
|
||||
when:
|
||||
- event: pull_request
|
||||
|
||||
timeout: 10m
|
||||
|
||||
steps:
|
||||
- name: inherits-timeout
|
||||
image: alpine:3
|
||||
commands:
|
||||
- echo "inherits workflow timeout"
|
||||
|
|
@ -1,233 +0,0 @@
|
|||
#!/usr/bin/env bats
|
||||
# =============================================================================
|
||||
# tests/lib-ci-required-contexts.bats — Unit tests for ci_required_contexts()
|
||||
# and the required-context reducer in ci_commit_status().
|
||||
#
|
||||
# Verifies that when branch protection declares required status check contexts,
|
||||
# ci_commit_status() reduces over just those — optional workflows that are
|
||||
# stuck/failed do not block decisions (#1136).
|
||||
#
|
||||
# Uses a curl shim to return canned forge API responses.
|
||||
# =============================================================================
|
||||
|
||||
setup() {
|
||||
ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
|
||||
export FACTORY_ROOT="$ROOT"
|
||||
export FORGE_TOKEN="dummy-token"
|
||||
export FORGE_URL="https://forge.example.test"
|
||||
export FORGE_API="${FORGE_URL}/api/v1/repos/owner/repo"
|
||||
export PRIMARY_BRANCH="main"
|
||||
export WOODPECKER_REPO_ID="0" # disable Woodpecker path
|
||||
|
||||
# Reset cache between tests
|
||||
unset _CI_REQUIRED_CONTEXTS
|
||||
|
||||
export CALLS_LOG="${BATS_TEST_TMPDIR}/curl-calls.log"
|
||||
: > "$CALLS_LOG"
|
||||
|
||||
# Mock forge_api — mirrors lib/env.sh shape
|
||||
forge_api() {
|
||||
local method="$1" path="$2"
|
||||
shift 2
|
||||
curl -sf -X "$method" \
|
||||
-H "Authorization: token ${FORGE_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${FORGE_API}${path}" "$@"
|
||||
}
|
||||
|
||||
# Mock forge_api_all (used by some ci-helpers functions)
|
||||
forge_api_all() {
|
||||
forge_api GET "$1"
|
||||
}
|
||||
|
||||
# Mock woodpecker_api (not used when WOODPECKER_REPO_ID=0, but needed for source)
|
||||
woodpecker_api() { return 1; }
|
||||
|
||||
# Default mock responses — overridden per test
|
||||
# Branch protection: status checks enabled, "ci" is required
|
||||
export MOCK_BP_ENABLED="true"
|
||||
export MOCK_BP_CONTEXTS='["ci"]'
|
||||
|
||||
# Commit statuses: "ci" success, "edge-subpath" pending
|
||||
export MOCK_STATUSES='[
|
||||
{"id":1,"context":"ci","status":"success","created_at":"2026-01-01T00:00:00Z"},
|
||||
{"id":2,"context":"edge-subpath","status":"pending","created_at":"2026-01-01T00:00:01Z"}
|
||||
]'
|
||||
|
||||
curl() {
|
||||
local method="GET" url="" arg
|
||||
while [ $# -gt 0 ]; do
|
||||
arg="$1"
|
||||
case "$arg" in
|
||||
-X) method="$2"; shift 2 ;;
|
||||
-H|-d|--data-binary|-o) shift 2 ;;
|
||||
-w) shift 2 ;;
|
||||
-sf|-s|-f|--silent|--fail) shift ;;
|
||||
*) url="$arg"; shift ;;
|
||||
esac
|
||||
done
|
||||
printf '%s %s\n' "$method" "$url" >> "$CALLS_LOG"
|
||||
|
||||
case "$url" in
|
||||
*"/branch_protections/"*)
|
||||
printf '{"enable_status_check":%s,"status_check_contexts":%s}' \
|
||||
"$MOCK_BP_ENABLED" "$MOCK_BP_CONTEXTS"
|
||||
;;
|
||||
*"/commits/"*"/status")
|
||||
printf '{"state":"pending","statuses":%s}' "$MOCK_STATUSES"
|
||||
;;
|
||||
*)
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
return 0
|
||||
}
|
||||
|
||||
source "${ROOT}/lib/ci-helpers.sh"
|
||||
}
|
||||
|
||||
# ── ci_required_contexts tests ───────────────────────────────────────────────
|
||||
|
||||
@test "ci_required_contexts returns context list when status checks enabled" {
|
||||
run ci_required_contexts
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == "ci" ]]
|
||||
}
|
||||
|
||||
@test "ci_required_contexts returns empty when status checks disabled" {
|
||||
export MOCK_BP_ENABLED="false"
|
||||
unset _CI_REQUIRED_CONTEXTS
|
||||
run ci_required_contexts
|
||||
[ "$status" -eq 0 ]
|
||||
[ -z "$output" ]
|
||||
}
|
||||
|
||||
@test "ci_required_contexts returns empty when branch protection not found" {
|
||||
curl() {
|
||||
return 1
|
||||
}
|
||||
unset _CI_REQUIRED_CONTEXTS
|
||||
run ci_required_contexts
|
||||
[ "$status" -eq 0 ]
|
||||
[ -z "$output" ]
|
||||
}
|
||||
|
||||
@test "ci_required_contexts caches result across calls" {
|
||||
ci_required_contexts >/dev/null
|
||||
ci_required_contexts >/dev/null
|
||||
# Only one API call despite two invocations
|
||||
local call_count
|
||||
call_count=$(grep -c "branch_protections" "$CALLS_LOG" 2>/dev/null || echo 0)
|
||||
[ "$call_count" -eq 1 ]
|
||||
}
|
||||
|
||||
@test "ci_required_contexts returns multiple contexts" {
|
||||
export MOCK_BP_CONTEXTS='["ci","lint"]'
|
||||
unset _CI_REQUIRED_CONTEXTS
|
||||
run ci_required_contexts
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *"ci"* ]]
|
||||
[[ "$output" == *"lint"* ]]
|
||||
}
|
||||
|
||||
# ── ci_commit_status with required contexts ──────────────────────────────────
|
||||
|
||||
@test "ci_commit_status returns success when required context passes (optional pending)" {
|
||||
# "ci" is success, "edge-subpath" is pending — should report success
|
||||
run ci_commit_status "abc123"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == "success" ]]
|
||||
}
|
||||
|
||||
@test "ci_commit_status returns failure when required context fails (optional success)" {
|
||||
export MOCK_STATUSES='[
|
||||
{"id":1,"context":"ci","status":"failure","created_at":"2026-01-01T00:00:00Z"},
|
||||
{"id":2,"context":"edge-subpath","status":"success","created_at":"2026-01-01T00:00:01Z"}
|
||||
]'
|
||||
unset _CI_REQUIRED_CONTEXTS
|
||||
run ci_commit_status "abc123"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == "failure" ]]
|
||||
}
|
||||
|
||||
@test "ci_commit_status returns pending when required context has no status yet" {
|
||||
export MOCK_STATUSES='[
|
||||
{"id":1,"context":"edge-subpath","status":"success","created_at":"2026-01-01T00:00:00Z"}
|
||||
]'
|
||||
unset _CI_REQUIRED_CONTEXTS
|
||||
run ci_commit_status "abc123"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == "pending" ]]
|
||||
}
|
||||
|
||||
@test "ci_commit_status returns success when all required contexts pass" {
|
||||
export MOCK_BP_CONTEXTS='["ci","lint"]'
|
||||
export MOCK_STATUSES='[
|
||||
{"id":1,"context":"ci","status":"success","created_at":"2026-01-01T00:00:00Z"},
|
||||
{"id":2,"context":"lint","status":"success","created_at":"2026-01-01T00:00:01Z"},
|
||||
{"id":3,"context":"edge-subpath","status":"failure","created_at":"2026-01-01T00:00:02Z"}
|
||||
]'
|
||||
unset _CI_REQUIRED_CONTEXTS
|
||||
run ci_commit_status "abc123"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == "success" ]]
|
||||
}
|
||||
|
||||
@test "ci_commit_status returns failure when any required context fails" {
|
||||
export MOCK_BP_CONTEXTS='["ci","lint"]'
|
||||
export MOCK_STATUSES='[
|
||||
{"id":1,"context":"ci","status":"success","created_at":"2026-01-01T00:00:00Z"},
|
||||
{"id":2,"context":"lint","status":"error","created_at":"2026-01-01T00:00:01Z"},
|
||||
{"id":3,"context":"edge-subpath","status":"success","created_at":"2026-01-01T00:00:02Z"}
|
||||
]'
|
||||
unset _CI_REQUIRED_CONTEXTS
|
||||
run ci_commit_status "abc123"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == "failure" ]]
|
||||
}
|
||||
|
||||
@test "ci_commit_status uses latest status per context (re-run overwrites)" {
|
||||
export MOCK_STATUSES='[
|
||||
{"id":1,"context":"ci","status":"failure","created_at":"2026-01-01T00:00:00Z"},
|
||||
{"id":3,"context":"ci","status":"success","created_at":"2026-01-01T00:01:00Z"}
|
||||
]'
|
||||
unset _CI_REQUIRED_CONTEXTS
|
||||
run ci_commit_status "abc123"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == "success" ]]
|
||||
}
|
||||
|
||||
# ── incident reproduction shape ──────────────────────────────────────────────
|
||||
|
||||
@test "incident shape: required ci passes, optional edge-subpath stuck pending — returns success" {
|
||||
# This is the exact scenario from the 2026-04-21 incident:
|
||||
# - "ci" workflow: success
|
||||
# - "edge-subpath" (optional): stuck pending
|
||||
# - Combined state would be "pending" (worst of all)
|
||||
# - With fix: only "ci" matters → success
|
||||
export MOCK_BP_CONTEXTS='["ci"]'
|
||||
export MOCK_STATUSES='[
|
||||
{"id":1,"context":"ci","status":"success","created_at":"2026-01-01T00:00:00Z"},
|
||||
{"id":2,"context":"edge-subpath","status":"pending","created_at":"2026-01-01T00:00:01Z"},
|
||||
{"id":3,"context":"caddy-validate","status":"failure","created_at":"2026-01-01T00:00:02Z"}
|
||||
]'
|
||||
unset _CI_REQUIRED_CONTEXTS
|
||||
run ci_commit_status "abc123"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == "success" ]]
|
||||
}
|
||||
|
||||
# ── fallback: no required contexts → original behavior ───────────────────────
|
||||
|
||||
@test "ci_commit_status falls back to combined state when no required contexts" {
|
||||
export MOCK_BP_ENABLED="false"
|
||||
export WOODPECKER_REPO_ID="0"
|
||||
unset _CI_REQUIRED_CONTEXTS
|
||||
|
||||
# Combined state is "pending" (from MOCK_STATUSES default)
|
||||
# Without required contexts, falls through to forge combined .state
|
||||
run ci_commit_status "abc123"
|
||||
[ "$status" -eq 0 ]
|
||||
# Falls back to .state from combined endpoint → "pending"
|
||||
[[ "$output" == "pending" ]]
|
||||
}
|
||||
|
|
@ -89,13 +89,6 @@ check_forgejo_routing() {
|
|||
tr_fail "Missing Forgejo handle block (handle /forge/*)"
|
||||
fi
|
||||
|
||||
# Check uri strip_prefix /forge (required for Forgejo routing)
|
||||
if echo "$CADDYFILE" | grep -q "uri strip_prefix /forge"; then
|
||||
tr_pass "Forgejo strip_prefix configured (/forge)"
|
||||
else
|
||||
tr_fail "Missing Forgejo strip_prefix (/forge)"
|
||||
fi
|
||||
|
||||
# Check reverse_proxy to Forgejo on port 3000
|
||||
if echo "$CADDYFILE" | grep -q "reverse_proxy 127.0.0.1:3000"; then
|
||||
tr_pass "Forgejo reverse_proxy configured (127.0.0.1:3000)"
|
||||
|
|
|
|||
|
|
@ -1,52 +0,0 @@
|
|||
# tests/test-lint-ci.bats — Tests for `disinto validate lint-ci`
|
||||
#
|
||||
# Verifies the CI timeout validator:
|
||||
# 1. Step-level timeout errors fire when missing
|
||||
# 2. Workflow-level timeout satisfies all steps
|
||||
# 3. curl without --max-time triggers a warning
|
||||
# 4. curl with --max-time passes cleanly
|
||||
|
||||
load bats
|
||||
|
||||
DISINTO="${FACTORY_ROOT:-$(cd "$(dirname "$0")/.." && pwd)}/bin/disinto"
|
||||
FIXTURES="$(cd "$(dirname "$0")/fixtures/lint-ci" && pwd)"
|
||||
|
||||
# ── Step-level timeout errors ────────────────────────────────────────────────
|
||||
|
||||
@test "missing step timeout triggers error" {
|
||||
local output
|
||||
output=$(bash "$DISINTO" validate lint-ci "$FIXTURES/missing-timeout" 2>&1)
|
||||
local rc=$?
|
||||
echo "$output"
|
||||
[ "$rc" -eq 1 ]
|
||||
echo "$output" | grep -q "error:.*no-timeout-step.*step has no timeout"
|
||||
}
|
||||
|
||||
@test "workflow-level timeout satisfies all steps" {
|
||||
local output
|
||||
output=$(bash "$DISINTO" validate lint-ci "$FIXTURES/workflow-timeout" 2>&1)
|
||||
local rc=$?
|
||||
echo "$output"
|
||||
[ "$rc" -eq 0 ]
|
||||
echo "$output" | grep -q "lint-ci: 0 error(s), 0 warning(s)"
|
||||
}
|
||||
|
||||
# ── Command-level timeout warnings ───────────────────────────────────────────
|
||||
|
||||
@test "curl without --max-time triggers warning" {
|
||||
local output
|
||||
output=$(bash "$DISINTO" validate lint-ci "$FIXTURES/bad-curl" 2>&1)
|
||||
local rc=$?
|
||||
echo "$output"
|
||||
[ "$rc" -eq 0 ]
|
||||
echo "$output" | grep -q "warning:.*curl without --max-time"
|
||||
}
|
||||
|
||||
@test "curl with --max-time passes cleanly" {
|
||||
local output
|
||||
output=$(bash "$DISINTO" validate lint-ci "$FIXTURES/good-curl" 2>&1)
|
||||
local rc=$?
|
||||
echo "$output"
|
||||
[ "$rc" -eq 0 ]
|
||||
echo "$output" | grep -q "lint-ci: 0 error(s), 0 warning(s)"
|
||||
}
|
||||
|
|
@ -210,7 +210,7 @@ chmod 0750 "$LOG_DIR"
|
|||
|
||||
# Touch the log file so it exists from day one
|
||||
touch "$LOG_FILE"
|
||||
chmod 0660 "$LOG_FILE"
|
||||
chmod 0640 "$LOG_FILE"
|
||||
chown root:disinto-register "$LOG_FILE"
|
||||
|
||||
# Install logrotate config (daily rotation, 30 days retention)
|
||||
|
|
@ -223,7 +223,7 @@ ${LOG_FILE} {
|
|||
delaycompress
|
||||
missingok
|
||||
notifempty
|
||||
create 0660 root disinto-register
|
||||
create 0640 root disinto-register
|
||||
copytruncate
|
||||
}
|
||||
EOF
|
||||
|
|
|
|||
|
|
@ -244,22 +244,23 @@ do_deregister() {
|
|||
# Record who is deregistering before removal
|
||||
local deregistered_by="$CALLER"
|
||||
|
||||
# Get current port and stored pubkey before removing
|
||||
local port stored_pubkey pubkey_fp
|
||||
# Get current port and pubkey before removing
|
||||
local port pubkey_fp
|
||||
port=$(get_port "$project")
|
||||
stored_pubkey=$(get_project_info "$project" | jq -r '.pubkey // empty' 2>/dev/null) || stored_pubkey=""
|
||||
|
||||
# Return a single generic error — project nonexistence and ownership
|
||||
# failure must not be distinguishable to the caller (prevents enumeration).
|
||||
if [ -z "$port" ] || [ "$caller_pubkey" != "$stored_pubkey" ]; then
|
||||
# Audit the attempt before we fail so operators can investigate.
|
||||
pubkey_fp=$(ssh-keygen -lf /dev/stdin <<<"$stored_pubkey" 2>/dev/null | awk '{print $2}') || pubkey_fp="unknown"
|
||||
audit_log "deregister" "$project" "${port:-unknown}" "$pubkey_fp"
|
||||
echo '{"error":"deregister denied"}'
|
||||
if [ -z "$port" ]; then
|
||||
echo '{"error":"project not found"}'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Verify caller owns this project — pubkey must match stored value
|
||||
local stored_pubkey
|
||||
stored_pubkey=$(get_project_info "$project" | jq -r '.pubkey // empty' 2>/dev/null) || stored_pubkey=""
|
||||
if [ "$caller_pubkey" != "$stored_pubkey" ]; then
|
||||
echo '{"error":"pubkey mismatch"}'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Compute fingerprint for success-path audit log
|
||||
pubkey_fp=$(ssh-keygen -lf /dev/stdin <<<"$stored_pubkey" 2>/dev/null | awk '{print $2}') || pubkey_fp="unknown"
|
||||
|
||||
# Remove from registry
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
<!-- last-reviewed: 5be020b9de1a719cb331b930cf45caf7559473f7 -->
|
||||
<!-- last-reviewed: 19ead14edecbc4e05e7bfe3d43f573ca8189e953 -->
|
||||
# vault/policies/ — Agent Instructions
|
||||
|
||||
HashiCorp Vault ACL policies for the disinto factory. One `.hcl` file per
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue