Merge pull request 'fix: [nomad-prep] P5 — add healthchecks to agents, edge, staging, woodpecker-agent (#794)' (#809) from fix/issue-794 into main
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
This commit is contained in:
commit
6a4ca5c3a0
4 changed files with 63 additions and 1 deletions
|
|
@ -49,6 +49,12 @@ services:
|
||||||
- GARDENER_INTERVAL=${GARDENER_INTERVAL:-21600}
|
- GARDENER_INTERVAL=${GARDENER_INTERVAL:-21600}
|
||||||
- ARCHITECT_INTERVAL=${ARCHITECT_INTERVAL:-21600}
|
- ARCHITECT_INTERVAL=${ARCHITECT_INTERVAL:-21600}
|
||||||
- PLANNER_INTERVAL=${PLANNER_INTERVAL:-43200}
|
- PLANNER_INTERVAL=${PLANNER_INTERVAL:-43200}
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "pgrep", "-f", "entrypoint.sh"]
|
||||||
|
interval: 60s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 30s
|
||||||
depends_on:
|
depends_on:
|
||||||
forgejo:
|
forgejo:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
@ -103,6 +109,12 @@ services:
|
||||||
- CLAUDE_CONFIG_DIR=${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}
|
- CLAUDE_CONFIG_DIR=${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}
|
||||||
- POLL_INTERVAL=${POLL_INTERVAL:-300}
|
- POLL_INTERVAL=${POLL_INTERVAL:-300}
|
||||||
- AGENT_ROLES=dev
|
- AGENT_ROLES=dev
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "pgrep", "-f", "entrypoint.sh"]
|
||||||
|
interval: 60s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 30s
|
||||||
depends_on:
|
depends_on:
|
||||||
forgejo:
|
forgejo:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
@ -156,6 +168,12 @@ services:
|
||||||
ports:
|
ports:
|
||||||
- "80:80"
|
- "80:80"
|
||||||
- "443:443"
|
- "443:443"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-fsS", "http://localhost:2019/config/"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 15s
|
||||||
depends_on:
|
depends_on:
|
||||||
- forgejo
|
- forgejo
|
||||||
networks:
|
networks:
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,6 @@ WORKDIR /var/chat
|
||||||
|
|
||||||
EXPOSE 8080
|
EXPOSE 8080
|
||||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
|
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
|
||||||
CMD python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/')" || exit 1
|
CMD python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/health')" || exit 1
|
||||||
|
|
||||||
ENTRYPOINT ["/entrypoint-chat.sh"]
|
ENTRYPOINT ["/entrypoint-chat.sh"]
|
||||||
|
|
|
||||||
|
|
@ -481,6 +481,14 @@ class ChatHandler(BaseHTTPRequestHandler):
|
||||||
parsed = urlparse(self.path)
|
parsed = urlparse(self.path)
|
||||||
path = parsed.path
|
path = parsed.path
|
||||||
|
|
||||||
|
# Health endpoint (no auth required) — used by Docker healthcheck
|
||||||
|
if path == "/health":
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/plain")
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(b"ok\n")
|
||||||
|
return
|
||||||
|
|
||||||
# Verify endpoint for Caddy forward_auth (#709)
|
# Verify endpoint for Caddy forward_auth (#709)
|
||||||
if path == "/chat/auth/verify":
|
if path == "/chat/auth/verify":
|
||||||
self.handle_auth_verify()
|
self.handle_auth_verify()
|
||||||
|
|
|
||||||
|
|
@ -320,6 +320,12 @@ services:
|
||||||
WOODPECKER_HEALTHCHECK_ADDR: ":3333"
|
WOODPECKER_HEALTHCHECK_ADDR: ":3333"
|
||||||
WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net
|
WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net
|
||||||
WOODPECKER_MAX_WORKFLOWS: 1
|
WOODPECKER_MAX_WORKFLOWS: 1
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "wget", "-q", "--spider", "http://localhost:3333/healthz"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 15s
|
||||||
depends_on:
|
depends_on:
|
||||||
- woodpecker
|
- woodpecker
|
||||||
|
|
||||||
|
|
@ -374,6 +380,12 @@ services:
|
||||||
# Vault-only secrets (GITHUB_TOKEN, CLAWHUB_TOKEN, deploy keys) live in
|
# Vault-only secrets (GITHUB_TOKEN, CLAWHUB_TOKEN, deploy keys) live in
|
||||||
# secrets/*.enc and are NEVER injected here — only the runner
|
# secrets/*.enc and are NEVER injected here — only the runner
|
||||||
# container receives them at fire time (AD-006, #745, #777).
|
# container receives them at fire time (AD-006, #745, #777).
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "pgrep", "-f", "entrypoint.sh"]
|
||||||
|
interval: 60s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 30s
|
||||||
depends_on:
|
depends_on:
|
||||||
forgejo:
|
forgejo:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
@ -428,6 +440,12 @@ COMPOSEEOF
|
||||||
CLAUDE_CONFIG_DIR: ${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}
|
CLAUDE_CONFIG_DIR: ${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config}
|
||||||
POLL_INTERVAL: ${POLL_INTERVAL:-300}
|
POLL_INTERVAL: ${POLL_INTERVAL:-300}
|
||||||
AGENT_ROLES: dev
|
AGENT_ROLES: dev
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "pgrep", "-f", "entrypoint.sh"]
|
||||||
|
interval: 60s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 30s
|
||||||
depends_on:
|
depends_on:
|
||||||
forgejo:
|
forgejo:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
@ -499,6 +517,12 @@ LLAMAEOF
|
||||||
- ./secrets/tunnel_key:/run/secrets/tunnel_key:ro
|
- ./secrets/tunnel_key:/run/secrets/tunnel_key:ro
|
||||||
- ${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}
|
- ${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}
|
||||||
- ${HOME}/.claude.json:/home/agent/.claude.json:ro
|
- ${HOME}/.claude.json:/home/agent/.claude.json:ro
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-fsS", "http://localhost:2019/config/"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 15s
|
||||||
depends_on:
|
depends_on:
|
||||||
forgejo:
|
forgejo:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
@ -516,6 +540,12 @@ LLAMAEOF
|
||||||
command: ["caddy", "file-server", "--root", "/srv/site"]
|
command: ["caddy", "file-server", "--root", "/srv/site"]
|
||||||
security_opt:
|
security_opt:
|
||||||
- apparmor=unconfined
|
- apparmor=unconfined
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "wget", "-q", "--spider", "http://localhost:2019/config/"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 10s
|
||||||
volumes:
|
volumes:
|
||||||
- ./docker:/srv/site:ro
|
- ./docker:/srv/site:ro
|
||||||
networks:
|
networks:
|
||||||
|
|
@ -575,6 +605,12 @@ LLAMAEOF
|
||||||
CHAT_MAX_REQUESTS_PER_HOUR: ${CHAT_MAX_REQUESTS_PER_HOUR:-60}
|
CHAT_MAX_REQUESTS_PER_HOUR: ${CHAT_MAX_REQUESTS_PER_HOUR:-60}
|
||||||
CHAT_MAX_REQUESTS_PER_DAY: ${CHAT_MAX_REQUESTS_PER_DAY:-500}
|
CHAT_MAX_REQUESTS_PER_DAY: ${CHAT_MAX_REQUESTS_PER_DAY:-500}
|
||||||
CHAT_MAX_TOKENS_PER_DAY: ${CHAT_MAX_TOKENS_PER_DAY:-1000000}
|
CHAT_MAX_TOKENS_PER_DAY: ${CHAT_MAX_TOKENS_PER_DAY:-1000000}
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/health')"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 10s
|
||||||
networks:
|
networks:
|
||||||
- disinto-net
|
- disinto-net
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue