diff --git a/docker-compose.yml b/docker-compose.yml index 65a7f58..c8c34ab 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -49,6 +49,12 @@ services: - GARDENER_INTERVAL=${GARDENER_INTERVAL:-21600} - ARCHITECT_INTERVAL=${ARCHITECT_INTERVAL:-21600} - PLANNER_INTERVAL=${PLANNER_INTERVAL:-43200} + healthcheck: + test: ["CMD", "pgrep", "-f", "entrypoint.sh"] + interval: 60s + timeout: 5s + retries: 3 + start_period: 30s depends_on: forgejo: condition: service_healthy @@ -103,6 +109,12 @@ services: - CLAUDE_CONFIG_DIR=${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config} - POLL_INTERVAL=${POLL_INTERVAL:-300} - AGENT_ROLES=dev + healthcheck: + test: ["CMD", "pgrep", "-f", "entrypoint.sh"] + interval: 60s + timeout: 5s + retries: 3 + start_period: 30s depends_on: forgejo: condition: service_healthy @@ -156,6 +168,12 @@ services: ports: - "80:80" - "443:443" + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost:2019/config/"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 15s depends_on: - forgejo networks: diff --git a/docker/chat/Dockerfile b/docker/chat/Dockerfile index 81aebbe..3d89863 100644 --- a/docker/chat/Dockerfile +++ b/docker/chat/Dockerfile @@ -30,6 +30,6 @@ WORKDIR /var/chat EXPOSE 8080 HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \ - CMD python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/')" || exit 1 + CMD python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/health')" || exit 1 ENTRYPOINT ["/entrypoint-chat.sh"] diff --git a/docker/chat/server.py b/docker/chat/server.py index ad8897d..6748354 100644 --- a/docker/chat/server.py +++ b/docker/chat/server.py @@ -481,6 +481,14 @@ class ChatHandler(BaseHTTPRequestHandler): parsed = urlparse(self.path) path = parsed.path + # Health endpoint (no auth required) — used by Docker healthcheck + if path == "/health": + self.send_response(200) + self.send_header("Content-Type", "text/plain") + self.end_headers() + self.wfile.write(b"ok\n") + return + # Verify endpoint for Caddy forward_auth (#709) if path == "/chat/auth/verify": self.handle_auth_verify() diff --git a/lib/generators.sh b/lib/generators.sh index 17f91a4..c32a543 100644 --- a/lib/generators.sh +++ b/lib/generators.sh @@ -320,6 +320,12 @@ services: WOODPECKER_HEALTHCHECK_ADDR: ":3333" WOODPECKER_BACKEND_DOCKER_NETWORK: disinto_disinto-net WOODPECKER_MAX_WORKFLOWS: 1 + healthcheck: + test: ["CMD", "wget", "-q", "--spider", "http://localhost:3333/healthz"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 15s depends_on: - woodpecker @@ -374,6 +380,12 @@ services: # Vault-only secrets (GITHUB_TOKEN, CLAWHUB_TOKEN, deploy keys) live in # secrets/*.enc and are NEVER injected here — only the runner # container receives them at fire time (AD-006, #745, #777). + healthcheck: + test: ["CMD", "pgrep", "-f", "entrypoint.sh"] + interval: 60s + timeout: 5s + retries: 3 + start_period: 30s depends_on: forgejo: condition: service_healthy @@ -428,6 +440,12 @@ COMPOSEEOF CLAUDE_CONFIG_DIR: ${CLAUDE_CONFIG_DIR:-/var/lib/disinto/claude-shared/config} POLL_INTERVAL: ${POLL_INTERVAL:-300} AGENT_ROLES: dev + healthcheck: + test: ["CMD", "pgrep", "-f", "entrypoint.sh"] + interval: 60s + timeout: 5s + retries: 3 + start_period: 30s depends_on: forgejo: condition: service_healthy @@ -499,6 +517,12 @@ LLAMAEOF - ./secrets/tunnel_key:/run/secrets/tunnel_key:ro - ${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared}:${CLAUDE_SHARED_DIR:-/var/lib/disinto/claude-shared} - ${HOME}/.claude.json:/home/agent/.claude.json:ro + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost:2019/config/"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 15s depends_on: forgejo: condition: service_healthy @@ -516,6 +540,12 @@ LLAMAEOF command: ["caddy", "file-server", "--root", "/srv/site"] security_opt: - apparmor=unconfined + healthcheck: + test: ["CMD", "wget", "-q", "--spider", "http://localhost:2019/config/"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 10s volumes: - ./docker:/srv/site:ro networks: @@ -575,6 +605,12 @@ LLAMAEOF CHAT_MAX_REQUESTS_PER_HOUR: ${CHAT_MAX_REQUESTS_PER_HOUR:-60} CHAT_MAX_REQUESTS_PER_DAY: ${CHAT_MAX_REQUESTS_PER_DAY:-500} CHAT_MAX_TOKENS_PER_DAY: ${CHAT_MAX_TOKENS_PER_DAY:-1000000} + healthcheck: + test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/health')"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 10s networks: - disinto-net