diff --git a/docker/chat/server.py b/docker/chat/server.py index 0623955..f711b5a 100644 --- a/docker/chat/server.py +++ b/docker/chat/server.py @@ -23,7 +23,6 @@ The claude binary is expected to be mounted from the host at /usr/local/bin/clau """ import asyncio -import datetime import json import os import re @@ -61,10 +60,6 @@ EDGE_ROUTING_MODE = os.environ.get("EDGE_ROUTING_MODE", "subpath") # (acceptable during local dev; production MUST set this). FORWARD_AUTH_SECRET = os.environ.get("FORWARD_AUTH_SECRET", "") -# Rate limiting / cost caps (#711) -CHAT_MAX_REQUESTS_PER_HOUR = int(os.environ.get("CHAT_MAX_REQUESTS_PER_HOUR", 60)) -CHAT_MAX_REQUESTS_PER_DAY = int(os.environ.get("CHAT_MAX_REQUESTS_PER_DAY", 500)) -CHAT_MAX_TOKENS_PER_DAY = int(os.environ.get("CHAT_MAX_TOKENS_PER_DAY", 1000000)) # Allowed users - disinto-admin always allowed; CSV allowlist extends it _allowed_csv = os.environ.get("DISINTO_CHAT_ALLOWED_USERS", "") @@ -90,11 +85,6 @@ _sessions = {} # Pending OAuth state tokens: state -> expires (float) _oauth_states = {} -# Per-user rate limiting state (#711) -# user -> list of request timestamps (for sliding-window hourly/daily caps) -_request_log = {} -# user -> {"tokens": int, "date": "YYYY-MM-DD"} -_daily_tokens = {} # WebSocket message queues per user # user -> asyncio.Queue (for streaming messages to connected clients) @@ -213,69 +203,9 @@ def _fetch_user(access_token): return None -# ============================================================================= -# Rate Limiting Functions (#711) -# ============================================================================= - -def _check_rate_limit(user): - """Check per-user rate limits. Returns (allowed, retry_after, reason) (#711). - - Checks hourly request cap, daily request cap, and daily token cap. - """ - now = time.time() - one_hour_ago = now - 3600 - today = datetime.date.today().isoformat() - - # Prune old entries from request log - timestamps = _request_log.get(user, []) - timestamps = [t for t in timestamps if t > now - 86400] - _request_log[user] = timestamps - - # Hourly request cap - hourly = [t for t in timestamps if t > one_hour_ago] - if len(hourly) >= CHAT_MAX_REQUESTS_PER_HOUR: - oldest_in_window = min(hourly) - retry_after = int(oldest_in_window + 3600 - now) + 1 - return False, max(retry_after, 1), "hourly request limit" - - # Daily request cap - start_of_day = time.mktime(datetime.date.today().timetuple()) - daily = [t for t in timestamps if t >= start_of_day] - if len(daily) >= CHAT_MAX_REQUESTS_PER_DAY: - next_day = start_of_day + 86400 - retry_after = int(next_day - now) + 1 - return False, max(retry_after, 1), "daily request limit" - - # Daily token cap - token_info = _daily_tokens.get(user, {"tokens": 0, "date": today}) - if token_info["date"] != today: - token_info = {"tokens": 0, "date": today} - _daily_tokens[user] = token_info - if token_info["tokens"] >= CHAT_MAX_TOKENS_PER_DAY: - next_day = start_of_day + 86400 - retry_after = int(next_day - now) + 1 - return False, max(retry_after, 1), "daily token limit" - - return True, 0, "" - - -def _record_request(user): - """Record a request timestamp for the user (#711).""" - _request_log.setdefault(user, []).append(time.time()) - - -def _record_tokens(user, tokens): - """Record token usage for the user (#711).""" - today = datetime.date.today().isoformat() - token_info = _daily_tokens.get(user, {"tokens": 0, "date": today}) - if token_info["date"] != today: - token_info = {"tokens": 0, "date": today} - token_info["tokens"] += tokens - _daily_tokens[user] = token_info - def _parse_stream_json(output): - """Parse stream-json output from claude --print (#711). + """Parse stream-json output from claude --print. Returns (text_content, total_tokens). Falls back gracefully if the usage event is absent or malformed. @@ -1063,34 +993,13 @@ class ChatHandler(BaseHTTPRequestHandler): except IOError as e: self.send_error_page(500, f"Error reading file: {e}") - def _send_rate_limit_response(self, retry_after, reason): - """Send a 429 response with Retry-After header and HTMX fragment (#711).""" - body = ( - f'
' - f"Rate limit exceeded: {reason}. " - f"Please try again in {retry_after} seconds." - f"
" - ) - self.send_response(429) - self.send_header("Retry-After", str(retry_after)) - self.send_header("Content-Type", "text/html; charset=utf-8") - self.send_header("Content-Length", str(len(body.encode("utf-8")))) - self.end_headers() - self.wfile.write(body.encode("utf-8")) - + def handle_chat(self, user): """ Handle chat requests by spawning `claude --print` with the user message. - Enforces per-user rate limits and tracks token usage (#711). Streams tokens over WebSocket if connected. """ - # Check rate limits before processing (#711) - allowed, retry_after, reason = _check_rate_limit(user) - if not allowed: - self._send_rate_limit_response(retry_after, reason) - return - # Read request body content_length = int(self.headers.get("Content-Length", 0)) if content_length == 0: @@ -1127,9 +1036,6 @@ class ChatHandler(BaseHTTPRequestHandler): if not conv_id or not _validate_conversation_id(conv_id): conv_id = _generate_conversation_id() - # Record request for rate limiting (#711) - _record_request(user) - try: # Save user message to history _write_message(user, conv_id, "user", message) @@ -1194,14 +1100,6 @@ class ChatHandler(BaseHTTPRequestHandler): # Combine response parts response = "".join(response_parts) - # Track token usage - does not block *this* request (#711) - if total_tokens > 0: - _record_tokens(user, total_tokens) - print( - f"Token usage: user={user} tokens={total_tokens}", - file=sys.stderr, - ) - # Fall back to raw output if stream-json parsing yielded no text if not response: response = proc.stdout.getvalue() if hasattr(proc.stdout, 'getvalue') else "" @@ -1294,18 +1192,6 @@ class ChatHandler(BaseHTTPRequestHandler): self.send_error_page(401, "Unauthorized: no valid session") return - # Check rate limits before allowing WebSocket connection - allowed, retry_after, reason = _check_rate_limit(user) - if not allowed: - self.send_error_page( - 429, - f"Rate limit exceeded: {reason}. Retry after {retry_after}s", - ) - return - - # Record request for rate limiting - _record_request(user) - # Create message queue for this user _websocket_queues[user] = asyncio.Queue() @@ -1421,12 +1307,6 @@ def main(): print("forward_auth secret configured (#709)", file=sys.stderr) else: print("WARNING: FORWARD_AUTH_SECRET not set - verify endpoint unrestricted", file=sys.stderr) - print( - f"Rate limits (#711): {CHAT_MAX_REQUESTS_PER_HOUR}/hr, " - f"{CHAT_MAX_REQUESTS_PER_DAY}/day, " - f"{CHAT_MAX_TOKENS_PER_DAY} tokens/day", - file=sys.stderr, - ) httpd.serve_forever() diff --git a/lib/generators.sh b/lib/generators.sh index aa8c373..b8d48bb 100644 --- a/lib/generators.sh +++ b/lib/generators.sh @@ -673,6 +673,7 @@ COMPOSEEOF # Chat container — Claude chat UI backend (#705) # Internal service only; edge proxy routes to chat:8080 # Sandbox hardened per #706 — no docker.sock, read-only rootfs, minimal caps + # Rate limiting removed (#1084) chat: build: context: ./docker/chat @@ -708,10 +709,7 @@ COMPOSEEOF DISINTO_CHAT_ALLOWED_USERS: ${DISINTO_CHAT_ALLOWED_USERS:-} # Shared secret for Caddy forward_auth verify endpoint (#709) FORWARD_AUTH_SECRET: ${FORWARD_AUTH_SECRET:-} - # Cost caps / rate limiting (#711) - CHAT_MAX_REQUESTS_PER_HOUR: ${CHAT_MAX_REQUESTS_PER_HOUR:-60} - CHAT_MAX_REQUESTS_PER_DAY: ${CHAT_MAX_REQUESTS_PER_DAY:-500} - CHAT_MAX_TOKENS_PER_DAY: ${CHAT_MAX_TOKENS_PER_DAY:-1000000} + # Rate limiting removed (#1084) healthcheck: test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/health')"] interval: 30s diff --git a/nomad/jobs/chat.hcl b/nomad/jobs/chat.hcl index ad18cec..95f86ab 100644 --- a/nomad/jobs/chat.hcl +++ b/nomad/jobs/chat.hcl @@ -119,9 +119,7 @@ job "chat" { # CHAT_OAUTH_CLIENT_ID, CHAT_OAUTH_CLIENT_SECRET, FORWARD_AUTH_SECRET # rendered from kv/disinto/shared/chat via template stanza. env { - FORGE_URL = "http://forgejo:3000" - CHAT_MAX_REQUESTS_PER_HOUR = "60" - CHAT_MAX_REQUESTS_PER_DAY = "1000" + FORGE_URL = "http://forgejo:3000" } # ── Vault-templated secrets (S5.2, issue #989) ─────────────────────────