fix: feat: drop chat rate-limiting — remove per-user hour/day request caps and token cap (reverts #711) (#1084)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/nomad-validate Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
ci/woodpecker/pr/edge-subpath Pipeline was successful
ci/woodpecker/pr/nomad-validate Pipeline was successful
ci/woodpecker/pr/secret-scan Pipeline was successful
ci/woodpecker/pr/smoke-init Pipeline was successful
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/nomad-validate Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
ci/woodpecker/pr/edge-subpath Pipeline was successful
ci/woodpecker/pr/nomad-validate Pipeline was successful
ci/woodpecker/pr/secret-scan Pipeline was successful
ci/woodpecker/pr/smoke-init Pipeline was successful
This commit is contained in:
parent
398a7398a9
commit
aa12703135
3 changed files with 5 additions and 129 deletions
|
|
@ -23,7 +23,6 @@ The claude binary is expected to be mounted from the host at /usr/local/bin/clau
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import datetime
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
@ -61,10 +60,6 @@ EDGE_ROUTING_MODE = os.environ.get("EDGE_ROUTING_MODE", "subpath")
|
||||||
# (acceptable during local dev; production MUST set this).
|
# (acceptable during local dev; production MUST set this).
|
||||||
FORWARD_AUTH_SECRET = os.environ.get("FORWARD_AUTH_SECRET", "")
|
FORWARD_AUTH_SECRET = os.environ.get("FORWARD_AUTH_SECRET", "")
|
||||||
|
|
||||||
# Rate limiting / cost caps (#711)
|
|
||||||
CHAT_MAX_REQUESTS_PER_HOUR = int(os.environ.get("CHAT_MAX_REQUESTS_PER_HOUR", 60))
|
|
||||||
CHAT_MAX_REQUESTS_PER_DAY = int(os.environ.get("CHAT_MAX_REQUESTS_PER_DAY", 500))
|
|
||||||
CHAT_MAX_TOKENS_PER_DAY = int(os.environ.get("CHAT_MAX_TOKENS_PER_DAY", 1000000))
|
|
||||||
|
|
||||||
# Allowed users - disinto-admin always allowed; CSV allowlist extends it
|
# Allowed users - disinto-admin always allowed; CSV allowlist extends it
|
||||||
_allowed_csv = os.environ.get("DISINTO_CHAT_ALLOWED_USERS", "")
|
_allowed_csv = os.environ.get("DISINTO_CHAT_ALLOWED_USERS", "")
|
||||||
|
|
@ -90,11 +85,6 @@ _sessions = {}
|
||||||
# Pending OAuth state tokens: state -> expires (float)
|
# Pending OAuth state tokens: state -> expires (float)
|
||||||
_oauth_states = {}
|
_oauth_states = {}
|
||||||
|
|
||||||
# Per-user rate limiting state (#711)
|
|
||||||
# user -> list of request timestamps (for sliding-window hourly/daily caps)
|
|
||||||
_request_log = {}
|
|
||||||
# user -> {"tokens": int, "date": "YYYY-MM-DD"}
|
|
||||||
_daily_tokens = {}
|
|
||||||
|
|
||||||
# WebSocket message queues per user
|
# WebSocket message queues per user
|
||||||
# user -> asyncio.Queue (for streaming messages to connected clients)
|
# user -> asyncio.Queue (for streaming messages to connected clients)
|
||||||
|
|
@ -213,69 +203,9 @@ def _fetch_user(access_token):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
|
||||||
# Rate Limiting Functions (#711)
|
|
||||||
# =============================================================================
|
|
||||||
|
|
||||||
def _check_rate_limit(user):
|
|
||||||
"""Check per-user rate limits. Returns (allowed, retry_after, reason) (#711).
|
|
||||||
|
|
||||||
Checks hourly request cap, daily request cap, and daily token cap.
|
|
||||||
"""
|
|
||||||
now = time.time()
|
|
||||||
one_hour_ago = now - 3600
|
|
||||||
today = datetime.date.today().isoformat()
|
|
||||||
|
|
||||||
# Prune old entries from request log
|
|
||||||
timestamps = _request_log.get(user, [])
|
|
||||||
timestamps = [t for t in timestamps if t > now - 86400]
|
|
||||||
_request_log[user] = timestamps
|
|
||||||
|
|
||||||
# Hourly request cap
|
|
||||||
hourly = [t for t in timestamps if t > one_hour_ago]
|
|
||||||
if len(hourly) >= CHAT_MAX_REQUESTS_PER_HOUR:
|
|
||||||
oldest_in_window = min(hourly)
|
|
||||||
retry_after = int(oldest_in_window + 3600 - now) + 1
|
|
||||||
return False, max(retry_after, 1), "hourly request limit"
|
|
||||||
|
|
||||||
# Daily request cap
|
|
||||||
start_of_day = time.mktime(datetime.date.today().timetuple())
|
|
||||||
daily = [t for t in timestamps if t >= start_of_day]
|
|
||||||
if len(daily) >= CHAT_MAX_REQUESTS_PER_DAY:
|
|
||||||
next_day = start_of_day + 86400
|
|
||||||
retry_after = int(next_day - now) + 1
|
|
||||||
return False, max(retry_after, 1), "daily request limit"
|
|
||||||
|
|
||||||
# Daily token cap
|
|
||||||
token_info = _daily_tokens.get(user, {"tokens": 0, "date": today})
|
|
||||||
if token_info["date"] != today:
|
|
||||||
token_info = {"tokens": 0, "date": today}
|
|
||||||
_daily_tokens[user] = token_info
|
|
||||||
if token_info["tokens"] >= CHAT_MAX_TOKENS_PER_DAY:
|
|
||||||
next_day = start_of_day + 86400
|
|
||||||
retry_after = int(next_day - now) + 1
|
|
||||||
return False, max(retry_after, 1), "daily token limit"
|
|
||||||
|
|
||||||
return True, 0, ""
|
|
||||||
|
|
||||||
|
|
||||||
def _record_request(user):
|
|
||||||
"""Record a request timestamp for the user (#711)."""
|
|
||||||
_request_log.setdefault(user, []).append(time.time())
|
|
||||||
|
|
||||||
|
|
||||||
def _record_tokens(user, tokens):
|
|
||||||
"""Record token usage for the user (#711)."""
|
|
||||||
today = datetime.date.today().isoformat()
|
|
||||||
token_info = _daily_tokens.get(user, {"tokens": 0, "date": today})
|
|
||||||
if token_info["date"] != today:
|
|
||||||
token_info = {"tokens": 0, "date": today}
|
|
||||||
token_info["tokens"] += tokens
|
|
||||||
_daily_tokens[user] = token_info
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_stream_json(output):
|
def _parse_stream_json(output):
|
||||||
"""Parse stream-json output from claude --print (#711).
|
"""Parse stream-json output from claude --print.
|
||||||
|
|
||||||
Returns (text_content, total_tokens). Falls back gracefully if the
|
Returns (text_content, total_tokens). Falls back gracefully if the
|
||||||
usage event is absent or malformed.
|
usage event is absent or malformed.
|
||||||
|
|
@ -1063,34 +993,13 @@ class ChatHandler(BaseHTTPRequestHandler):
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
self.send_error_page(500, f"Error reading file: {e}")
|
self.send_error_page(500, f"Error reading file: {e}")
|
||||||
|
|
||||||
def _send_rate_limit_response(self, retry_after, reason):
|
|
||||||
"""Send a 429 response with Retry-After header and HTMX fragment (#711)."""
|
|
||||||
body = (
|
|
||||||
f'<div class="rate-limit-error">'
|
|
||||||
f"Rate limit exceeded: {reason}. "
|
|
||||||
f"Please try again in {retry_after} seconds."
|
|
||||||
f"</div>"
|
|
||||||
)
|
|
||||||
self.send_response(429)
|
|
||||||
self.send_header("Retry-After", str(retry_after))
|
|
||||||
self.send_header("Content-Type", "text/html; charset=utf-8")
|
|
||||||
self.send_header("Content-Length", str(len(body.encode("utf-8"))))
|
|
||||||
self.end_headers()
|
|
||||||
self.wfile.write(body.encode("utf-8"))
|
|
||||||
|
|
||||||
def handle_chat(self, user):
|
def handle_chat(self, user):
|
||||||
"""
|
"""
|
||||||
Handle chat requests by spawning `claude --print` with the user message.
|
Handle chat requests by spawning `claude --print` with the user message.
|
||||||
Enforces per-user rate limits and tracks token usage (#711).
|
|
||||||
Streams tokens over WebSocket if connected.
|
Streams tokens over WebSocket if connected.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Check rate limits before processing (#711)
|
|
||||||
allowed, retry_after, reason = _check_rate_limit(user)
|
|
||||||
if not allowed:
|
|
||||||
self._send_rate_limit_response(retry_after, reason)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Read request body
|
# Read request body
|
||||||
content_length = int(self.headers.get("Content-Length", 0))
|
content_length = int(self.headers.get("Content-Length", 0))
|
||||||
if content_length == 0:
|
if content_length == 0:
|
||||||
|
|
@ -1127,9 +1036,6 @@ class ChatHandler(BaseHTTPRequestHandler):
|
||||||
if not conv_id or not _validate_conversation_id(conv_id):
|
if not conv_id or not _validate_conversation_id(conv_id):
|
||||||
conv_id = _generate_conversation_id()
|
conv_id = _generate_conversation_id()
|
||||||
|
|
||||||
# Record request for rate limiting (#711)
|
|
||||||
_record_request(user)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Save user message to history
|
# Save user message to history
|
||||||
_write_message(user, conv_id, "user", message)
|
_write_message(user, conv_id, "user", message)
|
||||||
|
|
@ -1194,14 +1100,6 @@ class ChatHandler(BaseHTTPRequestHandler):
|
||||||
# Combine response parts
|
# Combine response parts
|
||||||
response = "".join(response_parts)
|
response = "".join(response_parts)
|
||||||
|
|
||||||
# Track token usage - does not block *this* request (#711)
|
|
||||||
if total_tokens > 0:
|
|
||||||
_record_tokens(user, total_tokens)
|
|
||||||
print(
|
|
||||||
f"Token usage: user={user} tokens={total_tokens}",
|
|
||||||
file=sys.stderr,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Fall back to raw output if stream-json parsing yielded no text
|
# Fall back to raw output if stream-json parsing yielded no text
|
||||||
if not response:
|
if not response:
|
||||||
response = proc.stdout.getvalue() if hasattr(proc.stdout, 'getvalue') else ""
|
response = proc.stdout.getvalue() if hasattr(proc.stdout, 'getvalue') else ""
|
||||||
|
|
@ -1294,18 +1192,6 @@ class ChatHandler(BaseHTTPRequestHandler):
|
||||||
self.send_error_page(401, "Unauthorized: no valid session")
|
self.send_error_page(401, "Unauthorized: no valid session")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Check rate limits before allowing WebSocket connection
|
|
||||||
allowed, retry_after, reason = _check_rate_limit(user)
|
|
||||||
if not allowed:
|
|
||||||
self.send_error_page(
|
|
||||||
429,
|
|
||||||
f"Rate limit exceeded: {reason}. Retry after {retry_after}s",
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Record request for rate limiting
|
|
||||||
_record_request(user)
|
|
||||||
|
|
||||||
# Create message queue for this user
|
# Create message queue for this user
|
||||||
_websocket_queues[user] = asyncio.Queue()
|
_websocket_queues[user] = asyncio.Queue()
|
||||||
|
|
||||||
|
|
@ -1421,12 +1307,6 @@ def main():
|
||||||
print("forward_auth secret configured (#709)", file=sys.stderr)
|
print("forward_auth secret configured (#709)", file=sys.stderr)
|
||||||
else:
|
else:
|
||||||
print("WARNING: FORWARD_AUTH_SECRET not set - verify endpoint unrestricted", file=sys.stderr)
|
print("WARNING: FORWARD_AUTH_SECRET not set - verify endpoint unrestricted", file=sys.stderr)
|
||||||
print(
|
|
||||||
f"Rate limits (#711): {CHAT_MAX_REQUESTS_PER_HOUR}/hr, "
|
|
||||||
f"{CHAT_MAX_REQUESTS_PER_DAY}/day, "
|
|
||||||
f"{CHAT_MAX_TOKENS_PER_DAY} tokens/day",
|
|
||||||
file=sys.stderr,
|
|
||||||
)
|
|
||||||
httpd.serve_forever()
|
httpd.serve_forever()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -673,6 +673,7 @@ COMPOSEEOF
|
||||||
# Chat container — Claude chat UI backend (#705)
|
# Chat container — Claude chat UI backend (#705)
|
||||||
# Internal service only; edge proxy routes to chat:8080
|
# Internal service only; edge proxy routes to chat:8080
|
||||||
# Sandbox hardened per #706 — no docker.sock, read-only rootfs, minimal caps
|
# Sandbox hardened per #706 — no docker.sock, read-only rootfs, minimal caps
|
||||||
|
# Rate limiting removed (#1084)
|
||||||
chat:
|
chat:
|
||||||
build:
|
build:
|
||||||
context: ./docker/chat
|
context: ./docker/chat
|
||||||
|
|
@ -708,10 +709,7 @@ COMPOSEEOF
|
||||||
DISINTO_CHAT_ALLOWED_USERS: ${DISINTO_CHAT_ALLOWED_USERS:-}
|
DISINTO_CHAT_ALLOWED_USERS: ${DISINTO_CHAT_ALLOWED_USERS:-}
|
||||||
# Shared secret for Caddy forward_auth verify endpoint (#709)
|
# Shared secret for Caddy forward_auth verify endpoint (#709)
|
||||||
FORWARD_AUTH_SECRET: ${FORWARD_AUTH_SECRET:-}
|
FORWARD_AUTH_SECRET: ${FORWARD_AUTH_SECRET:-}
|
||||||
# Cost caps / rate limiting (#711)
|
# Rate limiting removed (#1084)
|
||||||
CHAT_MAX_REQUESTS_PER_HOUR: ${CHAT_MAX_REQUESTS_PER_HOUR:-60}
|
|
||||||
CHAT_MAX_REQUESTS_PER_DAY: ${CHAT_MAX_REQUESTS_PER_DAY:-500}
|
|
||||||
CHAT_MAX_TOKENS_PER_DAY: ${CHAT_MAX_TOKENS_PER_DAY:-1000000}
|
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/health')"]
|
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/health')"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
|
|
|
||||||
|
|
@ -120,8 +120,6 @@ job "chat" {
|
||||||
# rendered from kv/disinto/shared/chat via template stanza.
|
# rendered from kv/disinto/shared/chat via template stanza.
|
||||||
env {
|
env {
|
||||||
FORGE_URL = "http://forgejo:3000"
|
FORGE_URL = "http://forgejo:3000"
|
||||||
CHAT_MAX_REQUESTS_PER_HOUR = "60"
|
|
||||||
CHAT_MAX_REQUESTS_PER_DAY = "1000"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# ── Vault-templated secrets (S5.2, issue #989) ─────────────────────────
|
# ── Vault-templated secrets (S5.2, issue #989) ─────────────────────────
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue