diff --git a/docker-compose.yml b/docker-compose.yml
index bca9ab9..6206b2c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -243,10 +243,7 @@ services:
       - EDGE_TUNNEL_FQDN=${EDGE_TUNNEL_FQDN:-}
       - EDGE_TUNNEL_FQDN_CHAT=${EDGE_TUNNEL_FQDN_CHAT:-}
       - EDGE_ROUTING_MODE=${EDGE_ROUTING_MODE:-subpath}
-      # Cost caps / rate limiting (#711)
-      - CHAT_MAX_REQUESTS_PER_HOUR=${CHAT_MAX_REQUESTS_PER_HOUR:-60}
-      - CHAT_MAX_REQUESTS_PER_DAY=${CHAT_MAX_REQUESTS_PER_DAY:-500}
-      - CHAT_MAX_TOKENS_PER_DAY=${CHAT_MAX_TOKENS_PER_DAY:-1000000}
+      # Rate limiting removed (#1084)
     ports:
       - "80:80"
       - "443:443"
diff --git a/docker/chat/server.py b/docker/chat/server.py
index b5252a7..6472a1d 100644
--- a/docker/chat/server.py
+++ b/docker/chat/server.py
@@ -23,7 +23,6 @@ The claude binary is expected to be mounted from the host at /usr/local/bin/clau
 """
 
 import asyncio
-import datetime
 import json
 import os
 import re
@@ -61,10 +60,6 @@ EDGE_ROUTING_MODE = os.environ.get("EDGE_ROUTING_MODE", "subpath")
 # (acceptable during local dev; production MUST set this).
 FORWARD_AUTH_SECRET = os.environ.get("FORWARD_AUTH_SECRET", "")
 
-# Rate limiting / cost caps (#711)
-CHAT_MAX_REQUESTS_PER_HOUR = int(os.environ.get("CHAT_MAX_REQUESTS_PER_HOUR", 60))
-CHAT_MAX_REQUESTS_PER_DAY = int(os.environ.get("CHAT_MAX_REQUESTS_PER_DAY", 500))
-CHAT_MAX_TOKENS_PER_DAY = int(os.environ.get("CHAT_MAX_TOKENS_PER_DAY", 1000000))
 
 # Allowed users - disinto-admin always allowed; CSV allowlist extends it
 _allowed_csv = os.environ.get("DISINTO_CHAT_ALLOWED_USERS", "")
@@ -90,11 +85,6 @@ _sessions = {}
 # Pending OAuth state tokens: state -> expires (float)
 _oauth_states = {}
 
-# Per-user rate limiting state (#711)
-# user -> list of request timestamps (for sliding-window hourly/daily caps)
-_request_log = {}
-# user -> {"tokens": int, "date": "YYYY-MM-DD"}
-_daily_tokens = {}
 
 # WebSocket message queues per user
 # user -> asyncio.Queue (for streaming messages to connected clients)
@@ -213,69 +203,9 @@ def _fetch_user(access_token):
         return None
 
 
-# =============================================================================
-# Rate Limiting Functions (#711)
-# =============================================================================
-
-def _check_rate_limit(user):
-    """Check per-user rate limits. Returns (allowed, retry_after, reason) (#711).
-
-    Checks hourly request cap, daily request cap, and daily token cap.
-    """
-    now = time.time()
-    one_hour_ago = now - 3600
-    today = datetime.date.today().isoformat()
-
-    # Prune old entries from request log
-    timestamps = _request_log.get(user, [])
-    timestamps = [t for t in timestamps if t > now - 86400]
-    _request_log[user] = timestamps
-
-    # Hourly request cap
-    hourly = [t for t in timestamps if t > one_hour_ago]
-    if len(hourly) >= CHAT_MAX_REQUESTS_PER_HOUR:
-        oldest_in_window = min(hourly)
-        retry_after = int(oldest_in_window + 3600 - now) + 1
-        return False, max(retry_after, 1), "hourly request limit"
-
-    # Daily request cap
-    start_of_day = time.mktime(datetime.date.today().timetuple())
-    daily = [t for t in timestamps if t >= start_of_day]
-    if len(daily) >= CHAT_MAX_REQUESTS_PER_DAY:
-        next_day = start_of_day + 86400
-        retry_after = int(next_day - now) + 1
-        return False, max(retry_after, 1), "daily request limit"
-
-    # Daily token cap
-    token_info = _daily_tokens.get(user, {"tokens": 0, "date": today})
-    if token_info["date"] != today:
-        token_info = {"tokens": 0, "date": today}
-        _daily_tokens[user] = token_info
-    if token_info["tokens"] >= CHAT_MAX_TOKENS_PER_DAY:
-        next_day = start_of_day + 86400
-        retry_after = int(next_day - now) + 1
-        return False, max(retry_after, 1), "daily token limit"
-
-    return True, 0, ""
-
-
-def _record_request(user):
-    """Record a request timestamp for the user (#711)."""
-    _request_log.setdefault(user, []).append(time.time())
-
-
-def _record_tokens(user, tokens):
-    """Record token usage for the user (#711)."""
-    today = datetime.date.today().isoformat()
-    token_info = _daily_tokens.get(user, {"tokens": 0, "date": today})
-    if token_info["date"] != today:
-        token_info = {"tokens": 0, "date": today}
-    token_info["tokens"] += tokens
-    _daily_tokens[user] = token_info
-
 
 def _parse_stream_json(output):
-    """Parse stream-json output from claude --print (#711).
+    """Parse stream-json output from claude --print.
 
     Returns (text_content, total_tokens).  Falls back gracefully if the
     usage event is absent or malformed.
@@ -1063,34 +993,13 @@ class ChatHandler(BaseHTTPRequestHandler):
         except IOError as e:
             self.send_error_page(500, f"Error reading file: {e}")
 
-    def _send_rate_limit_response(self, retry_after, reason):
-        """Send a 429 response with Retry-After header and HTMX fragment (#711)."""
-        body = (
-            f'<div class="rate-limit-error">'
-            f"Rate limit exceeded: {reason}. "
-            f"Please try again in {retry_after} seconds."
-            f"</div>"
-        )
-        self.send_response(429)
-        self.send_header("Retry-After", str(retry_after))
-        self.send_header("Content-Type", "text/html; charset=utf-8")
-        self.send_header("Content-Length", str(len(body.encode("utf-8"))))
-        self.end_headers()
-        self.wfile.write(body.encode("utf-8"))
-
+ 
     def handle_chat(self, user):
         """
         Handle chat requests by spawning `claude --print` with the user message.
-        Enforces per-user rate limits and tracks token usage (#711).
         Streams tokens over WebSocket if connected.
         """
 
-        # Check rate limits before processing (#711)
-        allowed, retry_after, reason = _check_rate_limit(user)
-        if not allowed:
-            self._send_rate_limit_response(retry_after, reason)
-            return
-
         # Read request body
         content_length = int(self.headers.get("Content-Length", 0))
         if content_length == 0:
@@ -1127,9 +1036,6 @@ class ChatHandler(BaseHTTPRequestHandler):
         if not conv_id or not _validate_conversation_id(conv_id):
             conv_id = _generate_conversation_id()
 
-        # Record request for rate limiting (#711)
-        _record_request(user)
-
         try:
             # Save user message to history
             _write_message(user, conv_id, "user", message)
@@ -1194,14 +1100,6 @@ class ChatHandler(BaseHTTPRequestHandler):
             # Combine response parts
             response = "".join(response_parts)
 
-            # Track token usage - does not block *this* request (#711)
-            if total_tokens > 0:
-                _record_tokens(user, total_tokens)
-                print(
-                    f"Token usage: user={user} tokens={total_tokens}",
-                    file=sys.stderr,
-                )
-
             # Fall back to raw output if stream-json parsing yielded no text
             if not response:
                 response = proc.stdout.getvalue() if hasattr(proc.stdout, 'getvalue') else ""
@@ -1294,18 +1192,6 @@ class ChatHandler(BaseHTTPRequestHandler):
             self.send_error_page(401, "Unauthorized: no valid session")
             return
 
-        # Check rate limits before allowing WebSocket connection
-        allowed, retry_after, reason = _check_rate_limit(user)
-        if not allowed:
-            self.send_error_page(
-                429,
-                f"Rate limit exceeded: {reason}. Retry after {retry_after}s",
-            )
-            return
-
-        # Record request for rate limiting
-        _record_request(user)
-
         # Create message queue for this user
         _websocket_queues[user] = asyncio.Queue()
 
@@ -1421,12 +1307,6 @@ def main():
         print("forward_auth secret configured (#709)", file=sys.stderr)
     else:
         print("WARNING: FORWARD_AUTH_SECRET not set - verify endpoint unrestricted", file=sys.stderr)
-    print(
-        f"Rate limits (#711): {CHAT_MAX_REQUESTS_PER_HOUR}/hr, "
-        f"{CHAT_MAX_REQUESTS_PER_DAY}/day, "
-        f"{CHAT_MAX_TOKENS_PER_DAY} tokens/day",
-        file=sys.stderr,
-    )
     httpd.serve_forever()
 
 
diff --git a/lib/AGENTS.md b/lib/AGENTS.md
index ae56bbe..a3fce35 100644
--- a/lib/AGENTS.md
+++ b/lib/AGENTS.md
@@ -30,7 +30,7 @@ sourced as needed.
 | `lib/git-creds.sh` | Shared git credential helper configuration. `configure_git_creds([HOME_DIR] [RUN_AS_CMD])` — writes a static credential helper script and configures git globally to use password-based HTTP auth (Forgejo 11.x rejects API tokens for `git push`, #361). **Retry on cold boot (#741)**: resolves bot username from `FORGE_TOKEN` with 5 retries (exponential backoff 1-5s); fails loudly and returns 1 if Forgejo is unreachable — never falls back to a wrong hardcoded default (exports `BOT_USER` on success). `repair_baked_cred_urls([--as RUN_AS_CMD] DIR ...)` — rewrites any git remote URLs that have credentials baked in to use clean URLs instead; uses `safe.directory` bypass for root-owned repos (#671). Requires `FORGE_PASS`, `FORGE_URL`, `FORGE_TOKEN`. | entrypoints (agents, edge) |
 | `lib/ops-setup.sh` | `setup_ops_repo()` — creates ops repo on Forgejo if it doesn't exist, configures bot collaborators, clones/initializes ops repo locally, seeds directory structure (vault, knowledge, evidence, sprints). Evidence subdirectories seeded: engagement/, red-team/, holdout/, evolution/, user-test/. Also seeds sprints/ for architect output. Exports `_ACTUAL_OPS_SLUG`. `migrate_ops_repo(ops_root, [primary_branch])` — idempotent migration helper that seeds missing directories and .gitkeep files on existing ops repos (pre-#407 deployments). | bin/disinto (init) |
 | `lib/ci-setup.sh` | `_install_cron_impl()` — installs crontab entries for bare-metal deployments (compose mode uses polling loop instead). `_create_forgejo_oauth_app()` — generic helper to create an OAuth2 app on Forgejo (shared by Woodpecker and chat). `_create_woodpecker_oauth_impl()` — creates Woodpecker OAuth2 app (thin wrapper). `_create_chat_oauth_impl()` — creates disinto-chat OAuth2 app, writes `CHAT_OAUTH_CLIENT_ID`/`CHAT_OAUTH_CLIENT_SECRET` to `.env` (#708). `_generate_woodpecker_token_impl()` — auto-generates WOODPECKER_TOKEN via OAuth2 flow. `_activate_woodpecker_repo_impl()` — activates repo in Woodpecker. All gated by `_load_ci_context()` which validates required env vars. | bin/disinto (init) |
-| `lib/generators.sh` | Template generation for `disinto init`: `generate_compose()` — docker-compose.yml (**duplicate service detection**: tracks service names during generation, aborts with `ERROR: Duplicate service name '$name' detected` on conflict; detection state is reset between calls so idempotent reinvocation is safe, #850) (uses `codeberg.org/forgejo/forgejo:11.0` tag; `CLAUDE_BIN_DIR` volume mount removed from agents/llama services — only `reproduce` and `edge` still use the host-mounted CLI (#992); adds `security_opt: [apparmor:unconfined]` to all services for rootless container compatibility; Forgejo includes a healthcheck so dependent services use `condition: service_healthy` — fixes cold-start races, #665; adds `chat` service block with isolated `chat-config` named volume and `CHAT_HISTORY_DIR` bind-mount for per-user NDJSON history persistence (#710); injects `FORWARD_AUTH_SECRET` for Caddy↔chat defense-in-depth auth (#709); cost-cap env vars `CHAT_MAX_REQUESTS_PER_HOUR`, `CHAT_MAX_REQUESTS_PER_DAY`, `CHAT_MAX_TOKENS_PER_DAY` (#711); subdomain fallback comment for `EDGE_TUNNEL_FQDN_*` vars (#713); all `depends_on` now use `condition: service_healthy/started` instead of bare service names; all services now include `restart: unless-stopped` including the edge service — #768; agents service now uses `image: ghcr.io/disinto/agents:${DISINTO_IMAGE_TAG:-latest}` instead of `build:` (#429); `WOODPECKER_PLUGINS_PRIVILEGED` env var added to woodpecker service (#779); agents-llama conditional block gated on `ENABLE_LLAMA_AGENT=1` (#769); `agents-llama-all` compose service (profile `agents-llama-all`, all 7 roles: review,dev,gardener,architect,planner,predictor,supervisor) added by #801; agents service gains volume mounts for `./projects`, `./.env`, `./state`), `generate_caddyfile()` — Caddyfile (routes: `/forge/*` → forgejo:3000, `/woodpecker/*` → woodpecker:8000, `/staging/*` → staging:80; `/chat/login` and `/chat/oauth/callback` bypass `forward_auth` so unauthenticated users can reach the OAuth flow; `/chat/*` gated by `forward_auth` on `chat:8080/chat/auth/verify` which stamps `X-Forwarded-User` (#709); root `/` redirects to `/forge/`), `generate_staging_index()` — staging index, `generate_deploy_pipelines()` — Woodpecker deployment pipeline configs. Requires `FACTORY_ROOT`, `PROJECT_NAME`, `PRIMARY_BRANCH`. | bin/disinto (init) |
+| `lib/generators.sh` | Template generation for `disinto init`: `generate_compose()` — docker-compose.yml (**duplicate service detection**: tracks service names during generation, aborts with `ERROR: Duplicate service name '$name' detected` on conflict; detection state is reset between calls so idempotent reinvocation is safe, #850) (uses `codeberg.org/forgejo/forgejo:11.0` tag; `CLAUDE_BIN_DIR` volume mount removed from agents/llama services — only `reproduce` and `edge` still use the host-mounted CLI (#992); adds `security_opt: [apparmor:unconfined]` to all services for rootless container compatibility; Forgejo includes a healthcheck so dependent services use `condition: service_healthy` — fixes cold-start races, #665; adds `chat` service block with isolated `chat-config` named volume and `CHAT_HISTORY_DIR` bind-mount for per-user NDJSON history persistence (#710); injects `FORWARD_AUTH_SECRET` for Caddy↔chat defense-in-depth auth (#709); subdomain fallback comment for `EDGE_TUNNEL_FQDN_*` vars (#713); all `depends_on` now use `condition: service_healthy/started` instead of bare service names; all services now include `restart: unless-stopped` including the edge service — #768; agents service now uses `image: ghcr.io/disinto/agents:${DISINTO_IMAGE_TAG:-latest}` instead of `build:` (#429); `WOODPECKER_PLUGINS_PRIVILEGED` env var added to woodpecker service (#779); agents-llama conditional block gated on `ENABLE_LLAMA_AGENT=1` (#769); `agents-llama-all` compose service (profile `agents-llama-all`, all 7 roles: review,dev,gardener,architect,planner,predictor,supervisor) added by #801; agents service gains volume mounts for `./projects`, `./.env`, `./state`), `generate_caddyfile()` — Caddyfile (routes: `/forge/*` → forgejo:3000, `/woodpecker/*` → woodpecker:8000, `/staging/*` → staging:80; `/chat/login` and `/chat/oauth/callback` bypass `forward_auth` so unauthenticated users can reach the OAuth flow; `/chat/*` gated by `forward_auth` on `chat:8080/chat/auth/verify` which stamps `X-Forwarded-User` (#709); root `/` redirects to `/forge/`), `generate_staging_index()` — staging index, `generate_deploy_pipelines()` — Woodpecker deployment pipeline configs. Requires `FACTORY_ROOT`, `PROJECT_NAME`, `PRIMARY_BRANCH`. | bin/disinto (init) |
 | `lib/backup.sh` | Factory backup creation. `backup_create <outfile.tar.gz>` — exports factory state: fetches all issues (open+closed) from the project and ops repos via Forgejo API, bundles the ops repo as a git bundle, and writes a tarball. Requires `FORGE_URL`, `FORGE_TOKEN`, `FORGE_REPO`, `FORGE_OPS_REPO`, `OPS_REPO_ROOT`. Sourced by `bin/disinto backup create` (#1057). | bin/disinto (backup create) |
 | `lib/disinto/backup.sh` | Factory backup restore. `backup_import <infile.tar.gz>` — restores from a backup tarball: creates missing repos via Forgejo API, imports issues (idempotent — skips by number if present), unpacks ops repo git bundle. Idempotent: running twice produces same end state with no errors. Requires `FORGE_URL`, `FORGE_TOKEN`. Sourced by `bin/disinto backup import` (#1058). | bin/disinto (backup import) |
 | `lib/sprint-filer.sh` | Post-merge sub-issue filer for sprint PRs. Invoked by the `.woodpecker/ops-filer.yml` pipeline after a sprint PR merges to ops repo `main`. Parses `<!-- filer:begin --> ... <!-- filer:end -->` blocks from sprint PR bodies to extract sub-issue definitions, creates them on the project repo using `FORGE_FILER_TOKEN` (narrow-scope `filer-bot` identity with `issues:write` only), adds `in-progress` label to the parent vision issue, and handles vision lifecycle closure when all sub-issues are closed. Uses `filer_api_all()` for paginated fetches. Idempotent: uses `<!-- decomposed-from: #<vision>, sprint: <slug>, id: <id> -->` markers to skip already-filed issues. Requires `FORGE_FILER_TOKEN`, `FORGE_API`, `FORGE_API_BASE`, `FORGE_OPS_REPO`. | `.woodpecker/ops-filer.yml` (CI pipeline on ops repo) |
diff --git a/lib/generators.sh b/lib/generators.sh
index 581de8b..41e7e67 100644
--- a/lib/generators.sh
+++ b/lib/generators.sh
@@ -681,6 +681,55 @@ COMPOSEEOF
       - disinto-net
     command: ["echo", "staging slot — replace with project image"]
 
+  # Chat container — Claude chat UI backend (#705)
+  # Internal service only; edge proxy routes to chat:8080
+  # Sandbox hardened per #706 — no docker.sock, read-only rootfs, minimal caps
+  # Rate limiting removed (#1084)
+  chat:
+    build:
+      context: ./docker/chat
+      dockerfile: Dockerfile
+    container_name: disinto-chat
+    restart: unless-stopped
+    read_only: true
+    tmpfs:
+      - /tmp:size=64m
+    security_opt:
+      - no-new-privileges:true
+    cap_drop:
+      - ALL
+    pids_limit: 128
+    mem_limit: 512m
+    memswap_limit: 512m
+    volumes:
+      # Mount claude binary from host (same as agents)
+      - ${CLAUDE_BIN_DIR}:/usr/local/bin/claude:ro
+      # Throwaway named volume for chat config (isolated from host ~/.claude)
+      - chat-config:/var/chat/config
+      # Chat history persistence: per-user NDJSON files on bind-mounted host volume
+      - ${CHAT_HISTORY_DIR:-./state/chat-history}:/var/lib/chat/history
+    environment:
+      CHAT_HOST: "0.0.0.0"
+      CHAT_PORT: "8080"
+      FORGE_URL: http://forgejo:3000
+      CHAT_OAUTH_CLIENT_ID: ${CHAT_OAUTH_CLIENT_ID:-}
+      CHAT_OAUTH_CLIENT_SECRET: ${CHAT_OAUTH_CLIENT_SECRET:-}
+      EDGE_TUNNEL_FQDN: ${EDGE_TUNNEL_FQDN:-}
+      EDGE_TUNNEL_FQDN_CHAT: ${EDGE_TUNNEL_FQDN_CHAT:-}
+      EDGE_ROUTING_MODE: ${EDGE_ROUTING_MODE:-subpath}
+      DISINTO_CHAT_ALLOWED_USERS: ${DISINTO_CHAT_ALLOWED_USERS:-}
+      # Shared secret for Caddy forward_auth verify endpoint (#709)
+      FORWARD_AUTH_SECRET: ${FORWARD_AUTH_SECRET:-}
+      # Rate limiting removed (#1084)
+    healthcheck:
+      test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/health')"]
+      interval: 30s
+      timeout: 5s
+      retries: 3
+      start_period: 10s
+    networks:
+      - disinto-net
+
 volumes:
   forgejo-data:
   woodpecker-data:
diff --git a/nomad/jobs/chat.hcl b/nomad/jobs/chat.hcl
index ad18cec..95f86ab 100644
--- a/nomad/jobs/chat.hcl
+++ b/nomad/jobs/chat.hcl
@@ -119,9 +119,7 @@ job "chat" {
       # CHAT_OAUTH_CLIENT_ID, CHAT_OAUTH_CLIENT_SECRET, FORWARD_AUTH_SECRET
       # rendered from kv/disinto/shared/chat via template stanza.
       env {
-        FORGE_URL                      = "http://forgejo:3000"
-        CHAT_MAX_REQUESTS_PER_HOUR     = "60"
-        CHAT_MAX_REQUESTS_PER_DAY      = "1000"
+        FORGE_URL = "http://forgejo:3000"
       }
 
       # ── Vault-templated secrets (S5.2, issue #989) ─────────────────────────