diff --git a/backend/innercontext/llm.py b/backend/innercontext/llm.py
index 5bc3b9d..f2fdde4 100644
--- a/backend/innercontext/llm.py
+++ b/backend/innercontext/llm.py
@@ -46,6 +46,13 @@ def call_gemini(
         with suppress(Exception):
             user_input = str(contents)
 
+    # Disable thinking by default — Gemini 2.5 Flash thinking tokens count toward
+    # max_output_tokens, leaving too little room for actual JSON output.
+    if config.thinking_config is None:
+        config = config.model_copy(
+            update={"thinking_config": genai_types.ThinkingConfig(thinking_budget=0)}
+        )
+
     start = time.monotonic()
     success, error_detail, response, finish_reason = True, None, None, None
     try: