diff --git a/backend/innercontext/llm.py b/backend/innercontext/llm.py
index f2fdde4..bf7fc90 100644
--- a/backend/innercontext/llm.py
+++ b/backend/innercontext/llm.py
@@ -46,11 +46,16 @@ def call_gemini(
         with suppress(Exception):
             user_input = str(contents)
 
-    # Disable thinking by default — Gemini 2.5 Flash thinking tokens count toward
-    # max_output_tokens, leaving too little room for actual JSON output.
+    # Limit thinking by default — Gemini 3 Flash defaults to "high" thinking which
+    # consumes most of the token budget before generating actual output.
+    # Use "low" to reduce latency while keeping basic reasoning intact.
     if config.thinking_config is None:
         config = config.model_copy(
-            update={"thinking_config": genai_types.ThinkingConfig(thinking_budget=0)}
+            update={
+                "thinking_config": genai_types.ThinkingConfig(
+                    thinking_level=genai_types.ThinkingLevel.LOW
+                )
+            }
         )
 
     start = time.monotonic()