diff --git a/backend/innercontext/llm.py b/backend/innercontext/llm.py index 5bc3b9d..f2fdde4 100644 --- a/backend/innercontext/llm.py +++ b/backend/innercontext/llm.py @@ -46,6 +46,13 @@ def call_gemini( with suppress(Exception): user_input = str(contents) + # Disable thinking by default — Gemini 2.5 Flash thinking tokens count toward + # max_output_tokens, leaving too little room for actual JSON output. + if config.thinking_config is None: + config = config.model_copy( + update={"thinking_config": genai_types.ThinkingConfig(thinking_budget=0)} + ) + start = time.monotonic() success, error_detail, response, finish_reason = True, None, None, None try: