From 3fbf6d7041b7ac1fc19a63c329217a75c306870e Mon Sep 17 00:00:00 2001 From: Piotr Oleszczyk Date: Sat, 28 Feb 2026 22:03:49 +0100 Subject: [PATCH] fix(backend): drop response_mime_type=application/json to avoid constrained decoding Constrained decoding is ~10x slower and consumes hidden tokens for constraint processing, causing truncation at ~1000 chars even with 8192 max_output_tokens. The system prompt already instructs the model to output raw minified JSON; our NaN/markdown-fence sanitisation handles edge cases. Co-Authored-By: Claude Sonnet 4.6 --- backend/innercontext/api/products.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/innercontext/api/products.py b/backend/innercontext/api/products.py index 6e237aa..dd3eeeb 100644 --- a/backend/innercontext/api/products.py +++ b/backend/innercontext/api/products.py @@ -359,8 +359,7 @@ def parse_product_text(data: ProductParseRequest) -> ProductParseResponse: contents=f"Extract product data from this text:\n\n{data.text}", config=genai_types.GenerateContentConfig( system_instruction=_product_parse_system_prompt(), - response_mime_type="application/json", - max_output_tokens=65536, + max_output_tokens=8192, temperature=0.0, ), )