From 3fbf6d7041b7ac1fc19a63c329217a75c306870e Mon Sep 17 00:00:00 2001
From: Piotr Oleszczyk <piotr@oleszczyk.eu>
Date: Sat, 28 Feb 2026 22:03:49 +0100
Subject: [PATCH] fix(backend): drop response_mime_type=application/json to
 avoid constrained decoding

Constrained decoding is ~10x slower and consumes hidden tokens for constraint
processing, causing truncation at ~1000 chars even with 8192 max_output_tokens.
The system prompt already instructs the model to output raw minified JSON; our
NaN/markdown-fence sanitisation handles edge cases.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 backend/innercontext/api/products.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/backend/innercontext/api/products.py b/backend/innercontext/api/products.py
index 6e237aa..dd3eeeb 100644
--- a/backend/innercontext/api/products.py
+++ b/backend/innercontext/api/products.py
@@ -359,8 +359,7 @@ def parse_product_text(data: ProductParseRequest) -> ProductParseResponse:
         contents=f"Extract product data from this text:\n\n{data.text}",
         config=genai_types.GenerateContentConfig(
             system_instruction=_product_parse_system_prompt(),
-            response_mime_type="application/json",
-            max_output_tokens=65536,
+            max_output_tokens=8192,
             temperature=0.0,
         ),
     )