From 710b53e4713839114a2fe205a1fc14a961be5a81 Mon Sep 17 00:00:00 2001
From: Piotr Oleszczyk <piotr@oleszczyk.eu>
Date: Fri, 6 Mar 2026 10:44:12 +0100
Subject: [PATCH] fix(api): resolve function tool UUID mismatch and MAX_TOKENS
 errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two critical bugs identified from production logs:

1. UUID Mismatch Bug (0 products returned from function tools):
   - Context shows 8-char short IDs: '63278801'
   - Function handler expected full UUIDs: '63278801-xxxx-...'
   - LLM requested short IDs, handler couldn't match → 0 products

   Fix: Index products by BOTH full UUID and short ID (first 8 chars)
   in build_product_details_tool_handler. Accept either format.
   Added deduplication to handle duplicate requests.
   Maintains Phase 2 token optimization (no context changes).

2. MAX_TOKENS Error (response truncation):
   - max_output_tokens=4096 includes thinking tokens (~3500)
   - Only ~500 tokens left for JSON response
   - MEDIUM thinking level (Phase 2) consumed budget

   Fix: Increase max_output_tokens from 4096 → 8192 across all
   creative endpoints (routines/suggest, routines/suggest-batch,
   products/suggest). Updated default in get_creative_config().

   Gives headroom: ~3500 thinking + ~4500 response = ~8000 total

From production logs (ai_call_logs):
- Log 71699654: Success but response_text null (function call only)
- Log 2db37c0f: MAX_TOKENS failure, tool returned 0 products

Both issues now resolved.
---
 backend/innercontext/api/product_llm_tools.py | 22 ++++++++++++++++---
 backend/innercontext/api/products.py          |  4 ++--
 backend/innercontext/api/routines.py          |  4 ++--
 backend/innercontext/llm.py                   |  3 ++-
 4 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/backend/innercontext/api/product_llm_tools.py b/backend/innercontext/api/product_llm_tools.py
index fe65d5e..44f03aa 100644
--- a/backend/innercontext/api/product_llm_tools.py
+++ b/backend/innercontext/api/product_llm_tools.py
@@ -164,21 +164,37 @@ def build_product_details_tool_handler(
     *,
     last_used_on_by_product: dict[str, date] | None = None,
 ):
-    available_by_id = {str(p.id): p for p in products}
+    # Build index for both full UUIDs and short IDs (first 8 chars)
+    # LLM sees short IDs in context but may request either format
+    available_by_id = {}
+    for p in products:
+        full_id = str(p.id)
+        available_by_id[full_id] = p  # Full UUID
+        available_by_id[full_id[:8]] = p  # Short ID (8 chars)
+
     last_used_on_by_product = last_used_on_by_product or {}
 
     def _handler(args: dict[str, Any]) -> dict[str, object]:
         requested_ids = _extract_requested_product_ids(args)
         products_payload = []
+        seen_products = set()  # Avoid duplicates if LLM requests both short and full ID
+
         for pid in requested_ids:
             product = available_by_id.get(pid)
             if product is None:
                 continue
+
+            # Skip if we already added this product (by full UUID)
+            full_id = str(product.id)
+            if full_id in seen_products:
+                continue
+            seen_products.add(full_id)
+
             products_payload.append(
                 _map_product_details(
                     product,
-                    pid,
-                    last_used_on=last_used_on_by_product.get(pid),
+                    full_id,  # Always use full ID in response
+                    last_used_on=last_used_on_by_product.get(full_id),
                 )
             )
         return {"products": products_payload}
diff --git a/backend/innercontext/api/products.py b/backend/innercontext/api/products.py
index fea60c1..d6bc82e 100644
--- a/backend/innercontext/api/products.py
+++ b/backend/innercontext/api/products.py
@@ -971,7 +971,7 @@ def suggest_shopping(session: Session = Depends(get_session)):
     config = get_creative_config(
         system_instruction=_SHOPPING_SYSTEM_PROMPT,
         response_schema=_ShoppingSuggestionsOut,
-        max_output_tokens=4096,
+        max_output_tokens=8192,
     ).model_copy(
         update={
             "tools": [
@@ -1026,7 +1026,7 @@ def suggest_shopping(session: Session = Depends(get_session)):
             config=get_creative_config(
                 system_instruction=_SHOPPING_SYSTEM_PROMPT,
                 response_schema=_ShoppingSuggestionsOut,
-                max_output_tokens=4096,
+                max_output_tokens=8192,
             ),
             user_input=conservative_prompt,
             tool_trace={
diff --git a/backend/innercontext/api/routines.py b/backend/innercontext/api/routines.py
index 1436c4e..c97fc71 100644
--- a/backend/innercontext/api/routines.py
+++ b/backend/innercontext/api/routines.py
@@ -612,7 +612,7 @@ def suggest_routine(
     config = get_creative_config(
         system_instruction=_ROUTINES_SYSTEM_PROMPT,
         response_schema=_SuggestionOut,
-        max_output_tokens=4096,
+        max_output_tokens=8192,
     ).model_copy(
         update={
             "tools": [
@@ -668,7 +668,7 @@ def suggest_routine(
             config=get_creative_config(
                 system_instruction=_ROUTINES_SYSTEM_PROMPT,
                 response_schema=_SuggestionOut,
-                max_output_tokens=4096,
+                max_output_tokens=8192,
             ),
             user_input=conservative_prompt,
             tool_trace={
diff --git a/backend/innercontext/llm.py b/backend/innercontext/llm.py
index d26be73..40635cf 100644
--- a/backend/innercontext/llm.py
+++ b/backend/innercontext/llm.py
@@ -34,11 +34,12 @@ def get_extraction_config(
 def get_creative_config(
     system_instruction: str,
     response_schema: Any,
-    max_output_tokens: int = 4096,
+    max_output_tokens: int = 8192,
 ) -> genai_types.GenerateContentConfig:
     """Config for creative tasks like recommendations (balanced creativity).
 
     Phase 2: Uses MEDIUM thinking level to capture reasoning chain for observability.
+    Increased default from 4096 to 8192 to accommodate thinking tokens (~3500) + response.
     """
     return genai_types.GenerateContentConfig(
         system_instruction=system_instruction,