From 710b53e4713839114a2fe205a1fc14a961be5a81 Mon Sep 17 00:00:00 2001 From: Piotr Oleszczyk Date: Fri, 6 Mar 2026 10:44:12 +0100 Subject: [PATCH] fix(api): resolve function tool UUID mismatch and MAX_TOKENS errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two critical bugs identified from production logs: 1. UUID Mismatch Bug (0 products returned from function tools): - Context shows 8-char short IDs: '63278801' - Function handler expected full UUIDs: '63278801-xxxx-...' - LLM requested short IDs, handler couldn't match → 0 products Fix: Index products by BOTH full UUID and short ID (first 8 chars) in build_product_details_tool_handler. Accept either format. Added deduplication to handle duplicate requests. Maintains Phase 2 token optimization (no context changes). 2. MAX_TOKENS Error (response truncation): - max_output_tokens=4096 includes thinking tokens (~3500) - Only ~500 tokens left for JSON response - MEDIUM thinking level (Phase 2) consumed budget Fix: Increase max_output_tokens from 4096 → 8192 across all creative endpoints (routines/suggest, routines/suggest-batch, products/suggest). Updated default in get_creative_config(). Gives headroom: ~3500 thinking + ~4500 response = ~8000 total From production logs (ai_call_logs): - Log 71699654: Success but response_text null (function call only) - Log 2db37c0f: MAX_TOKENS failure, tool returned 0 products Both issues now resolved. --- backend/innercontext/api/product_llm_tools.py | 22 ++++++++++++++++--- backend/innercontext/api/products.py | 4 ++-- backend/innercontext/api/routines.py | 4 ++-- backend/innercontext/llm.py | 3 ++- 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/backend/innercontext/api/product_llm_tools.py b/backend/innercontext/api/product_llm_tools.py index fe65d5e..44f03aa 100644 --- a/backend/innercontext/api/product_llm_tools.py +++ b/backend/innercontext/api/product_llm_tools.py @@ -164,21 +164,37 @@ def build_product_details_tool_handler( *, last_used_on_by_product: dict[str, date] | None = None, ): - available_by_id = {str(p.id): p for p in products} + # Build index for both full UUIDs and short IDs (first 8 chars) + # LLM sees short IDs in context but may request either format + available_by_id = {} + for p in products: + full_id = str(p.id) + available_by_id[full_id] = p # Full UUID + available_by_id[full_id[:8]] = p # Short ID (8 chars) + last_used_on_by_product = last_used_on_by_product or {} def _handler(args: dict[str, Any]) -> dict[str, object]: requested_ids = _extract_requested_product_ids(args) products_payload = [] + seen_products = set() # Avoid duplicates if LLM requests both short and full ID + for pid in requested_ids: product = available_by_id.get(pid) if product is None: continue + + # Skip if we already added this product (by full UUID) + full_id = str(product.id) + if full_id in seen_products: + continue + seen_products.add(full_id) + products_payload.append( _map_product_details( product, - pid, - last_used_on=last_used_on_by_product.get(pid), + full_id, # Always use full ID in response + last_used_on=last_used_on_by_product.get(full_id), ) ) return {"products": products_payload} diff --git a/backend/innercontext/api/products.py b/backend/innercontext/api/products.py index fea60c1..d6bc82e 100644 --- a/backend/innercontext/api/products.py +++ b/backend/innercontext/api/products.py @@ -971,7 +971,7 @@ def suggest_shopping(session: Session = Depends(get_session)): config = get_creative_config( system_instruction=_SHOPPING_SYSTEM_PROMPT, response_schema=_ShoppingSuggestionsOut, - max_output_tokens=4096, + max_output_tokens=8192, ).model_copy( update={ "tools": [ @@ -1026,7 +1026,7 @@ def suggest_shopping(session: Session = Depends(get_session)): config=get_creative_config( system_instruction=_SHOPPING_SYSTEM_PROMPT, response_schema=_ShoppingSuggestionsOut, - max_output_tokens=4096, + max_output_tokens=8192, ), user_input=conservative_prompt, tool_trace={ diff --git a/backend/innercontext/api/routines.py b/backend/innercontext/api/routines.py index 1436c4e..c97fc71 100644 --- a/backend/innercontext/api/routines.py +++ b/backend/innercontext/api/routines.py @@ -612,7 +612,7 @@ def suggest_routine( config = get_creative_config( system_instruction=_ROUTINES_SYSTEM_PROMPT, response_schema=_SuggestionOut, - max_output_tokens=4096, + max_output_tokens=8192, ).model_copy( update={ "tools": [ @@ -668,7 +668,7 @@ def suggest_routine( config=get_creative_config( system_instruction=_ROUTINES_SYSTEM_PROMPT, response_schema=_SuggestionOut, - max_output_tokens=4096, + max_output_tokens=8192, ), user_input=conservative_prompt, tool_trace={ diff --git a/backend/innercontext/llm.py b/backend/innercontext/llm.py index d26be73..40635cf 100644 --- a/backend/innercontext/llm.py +++ b/backend/innercontext/llm.py @@ -34,11 +34,12 @@ def get_extraction_config( def get_creative_config( system_instruction: str, response_schema: Any, - max_output_tokens: int = 4096, + max_output_tokens: int = 8192, ) -> genai_types.GenerateContentConfig: """Config for creative tasks like recommendations (balanced creativity). Phase 2: Uses MEDIUM thinking level to capture reasoning chain for observability. + Increased default from 4096 to 8192 to accommodate thinking tokens (~3500) + response. """ return genai_types.GenerateContentConfig( system_instruction=system_instruction,