fix(api): resolve function tool UUID mismatch and MAX_TOKENS errors

Two critical bugs identified from production logs:

1. UUID Mismatch Bug (0 products returned from function tools):
   - Context shows 8-char short IDs: '63278801'
   - Function handler expected full UUIDs: '63278801-xxxx-...'
   - LLM requested short IDs, handler couldn't match → 0 products

   Fix: Index products by BOTH full UUID and short ID (first 8 chars)
   in build_product_details_tool_handler. Accept either format.
   Added deduplication to handle duplicate requests.
   Maintains Phase 2 token optimization (no context changes).

2. MAX_TOKENS Error (response truncation):
   - max_output_tokens=4096 includes thinking tokens (~3500)
   - Only ~500 tokens left for JSON response
   - MEDIUM thinking level (Phase 2) consumed budget

   Fix: Increase max_output_tokens from 4096 → 8192 across all
   creative endpoints (routines/suggest, routines/suggest-batch,
   products/suggest). Updated default in get_creative_config().

   Gives headroom: ~3500 thinking + ~4500 response = ~8000 total

From production logs (ai_call_logs):
- Log 71699654: Success but response_text null (function call only)
- Log 2db37c0f: MAX_TOKENS failure, tool returned 0 products

Both issues now resolved.
This commit is contained in:
Piotr Oleszczyk 2026-03-06 10:44:12 +01:00
parent 3ef1f249b6
commit 710b53e471
4 changed files with 25 additions and 8 deletions

View file

@ -164,21 +164,37 @@ def build_product_details_tool_handler(
*, *,
last_used_on_by_product: dict[str, date] | None = None, last_used_on_by_product: dict[str, date] | None = None,
): ):
available_by_id = {str(p.id): p for p in products} # Build index for both full UUIDs and short IDs (first 8 chars)
# LLM sees short IDs in context but may request either format
available_by_id = {}
for p in products:
full_id = str(p.id)
available_by_id[full_id] = p # Full UUID
available_by_id[full_id[:8]] = p # Short ID (8 chars)
last_used_on_by_product = last_used_on_by_product or {} last_used_on_by_product = last_used_on_by_product or {}
def _handler(args: dict[str, Any]) -> dict[str, object]: def _handler(args: dict[str, Any]) -> dict[str, object]:
requested_ids = _extract_requested_product_ids(args) requested_ids = _extract_requested_product_ids(args)
products_payload = [] products_payload = []
seen_products = set() # Avoid duplicates if LLM requests both short and full ID
for pid in requested_ids: for pid in requested_ids:
product = available_by_id.get(pid) product = available_by_id.get(pid)
if product is None: if product is None:
continue continue
# Skip if we already added this product (by full UUID)
full_id = str(product.id)
if full_id in seen_products:
continue
seen_products.add(full_id)
products_payload.append( products_payload.append(
_map_product_details( _map_product_details(
product, product,
pid, full_id, # Always use full ID in response
last_used_on=last_used_on_by_product.get(pid), last_used_on=last_used_on_by_product.get(full_id),
) )
) )
return {"products": products_payload} return {"products": products_payload}

View file

@ -971,7 +971,7 @@ def suggest_shopping(session: Session = Depends(get_session)):
config = get_creative_config( config = get_creative_config(
system_instruction=_SHOPPING_SYSTEM_PROMPT, system_instruction=_SHOPPING_SYSTEM_PROMPT,
response_schema=_ShoppingSuggestionsOut, response_schema=_ShoppingSuggestionsOut,
max_output_tokens=4096, max_output_tokens=8192,
).model_copy( ).model_copy(
update={ update={
"tools": [ "tools": [
@ -1026,7 +1026,7 @@ def suggest_shopping(session: Session = Depends(get_session)):
config=get_creative_config( config=get_creative_config(
system_instruction=_SHOPPING_SYSTEM_PROMPT, system_instruction=_SHOPPING_SYSTEM_PROMPT,
response_schema=_ShoppingSuggestionsOut, response_schema=_ShoppingSuggestionsOut,
max_output_tokens=4096, max_output_tokens=8192,
), ),
user_input=conservative_prompt, user_input=conservative_prompt,
tool_trace={ tool_trace={

View file

@ -612,7 +612,7 @@ def suggest_routine(
config = get_creative_config( config = get_creative_config(
system_instruction=_ROUTINES_SYSTEM_PROMPT, system_instruction=_ROUTINES_SYSTEM_PROMPT,
response_schema=_SuggestionOut, response_schema=_SuggestionOut,
max_output_tokens=4096, max_output_tokens=8192,
).model_copy( ).model_copy(
update={ update={
"tools": [ "tools": [
@ -668,7 +668,7 @@ def suggest_routine(
config=get_creative_config( config=get_creative_config(
system_instruction=_ROUTINES_SYSTEM_PROMPT, system_instruction=_ROUTINES_SYSTEM_PROMPT,
response_schema=_SuggestionOut, response_schema=_SuggestionOut,
max_output_tokens=4096, max_output_tokens=8192,
), ),
user_input=conservative_prompt, user_input=conservative_prompt,
tool_trace={ tool_trace={

View file

@ -34,11 +34,12 @@ def get_extraction_config(
def get_creative_config( def get_creative_config(
system_instruction: str, system_instruction: str,
response_schema: Any, response_schema: Any,
max_output_tokens: int = 4096, max_output_tokens: int = 8192,
) -> genai_types.GenerateContentConfig: ) -> genai_types.GenerateContentConfig:
"""Config for creative tasks like recommendations (balanced creativity). """Config for creative tasks like recommendations (balanced creativity).
Phase 2: Uses MEDIUM thinking level to capture reasoning chain for observability. Phase 2: Uses MEDIUM thinking level to capture reasoning chain for observability.
Increased default from 4096 to 8192 to accommodate thinking tokens (~3500) + response.
""" """
return genai_types.GenerateContentConfig( return genai_types.GenerateContentConfig(
system_instruction=system_instruction, system_instruction=system_instruction,