fix(api): resolve function tool UUID mismatch and MAX_TOKENS errors
Two critical bugs identified from production logs: 1. UUID Mismatch Bug (0 products returned from function tools): - Context shows 8-char short IDs: '63278801' - Function handler expected full UUIDs: '63278801-xxxx-...' - LLM requested short IDs, handler couldn't match → 0 products Fix: Index products by BOTH full UUID and short ID (first 8 chars) in build_product_details_tool_handler. Accept either format. Added deduplication to handle duplicate requests. Maintains Phase 2 token optimization (no context changes). 2. MAX_TOKENS Error (response truncation): - max_output_tokens=4096 includes thinking tokens (~3500) - Only ~500 tokens left for JSON response - MEDIUM thinking level (Phase 2) consumed budget Fix: Increase max_output_tokens from 4096 → 8192 across all creative endpoints (routines/suggest, routines/suggest-batch, products/suggest). Updated default in get_creative_config(). Gives headroom: ~3500 thinking + ~4500 response = ~8000 total From production logs (ai_call_logs): - Log 71699654: Success but response_text null (function call only) - Log 2db37c0f: MAX_TOKENS failure, tool returned 0 products Both issues now resolved.
This commit is contained in:
parent
3ef1f249b6
commit
710b53e471
4 changed files with 25 additions and 8 deletions
|
|
@ -164,21 +164,37 @@ def build_product_details_tool_handler(
|
|||
*,
|
||||
last_used_on_by_product: dict[str, date] | None = None,
|
||||
):
|
||||
available_by_id = {str(p.id): p for p in products}
|
||||
# Build index for both full UUIDs and short IDs (first 8 chars)
|
||||
# LLM sees short IDs in context but may request either format
|
||||
available_by_id = {}
|
||||
for p in products:
|
||||
full_id = str(p.id)
|
||||
available_by_id[full_id] = p # Full UUID
|
||||
available_by_id[full_id[:8]] = p # Short ID (8 chars)
|
||||
|
||||
last_used_on_by_product = last_used_on_by_product or {}
|
||||
|
||||
def _handler(args: dict[str, Any]) -> dict[str, object]:
|
||||
requested_ids = _extract_requested_product_ids(args)
|
||||
products_payload = []
|
||||
seen_products = set() # Avoid duplicates if LLM requests both short and full ID
|
||||
|
||||
for pid in requested_ids:
|
||||
product = available_by_id.get(pid)
|
||||
if product is None:
|
||||
continue
|
||||
|
||||
# Skip if we already added this product (by full UUID)
|
||||
full_id = str(product.id)
|
||||
if full_id in seen_products:
|
||||
continue
|
||||
seen_products.add(full_id)
|
||||
|
||||
products_payload.append(
|
||||
_map_product_details(
|
||||
product,
|
||||
pid,
|
||||
last_used_on=last_used_on_by_product.get(pid),
|
||||
full_id, # Always use full ID in response
|
||||
last_used_on=last_used_on_by_product.get(full_id),
|
||||
)
|
||||
)
|
||||
return {"products": products_payload}
|
||||
|
|
|
|||
|
|
@ -971,7 +971,7 @@ def suggest_shopping(session: Session = Depends(get_session)):
|
|||
config = get_creative_config(
|
||||
system_instruction=_SHOPPING_SYSTEM_PROMPT,
|
||||
response_schema=_ShoppingSuggestionsOut,
|
||||
max_output_tokens=4096,
|
||||
max_output_tokens=8192,
|
||||
).model_copy(
|
||||
update={
|
||||
"tools": [
|
||||
|
|
@ -1026,7 +1026,7 @@ def suggest_shopping(session: Session = Depends(get_session)):
|
|||
config=get_creative_config(
|
||||
system_instruction=_SHOPPING_SYSTEM_PROMPT,
|
||||
response_schema=_ShoppingSuggestionsOut,
|
||||
max_output_tokens=4096,
|
||||
max_output_tokens=8192,
|
||||
),
|
||||
user_input=conservative_prompt,
|
||||
tool_trace={
|
||||
|
|
|
|||
|
|
@ -612,7 +612,7 @@ def suggest_routine(
|
|||
config = get_creative_config(
|
||||
system_instruction=_ROUTINES_SYSTEM_PROMPT,
|
||||
response_schema=_SuggestionOut,
|
||||
max_output_tokens=4096,
|
||||
max_output_tokens=8192,
|
||||
).model_copy(
|
||||
update={
|
||||
"tools": [
|
||||
|
|
@ -668,7 +668,7 @@ def suggest_routine(
|
|||
config=get_creative_config(
|
||||
system_instruction=_ROUTINES_SYSTEM_PROMPT,
|
||||
response_schema=_SuggestionOut,
|
||||
max_output_tokens=4096,
|
||||
max_output_tokens=8192,
|
||||
),
|
||||
user_input=conservative_prompt,
|
||||
tool_trace={
|
||||
|
|
|
|||
|
|
@ -34,11 +34,12 @@ def get_extraction_config(
|
|||
def get_creative_config(
|
||||
system_instruction: str,
|
||||
response_schema: Any,
|
||||
max_output_tokens: int = 4096,
|
||||
max_output_tokens: int = 8192,
|
||||
) -> genai_types.GenerateContentConfig:
|
||||
"""Config for creative tasks like recommendations (balanced creativity).
|
||||
|
||||
Phase 2: Uses MEDIUM thinking level to capture reasoning chain for observability.
|
||||
Increased default from 4096 to 8192 to accommodate thinking tokens (~3500) + response.
|
||||
"""
|
||||
return genai_types.GenerateContentConfig(
|
||||
system_instruction=system_instruction,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue