fix(api): resolve function tool UUID mismatch and MAX_TOKENS errors
Two critical bugs identified from production logs: 1. UUID Mismatch Bug (0 products returned from function tools): - Context shows 8-char short IDs: '63278801' - Function handler expected full UUIDs: '63278801-xxxx-...' - LLM requested short IDs, handler couldn't match → 0 products Fix: Index products by BOTH full UUID and short ID (first 8 chars) in build_product_details_tool_handler. Accept either format. Added deduplication to handle duplicate requests. Maintains Phase 2 token optimization (no context changes). 2. MAX_TOKENS Error (response truncation): - max_output_tokens=4096 includes thinking tokens (~3500) - Only ~500 tokens left for JSON response - MEDIUM thinking level (Phase 2) consumed budget Fix: Increase max_output_tokens from 4096 → 8192 across all creative endpoints (routines/suggest, routines/suggest-batch, products/suggest). Updated default in get_creative_config(). Gives headroom: ~3500 thinking + ~4500 response = ~8000 total From production logs (ai_call_logs): - Log 71699654: Success but response_text null (function call only) - Log 2db37c0f: MAX_TOKENS failure, tool returned 0 products Both issues now resolved.
This commit is contained in:
parent
3ef1f249b6
commit
710b53e471
4 changed files with 25 additions and 8 deletions
|
|
@ -164,21 +164,37 @@ def build_product_details_tool_handler(
|
||||||
*,
|
*,
|
||||||
last_used_on_by_product: dict[str, date] | None = None,
|
last_used_on_by_product: dict[str, date] | None = None,
|
||||||
):
|
):
|
||||||
available_by_id = {str(p.id): p for p in products}
|
# Build index for both full UUIDs and short IDs (first 8 chars)
|
||||||
|
# LLM sees short IDs in context but may request either format
|
||||||
|
available_by_id = {}
|
||||||
|
for p in products:
|
||||||
|
full_id = str(p.id)
|
||||||
|
available_by_id[full_id] = p # Full UUID
|
||||||
|
available_by_id[full_id[:8]] = p # Short ID (8 chars)
|
||||||
|
|
||||||
last_used_on_by_product = last_used_on_by_product or {}
|
last_used_on_by_product = last_used_on_by_product or {}
|
||||||
|
|
||||||
def _handler(args: dict[str, Any]) -> dict[str, object]:
|
def _handler(args: dict[str, Any]) -> dict[str, object]:
|
||||||
requested_ids = _extract_requested_product_ids(args)
|
requested_ids = _extract_requested_product_ids(args)
|
||||||
products_payload = []
|
products_payload = []
|
||||||
|
seen_products = set() # Avoid duplicates if LLM requests both short and full ID
|
||||||
|
|
||||||
for pid in requested_ids:
|
for pid in requested_ids:
|
||||||
product = available_by_id.get(pid)
|
product = available_by_id.get(pid)
|
||||||
if product is None:
|
if product is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Skip if we already added this product (by full UUID)
|
||||||
|
full_id = str(product.id)
|
||||||
|
if full_id in seen_products:
|
||||||
|
continue
|
||||||
|
seen_products.add(full_id)
|
||||||
|
|
||||||
products_payload.append(
|
products_payload.append(
|
||||||
_map_product_details(
|
_map_product_details(
|
||||||
product,
|
product,
|
||||||
pid,
|
full_id, # Always use full ID in response
|
||||||
last_used_on=last_used_on_by_product.get(pid),
|
last_used_on=last_used_on_by_product.get(full_id),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return {"products": products_payload}
|
return {"products": products_payload}
|
||||||
|
|
|
||||||
|
|
@ -971,7 +971,7 @@ def suggest_shopping(session: Session = Depends(get_session)):
|
||||||
config = get_creative_config(
|
config = get_creative_config(
|
||||||
system_instruction=_SHOPPING_SYSTEM_PROMPT,
|
system_instruction=_SHOPPING_SYSTEM_PROMPT,
|
||||||
response_schema=_ShoppingSuggestionsOut,
|
response_schema=_ShoppingSuggestionsOut,
|
||||||
max_output_tokens=4096,
|
max_output_tokens=8192,
|
||||||
).model_copy(
|
).model_copy(
|
||||||
update={
|
update={
|
||||||
"tools": [
|
"tools": [
|
||||||
|
|
@ -1026,7 +1026,7 @@ def suggest_shopping(session: Session = Depends(get_session)):
|
||||||
config=get_creative_config(
|
config=get_creative_config(
|
||||||
system_instruction=_SHOPPING_SYSTEM_PROMPT,
|
system_instruction=_SHOPPING_SYSTEM_PROMPT,
|
||||||
response_schema=_ShoppingSuggestionsOut,
|
response_schema=_ShoppingSuggestionsOut,
|
||||||
max_output_tokens=4096,
|
max_output_tokens=8192,
|
||||||
),
|
),
|
||||||
user_input=conservative_prompt,
|
user_input=conservative_prompt,
|
||||||
tool_trace={
|
tool_trace={
|
||||||
|
|
|
||||||
|
|
@ -612,7 +612,7 @@ def suggest_routine(
|
||||||
config = get_creative_config(
|
config = get_creative_config(
|
||||||
system_instruction=_ROUTINES_SYSTEM_PROMPT,
|
system_instruction=_ROUTINES_SYSTEM_PROMPT,
|
||||||
response_schema=_SuggestionOut,
|
response_schema=_SuggestionOut,
|
||||||
max_output_tokens=4096,
|
max_output_tokens=8192,
|
||||||
).model_copy(
|
).model_copy(
|
||||||
update={
|
update={
|
||||||
"tools": [
|
"tools": [
|
||||||
|
|
@ -668,7 +668,7 @@ def suggest_routine(
|
||||||
config=get_creative_config(
|
config=get_creative_config(
|
||||||
system_instruction=_ROUTINES_SYSTEM_PROMPT,
|
system_instruction=_ROUTINES_SYSTEM_PROMPT,
|
||||||
response_schema=_SuggestionOut,
|
response_schema=_SuggestionOut,
|
||||||
max_output_tokens=4096,
|
max_output_tokens=8192,
|
||||||
),
|
),
|
||||||
user_input=conservative_prompt,
|
user_input=conservative_prompt,
|
||||||
tool_trace={
|
tool_trace={
|
||||||
|
|
|
||||||
|
|
@ -34,11 +34,12 @@ def get_extraction_config(
|
||||||
def get_creative_config(
|
def get_creative_config(
|
||||||
system_instruction: str,
|
system_instruction: str,
|
||||||
response_schema: Any,
|
response_schema: Any,
|
||||||
max_output_tokens: int = 4096,
|
max_output_tokens: int = 8192,
|
||||||
) -> genai_types.GenerateContentConfig:
|
) -> genai_types.GenerateContentConfig:
|
||||||
"""Config for creative tasks like recommendations (balanced creativity).
|
"""Config for creative tasks like recommendations (balanced creativity).
|
||||||
|
|
||||||
Phase 2: Uses MEDIUM thinking level to capture reasoning chain for observability.
|
Phase 2: Uses MEDIUM thinking level to capture reasoning chain for observability.
|
||||||
|
Increased default from 4096 to 8192 to accommodate thinking tokens (~3500) + response.
|
||||||
"""
|
"""
|
||||||
return genai_types.GenerateContentConfig(
|
return genai_types.GenerateContentConfig(
|
||||||
system_instruction=system_instruction,
|
system_instruction=system_instruction,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue