diff --git a/backend/innercontext/api/product_llm_tools.py b/backend/innercontext/api/product_llm_tools.py index fe65d5e..44f03aa 100644 --- a/backend/innercontext/api/product_llm_tools.py +++ b/backend/innercontext/api/product_llm_tools.py @@ -164,21 +164,37 @@ def build_product_details_tool_handler( *, last_used_on_by_product: dict[str, date] | None = None, ): - available_by_id = {str(p.id): p for p in products} + # Build index for both full UUIDs and short IDs (first 8 chars) + # LLM sees short IDs in context but may request either format + available_by_id = {} + for p in products: + full_id = str(p.id) + available_by_id[full_id] = p # Full UUID + available_by_id[full_id[:8]] = p # Short ID (8 chars) + last_used_on_by_product = last_used_on_by_product or {} def _handler(args: dict[str, Any]) -> dict[str, object]: requested_ids = _extract_requested_product_ids(args) products_payload = [] + seen_products = set() # Avoid duplicates if LLM requests both short and full ID + for pid in requested_ids: product = available_by_id.get(pid) if product is None: continue + + # Skip if we already added this product (by full UUID) + full_id = str(product.id) + if full_id in seen_products: + continue + seen_products.add(full_id) + products_payload.append( _map_product_details( product, - pid, - last_used_on=last_used_on_by_product.get(pid), + full_id, # Always use full ID in response + last_used_on=last_used_on_by_product.get(full_id), ) ) return {"products": products_payload} diff --git a/backend/innercontext/api/products.py b/backend/innercontext/api/products.py index fea60c1..d6bc82e 100644 --- a/backend/innercontext/api/products.py +++ b/backend/innercontext/api/products.py @@ -971,7 +971,7 @@ def suggest_shopping(session: Session = Depends(get_session)): config = get_creative_config( system_instruction=_SHOPPING_SYSTEM_PROMPT, response_schema=_ShoppingSuggestionsOut, - max_output_tokens=4096, + max_output_tokens=8192, ).model_copy( update={ "tools": [ @@ -1026,7 +1026,7 @@ def suggest_shopping(session: Session = Depends(get_session)): config=get_creative_config( system_instruction=_SHOPPING_SYSTEM_PROMPT, response_schema=_ShoppingSuggestionsOut, - max_output_tokens=4096, + max_output_tokens=8192, ), user_input=conservative_prompt, tool_trace={ diff --git a/backend/innercontext/api/routines.py b/backend/innercontext/api/routines.py index 1436c4e..c97fc71 100644 --- a/backend/innercontext/api/routines.py +++ b/backend/innercontext/api/routines.py @@ -612,7 +612,7 @@ def suggest_routine( config = get_creative_config( system_instruction=_ROUTINES_SYSTEM_PROMPT, response_schema=_SuggestionOut, - max_output_tokens=4096, + max_output_tokens=8192, ).model_copy( update={ "tools": [ @@ -668,7 +668,7 @@ def suggest_routine( config=get_creative_config( system_instruction=_ROUTINES_SYSTEM_PROMPT, response_schema=_SuggestionOut, - max_output_tokens=4096, + max_output_tokens=8192, ), user_input=conservative_prompt, tool_trace={ diff --git a/backend/innercontext/llm.py b/backend/innercontext/llm.py index d26be73..40635cf 100644 --- a/backend/innercontext/llm.py +++ b/backend/innercontext/llm.py @@ -34,11 +34,12 @@ def get_extraction_config( def get_creative_config( system_instruction: str, response_schema: Any, - max_output_tokens: int = 4096, + max_output_tokens: int = 8192, ) -> genai_types.GenerateContentConfig: """Config for creative tasks like recommendations (balanced creativity). Phase 2: Uses MEDIUM thinking level to capture reasoning chain for observability. + Increased default from 4096 to 8192 to accommodate thinking tokens (~3500) + response. """ return genai_types.GenerateContentConfig( system_instruction=system_instruction,