innercontext/backend/innercontext/api/product_llm_tools.py

from datetime import date
from typing import Any
from uuid import UUID

from google.genai import types as genai_types
from sqlmodel import Session, col, select

from innercontext.models import Product, Routine, RoutineStep


def _ev(v: object) -> str:
    if v is None:
        return ""
    value = getattr(v, "value", None)
    if isinstance(value, str):
        return value
    return str(v)


def _extract_requested_product_ids(
    args: dict[str, object], max_ids: int = 8
) -> list[str]:
    raw_ids = args.get("product_ids")
    if not isinstance(raw_ids, list):
        return []

    requested_ids: list[str] = []
    seen: set[str] = set()
    for raw_id in raw_ids:
        if not isinstance(raw_id, str):
            continue
        if raw_id in seen:
            continue
        seen.add(raw_id)
        requested_ids.append(raw_id)
        if len(requested_ids) >= max_ids:
            break
    return requested_ids


def _build_compact_actives_payload(product: Product) -> list[dict[str, object]]:
    """
    Build compact actives payload for function tool responses.

    Phase 2: Reduced from 24 actives to TOP 5 for token efficiency.
    For clinical decisions, the primary actives are most relevant.
    """
    payload: list[dict[str, object]] = []
    for active in product.actives or []:
        if isinstance(active, dict):
            name = str(active.get("name") or "").strip()
            if not name:
                continue
            item: dict[str, object] = {"name": name}
            percent = active.get("percent")
            if percent is not None:
                item["percent"] = percent
            functions = active.get("functions")
            if isinstance(functions, list):
                item["functions"] = [str(f) for f in functions[:4]]
            strength_level = active.get("strength_level")
            if strength_level is not None:
                item["strength_level"] = str(strength_level)
            payload.append(item)
            continue

        name = str(getattr(active, "name", "") or "").strip()
        if not name:
            continue
        item = {"name": name}
        percent = getattr(active, "percent", None)
        if percent is not None:
            item["percent"] = percent
        functions = getattr(active, "functions", None)
        if isinstance(functions, list):
            item["functions"] = [_ev(f) for f in functions[:4]]
        strength_level = getattr(active, "strength_level", None)
        if strength_level is not None:
            item["strength_level"] = _ev(strength_level)
        payload.append(item)
    # Phase 2: Return top 5 actives only (was 24)
    return payload[:5]


def _map_product_details(
    product: Product,
    pid: str,
    *,
    last_used_on: date | None = None,
    include_inci: bool = False,
) -> dict[str, object]:
    """
    Map product to clinical decision payload.

    Phase 2: INCI list is now OPTIONAL and excluded by default.
    The 128-ingredient INCI list was consuming ~15KB per product.
    For safety/clinical decisions, actives + effect_profile are sufficient.

    Args:
        product: Product to map
        pid: Product ID string
        last_used_on: Last usage date
        include_inci: Whether to include full INCI list (default: False)

    Returns:
        Product details optimized for clinical decisions
    """
    ctx = product.to_llm_context()

    payload = {
        "id": pid,
        "name": product.name,
        "brand": product.brand,
        "category": ctx.get("category"),
        "recommended_time": ctx.get("recommended_time"),
        "leave_on": product.leave_on,
        "targets": ctx.get("targets") or [],
        "effect_profile": ctx.get("effect_profile") or {},
        "actives": _build_compact_actives_payload(product),  # Top 5 actives only
        "context_rules": ctx.get("context_rules") or {},
        "safety": ctx.get("safety") or {},
        "min_interval_hours": ctx.get("min_interval_hours"),
        "max_frequency_per_week": ctx.get("max_frequency_per_week"),
        "last_used_on": last_used_on.isoformat() if last_used_on else None,
    }

    # Phase 2: INCI list only included when explicitly requested
    # This saves ~12-15KB per product in function tool responses
    if include_inci:
        inci = product.inci or []
        payload["inci"] = [str(i)[:120] for i in inci[:128]]

    return payload


def build_last_used_on_by_product(
    session: Session,
    product_ids: list[UUID],
) -> dict[str, date]:
    if not product_ids:
        return {}

    rows = session.exec(
        select(RoutineStep, Routine)
        .join(Routine)
        .where(col(RoutineStep.product_id).in_(product_ids))
        .order_by(col(Routine.routine_date).desc())
    ).all()

    last_used: dict[str, date] = {}
    for step, routine in rows:
        product_id = step.product_id
        if product_id is None:
            continue
        key = str(product_id)
        if key in last_used:
            continue
        last_used[key] = routine.routine_date
    return last_used


def build_product_details_tool_handler(
    products: list[Product],
    *,
    last_used_on_by_product: dict[str, date] | None = None,
):
    available_by_id = {str(p.id): p for p in products}
    last_used_on_by_product = last_used_on_by_product or {}

    def _handler(args: dict[str, Any]) -> dict[str, object]:
        requested_ids = _extract_requested_product_ids(args)
        products_payload = []
        for pid in requested_ids:
            product = available_by_id.get(pid)
            if product is None:
                continue
            products_payload.append(
                _map_product_details(
                    product,
                    pid,
                    last_used_on=last_used_on_by_product.get(pid),
                )
            )
        return {"products": products_payload}

    return _handler


PRODUCT_DETAILS_FUNCTION_DECLARATION = genai_types.FunctionDeclaration(
    name="get_product_details",
    description=(
        "Use this to fetch clinical/safety data for products before making decisions. "
        "Call when you need to verify: ingredient conflicts, irritation risk, "
        "barrier compatibility, context rules, or usage frequency limits. "
        "Returns: id, name, brand, category, recommended_time, leave_on, targets, "
        "effect_profile (13 scores 0-5), actives (top 5 with functions), "
        "context_rules (safe_after_shaving, safe_with_compromised_barrier, etc.), "
        "safety flags, min_interval_hours, max_frequency_per_week, last_used_on. "
        "NOTE: Full INCI list omitted for efficiency - actives + effect_profile sufficient for safety."
    ),
    parameters=genai_types.Schema(
        type=genai_types.Type.OBJECT,
        properties={
            "product_ids": genai_types.Schema(
                type=genai_types.Type.ARRAY,
                items=genai_types.Schema(type=genai_types.Type.STRING),
                description="Product UUIDs from the provided product list. Batch multiple IDs in one call.",
            )
        },
        required=["product_ids"],
    ),
)