innercontext/backend/innercontext/models/product.py
Piotr Oleszczyk 5bb2ea5f08 feat(api): add short_id column for consistent LLM UUID handling
Resolves validation failures where LLM fabricated full UUIDs from 8-char
prefixes shown in context, causing 'unknown product_id' errors.

Root Cause Analysis:
- Context showed 8-char short IDs: '77cbf37c' (Phase 2 optimization)
- Function tool returned full UUIDs: '77cbf37c-3830-4927-...'
- LLM saw BOTH formats, got confused, invented UUIDs for final response
- Validators rejected fabricated UUIDs as unknown products

Solution: Consistent 8-char short_id across LLM boundary:
1. Database: New short_id column (8 chars, unique, indexed)
2. Context: Shows short_id (was: str(id)[:8])
3. Function tools: Return short_id (was: full UUID)
4. Translation layer: Expands short_id → UUID before validation
5. Database: Stores full UUIDs (no schema change for existing data)

Changes:
- Added products.short_id column with unique constraint + index
- Migration populates from UUID prefix, handles collisions via regeneration
- Product model auto-generates short_id for new products
- LLM contexts use product.short_id consistently
- Function tools return product.short_id
- Added _expand_product_id() translation layer in routines.py
- Integrated expansion in suggest_routine() and suggest_batch()
- Validators work with full UUIDs (no changes needed)

Benefits:
 LLM never sees full UUIDs, no format confusion
 Maintains Phase 2 token optimization (~85% reduction)
 O(1) indexed short_id lookups vs O(n) pattern matching
 Unique constraint prevents collisions at DB level
 Clean separation: 8-char for LLM, 36-char for application

From production error:
  Step 1: unknown product_id 77cbf37c-3830-4927-9669-07447206689d
  (LLM invented the last 28 characters)

Now resolved: LLM uses '77cbf37c' consistently, translation layer
expands to real UUID before validation.
2026-03-06 10:58:26 +01:00

392 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from datetime import date, datetime
from typing import Any, ClassVar, Optional, cast
from uuid import UUID, uuid4
from pydantic import field_validator, model_validator
from sqlalchemy import JSON, Column, DateTime
from sqlmodel import Field, Relationship, SQLModel
from .base import utc_now
from .domain import Domain
from .enums import (
AbsorptionSpeed,
DayTime,
IngredientFunction,
PriceTier,
ProductCategory,
SkinConcern,
SkinType,
StrengthLevel,
TextureType,
)
# ---------------------------------------------------------------------------
# Value objects
# ---------------------------------------------------------------------------
class ProductEffectProfile(SQLModel):
hydration_immediate: int = Field(default=0, ge=0, le=5)
hydration_long_term: int = Field(default=0, ge=0, le=5)
barrier_repair_strength: int = Field(default=0, ge=0, le=5)
soothing_strength: int = Field(default=0, ge=0, le=5)
exfoliation_strength: int = Field(default=0, ge=0, le=5)
retinoid_strength: int = Field(default=0, ge=0, le=5)
irritation_risk: int = Field(default=0, ge=0, le=5)
comedogenic_risk: int = Field(default=0, ge=0, le=5)
barrier_disruption_risk: int = Field(default=0, ge=0, le=5)
dryness_risk: int = Field(default=0, ge=0, le=5)
brightening_strength: int = Field(default=0, ge=0, le=5)
anti_acne_strength: int = Field(default=0, ge=0, le=5)
anti_aging_strength: int = Field(default=0, ge=0, le=5)
class ActiveIngredient(SQLModel):
name: str
percent: float | None = Field(default=None, ge=0, le=100)
functions: list[IngredientFunction] = Field(default_factory=list)
strength_level: StrengthLevel | None = None
irritation_potential: StrengthLevel | None = None
class ProductContext(SQLModel):
safe_after_shaving: bool | None = None
safe_after_acids: bool | None = None
safe_after_retinoids: bool | None = None
safe_with_compromised_barrier: bool | None = None
low_uv_only: bool | None = None
# ---------------------------------------------------------------------------
# Helper
# ---------------------------------------------------------------------------
def _ev(v: object) -> str:
"""Return enum value or string as-is (handles both DB-loaded dicts and Python enums)."""
return v.value if hasattr(v, "value") else str(v) # type: ignore[union-attr]
# ---------------------------------------------------------------------------
# Base model (pure Python types, no sa_column, no id/timestamps)
# ---------------------------------------------------------------------------
class ProductBase(SQLModel):
name: str
brand: str
line_name: str | None = Field(default=None, max_length=128)
sku: str | None = Field(default=None, max_length=64)
url: str | None = Field(default=None, max_length=512)
barcode: str | None = Field(default=None, max_length=64)
category: ProductCategory
recommended_time: DayTime
texture: TextureType | None = None
absorption_speed: AbsorptionSpeed | None = None
leave_on: bool
price_amount: float | None = Field(default=None, gt=0)
price_currency: str | None = Field(default=None, min_length=3, max_length=3)
size_ml: float | None = Field(default=None, gt=0)
full_weight_g: float | None = Field(default=None, gt=0)
empty_weight_g: float | None = Field(default=None, gt=0)
pao_months: int | None = Field(default=None, ge=1, le=60)
inci: list[str] = Field(default_factory=list)
actives: list[ActiveIngredient] | None = None
recommended_for: list[SkinType] = Field(default_factory=list)
targets: list[SkinConcern] = Field(default_factory=list)
fragrance_free: bool | None = None
essential_oils_free: bool | None = None
alcohol_denat_free: bool | None = None
pregnancy_safe: bool | None = None
product_effect_profile: ProductEffectProfile = Field(
default_factory=ProductEffectProfile
)
ph_min: float | None = Field(default=None, ge=0, le=14)
ph_max: float | None = Field(default=None, ge=0, le=14)
context_rules: ProductContext | None = None
min_interval_hours: int | None = Field(default=None, ge=0)
max_frequency_per_week: int | None = Field(default=None, ge=1, le=14)
is_medication: bool = Field(default=False)
is_tool: bool = Field(default=False)
needle_length_mm: float | None = Field(default=None, gt=0)
personal_tolerance_notes: str | None = None
personal_repurchase_intent: bool | None = None
# ---------------------------------------------------------------------------
# Table models
# ---------------------------------------------------------------------------
class Product(ProductBase, table=True):
__tablename__ = "products"
__domains__: ClassVar[frozenset[Domain]] = frozenset({Domain.SKINCARE})
id: UUID = Field(default_factory=uuid4, primary_key=True)
short_id: str = Field(
max_length=8,
unique=True,
index=True,
description="8-character short ID for LLM contexts (first 8 chars of UUID)",
)
# Override 9 JSON fields with sa_column (only in table model)
inci: list[str] = Field(
default_factory=list, sa_column=Column(JSON, nullable=False)
)
actives: list[ActiveIngredient] | None = Field(
default=None, sa_column=Column(JSON, nullable=True)
)
recommended_for: list[SkinType] = Field(
default_factory=list, sa_column=Column(JSON, nullable=False)
)
targets: list[SkinConcern] = Field(
default_factory=list, sa_column=Column(JSON, nullable=False)
)
product_effect_profile: ProductEffectProfile = Field(
default_factory=ProductEffectProfile,
sa_column=Column(JSON, nullable=False),
)
context_rules: ProductContext | None = Field(
default=None, sa_column=Column(JSON, nullable=True)
)
price_tier: PriceTier | None = Field(default=None, index=True)
price_per_use_pln: float | None = Field(default=None)
price_tier_source: str | None = Field(default=None, max_length=32)
pricing_computed_at: datetime | None = Field(default=None)
created_at: datetime = Field(default_factory=utc_now, nullable=False)
updated_at: datetime = Field(
default_factory=utc_now,
sa_column=Column(
DateTime(timezone=True),
default=utc_now,
onupdate=utc_now,
nullable=False,
),
)
inventory: list["ProductInventory"] = Relationship(
back_populates="product",
sa_relationship_kwargs={"cascade": "all, delete-orphan"},
)
@field_validator("product_effect_profile", mode="before")
@classmethod
def coerce_effect_profile(cls, v: object) -> object:
if isinstance(v, dict):
return ProductEffectProfile(**cast(dict[str, Any], v))
return v
@model_validator(mode="after")
def validate_business_rules(self) -> "Product":
if (
self.ph_min is not None
and self.ph_max is not None
and self.ph_min > self.ph_max
):
raise ValueError("ph_min must be <= ph_max")
if self.category == ProductCategory.SPF and self.recommended_time == DayTime.PM:
raise ValueError("SPF cannot be recommended only for PM")
if self.category == ProductCategory.SPF and not self.leave_on:
raise ValueError("SPF products must be leave-on")
if self.price_currency is not None:
self.price_currency = self.price_currency.upper()
# Auto-generate short_id from UUID if not set
# Migration handles existing products; this is for new products
if not hasattr(self, "short_id") or not self.short_id:
self.short_id = str(self.id)[:8]
return self
def to_llm_context(
self,
*,
computed_price_tier: PriceTier | None = None,
price_per_use_pln: float | None = None,
) -> dict:
ctx: dict = {
"id": str(self.id),
"name": self.name,
"brand": self.brand,
"category": _ev(self.category),
"recommended_time": _ev(self.recommended_time),
"leave_on": self.leave_on,
}
for field in ("line_name", "url"):
val = getattr(self, field)
if val is not None:
ctx[field] = val
if self.texture is not None:
ctx["texture"] = _ev(self.texture)
if self.absorption_speed is not None:
ctx["absorption_speed"] = _ev(self.absorption_speed)
if self.price_amount is not None:
ctx["price_amount"] = self.price_amount
if self.price_currency is not None:
ctx["price_currency"] = self.price_currency
if computed_price_tier is not None:
ctx["price_tier"] = _ev(computed_price_tier)
if price_per_use_pln is not None:
ctx["price_per_use_pln"] = round(price_per_use_pln, 4)
if self.size_ml is not None:
ctx["size_ml"] = self.size_ml
if self.pao_months is not None:
ctx["pao_months"] = self.pao_months
if self.inci:
ctx["inci"] = self.inci
if self.recommended_for:
ctx["recommended_for"] = [_ev(s) for s in self.recommended_for]
if self.targets:
ctx["targets"] = [_ev(s) for s in self.targets]
if self.actives:
actives_ctx = []
for a in self.actives:
if isinstance(a, dict):
actives_ctx.append(a)
else:
a_dict: dict = {"name": a.name}
if a.percent is not None:
a_dict["percent"] = a.percent
if a.functions:
a_dict["functions"] = [_ev(f) for f in a.functions]
if a.strength_level is not None:
a_dict["strength_level"] = a.strength_level.name.lower()
actives_ctx.append(a_dict)
ctx["actives"] = actives_ctx
if self.ph_min is not None or self.ph_max is not None:
if self.ph_min == self.ph_max and self.ph_min is not None:
ctx["ph"] = self.ph_min
elif self.ph_min is not None and self.ph_max is not None:
ctx["ph_range"] = f"{self.ph_min}{self.ph_max}"
elif self.ph_min is not None:
ctx["ph_min"] = self.ph_min
else:
ctx["ph_max"] = self.ph_max
ep = self.product_effect_profile
if isinstance(ep, dict):
nonzero = {k: v for k, v in ep.items() if v >= 2}
else:
nonzero = {k: v for k, v in ep.model_dump().items() if v >= 2}
if nonzero:
ctx["effect_profile"] = nonzero
if self.context_rules is not None:
cr = self.context_rules
if isinstance(cr, dict):
rules = {k: v for k, v in cr.items() if v is not None}
else:
rules = {k: v for k, v in cr.model_dump().items() if v is not None}
if rules:
ctx["context_rules"] = rules
if self.min_interval_hours is not None:
ctx["min_interval_hours"] = self.min_interval_hours
if self.max_frequency_per_week is not None:
ctx["max_frequency_per_week"] = self.max_frequency_per_week
safety = {}
for flag in (
"fragrance_free",
"essential_oils_free",
"alcohol_denat_free",
"pregnancy_safe",
):
val = getattr(self, flag)
if val is not None:
safety[flag] = val
if safety:
ctx["safety"] = safety
if self.is_medication:
ctx["is_medication"] = True
if self.is_tool:
ctx["is_tool"] = True
if self.needle_length_mm is not None:
ctx["needle_length_mm"] = self.needle_length_mm
if self.personal_tolerance_notes:
ctx["personal_tolerance_notes"] = self.personal_tolerance_notes
if self.personal_repurchase_intent is not None:
ctx["personal_repurchase_intent"] = self.personal_repurchase_intent
try:
opened_items = [
inv for inv in (self.inventory or []) if inv.is_opened and inv.opened_at
]
if opened_items:
most_recent = max(opened_items, key=lambda x: x.opened_at)
ctx["days_since_opened"] = (date.today() - most_recent.opened_at).days
except Exception:
pass
return ctx
class ProductInventory(SQLModel, table=True):
__tablename__ = "product_inventory"
__domains__: ClassVar[frozenset[Domain]] = frozenset({Domain.SKINCARE})
id: UUID = Field(default_factory=uuid4, primary_key=True)
product_id: UUID = Field(foreign_key="products.id", index=True, ondelete="CASCADE")
is_opened: bool = Field(default=False)
opened_at: date | None = Field(default=None)
finished_at: date | None = Field(default=None)
expiry_date: date | None = Field(default=None)
current_weight_g: float | None = Field(default=None, gt=0)
last_weighed_at: date | None = Field(default=None)
notes: str | None = None
created_at: datetime = Field(default_factory=utc_now, nullable=False)
product: Optional["Product"] = Relationship(back_populates="inventory")
# ---------------------------------------------------------------------------
# Public response models
# ---------------------------------------------------------------------------
class ProductPublic(ProductBase):
id: UUID
created_at: datetime
updated_at: datetime
price_tier: PriceTier | None = None
price_per_use_pln: float | None = None
price_tier_source: str | None = None
class ProductWithInventory(ProductPublic):
inventory: list[ProductInventory] = []