diff --git a/backend/.coverage b/backend/.coverage new file mode 100644 index 0000000..da6d65f Binary files /dev/null and b/backend/.coverage differ diff --git a/backend/alembic/versions/27b2c306b0c6_add_short_id_column_to_products.py b/backend/alembic/versions/27b2c306b0c6_add_short_id_column_to_products.py new file mode 100644 index 0000000..8f4cde4 --- /dev/null +++ b/backend/alembic/versions/27b2c306b0c6_add_short_id_column_to_products.py @@ -0,0 +1,83 @@ +"""add short_id column to products + +Revision ID: 27b2c306b0c6 +Revises: 2697b4f1972d +Create Date: 2026-03-06 10:54:13.308340 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "27b2c306b0c6" +down_revision: Union[str, Sequence[str], None] = "2697b4f1972d" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema. + + Add short_id column (8-char prefix of UUID) for LLM token optimization. + Handles collisions by regenerating conflicting short_ids. + """ + # Step 1: Add column (nullable initially) + op.add_column("products", sa.Column("short_id", sa.String(8), nullable=True)) + + # Step 2: Populate from existing UUIDs with collision detection + connection = op.get_bind() + + # Get all products + result = connection.execute(sa.text("SELECT id FROM products")) + products = [(str(row[0]),) for row in result] + + # Track used short_ids to detect collisions + used_short_ids = set() + + for (product_id,) in products: + short_id = product_id[:8] + + # Handle collision: regenerate using next 8 chars, or random + if short_id in used_short_ids: + # Try using chars 9-17 + alternative = product_id[9:17] if len(product_id) > 16 else None + if alternative and alternative not in used_short_ids: + short_id = alternative + else: + # Generate random 8-char hex + import secrets + + while True: + short_id = secrets.token_hex(4) # 8 hex chars + if short_id not in used_short_ids: + break + + print(f"COLLISION RESOLVED: UUID {product_id} → short_id {short_id}") + + used_short_ids.add(short_id) + + # Update product with short_id + connection.execute( + sa.text("UPDATE products SET short_id = :short_id WHERE id = :id"), + {"short_id": short_id, "id": product_id}, + ) + + # Step 3: Add NOT NULL constraint + op.alter_column("products", "short_id", nullable=False) + + # Step 4: Add unique constraint + op.create_unique_constraint("uq_products_short_id", "products", ["short_id"]) + + # Step 5: Add index for fast lookups + op.create_index("idx_products_short_id", "products", ["short_id"]) + + +def downgrade() -> None: + """Downgrade schema.""" + op.drop_index("idx_products_short_id", table_name="products") + op.drop_constraint("uq_products_short_id", "products", type_="unique") + op.drop_column("products", "short_id") diff --git a/backend/innercontext/api/llm_context.py b/backend/innercontext/api/llm_context.py index 6b9b19e..6ffc68e 100644 --- a/backend/innercontext/api/llm_context.py +++ b/backend/innercontext/api/llm_context.py @@ -117,7 +117,7 @@ def build_product_context_summary(product: Product, has_inventory: bool = False) safety_str = f" safety={{{','.join(safety_flags)}}}" if safety_flags else "" return ( - f"{status} {str(product.id)[:8]} | {product.brand} {product.name} " + f"{status} {product.short_id} | {product.brand} {product.name} " f"({product.category}){effects_str}{safety_str}" ) diff --git a/backend/innercontext/api/product_llm_tools.py b/backend/innercontext/api/product_llm_tools.py index 44f03aa..e152fd6 100644 --- a/backend/innercontext/api/product_llm_tools.py +++ b/backend/innercontext/api/product_llm_tools.py @@ -96,9 +96,12 @@ def _map_product_details( The 128-ingredient INCI list was consuming ~15KB per product. For safety/clinical decisions, actives + effect_profile are sufficient. + Uses short_id (8 chars) for LLM consistency - translation layer expands + to full UUID before validation/database storage. + Args: product: Product to map - pid: Product ID string + pid: Product short_id (8 characters, e.g., "77cbf37c") last_used_on: Last usage date include_inci: Whether to include full INCI list (default: False) @@ -193,7 +196,7 @@ def build_product_details_tool_handler( products_payload.append( _map_product_details( product, - full_id, # Always use full ID in response + product.short_id, # Return short_id for LLM consistency last_used_on=last_used_on_by_product.get(full_id), ) ) diff --git a/backend/innercontext/api/routines.py b/backend/innercontext/api/routines.py index c97fc71..7134beb 100644 --- a/backend/innercontext/api/routines.py +++ b/backend/innercontext/api/routines.py @@ -400,6 +400,42 @@ def _get_products_with_inventory( return set(inventory_rows) +def _expand_product_id(session: Session, short_or_full_id: str) -> UUID | None: + """ + Expand 8-char short_id to full UUID, or validate full UUID. + + Translation layer between LLM world (8-char short_ids) and application world + (36-char UUIDs). LLM sees/uses short_ids for token optimization, but + validators and database use full UUIDs. + + Args: + session: Database session + short_or_full_id: Either short_id ("77cbf37c") or full UUID + + Returns: + Full UUID if product exists, None otherwise + """ + # Already a full UUID? + if len(short_or_full_id) == 36: + try: + uuid_obj = UUID(short_or_full_id) + # Verify it exists + product = session.get(Product, uuid_obj) + return uuid_obj if product else None + except (ValueError, TypeError): + return None + + # Short ID (8 chars) - indexed lookup + if len(short_or_full_id) == 8: + product = session.exec( + select(Product).where(Product.short_id == short_or_full_id) + ).first() + return product.id if product else None + + # Invalid length + return None + + def _build_objectives_context(include_minoxidil_beard: bool) -> str: if include_minoxidil_beard: return ( @@ -686,17 +722,26 @@ def suggest_routine( except json.JSONDecodeError as e: raise HTTPException(status_code=502, detail=f"LLM returned invalid JSON: {e}") - steps = [ - SuggestedStep( - product_id=UUID(s["product_id"]) if s.get("product_id") else None, - action_type=s.get("action_type") or None, - action_notes=s.get("action_notes"), - region=s.get("region"), - why_this_step=s.get("why_this_step"), - optional=s.get("optional"), + # Translation layer: Expand short_ids (8 chars) to full UUIDs (36 chars) + steps = [] + for s in parsed.get("steps", []): + product_id_str = s.get("product_id") + product_id_uuid = None + + if product_id_str: + # Expand short_id or validate full UUID + product_id_uuid = _expand_product_id(session, product_id_str) + + steps.append( + SuggestedStep( + product_id=product_id_uuid, + action_type=s.get("action_type") or None, + action_notes=s.get("action_notes"), + region=s.get("region"), + why_this_step=s.get("why_this_step"), + optional=s.get("optional"), + ) ) - for s in parsed.get("steps", []) - ] summary_raw = parsed.get("summary") or {} confidence_raw = summary_raw.get("confidence", 0) @@ -854,11 +899,19 @@ def suggest_batch( raise HTTPException(status_code=502, detail=f"LLM returned invalid JSON: {e}") def _parse_steps(raw_steps: list) -> list[SuggestedStep]: + """Parse steps and expand short_ids to full UUIDs.""" result = [] for s in raw_steps: + product_id_str = s.get("product_id") + product_id_uuid = None + + if product_id_str: + # Translation layer: expand short_id to full UUID + product_id_uuid = _expand_product_id(session, product_id_str) + result.append( SuggestedStep( - product_id=UUID(s["product_id"]) if s.get("product_id") else None, + product_id=product_id_uuid, action_type=s.get("action_type") or None, action_notes=s.get("action_notes"), region=s.get("region"), diff --git a/backend/innercontext/models/product.py b/backend/innercontext/models/product.py index c6c4a81..db4ed15 100644 --- a/backend/innercontext/models/product.py +++ b/backend/innercontext/models/product.py @@ -142,6 +142,12 @@ class Product(ProductBase, table=True): __domains__: ClassVar[frozenset[Domain]] = frozenset({Domain.SKINCARE}) id: UUID = Field(default_factory=uuid4, primary_key=True) + short_id: str = Field( + max_length=8, + unique=True, + index=True, + description="8-character short ID for LLM contexts (first 8 chars of UUID)", + ) # Override 9 JSON fields with sa_column (only in table model) inci: list[str] = Field( @@ -214,6 +220,11 @@ class Product(ProductBase, table=True): if self.price_currency is not None: self.price_currency = self.price_currency.upper() + # Auto-generate short_id from UUID if not set + # Migration handles existing products; this is for new products + if not hasattr(self, "short_id") or not self.short_id: + self.short_id = str(self.id)[:8] + return self def to_llm_context( diff --git a/backend/jobs/2026-03-02__17-12-31/job.log b/backend/jobs/2026-03-02__17-12-31/job.log new file mode 100644 index 0000000..e69de29 diff --git a/backend/pgloader.config b/backend/pgloader.config new file mode 100644 index 0000000..877cd1b --- /dev/null +++ b/backend/pgloader.config @@ -0,0 +1,12 @@ +LOAD DATABASE + FROM postgresql://innercontext_user:dpeBM6P79CZovjLKQdXc@192.168.101.83/innercontext + INTO sqlite:///Users/piotr/dev/innercontext/backend/innercontext.db + + WITH include drop, + create tables, + create indexes, + reset sequences + + SET work_mem to '16MB', + maintenance_work_mem to '512 MB'; +