feat(api): add short_id column for consistent LLM UUID handling
Resolves validation failures where LLM fabricated full UUIDs from 8-char prefixes shown in context, causing 'unknown product_id' errors. Root Cause Analysis: - Context showed 8-char short IDs: '77cbf37c' (Phase 2 optimization) - Function tool returned full UUIDs: '77cbf37c-3830-4927-...' - LLM saw BOTH formats, got confused, invented UUIDs for final response - Validators rejected fabricated UUIDs as unknown products Solution: Consistent 8-char short_id across LLM boundary: 1. Database: New short_id column (8 chars, unique, indexed) 2. Context: Shows short_id (was: str(id)[:8]) 3. Function tools: Return short_id (was: full UUID) 4. Translation layer: Expands short_id → UUID before validation 5. Database: Stores full UUIDs (no schema change for existing data) Changes: - Added products.short_id column with unique constraint + index - Migration populates from UUID prefix, handles collisions via regeneration - Product model auto-generates short_id for new products - LLM contexts use product.short_id consistently - Function tools return product.short_id - Added _expand_product_id() translation layer in routines.py - Integrated expansion in suggest_routine() and suggest_batch() - Validators work with full UUIDs (no changes needed) Benefits: ✅ LLM never sees full UUIDs, no format confusion ✅ Maintains Phase 2 token optimization (~85% reduction) ✅ O(1) indexed short_id lookups vs O(n) pattern matching ✅ Unique constraint prevents collisions at DB level ✅ Clean separation: 8-char for LLM, 36-char for application From production error: Step 1: unknown product_id 77cbf37c-3830-4927-9669-07447206689d (LLM invented the last 28 characters) Now resolved: LLM uses '77cbf37c' consistently, translation layer expands to real UUID before validation.
This commit is contained in:
parent
710b53e471
commit
5bb2ea5f08
8 changed files with 176 additions and 14 deletions
|
|
@ -0,0 +1,83 @@
|
|||
"""add short_id column to products
|
||||
|
||||
Revision ID: 27b2c306b0c6
|
||||
Revises: 2697b4f1972d
|
||||
Create Date: 2026-03-06 10:54:13.308340
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "27b2c306b0c6"
|
||||
down_revision: Union[str, Sequence[str], None] = "2697b4f1972d"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema.
|
||||
|
||||
Add short_id column (8-char prefix of UUID) for LLM token optimization.
|
||||
Handles collisions by regenerating conflicting short_ids.
|
||||
"""
|
||||
# Step 1: Add column (nullable initially)
|
||||
op.add_column("products", sa.Column("short_id", sa.String(8), nullable=True))
|
||||
|
||||
# Step 2: Populate from existing UUIDs with collision detection
|
||||
connection = op.get_bind()
|
||||
|
||||
# Get all products
|
||||
result = connection.execute(sa.text("SELECT id FROM products"))
|
||||
products = [(str(row[0]),) for row in result]
|
||||
|
||||
# Track used short_ids to detect collisions
|
||||
used_short_ids = set()
|
||||
|
||||
for (product_id,) in products:
|
||||
short_id = product_id[:8]
|
||||
|
||||
# Handle collision: regenerate using next 8 chars, or random
|
||||
if short_id in used_short_ids:
|
||||
# Try using chars 9-17
|
||||
alternative = product_id[9:17] if len(product_id) > 16 else None
|
||||
if alternative and alternative not in used_short_ids:
|
||||
short_id = alternative
|
||||
else:
|
||||
# Generate random 8-char hex
|
||||
import secrets
|
||||
|
||||
while True:
|
||||
short_id = secrets.token_hex(4) # 8 hex chars
|
||||
if short_id not in used_short_ids:
|
||||
break
|
||||
|
||||
print(f"COLLISION RESOLVED: UUID {product_id} → short_id {short_id}")
|
||||
|
||||
used_short_ids.add(short_id)
|
||||
|
||||
# Update product with short_id
|
||||
connection.execute(
|
||||
sa.text("UPDATE products SET short_id = :short_id WHERE id = :id"),
|
||||
{"short_id": short_id, "id": product_id},
|
||||
)
|
||||
|
||||
# Step 3: Add NOT NULL constraint
|
||||
op.alter_column("products", "short_id", nullable=False)
|
||||
|
||||
# Step 4: Add unique constraint
|
||||
op.create_unique_constraint("uq_products_short_id", "products", ["short_id"])
|
||||
|
||||
# Step 5: Add index for fast lookups
|
||||
op.create_index("idx_products_short_id", "products", ["short_id"])
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
op.drop_index("idx_products_short_id", table_name="products")
|
||||
op.drop_constraint("uq_products_short_id", "products", type_="unique")
|
||||
op.drop_column("products", "short_id")
|
||||
Loading…
Add table
Add a link
Reference in a new issue