refactor(llm): optimize Gemini config profiles for extraction and creativity
Introduces `get_extraction_config` and `get_creative_config` to standardize Gemini API calls. * Defines explicit config profiles with appropriate `temperature` and `thinking_level` for Gemini 3 Flash. * Extraction tasks use minimal thinking and temp=0.0 to reduce latency and token usage. * Creative tasks use low thinking, temp=0.4, and top_p=0.8 to balance naturalness and safety. * Applies these helpers across products, routines, and skincare endpoints. * Also updates default model to `gemini-3-flash-preview`.
This commit is contained in:
parent
78df7322a9
commit
ba1f10d99f
5 changed files with 72 additions and 33 deletions
|
|
@ -4,14 +4,13 @@ from typing import Optional
|
||||||
from uuid import UUID, uuid4
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||||
from google.genai import types as genai_types
|
|
||||||
from pydantic import BaseModel as PydanticBase
|
from pydantic import BaseModel as PydanticBase
|
||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
from sqlmodel import Session, SQLModel, col, select
|
from sqlmodel import Session, SQLModel, col, select
|
||||||
|
|
||||||
from db import get_session
|
from db import get_session
|
||||||
from innercontext.api.utils import get_or_404
|
from innercontext.api.utils import get_or_404
|
||||||
from innercontext.llm import call_gemini
|
from innercontext.llm import call_gemini, get_creative_config, get_extraction_config
|
||||||
from innercontext.models import (
|
from innercontext.models import (
|
||||||
Product,
|
Product,
|
||||||
ProductBase,
|
ProductBase,
|
||||||
|
|
@ -422,12 +421,10 @@ def parse_product_text(data: ProductParseRequest) -> ProductParseResponse:
|
||||||
response = call_gemini(
|
response = call_gemini(
|
||||||
endpoint="products/parse-text",
|
endpoint="products/parse-text",
|
||||||
contents=f"Extract product data from this text:\n\n{data.text}",
|
contents=f"Extract product data from this text:\n\n{data.text}",
|
||||||
config=genai_types.GenerateContentConfig(
|
config=get_extraction_config(
|
||||||
system_instruction=_product_parse_system_prompt(),
|
system_instruction=_product_parse_system_prompt(),
|
||||||
response_mime_type="application/json",
|
|
||||||
response_schema=ProductParseLLMResponse,
|
response_schema=ProductParseLLMResponse,
|
||||||
max_output_tokens=16384,
|
max_output_tokens=16384,
|
||||||
temperature=0.0,
|
|
||||||
),
|
),
|
||||||
user_input=data.text,
|
user_input=data.text,
|
||||||
)
|
)
|
||||||
|
|
@ -637,12 +634,10 @@ def suggest_shopping(session: Session = Depends(get_session)):
|
||||||
response = call_gemini(
|
response = call_gemini(
|
||||||
endpoint="products/suggest",
|
endpoint="products/suggest",
|
||||||
contents=prompt,
|
contents=prompt,
|
||||||
config=genai_types.GenerateContentConfig(
|
config=get_creative_config(
|
||||||
system_instruction=_SHOPPING_SYSTEM_PROMPT,
|
system_instruction=_SHOPPING_SYSTEM_PROMPT,
|
||||||
response_mime_type="application/json",
|
|
||||||
response_schema=_ShoppingSuggestionsOut,
|
response_schema=_ShoppingSuggestionsOut,
|
||||||
max_output_tokens=4096,
|
max_output_tokens=4096,
|
||||||
temperature=0.4,
|
|
||||||
),
|
),
|
||||||
user_input=prompt,
|
user_input=prompt,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -4,13 +4,12 @@ from typing import Optional
|
||||||
from uuid import UUID, uuid4
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
from google.genai import types as genai_types
|
|
||||||
from pydantic import BaseModel as PydanticBase
|
from pydantic import BaseModel as PydanticBase
|
||||||
from sqlmodel import Session, SQLModel, col, select
|
from sqlmodel import Session, SQLModel, col, select
|
||||||
|
|
||||||
from db import get_session
|
from db import get_session
|
||||||
from innercontext.api.utils import get_or_404
|
from innercontext.api.utils import get_or_404
|
||||||
from innercontext.llm import call_gemini
|
from innercontext.llm import call_gemini, get_creative_config
|
||||||
from innercontext.models import (
|
from innercontext.models import (
|
||||||
GroomingSchedule,
|
GroomingSchedule,
|
||||||
Product,
|
Product,
|
||||||
|
|
@ -522,12 +521,10 @@ def suggest_routine(
|
||||||
response = call_gemini(
|
response = call_gemini(
|
||||||
endpoint="routines/suggest",
|
endpoint="routines/suggest",
|
||||||
contents=prompt,
|
contents=prompt,
|
||||||
config=genai_types.GenerateContentConfig(
|
config=get_creative_config(
|
||||||
system_instruction=_ROUTINES_SYSTEM_PROMPT,
|
system_instruction=_ROUTINES_SYSTEM_PROMPT,
|
||||||
response_mime_type="application/json",
|
|
||||||
response_schema=_SuggestionOut,
|
response_schema=_SuggestionOut,
|
||||||
max_output_tokens=4096,
|
max_output_tokens=4096,
|
||||||
temperature=0.4,
|
|
||||||
),
|
),
|
||||||
user_input=prompt,
|
user_input=prompt,
|
||||||
)
|
)
|
||||||
|
|
@ -600,12 +597,10 @@ def suggest_batch(
|
||||||
response = call_gemini(
|
response = call_gemini(
|
||||||
endpoint="routines/suggest-batch",
|
endpoint="routines/suggest-batch",
|
||||||
contents=prompt,
|
contents=prompt,
|
||||||
config=genai_types.GenerateContentConfig(
|
config=get_creative_config(
|
||||||
system_instruction=_ROUTINES_SYSTEM_PROMPT,
|
system_instruction=_ROUTINES_SYSTEM_PROMPT,
|
||||||
response_mime_type="application/json",
|
|
||||||
response_schema=_BatchOut,
|
response_schema=_BatchOut,
|
||||||
max_output_tokens=8192,
|
max_output_tokens=8192,
|
||||||
temperature=0.4,
|
|
||||||
),
|
),
|
||||||
user_input=prompt,
|
user_input=prompt,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ from sqlmodel import Session, SQLModel, select
|
||||||
|
|
||||||
from db import get_session
|
from db import get_session
|
||||||
from innercontext.api.utils import get_or_404
|
from innercontext.api.utils import get_or_404
|
||||||
from innercontext.llm import call_gemini
|
from innercontext.llm import call_gemini, get_extraction_config
|
||||||
from innercontext.models import (
|
from innercontext.models import (
|
||||||
SkinConditionSnapshot,
|
SkinConditionSnapshot,
|
||||||
SkinConditionSnapshotBase,
|
SkinConditionSnapshotBase,
|
||||||
|
|
@ -171,12 +171,10 @@ async def analyze_skin_photos(
|
||||||
response = call_gemini(
|
response = call_gemini(
|
||||||
endpoint="skincare/analyze-photos",
|
endpoint="skincare/analyze-photos",
|
||||||
contents=parts,
|
contents=parts,
|
||||||
config=genai_types.GenerateContentConfig(
|
config=get_extraction_config(
|
||||||
system_instruction=_skin_photo_system_prompt(),
|
system_instruction=_skin_photo_system_prompt(),
|
||||||
response_mime_type="application/json",
|
|
||||||
response_schema=_SkinAnalysisOut,
|
response_schema=_SkinAnalysisOut,
|
||||||
max_output_tokens=2048,
|
max_output_tokens=2048,
|
||||||
temperature=0.0,
|
|
||||||
),
|
),
|
||||||
user_input=image_summary,
|
user_input=image_summary,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -3,12 +3,50 @@
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from contextlib import suppress
|
from contextlib import suppress
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
from google import genai
|
from google import genai
|
||||||
from google.genai import types as genai_types
|
from google.genai import types as genai_types
|
||||||
|
|
||||||
_DEFAULT_MODEL = "gemini-flash-latest"
|
_DEFAULT_MODEL = "gemini-3-flash-preview"
|
||||||
|
|
||||||
|
|
||||||
|
def get_extraction_config(
|
||||||
|
system_instruction: str,
|
||||||
|
response_schema: Any,
|
||||||
|
max_output_tokens: int = 8192,
|
||||||
|
) -> genai_types.GenerateContentConfig:
|
||||||
|
"""Config for strict data extraction (deterministic, minimal thinking)."""
|
||||||
|
return genai_types.GenerateContentConfig(
|
||||||
|
system_instruction=system_instruction,
|
||||||
|
response_mime_type="application/json",
|
||||||
|
response_schema=response_schema,
|
||||||
|
max_output_tokens=max_output_tokens,
|
||||||
|
temperature=0.0,
|
||||||
|
thinking_config=genai_types.ThinkingConfig(
|
||||||
|
thinking_level=genai_types.ThinkingLevel.MINIMAL
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_creative_config(
|
||||||
|
system_instruction: str,
|
||||||
|
response_schema: Any,
|
||||||
|
max_output_tokens: int = 4096,
|
||||||
|
) -> genai_types.GenerateContentConfig:
|
||||||
|
"""Config for creative tasks like recommendations (balanced creativity)."""
|
||||||
|
return genai_types.GenerateContentConfig(
|
||||||
|
system_instruction=system_instruction,
|
||||||
|
response_mime_type="application/json",
|
||||||
|
response_schema=response_schema,
|
||||||
|
max_output_tokens=max_output_tokens,
|
||||||
|
temperature=0.4,
|
||||||
|
top_p=0.8,
|
||||||
|
thinking_config=genai_types.ThinkingConfig(
|
||||||
|
thinking_level=genai_types.ThinkingLevel.LOW
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_gemini_client() -> tuple[genai.Client, str]:
|
def get_gemini_client() -> tuple[genai.Client, str]:
|
||||||
|
|
@ -46,18 +84,6 @@ def call_gemini(
|
||||||
with suppress(Exception):
|
with suppress(Exception):
|
||||||
user_input = str(contents)
|
user_input = str(contents)
|
||||||
|
|
||||||
# Limit thinking by default — Gemini 3 Flash defaults to "high" thinking which
|
|
||||||
# consumes most of the token budget before generating actual output.
|
|
||||||
# Use "low" to reduce latency while keeping basic reasoning intact.
|
|
||||||
if config.thinking_config is None:
|
|
||||||
config = config.model_copy(
|
|
||||||
update={
|
|
||||||
"thinking_config": genai_types.ThinkingConfig(
|
|
||||||
thinking_level=genai_types.ThinkingLevel.LOW
|
|
||||||
)
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
start = time.monotonic()
|
start = time.monotonic()
|
||||||
success, error_detail, response, finish_reason = True, None, None, None
|
success, error_detail, response, finish_reason = True, None, None, None
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
25
backend/test_query.py
Normal file
25
backend/test_query.py
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
from datetime import date, timedelta
|
||||||
|
|
||||||
|
from sqlmodel import select
|
||||||
|
|
||||||
|
from db import get_session
|
||||||
|
from innercontext.models import Routine, RoutineStep
|
||||||
|
|
||||||
|
|
||||||
|
def run():
|
||||||
|
session = next(get_session())
|
||||||
|
ref_date = date.today()
|
||||||
|
cutoff = ref_date - timedelta(days=7)
|
||||||
|
|
||||||
|
recent_usage = session.exec(
|
||||||
|
select(RoutineStep.product_id)
|
||||||
|
.join(Routine, Routine.id == RoutineStep.routine_id)
|
||||||
|
.where(Routine.routine_date >= cutoff)
|
||||||
|
.where(Routine.routine_date <= ref_date)
|
||||||
|
).all()
|
||||||
|
|
||||||
|
print("Found:", len(recent_usage))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue