refactor(llm): optimize Gemini config profiles for extraction and creativity

Introduces `get_extraction_config` and `get_creative_config` to standardize Gemini API calls.

* Defines explicit config profiles with appropriate `temperature` and `thinking_level` for Gemini 3 Flash.
* Extraction tasks use minimal thinking and temp=0.0 to reduce latency and token usage.
* Creative tasks use low thinking, temp=0.4, and top_p=0.8 to balance naturalness and safety.
* Applies these helpers across products, routines, and skincare endpoints.
* Also updates default model to `gemini-3-flash-preview`.
This commit is contained in:
Piotr Oleszczyk 2026-03-03 21:24:23 +01:00
parent 78df7322a9
commit ba1f10d99f
5 changed files with 72 additions and 33 deletions

View file

@ -4,14 +4,13 @@ from typing import Optional
from uuid import UUID, uuid4
from fastapi import APIRouter, Depends, HTTPException, Query
from google.genai import types as genai_types
from pydantic import BaseModel as PydanticBase
from pydantic import ValidationError
from sqlmodel import Session, SQLModel, col, select
from db import get_session
from innercontext.api.utils import get_or_404
from innercontext.llm import call_gemini
from innercontext.llm import call_gemini, get_creative_config, get_extraction_config
from innercontext.models import (
Product,
ProductBase,
@ -422,12 +421,10 @@ def parse_product_text(data: ProductParseRequest) -> ProductParseResponse:
response = call_gemini(
endpoint="products/parse-text",
contents=f"Extract product data from this text:\n\n{data.text}",
config=genai_types.GenerateContentConfig(
config=get_extraction_config(
system_instruction=_product_parse_system_prompt(),
response_mime_type="application/json",
response_schema=ProductParseLLMResponse,
max_output_tokens=16384,
temperature=0.0,
),
user_input=data.text,
)
@ -637,12 +634,10 @@ def suggest_shopping(session: Session = Depends(get_session)):
response = call_gemini(
endpoint="products/suggest",
contents=prompt,
config=genai_types.GenerateContentConfig(
config=get_creative_config(
system_instruction=_SHOPPING_SYSTEM_PROMPT,
response_mime_type="application/json",
response_schema=_ShoppingSuggestionsOut,
max_output_tokens=4096,
temperature=0.4,
),
user_input=prompt,
)

View file

@ -4,13 +4,12 @@ from typing import Optional
from uuid import UUID, uuid4
from fastapi import APIRouter, Depends, HTTPException
from google.genai import types as genai_types
from pydantic import BaseModel as PydanticBase
from sqlmodel import Session, SQLModel, col, select
from db import get_session
from innercontext.api.utils import get_or_404
from innercontext.llm import call_gemini
from innercontext.llm import call_gemini, get_creative_config
from innercontext.models import (
GroomingSchedule,
Product,
@ -522,12 +521,10 @@ def suggest_routine(
response = call_gemini(
endpoint="routines/suggest",
contents=prompt,
config=genai_types.GenerateContentConfig(
config=get_creative_config(
system_instruction=_ROUTINES_SYSTEM_PROMPT,
response_mime_type="application/json",
response_schema=_SuggestionOut,
max_output_tokens=4096,
temperature=0.4,
),
user_input=prompt,
)
@ -600,12 +597,10 @@ def suggest_batch(
response = call_gemini(
endpoint="routines/suggest-batch",
contents=prompt,
config=genai_types.GenerateContentConfig(
config=get_creative_config(
system_instruction=_ROUTINES_SYSTEM_PROMPT,
response_mime_type="application/json",
response_schema=_BatchOut,
max_output_tokens=8192,
temperature=0.4,
),
user_input=prompt,
)

View file

@ -11,7 +11,7 @@ from sqlmodel import Session, SQLModel, select
from db import get_session
from innercontext.api.utils import get_or_404
from innercontext.llm import call_gemini
from innercontext.llm import call_gemini, get_extraction_config
from innercontext.models import (
SkinConditionSnapshot,
SkinConditionSnapshotBase,
@ -171,12 +171,10 @@ async def analyze_skin_photos(
response = call_gemini(
endpoint="skincare/analyze-photos",
contents=parts,
config=genai_types.GenerateContentConfig(
config=get_extraction_config(
system_instruction=_skin_photo_system_prompt(),
response_mime_type="application/json",
response_schema=_SkinAnalysisOut,
max_output_tokens=2048,
temperature=0.0,
),
user_input=image_summary,
)

View file

@ -3,12 +3,50 @@
import os
import time
from contextlib import suppress
from typing import Any
from fastapi import HTTPException
from google import genai
from google.genai import types as genai_types
_DEFAULT_MODEL = "gemini-flash-latest"
_DEFAULT_MODEL = "gemini-3-flash-preview"
def get_extraction_config(
system_instruction: str,
response_schema: Any,
max_output_tokens: int = 8192,
) -> genai_types.GenerateContentConfig:
"""Config for strict data extraction (deterministic, minimal thinking)."""
return genai_types.GenerateContentConfig(
system_instruction=system_instruction,
response_mime_type="application/json",
response_schema=response_schema,
max_output_tokens=max_output_tokens,
temperature=0.0,
thinking_config=genai_types.ThinkingConfig(
thinking_level=genai_types.ThinkingLevel.MINIMAL
),
)
def get_creative_config(
system_instruction: str,
response_schema: Any,
max_output_tokens: int = 4096,
) -> genai_types.GenerateContentConfig:
"""Config for creative tasks like recommendations (balanced creativity)."""
return genai_types.GenerateContentConfig(
system_instruction=system_instruction,
response_mime_type="application/json",
response_schema=response_schema,
max_output_tokens=max_output_tokens,
temperature=0.4,
top_p=0.8,
thinking_config=genai_types.ThinkingConfig(
thinking_level=genai_types.ThinkingLevel.LOW
),
)
def get_gemini_client() -> tuple[genai.Client, str]:
@ -46,18 +84,6 @@ def call_gemini(
with suppress(Exception):
user_input = str(contents)
# Limit thinking by default — Gemini 3 Flash defaults to "high" thinking which
# consumes most of the token budget before generating actual output.
# Use "low" to reduce latency while keeping basic reasoning intact.
if config.thinking_config is None:
config = config.model_copy(
update={
"thinking_config": genai_types.ThinkingConfig(
thinking_level=genai_types.ThinkingLevel.LOW
)
}
)
start = time.monotonic()
success, error_detail, response, finish_reason = True, None, None, None
try:

25
backend/test_query.py Normal file
View file

@ -0,0 +1,25 @@
from datetime import date, timedelta
from sqlmodel import select
from db import get_session
from innercontext.models import Routine, RoutineStep
def run():
session = next(get_session())
ref_date = date.today()
cutoff = ref_date - timedelta(days=7)
recent_usage = session.exec(
select(RoutineStep.product_id)
.join(Routine, Routine.id == RoutineStep.routine_id)
.where(Routine.routine_date >= cutoff)
.where(Routine.routine_date <= ref_date)
).all()
print("Found:", len(recent_usage))
if __name__ == "__main__":
run()