refactor(llm): optimize Gemini config profiles for extraction and creativity
Introduces `get_extraction_config` and `get_creative_config` to standardize Gemini API calls. * Defines explicit config profiles with appropriate `temperature` and `thinking_level` for Gemini 3 Flash. * Extraction tasks use minimal thinking and temp=0.0 to reduce latency and token usage. * Creative tasks use low thinking, temp=0.4, and top_p=0.8 to balance naturalness and safety. * Applies these helpers across products, routines, and skincare endpoints. * Also updates default model to `gemini-3-flash-preview`.
This commit is contained in:
parent
78df7322a9
commit
ba1f10d99f
5 changed files with 72 additions and 33 deletions
|
|
@ -4,14 +4,13 @@ from typing import Optional
|
|||
from uuid import UUID, uuid4
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from google.genai import types as genai_types
|
||||
from pydantic import BaseModel as PydanticBase
|
||||
from pydantic import ValidationError
|
||||
from sqlmodel import Session, SQLModel, col, select
|
||||
|
||||
from db import get_session
|
||||
from innercontext.api.utils import get_or_404
|
||||
from innercontext.llm import call_gemini
|
||||
from innercontext.llm import call_gemini, get_creative_config, get_extraction_config
|
||||
from innercontext.models import (
|
||||
Product,
|
||||
ProductBase,
|
||||
|
|
@ -422,12 +421,10 @@ def parse_product_text(data: ProductParseRequest) -> ProductParseResponse:
|
|||
response = call_gemini(
|
||||
endpoint="products/parse-text",
|
||||
contents=f"Extract product data from this text:\n\n{data.text}",
|
||||
config=genai_types.GenerateContentConfig(
|
||||
config=get_extraction_config(
|
||||
system_instruction=_product_parse_system_prompt(),
|
||||
response_mime_type="application/json",
|
||||
response_schema=ProductParseLLMResponse,
|
||||
max_output_tokens=16384,
|
||||
temperature=0.0,
|
||||
),
|
||||
user_input=data.text,
|
||||
)
|
||||
|
|
@ -637,12 +634,10 @@ def suggest_shopping(session: Session = Depends(get_session)):
|
|||
response = call_gemini(
|
||||
endpoint="products/suggest",
|
||||
contents=prompt,
|
||||
config=genai_types.GenerateContentConfig(
|
||||
config=get_creative_config(
|
||||
system_instruction=_SHOPPING_SYSTEM_PROMPT,
|
||||
response_mime_type="application/json",
|
||||
response_schema=_ShoppingSuggestionsOut,
|
||||
max_output_tokens=4096,
|
||||
temperature=0.4,
|
||||
),
|
||||
user_input=prompt,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -4,13 +4,12 @@ from typing import Optional
|
|||
from uuid import UUID, uuid4
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from google.genai import types as genai_types
|
||||
from pydantic import BaseModel as PydanticBase
|
||||
from sqlmodel import Session, SQLModel, col, select
|
||||
|
||||
from db import get_session
|
||||
from innercontext.api.utils import get_or_404
|
||||
from innercontext.llm import call_gemini
|
||||
from innercontext.llm import call_gemini, get_creative_config
|
||||
from innercontext.models import (
|
||||
GroomingSchedule,
|
||||
Product,
|
||||
|
|
@ -522,12 +521,10 @@ def suggest_routine(
|
|||
response = call_gemini(
|
||||
endpoint="routines/suggest",
|
||||
contents=prompt,
|
||||
config=genai_types.GenerateContentConfig(
|
||||
config=get_creative_config(
|
||||
system_instruction=_ROUTINES_SYSTEM_PROMPT,
|
||||
response_mime_type="application/json",
|
||||
response_schema=_SuggestionOut,
|
||||
max_output_tokens=4096,
|
||||
temperature=0.4,
|
||||
),
|
||||
user_input=prompt,
|
||||
)
|
||||
|
|
@ -600,12 +597,10 @@ def suggest_batch(
|
|||
response = call_gemini(
|
||||
endpoint="routines/suggest-batch",
|
||||
contents=prompt,
|
||||
config=genai_types.GenerateContentConfig(
|
||||
config=get_creative_config(
|
||||
system_instruction=_ROUTINES_SYSTEM_PROMPT,
|
||||
response_mime_type="application/json",
|
||||
response_schema=_BatchOut,
|
||||
max_output_tokens=8192,
|
||||
temperature=0.4,
|
||||
),
|
||||
user_input=prompt,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from sqlmodel import Session, SQLModel, select
|
|||
|
||||
from db import get_session
|
||||
from innercontext.api.utils import get_or_404
|
||||
from innercontext.llm import call_gemini
|
||||
from innercontext.llm import call_gemini, get_extraction_config
|
||||
from innercontext.models import (
|
||||
SkinConditionSnapshot,
|
||||
SkinConditionSnapshotBase,
|
||||
|
|
@ -171,12 +171,10 @@ async def analyze_skin_photos(
|
|||
response = call_gemini(
|
||||
endpoint="skincare/analyze-photos",
|
||||
contents=parts,
|
||||
config=genai_types.GenerateContentConfig(
|
||||
config=get_extraction_config(
|
||||
system_instruction=_skin_photo_system_prompt(),
|
||||
response_mime_type="application/json",
|
||||
response_schema=_SkinAnalysisOut,
|
||||
max_output_tokens=2048,
|
||||
temperature=0.0,
|
||||
),
|
||||
user_input=image_summary,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -3,12 +3,50 @@
|
|||
import os
|
||||
import time
|
||||
from contextlib import suppress
|
||||
from typing import Any
|
||||
|
||||
from fastapi import HTTPException
|
||||
from google import genai
|
||||
from google.genai import types as genai_types
|
||||
|
||||
_DEFAULT_MODEL = "gemini-flash-latest"
|
||||
_DEFAULT_MODEL = "gemini-3-flash-preview"
|
||||
|
||||
|
||||
def get_extraction_config(
|
||||
system_instruction: str,
|
||||
response_schema: Any,
|
||||
max_output_tokens: int = 8192,
|
||||
) -> genai_types.GenerateContentConfig:
|
||||
"""Config for strict data extraction (deterministic, minimal thinking)."""
|
||||
return genai_types.GenerateContentConfig(
|
||||
system_instruction=system_instruction,
|
||||
response_mime_type="application/json",
|
||||
response_schema=response_schema,
|
||||
max_output_tokens=max_output_tokens,
|
||||
temperature=0.0,
|
||||
thinking_config=genai_types.ThinkingConfig(
|
||||
thinking_level=genai_types.ThinkingLevel.MINIMAL
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def get_creative_config(
|
||||
system_instruction: str,
|
||||
response_schema: Any,
|
||||
max_output_tokens: int = 4096,
|
||||
) -> genai_types.GenerateContentConfig:
|
||||
"""Config for creative tasks like recommendations (balanced creativity)."""
|
||||
return genai_types.GenerateContentConfig(
|
||||
system_instruction=system_instruction,
|
||||
response_mime_type="application/json",
|
||||
response_schema=response_schema,
|
||||
max_output_tokens=max_output_tokens,
|
||||
temperature=0.4,
|
||||
top_p=0.8,
|
||||
thinking_config=genai_types.ThinkingConfig(
|
||||
thinking_level=genai_types.ThinkingLevel.LOW
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def get_gemini_client() -> tuple[genai.Client, str]:
|
||||
|
|
@ -46,18 +84,6 @@ def call_gemini(
|
|||
with suppress(Exception):
|
||||
user_input = str(contents)
|
||||
|
||||
# Limit thinking by default — Gemini 3 Flash defaults to "high" thinking which
|
||||
# consumes most of the token budget before generating actual output.
|
||||
# Use "low" to reduce latency while keeping basic reasoning intact.
|
||||
if config.thinking_config is None:
|
||||
config = config.model_copy(
|
||||
update={
|
||||
"thinking_config": genai_types.ThinkingConfig(
|
||||
thinking_level=genai_types.ThinkingLevel.LOW
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
start = time.monotonic()
|
||||
success, error_detail, response, finish_reason = True, None, None, None
|
||||
try:
|
||||
|
|
|
|||
25
backend/test_query.py
Normal file
25
backend/test_query.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
from datetime import date, timedelta
|
||||
|
||||
from sqlmodel import select
|
||||
|
||||
from db import get_session
|
||||
from innercontext.models import Routine, RoutineStep
|
||||
|
||||
|
||||
def run():
|
||||
session = next(get_session())
|
||||
ref_date = date.today()
|
||||
cutoff = ref_date - timedelta(days=7)
|
||||
|
||||
recent_usage = session.exec(
|
||||
select(RoutineStep.product_id)
|
||||
.join(Routine, Routine.id == RoutineStep.routine_id)
|
||||
.where(Routine.routine_date >= cutoff)
|
||||
.where(Routine.routine_date <= ref_date)
|
||||
).all()
|
||||
|
||||
print("Found:", len(recent_usage))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
Loading…
Add table
Add a link
Reference in a new issue