fix(backend): sanitize NaN/Infinity/undefined in Gemini JSON response

Models sometimes emit JS-style literals for unknown numeric fields.
Replace NaN, Infinity, undefined with null before parsing.
Also add error logging to capture raw response on parse failure.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Piotr Oleszczyk 2026-02-28 21:46:47 +01:00
parent 54903a3bed
commit 3e85858d41

View file

@ -1,8 +1,12 @@
import json
import logging
import re
from datetime import date
from typing import Optional
from uuid import UUID, uuid4
log = logging.getLogger(__name__)
from fastapi import APIRouter, Depends, HTTPException, Query
from google.genai import types as genai_types
from pydantic import ValidationError
@ -369,9 +373,14 @@ def parse_product_text(data: ProductParseRequest) -> ProductParseResponse:
end = raw.rfind("}")
if start != -1 and end != -1:
raw = raw[start : end + 1]
# Replace JS-style non-JSON literals that some models emit
raw = re.sub(r":\s*NaN\b", ": null", raw)
raw = re.sub(r":\s*Infinity\b", ": null", raw)
raw = re.sub(r":\s*undefined\b", ": null", raw)
try:
parsed = json.loads(raw)
except (json.JSONDecodeError, Exception) as e:
except json.JSONDecodeError as e:
log.error("Gemini parse-text raw response (failed):\n%s", raw)
raise HTTPException(status_code=502, detail=f"LLM returned invalid JSON: {e}")
try:
return ProductParseResponse.model_validate(parsed)