diff --git a/src/ptscrapper/scheduler.py b/src/ptscrapper/scheduler.py index bb5656e..9ff59bf 100644 --- a/src/ptscrapper/scheduler.py +++ b/src/ptscrapper/scheduler.py @@ -158,15 +158,21 @@ async def fetch_and_store_course(course_id: int, ts: datetime, client=None): recs = await fetch_course_posts(client, [course_id]) # now do your I/O; catch+log only errors here for rec in recs: - rows = [ - { - "fetch_date": ts, - "course_id": rec["c"], - "stop_id": stop["s"], - "dep_time": stop["t"], - } - for stop in rec.get("r", []) - ] + rows = [] + for seq, stop in enumerate(rec.get("r", []), start=1): + # stop["t"] is like "1900-01-01 05:53:00" + # parse out the time portion + t = datetime.strptime(stop["t"], "%Y-%m-%d %H:%M:%S").time() + rows.append( + { + "fetch_date": ts, # UTC fetch timestamp + "course_id": rec["c"], + "stop_seq": seq, # preserve order! + "stop_id": stop["s"], + "dep_time": t, # real datetime.time + "dep_zone": "Europe/Warsaw", + } + ) table = pa.Table.from_pylist(rows) write_course_posts(table, rec["c"], ts) record_course_fetched(rec["c"], ts)