From 6891e3b206c6331318e88f9bb72cae7db7466f5d Mon Sep 17 00:00:00 2001 From: Piotr Oleszczyk Date: Mon, 12 May 2025 14:36:37 +0200 Subject: [PATCH] process times in courses properly --- src/ptscrapper/scheduler.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/ptscrapper/scheduler.py b/src/ptscrapper/scheduler.py index bb5656e..9ff59bf 100644 --- a/src/ptscrapper/scheduler.py +++ b/src/ptscrapper/scheduler.py @@ -158,15 +158,21 @@ async def fetch_and_store_course(course_id: int, ts: datetime, client=None): recs = await fetch_course_posts(client, [course_id]) # now do your I/O; catch+log only errors here for rec in recs: - rows = [ - { - "fetch_date": ts, - "course_id": rec["c"], - "stop_id": stop["s"], - "dep_time": stop["t"], - } - for stop in rec.get("r", []) - ] + rows = [] + for seq, stop in enumerate(rec.get("r", []), start=1): + # stop["t"] is like "1900-01-01 05:53:00" + # parse out the time portion + t = datetime.strptime(stop["t"], "%Y-%m-%d %H:%M:%S").time() + rows.append( + { + "fetch_date": ts, # UTC fetch timestamp + "course_id": rec["c"], + "stop_seq": seq, # preserve order! + "stop_id": stop["s"], + "dep_time": t, # real datetime.time + "dep_zone": "Europe/Warsaw", + } + ) table = pa.Table.from_pylist(rows) write_course_posts(table, rec["c"], ts) record_course_fetched(rec["c"], ts)