From 42dc14021fdd41f0968d15879ffef492b637f6c4 Mon Sep 17 00:00:00 2001 From: mmaurostoffel <166130318+mmaurostoffel@users.noreply.github.com> Date: Mon, 6 Jan 2025 19:42:49 +0100 Subject: [PATCH] =?UTF-8?q?etl=5FProperty=5Fcapacities=5Fweekdays.py=20ein?= =?UTF-8?q?gef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Abfragemöglichkeit für die Wochentage eingefügt --- etl/src/api/main.py | 6 ++++ .../data/etl_property_capacities_weekdays.py | 30 +++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 etl/src/data/etl_property_capacities_weekdays.py diff --git a/etl/src/api/main.py b/etl/src/api/main.py index 614676c..1133bec 100644 --- a/etl/src/api/main.py +++ b/etl/src/api/main.py @@ -2,6 +2,7 @@ import data import polars as pl from data import etl_property_capacities as etl_pc from data import etl_property_capacities_monthly as etl_pcm +from data import etl_property_capacities_weekdays as etl_pcw from data import etl_region_capacities as etl_rc from data import etl_region_capacities_comparison as etl_rcc from fastapi import FastAPI, Response @@ -48,6 +49,11 @@ def property_capacities_data(id: int, scrapeDate: str): capacities = etl_pcm.property_capacities_monthly(id, scrapeDate) return capacities +@app.get("/property/{id}/capacities/weekdays/{scrapeDate}") +def property_capacities_data(id: int, scrapeDate: str): + capacities = etl_pcw.property_capacities_weekdays(id, scrapeDate) + return capacities + @app.get("/property/{id}/base") def property_base_data(id: int): return d.property_base_data(id).pl().to_dicts() diff --git a/etl/src/data/etl_property_capacities_weekdays.py b/etl/src/data/etl_property_capacities_weekdays.py new file mode 100644 index 0000000..96e821d --- /dev/null +++ b/etl/src/data/etl_property_capacities_weekdays.py @@ -0,0 +1,30 @@ +from io import StringIO + +import polars as pl + +import data + +d = data.load() + +def property_capacities_weekdays(id: int, scrapeDate: str): + extractions = d.extractions_propId_scrapeDate(id, scrapeDate).pl() + df_calendar = pl.DataFrame() + numWeeks = 0 + + for row in extractions.rows(named=True): + scrapeDate = row['created_at'] + df_calendar = pl.read_json(StringIO(row['calendar'])) + columnTitles = df_calendar.columns + df_calendar = df_calendar.transpose() + df_calendar = df_calendar.with_columns(pl.Series(name="dates", values=columnTitles)) + df_calendar = df_calendar.with_columns((pl.col("dates").str.to_date())) + numWeeks = round((df_calendar.get_column("dates").max() - df_calendar.get_column("dates").min()).days / 7, 0) + df_calendar = df_calendar.with_columns(pl.col("dates").dt.weekday().alias("weekday")) + df_calendar = df_calendar.drop("dates") + + df_calendar = df_calendar.group_by("weekday").agg(pl.col("column_0").sum()) + df_calendar = df_calendar.with_columns((pl.col("column_0") / numWeeks).alias("weekday")) + result = {"scraping-date": scrapeDate, "weekday": df_calendar['weekday'].to_list(), 'capacities': df_calendar['column_0'].to_list()} + return result + +