diff --git a/etl/src/api/main.py b/etl/src/api/main.py index cc26adf..614676c 100644 --- a/etl/src/api/main.py +++ b/etl/src/api/main.py @@ -1,6 +1,7 @@ import data import polars as pl from data import etl_property_capacities as etl_pc +from data import etl_property_capacities_monthly as etl_pcm from data import etl_region_capacities as etl_rc from data import etl_region_capacities_comparison as etl_rcc from fastapi import FastAPI, Response @@ -42,6 +43,11 @@ def property_capacities_data(id: int): capacities = etl_pc.property_capacities(id) return capacities +@app.get("/property/{id}/capacities/monthly/{scrapeDate}") +def property_capacities_data(id: int, scrapeDate: str): + capacities = etl_pcm.property_capacities_monthly(id, scrapeDate) + return capacities + @app.get("/property/{id}/base") def property_base_data(id: int): return d.property_base_data(id).pl().to_dicts() diff --git a/etl/src/data/database.py b/etl/src/data/database.py index 8cabbca..7aa3e0b 100644 --- a/etl/src/data/database.py +++ b/etl/src/data/database.py @@ -334,6 +334,23 @@ class Database: created_at """) + def extractions_propId_scrapeDate(self, property_id: int, scrape_date: str): + return self.connection.sql(f""" + SELECT + JSON_EXTRACT(body, '$.content.days') as calendar, + created_at + FROM + consultancy_d.extractions + WHERE + type == 'calendar' AND + property_id = {property_id} AND + calendar NOT NULL AND + created_at >= '2024-04-16' + ORDER BY + created_at + LIMIT 1 + """) + # Anzahl der extrahierten properties pro Exktraktionsvorgang def properties_per_extraction(self, property_id): return self.connection.sql(""" diff --git a/etl/src/data/etl_property_capacities_monthly.py b/etl/src/data/etl_property_capacities_monthly.py new file mode 100644 index 0000000..1fa78d6 --- /dev/null +++ b/etl/src/data/etl_property_capacities_monthly.py @@ -0,0 +1,24 @@ +from io import StringIO + +import polars as pl + +import data + +d = data.load() + +def property_capacities_monthly(id: int, scrapeDate: str): + extractions = d.extractions_propId_scrapeDate(id, scrapeDate).pl() + df_calendar = pl.DataFrame() + + for row in extractions.rows(named=True): + scrapeDate = row['created_at'] + df_calendar = pl.read_json(StringIO(row['calendar'])) + columnTitles = df_calendar.columns + df_calendar = df_calendar.transpose() + df_calendar = df_calendar.with_columns(pl.Series(name="dates", values=columnTitles)) + df_calendar = df_calendar.with_columns((pl.col("dates").str.to_date())) + df_calendar = df_calendar.with_columns((pl.col("dates").dt.strftime("%b") + " " + (pl.col("dates").dt.strftime("%Y")))) + + df_calendar = df_calendar.group_by("dates").agg(pl.col("column_0").sum()) + result = {"scraping-date": scrapeDate, "months": df_calendar['dates'].to_list(), 'capacities': df_calendar['column_0'].to_list()} + return result \ No newline at end of file