From d9cae3d0ab80959e61b6a14e607a21eaa1aebfc5 Mon Sep 17 00:00:00 2001 From: mmaurostoffel <166130318+mmaurostoffel@users.noreply.github.com> Date: Sun, 5 Jan 2025 21:23:10 +0100 Subject: [PATCH] Issue 3 fast fertig https://gitea.fhgr.ch/stoffelmauro/ConsultancyProject_2_ETL/issues/3 Der Issue ist soweit bereit, es gibt aber noch das Problem, dass das ScrapeDate nicht als Datum sondern asl Integer interpretiert wird im database.py. Deshalb ist es im Moment als konstante implementiert --- etl/src/api/main.py | 6 +++++ etl/src/data/database.py | 17 +++++++++++++ .../data/etl_property_capacities_monthly.py | 24 +++++++++++++++++++ 3 files changed, 47 insertions(+) create mode 100644 etl/src/data/etl_property_capacities_monthly.py diff --git a/etl/src/api/main.py b/etl/src/api/main.py index cc26adf..614676c 100644 --- a/etl/src/api/main.py +++ b/etl/src/api/main.py @@ -1,6 +1,7 @@ import data import polars as pl from data import etl_property_capacities as etl_pc +from data import etl_property_capacities_monthly as etl_pcm from data import etl_region_capacities as etl_rc from data import etl_region_capacities_comparison as etl_rcc from fastapi import FastAPI, Response @@ -42,6 +43,11 @@ def property_capacities_data(id: int): capacities = etl_pc.property_capacities(id) return capacities +@app.get("/property/{id}/capacities/monthly/{scrapeDate}") +def property_capacities_data(id: int, scrapeDate: str): + capacities = etl_pcm.property_capacities_monthly(id, scrapeDate) + return capacities + @app.get("/property/{id}/base") def property_base_data(id: int): return d.property_base_data(id).pl().to_dicts() diff --git a/etl/src/data/database.py b/etl/src/data/database.py index 8cabbca..7aa3e0b 100644 --- a/etl/src/data/database.py +++ b/etl/src/data/database.py @@ -334,6 +334,23 @@ class Database: created_at """) + def extractions_propId_scrapeDate(self, property_id: int, scrape_date: str): + return self.connection.sql(f""" + SELECT + JSON_EXTRACT(body, '$.content.days') as calendar, + created_at + FROM + consultancy_d.extractions + WHERE + type == 'calendar' AND + property_id = {property_id} AND + calendar NOT NULL AND + created_at >= '2024-04-16' + ORDER BY + created_at + LIMIT 1 + """) + # Anzahl der extrahierten properties pro Exktraktionsvorgang def properties_per_extraction(self, property_id): return self.connection.sql(""" diff --git a/etl/src/data/etl_property_capacities_monthly.py b/etl/src/data/etl_property_capacities_monthly.py new file mode 100644 index 0000000..1fa78d6 --- /dev/null +++ b/etl/src/data/etl_property_capacities_monthly.py @@ -0,0 +1,24 @@ +from io import StringIO + +import polars as pl + +import data + +d = data.load() + +def property_capacities_monthly(id: int, scrapeDate: str): + extractions = d.extractions_propId_scrapeDate(id, scrapeDate).pl() + df_calendar = pl.DataFrame() + + for row in extractions.rows(named=True): + scrapeDate = row['created_at'] + df_calendar = pl.read_json(StringIO(row['calendar'])) + columnTitles = df_calendar.columns + df_calendar = df_calendar.transpose() + df_calendar = df_calendar.with_columns(pl.Series(name="dates", values=columnTitles)) + df_calendar = df_calendar.with_columns((pl.col("dates").str.to_date())) + df_calendar = df_calendar.with_columns((pl.col("dates").dt.strftime("%b") + " " + (pl.col("dates").dt.strftime("%Y")))) + + df_calendar = df_calendar.group_by("dates").agg(pl.col("column_0").sum()) + result = {"scraping-date": scrapeDate, "months": df_calendar['dates'].to_list(), 'capacities': df_calendar['column_0'].to_list()} + return result \ No newline at end of file