diff --git a/etl/src/api/main.py b/etl/src/api/main.py index 2d177a3..4f99f50 100644 --- a/etl/src/api/main.py +++ b/etl/src/api/main.py @@ -5,12 +5,12 @@ from data import etl_property_capacities_monthly as etl_pcm from data import etl_property_capacities_weekdays as etl_pcw from data import etl_property_neighbours as etl_pn from data import etl_region_capacities as etl_rc +from data import etl_region_capacities_monthly as etl_rcm from data import etl_region_capacities_weekdays as etl_rcw from data import etl_region_movAverage as etl_rmA from data import etl_region_properties_capacities as etl_rpc from data import etl_region_capacities_comparison as etl_rcc -from data import etl_region_movAverage as etl_rmA -from data import etl_region_properties_capacities as etl_rpc + from fastapi import FastAPI, Response d = data.load() @@ -79,6 +79,11 @@ def region_capacities_data(id: int): capacities = etl_rc.region_capacities(id) return capacities +@app.get("/region/{id}/capacities/monthly/{scrapeDate}") +def region_capacities_data(id: int, scrapeDate: str): + capacities = (etl_rcm.region_capacities_monthly(id, scrapeDate) + return capacities + @app.get("/region/{id}/capacities/weekdays/{scrapeDate}") def region_capacities_data(id: int, scrapeDate: str): capacities = etl_rcw.region_capacities_weekdays(id, scrapeDate) @@ -98,7 +103,3 @@ def region_capacities_data(id: int, startDate: str): def region_base_data(id: int): return d.region_base_data(id).pl().to_dicts() -@app.get("/region/{id}/properties/capacities") -def region_property_capacities_data(id: int): - capacities = etl_rpc.region_properties_capacities(id) - return capacities diff --git a/etl/src/data/etl_region_capacities_monthly.py b/etl/src/data/etl_region_capacities_monthly.py new file mode 100644 index 0000000..01c1d36 --- /dev/null +++ b/etl/src/data/etl_region_capacities_monthly.py @@ -0,0 +1,53 @@ +from io import StringIO + +import polars as pl + +import data + +from datetime import datetime, timedelta + +d = data.load() + + +def region_capacities_monthly(id: int, scrapeDate_start: str): + # String to Date + scrapeDate_start = datetime.strptime(scrapeDate_start, '%Y-%m-%d') + + # Get end date of start search-window + scrapeDate_end = scrapeDate_start + timedelta(days=1) + + extractions = d.singleScrape_of_region_scrapDate(id, scrapeDate_start, scrapeDate_end).pl() + df_calendar = pl.DataFrame() + numWeeks = 0 + firstExe = True + counter = 0 + for row in extractions.rows(named=True): + scrapeDate = row['created_at'] + if row['calendarBody']: + counter += 1 + df_calendar = pl.read_json(StringIO(row['calendarBody'])) + columnTitles = df_calendar.columns + df_calendar = df_calendar.transpose() + df_calendar = df_calendar.with_columns(pl.Series(name="dates", values=columnTitles)) + df_calendar = df_calendar.with_columns((pl.col("dates").str.to_date())) + df_calendar = df_calendar.with_columns((pl.col("dates").dt.strftime("%b") + " " + (pl.col("dates").dt.strftime("%Y"))).alias('date_short')) + df_calendar = df_calendar.with_columns((pl.col("dates").dt.strftime("%Y") + " " + (pl.col("dates").dt.strftime("%m"))).alias('dates')) + df_calendar = df_calendar.group_by(['dates', 'date_short']).agg(pl.col("column_0").sum()) + df_calendar = df_calendar.sort('dates') + df_calendar = df_calendar.drop('dates') + + df_calendar = df_calendar.rename({'column_0': str(counter)}) + + if firstExe: + outDf = df_calendar + firstExe = False + else: + outDf = outDf.join(df_calendar, on='date_short') + + # Calculate horizontal Mean + means = outDf.mean_horizontal() + outDf = outDf.insert_column(1, means) + outDf = outDf[['date_short', 'mean']] + + result = {"scraping-date": scrapeDate, "months": outDf['date_short'].to_list(),'capacities': outDf['mean'].to_list()} + return result \ No newline at end of file