From 2560d43c3ff7cb4cc29d2113ac4ed20cc03bb909 Mon Sep 17 00:00:00 2001 From: mmaurostoffel <166130318+mmaurostoffel@users.noreply.github.com> Date: Fri, 17 Jan 2025 17:51:27 +0100 Subject: [PATCH] fixed calculation of monthly data, closes #18 --- etl/src/data/etl_property_capacities_monthly.py | 9 ++++++--- etl/src/data/etl_region_capacities_monthly.py | 5 ++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/etl/src/data/etl_property_capacities_monthly.py b/etl/src/data/etl_property_capacities_monthly.py index c6a6d1b..d7027a7 100644 --- a/etl/src/data/etl_property_capacities_monthly.py +++ b/etl/src/data/etl_property_capacities_monthly.py @@ -24,12 +24,15 @@ def property_capacities_monthly(id: int, scrapeDate: str): df_calendar = df_calendar.transpose() df_calendar = df_calendar.with_columns(pl.Series(name="dates", values=columnTitles)) df_calendar = df_calendar.with_columns((pl.col("dates").str.to_date())) + df_calendar = df_calendar.with_columns((pl.col("dates").dt.month_end().dt.day().alias('numDays'))) df_calendar = df_calendar.with_columns((pl.col("dates").dt.strftime("%b") + " " + (pl.col("dates").dt.strftime("%Y"))).alias('date_short')) df_calendar = df_calendar.with_columns((pl.col("dates").dt.strftime("%Y") + " " + (pl.col("dates").dt.strftime("%m"))).alias('dates')) - df_calendar = df_calendar.group_by(['dates', 'date_short']).agg(pl.col("column_0").sum()) + + + df_calendar = df_calendar.group_by(['dates', 'date_short', 'numDays']).agg(pl.col("column_0").sum()) + df_calendar = df_calendar.with_columns((pl.col("column_0") / pl.col("numDays") / 2 * 100).alias("column_0")) df_calendar = df_calendar.sort('dates') - df_calendar = df_calendar.drop('dates') result = {"scraping-date": scrapeDate, "months": df_calendar['date_short'].to_list(), 'capacities': df_calendar['column_0'].to_list()} etl_cache.saveObj(file, result) - return result + return result \ No newline at end of file diff --git a/etl/src/data/etl_region_capacities_monthly.py b/etl/src/data/etl_region_capacities_monthly.py index ba77c83..5aeab6a 100644 --- a/etl/src/data/etl_region_capacities_monthly.py +++ b/etl/src/data/etl_region_capacities_monthly.py @@ -41,11 +41,14 @@ def region_capacities_monthly(id: int, scrapeDate_start: str): df_calendar = df_calendar.transpose() df_calendar = df_calendar.with_columns(pl.Series(name="dates", values=columnTitles)) df_calendar = df_calendar.with_columns((pl.col("dates").str.to_date())) + df_calendar = df_calendar.with_columns((pl.col("dates").dt.month_end().dt.day().alias('numDays'))) df_calendar = df_calendar.with_columns((pl.col("dates").dt.strftime("%b") + " " + (pl.col("dates").dt.strftime("%Y"))).alias('date_short')) df_calendar = df_calendar.with_columns((pl.col("dates").dt.strftime("%Y") + " " + (pl.col("dates").dt.strftime("%m"))).alias('dates')) - df_calendar = df_calendar.group_by(['dates', 'date_short']).agg(pl.col("column_0").sum()) + df_calendar = df_calendar.group_by(['dates', 'date_short','numDays']).agg(pl.col("column_0").sum()) + df_calendar = df_calendar.with_columns((pl.col("column_0") / pl.col("numDays") / 2 * 100).alias("column_0")) df_calendar = df_calendar.sort('dates') df_calendar = df_calendar.drop('dates') + df_calendar = df_calendar.drop('numDays') df_calendar = df_calendar.rename({'column_0': str(counter)})