diff --git a/etl/src/data/etl_property_capacities_monthly.py b/etl/src/data/etl_property_capacities_monthly.py index 1fa78d6..a582af7 100644 --- a/etl/src/data/etl_property_capacities_monthly.py +++ b/etl/src/data/etl_property_capacities_monthly.py @@ -17,8 +17,11 @@ def property_capacities_monthly(id: int, scrapeDate: str): df_calendar = df_calendar.transpose() df_calendar = df_calendar.with_columns(pl.Series(name="dates", values=columnTitles)) df_calendar = df_calendar.with_columns((pl.col("dates").str.to_date())) - df_calendar = df_calendar.with_columns((pl.col("dates").dt.strftime("%b") + " " + (pl.col("dates").dt.strftime("%Y")))) + df_calendar = df_calendar.with_columns((pl.col("dates").dt.strftime("%b") + " " + (pl.col("dates").dt.strftime("%Y"))).alias('date_short')) + df_calendar = df_calendar.with_columns((pl.col("dates").dt.strftime("%Y") + " " + (pl.col("dates").dt.strftime("%m"))).alias('dates')) - df_calendar = df_calendar.group_by("dates").agg(pl.col("column_0").sum()) - result = {"scraping-date": scrapeDate, "months": df_calendar['dates'].to_list(), 'capacities': df_calendar['column_0'].to_list()} + df_calendar = df_calendar.group_by(['dates', 'date_short']).agg(pl.col("column_0").sum()) + df_calendar = df_calendar.sort('dates') + df_calendar = df_calendar.drop('dates') + result = {"scraping-date": scrapeDate, "months": df_calendar['date_short'].to_list(), 'capacities': df_calendar['column_0'].to_list()} return result \ No newline at end of file