from datetime import datetime, timedelta from io import StringIO import polars as pl import data from data import etl_cache d = data.load() def region_capacities_monthly(id: int, scrapeDate_start: str): file = f"etl_region_capacities_monthly_{id}_{scrapeDate_start}.obj" obj = etl_cache.openObj(file) if obj: return obj # Get end date of start search-window scrapeDate_end = scrapeDate_start + timedelta(days=1) # Get Data if id == -1: extractions = d.singleScrape_of_global_scrapDate(scrapeDate_start, scrapeDate_end).pl() else: extractions = d.singleScrape_of_region_scrapDate(id, scrapeDate_start, scrapeDate_end).pl() df_calendar = pl.DataFrame() numWeeks = 0 firstExe = True counter = 0 for row in extractions.rows(named=True): scrapeDate = row['created_at'] if row['calendarBody']: counter += 1 df_calendar = pl.read_json(StringIO(row['calendarBody'])) columnTitles = df_calendar.columns df_calendar = df_calendar.transpose() df_calendar = df_calendar.with_columns(pl.Series(name="dates", values=columnTitles)) df_calendar = df_calendar.with_columns((pl.col("dates").str.to_date())) df_calendar = df_calendar.with_columns((pl.col("dates").dt.month_end().dt.day().alias('numDays'))) df_calendar = df_calendar.with_columns((pl.col("dates").dt.strftime("%b") + " " + (pl.col("dates").dt.strftime("%Y"))).alias('date_short')) df_calendar = df_calendar.with_columns((pl.col("dates").dt.strftime("%Y") + " " + (pl.col("dates").dt.strftime("%m"))).alias('dates')) df_calendar = df_calendar.group_by(['dates', 'date_short','numDays']).agg(pl.col("column_0").sum()) df_calendar = df_calendar.with_columns((pl.col("column_0") / pl.col("numDays") / 2 * 100).alias("column_0")) df_calendar = df_calendar.sort('dates') df_calendar = df_calendar.drop('dates') df_calendar = df_calendar.drop('numDays') df_calendar = df_calendar.rename({'column_0': str(counter)}) if firstExe: outDf = df_calendar firstExe = False else: outDf = outDf.join(df_calendar, on='date_short') # Calculate horizontal Mean means = outDf.mean_horizontal() outDf = outDf.insert_column(1, means) outDf = outDf[['date_short', 'mean']] result = {"date": scrapeDate, "months": outDf['date_short'].to_list(),'capacities': outDf['mean'].to_list()} etl_cache.saveObj(file, result) return result