from datetime import date from io import StringIO import polars as pl import data d = data.load() def region_capacities(id: int): # Get Data if id == -1: extractions = d.capacity_global().pl() else: extractions = d.capacity_of_region(id).pl() # turn PropertyIDs to ints for sorting extractions = extractions.cast({"property_id": int}) extractions.drop('property_id') df_dates = pl.DataFrame() # Get Data from JSON gridData = pl.DataFrame(schema=[("scrape_date", pl.String), ("sum_hor", pl.Int64), ("calendar_width", pl.Int64)]) dayCounts = [] for row in extractions.rows(named=True): # Return 0 for sum if calendar is null if row['calendarBody']: calDF = pl.read_json(StringIO(row['calendarBody'])) sum_hor = calDF.sum_horizontal()[0] else: sum_hor = 0 gridData = gridData.vstack(pl.DataFrame({"scrape_date" : row['ScrapeDate'], "sum_hor": sum_hor, "calendar_width": calDF.width})) # Create Aggregates of values df_count = gridData.group_by("scrape_date").agg(pl.col("sum_hor").count()) df_sum = gridData.group_by("scrape_date").agg(pl.col("sum_hor").sum()) df_numDays = gridData.group_by("scrape_date").agg(pl.col("calendar_width").max()) # Join and rename DF's df = df_sum.join(df_count, on= 'scrape_date').join(df_numDays, on= 'scrape_date') # Calculate normed capacities for each scrapeDate df = df.with_columns((pl.col("sum_hor") / pl.col("sum_hor_right") / (pl.col("calendar_width")*2) * 100).alias("capacity")) # Sort the date column df = df.cast({"scrape_date": date}).sort('scrape_date') result = {"capacities": df['capacity'].to_list(), "dates": df['scrape_date'].to_list()} return result