import data import polars as pl from io import StringIO import numpy as np d = data.load() def region_capacities(id: int): # Get Data df = d.capacity_of_region(id).pl() # turn PropertyIDs to ints for sorting df = df.cast({"property_id": int}) # Get uniques for dates and propIDs and sort them listOfDates = df.get_column("ScrapeDate").unique().sort() listOfPropertyIDs = df.get_column("property_id").unique().sort() # Create DFs from lists to merge later datesDF = pl.DataFrame(listOfDates).with_row_index("date_index") propIdDF = pl.DataFrame(listOfPropertyIDs).with_row_index("prop_index") # Merge Dataframe to generate indices df = df.join(datesDF, on='ScrapeDate') df = df.join(propIdDF, on='property_id') # Drop now useless columns ScrapeDate and property_id df = df[['calendarBody', 'date_index', 'prop_index']] # Calculate grid values gridData = [] for row in df.rows(named=True): # Return 0 for sum if calendar is null if row['calendarBody']: calDF = pl.read_json(StringIO(row['calendarBody'])) sum_hor = calDF.sum_horizontal()[0] else: sum_hor = 0 gridData.append([row['prop_index'], row['date_index'], sum_hor]) gridData = np.array(gridData) # get all values to calculate Max allValues = gridData[:, 2] maxValue = np.max(allValues) gridData[:, 2] = (gridData[:, 2]*100)/maxValue # Return back to list gridData = gridData.tolist() # Cast listOfDates to datetime listOfDates = listOfDates.cast(pl.Date).to_list() listOfPropertyIDs = listOfPropertyIDs.to_list() # Create JSON outDict = {'scrapeDates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': gridData} return outDict