from io import StringIO import polars as pl import data from data import etl_cache d = data.load() def region_properties_capacities(id: int): file = f"etl_region_properties_capacities_{id}.obj" obj = etl_cache.openObj(file) if obj: return obj # Get Data if id == -1: df = d.capacity_global().pl() else: df = d.capacity_of_region(id).pl() # turn PropertyIDs to ints for sorting df = df.cast({"property_id": int}) # Get uniques for dates and propIDs and sort them listOfDates = df.get_column("ScrapeDate").unique().sort() listOfPropertyIDs = df.get_column("property_id").unique().sort() # Create DFs from lists to merge later datesDF = pl.DataFrame(listOfDates).with_row_index("date_index") propIdDF = pl.DataFrame(listOfPropertyIDs).with_row_index("prop_index") # Merge Dataframe to generate indices df = df.join(datesDF, on='ScrapeDate') df = df.join(propIdDF, on='property_id') # Calculate grid values gridData = pl.DataFrame(schema=[("scrape_date", pl.String), ("property_id", pl.String), ("sum_hor", pl.Int64)]) for row in df.rows(named=True): # Return 0 for sum if calendar is null if row['calendarBody']: calDF = pl.read_json(StringIO(row['calendarBody'])) sum_hor = calDF.sum_horizontal()[0] else: sum_hor = 0 gridData = gridData.vstack(pl.DataFrame({"scrape_date" : row['ScrapeDate'], "property_id": str(row['property_id']), "sum_hor": sum_hor})) # get the overall maximum sum maxValue = gridData['sum_hor'].max() values = [] for row in gridData.rows(named=True): capacity = (row['sum_hor']*100)/maxValue values.append({"date" : row['scrape_date'], "property_id": row['property_id'], "capacity": capacity}) # Cast listOfDates to datetime listOfDates = listOfDates.cast(pl.Date).to_list() listOfPropertyIDs = listOfPropertyIDs.cast(pl.String).to_list() # Create JSON outDict = {'dates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': values} etl_cache.saveObj(file, outDict) return outDict