ConsultancyProject_2_ETL/etl/src/data/etl_region_properties_capacities.py

from io import StringIO

import polars as pl

import data
from data import etl_cache

d = data.load()

def region_properties_capacities(id: int):

    file = f"etl_region_properties_capacities_{id}.obj"
    obj = etl_cache.openObj(file)
    if obj:
        return obj

    # Get Data
    if id == -1:
        df = d.capacity_global().pl()
    else:
        df = d.capacity_of_region(id).pl()
    # turn PropertyIDs to ints for sorting
    df = df.cast({"property_id": int})

    # Get uniques for dates and propIDs and sort them
    listOfDates = df.get_column("ScrapeDate").unique().sort()
    listOfPropertyIDs = df.get_column("property_id").unique().sort()

    # Create DFs from lists to merge later
    datesDF = pl.DataFrame(listOfDates).with_row_index("date_index")
    propIdDF = pl.DataFrame(listOfPropertyIDs).with_row_index("prop_index")

    # Merge Dataframe to generate indices
    df = df.join(datesDF, on='ScrapeDate')
    df = df.join(propIdDF, on='property_id')

    # Calculate grid values
    gridData = pl.DataFrame(schema=[("scrape_date", pl.String), ("property_id", pl.String), ("sum_hor", pl.Int64)])
    for row in df.rows(named=True):
        # Return 0 for sum if calendar is null
        if row['calendarBody']:
            calDF = pl.read_json(StringIO(row['calendarBody']))
            sum_hor = calDF.sum_horizontal()[0]
        else:
            sum_hor = 0

        gridData = gridData.vstack(pl.DataFrame({"scrape_date" : row['ScrapeDate'], "property_id": str(row['property_id']), "sum_hor": sum_hor}))

    # get the overall maximum sum
    maxValue = gridData['sum_hor'].max()
    values = []

    for row in gridData.rows(named=True):
        capacity = (row['sum_hor']*100)/maxValue
        values.append({"date" : row['scrape_date'], "property_id": row['property_id'], "capacity": capacity})

    # Cast listOfDates to datetime
    listOfDates = listOfDates.cast(pl.Date).to_list()
    listOfPropertyIDs = listOfPropertyIDs.cast(pl.String).to_list()

    # Create JSON
    outDict = {'dates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': values}
    etl_cache.saveObj(file, outDict)
    return outDict