diff --git a/etl/src/data/etl_region_capacities_comparison.py b/etl/src/data/etl_region_capacities_comparison.py deleted file mode 100644 index 7a56fef..0000000 --- a/etl/src/data/etl_region_capacities_comparison.py +++ /dev/null @@ -1,66 +0,0 @@ -from io import StringIO - -import numpy as np -import polars as pl - -import data - -d = data.load() - -def region_capacities_comparison(id_1: int, id_2: int): - fulldf = d.capacity_comparison_of_region(id_1, id_2).pl() - # turn PropertyIDs and seedIDs to ints for sorting and filtering - fulldf = fulldf.cast({"property_id": int}) - fulldf = fulldf.cast({"seed_id": int}) - df_region1 = fulldf.filter(pl.col("seed_id") == id_1) - df_region2 = fulldf.filter(pl.col("seed_id") == id_2) - df_list = [df_region1, df_region2] - outDictList = [] - - for df in df_list: - # Get uniques for dates and propIDs and sort them - listOfDates = df.get_column("ScrapeDate").unique().sort() - listOfPropertyIDs = df.get_column("property_id").unique().sort() - - # Create DFs from lists to merge later - datesDF = pl.DataFrame(listOfDates).with_row_index("date_index") - propIdDF = pl.DataFrame(listOfPropertyIDs).with_row_index("prop_index") - - # Merge Dataframe to generate indices - df = df.join(datesDF, on='ScrapeDate') - df = df.join(propIdDF, on='property_id') - # Drop now useless columns ScrapeDate and property_id - df = df[['ScrapeDate', 'calendarBody', 'date_index', 'prop_index']] - # Calculate grid values - gridData = [] - for row in df.rows(named=True): - # Return 0 for sum if calendar is null - if row['calendarBody']: - calDF = pl.read_json(StringIO(row['calendarBody'])) - sum_hor = calDF.sum_horizontal()[0] - else: - sum_hor = 0 - # With Index - # gridData.append([row['prop_index'], row['date_index'], sum_hor]) - # With ScrapeDate - gridData.append([row['ScrapeDate'], row['date_index'], sum_hor]) - - gridData = np.array(gridData) - # get all values to calculate Max - allValues = gridData[:, 2].astype(int) - maxValue = np.max(allValues) - gridData[:, 2] = (allValues*100)/maxValue - - # Return back to list - gridData = gridData.tolist() - - # Cast listOfDates to datetime - listOfDates = listOfDates.cast(pl.Date).to_list() - listOfPropertyIDs = listOfPropertyIDs.to_list() - - # Create JSON - tempDict = {'dates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': gridData} - outDictList.append(tempDict) - - outDict = {'region1': outDictList[0], 'region2': outDictList[1],} - return outDict \ No newline at end of file