Unbenutzer ETL Prozess gelöscht

2025-01-19 13:14:35 +01:00 · 2025-01-19 13:14:35 +01:00 · e758e064d0
commit e758e064d0
parent e67636dbd6
1 changed files with 0 additions and 66 deletions
--- a/etl/src/data/etl_region_capacities_comparison.py
+++ b/etl/src/data/etl_region_capacities_comparison.py
@ -1,66 +0,0 @@
-from io import StringIO
-
-import numpy as np
-import polars as pl
-
-import data
-
-d = data.load()
-
-def region_capacities_comparison(id_1: int, id_2: int):
-    fulldf = d.capacity_comparison_of_region(id_1, id_2).pl()
-    # turn PropertyIDs and seedIDs to ints for sorting and filtering
-    fulldf = fulldf.cast({"property_id": int})
-    fulldf = fulldf.cast({"seed_id": int})
-    df_region1 = fulldf.filter(pl.col("seed_id") == id_1)
-    df_region2 = fulldf.filter(pl.col("seed_id") == id_2)
-    df_list = [df_region1, df_region2]
-    outDictList = []
-
-    for df in df_list:
-        # Get uniques for dates and propIDs and sort them
-        listOfDates = df.get_column("ScrapeDate").unique().sort()
-        listOfPropertyIDs = df.get_column("property_id").unique().sort()
-
-        # Create DFs from lists to merge later
-        datesDF = pl.DataFrame(listOfDates).with_row_index("date_index")
-        propIdDF = pl.DataFrame(listOfPropertyIDs).with_row_index("prop_index")
-
-        # Merge Dataframe to generate indices
-        df = df.join(datesDF, on='ScrapeDate')
-        df = df.join(propIdDF, on='property_id')
-        # Drop now useless columns ScrapeDate and property_id
-        df = df[['ScrapeDate', 'calendarBody', 'date_index', 'prop_index']]
-        # Calculate grid values
-        gridData = []
-        for row in df.rows(named=True):
-            # Return 0 for sum if calendar is null
-            if row['calendarBody']:
-                calDF = pl.read_json(StringIO(row['calendarBody']))
-                sum_hor = calDF.sum_horizontal()[0]
-            else:
-                sum_hor = 0
-            # With Index
-            # gridData.append([row['prop_index'], row['date_index'], sum_hor])
-            # With ScrapeDate
-            gridData.append([row['ScrapeDate'], row['date_index'], sum_hor])
-
-        gridData = np.array(gridData)
-        # get all values to calculate Max
-        allValues = gridData[:, 2].astype(int)
-        maxValue = np.max(allValues)
-        gridData[:, 2] = (allValues*100)/maxValue
-
-        # Return back to list
-        gridData = gridData.tolist()
-
-        # Cast listOfDates to datetime
-        listOfDates = listOfDates.cast(pl.Date).to_list()
-        listOfPropertyIDs = listOfPropertyIDs.to_list()
-
-        # Create JSON
-        tempDict = {'dates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': gridData}
-        outDictList.append(tempDict)
-
-    outDict = {'region1': outDictList[0], 'region2': outDictList[1],}
-    return outDict