Unbenutzer ETL Prozess gelöscht
parent
e67636dbd6
commit
e758e064d0
|
@ -1,66 +0,0 @@
|
|||
from io import StringIO
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
|
||||
import data
|
||||
|
||||
d = data.load()
|
||||
|
||||
def region_capacities_comparison(id_1: int, id_2: int):
|
||||
fulldf = d.capacity_comparison_of_region(id_1, id_2).pl()
|
||||
# turn PropertyIDs and seedIDs to ints for sorting and filtering
|
||||
fulldf = fulldf.cast({"property_id": int})
|
||||
fulldf = fulldf.cast({"seed_id": int})
|
||||
df_region1 = fulldf.filter(pl.col("seed_id") == id_1)
|
||||
df_region2 = fulldf.filter(pl.col("seed_id") == id_2)
|
||||
df_list = [df_region1, df_region2]
|
||||
outDictList = []
|
||||
|
||||
for df in df_list:
|
||||
# Get uniques for dates and propIDs and sort them
|
||||
listOfDates = df.get_column("ScrapeDate").unique().sort()
|
||||
listOfPropertyIDs = df.get_column("property_id").unique().sort()
|
||||
|
||||
# Create DFs from lists to merge later
|
||||
datesDF = pl.DataFrame(listOfDates).with_row_index("date_index")
|
||||
propIdDF = pl.DataFrame(listOfPropertyIDs).with_row_index("prop_index")
|
||||
|
||||
# Merge Dataframe to generate indices
|
||||
df = df.join(datesDF, on='ScrapeDate')
|
||||
df = df.join(propIdDF, on='property_id')
|
||||
# Drop now useless columns ScrapeDate and property_id
|
||||
df = df[['ScrapeDate', 'calendarBody', 'date_index', 'prop_index']]
|
||||
# Calculate grid values
|
||||
gridData = []
|
||||
for row in df.rows(named=True):
|
||||
# Return 0 for sum if calendar is null
|
||||
if row['calendarBody']:
|
||||
calDF = pl.read_json(StringIO(row['calendarBody']))
|
||||
sum_hor = calDF.sum_horizontal()[0]
|
||||
else:
|
||||
sum_hor = 0
|
||||
# With Index
|
||||
# gridData.append([row['prop_index'], row['date_index'], sum_hor])
|
||||
# With ScrapeDate
|
||||
gridData.append([row['ScrapeDate'], row['date_index'], sum_hor])
|
||||
|
||||
gridData = np.array(gridData)
|
||||
# get all values to calculate Max
|
||||
allValues = gridData[:, 2].astype(int)
|
||||
maxValue = np.max(allValues)
|
||||
gridData[:, 2] = (allValues*100)/maxValue
|
||||
|
||||
# Return back to list
|
||||
gridData = gridData.tolist()
|
||||
|
||||
# Cast listOfDates to datetime
|
||||
listOfDates = listOfDates.cast(pl.Date).to_list()
|
||||
listOfPropertyIDs = listOfPropertyIDs.to_list()
|
||||
|
||||
# Create JSON
|
||||
tempDict = {'dates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': gridData}
|
||||
outDictList.append(tempDict)
|
||||
|
||||
outDict = {'region1': outDictList[0], 'region2': outDictList[1],}
|
||||
return outDict
|
Loading…
Reference in New Issue