Gitea Issue 2 resolved

#2

etl_region_capacities.py: neues Output Format = [datum, prop_id, capacity]
main
mmaurostoffel 2025-01-05 15:51:19 +01:00
parent 281d9d3f5a
commit 8fcaf2a6f7
1 changed files with 13 additions and 7 deletions

View File

@ -23,10 +23,8 @@ def region_capacities(id: int):
# Merge Dataframe to generate indices
df = df.join(datesDF, on='ScrapeDate')
df = df.join(propIdDF, on='property_id')
# Drop now useless columns ScrapeDate and property_id
df = df[['calendarBody', 'date_index', 'prop_index']]
df = df[['ScrapeDate', 'calendarBody', 'date_index', 'prop_index']]
# Calculate grid values
gridData = []
for row in df.rows(named=True):
@ -36,13 +34,18 @@ def region_capacities(id: int):
sum_hor = calDF.sum_horizontal()[0]
else:
sum_hor = 0
gridData.append([row['prop_index'], row['date_index'], sum_hor])
gridData = np.array(gridData)
# With Index
# gridData.append([row['prop_index'], row['date_index'], sum_hor])
# With ScrapeDate
gridData.append([row['ScrapeDate'], row['date_index'], sum_hor])
gridData = np.array(gridData)
# get all values to calculate Max
allValues = gridData[:, 2]
allValues = gridData[:, 2].astype(int)
print(allValues)
maxValue = np.max(allValues)
gridData[:, 2] = (gridData[:, 2]*100)/maxValue
print(maxValue)
gridData[:, 2] = (allValues*100)/maxValue
# Return back to list
gridData = gridData.tolist()
@ -55,3 +58,6 @@ def region_capacities(id: int):
outDict = {'scrapeDates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': gridData}
return outDict
out = region_capacities(1)
print(out)