diff --git a/etl/src/data/etl_region_capacities.py b/etl/src/data/etl_region_capacities.py index f4534cc..01759f9 100644 --- a/etl/src/data/etl_region_capacities.py +++ b/etl/src/data/etl_region_capacities.py @@ -23,10 +23,8 @@ def region_capacities(id: int): # Merge Dataframe to generate indices df = df.join(datesDF, on='ScrapeDate') df = df.join(propIdDF, on='property_id') - # Drop now useless columns ScrapeDate and property_id - df = df[['calendarBody', 'date_index', 'prop_index']] - + df = df[['ScrapeDate', 'calendarBody', 'date_index', 'prop_index']] # Calculate grid values gridData = [] for row in df.rows(named=True): @@ -36,13 +34,18 @@ def region_capacities(id: int): sum_hor = calDF.sum_horizontal()[0] else: sum_hor = 0 - gridData.append([row['prop_index'], row['date_index'], sum_hor]) - gridData = np.array(gridData) + # With Index + # gridData.append([row['prop_index'], row['date_index'], sum_hor]) + # With ScrapeDate + gridData.append([row['ScrapeDate'], row['date_index'], sum_hor]) + gridData = np.array(gridData) # get all values to calculate Max - allValues = gridData[:, 2] + allValues = gridData[:, 2].astype(int) + print(allValues) maxValue = np.max(allValues) - gridData[:, 2] = (gridData[:, 2]*100)/maxValue + print(maxValue) + gridData[:, 2] = (allValues*100)/maxValue # Return back to list gridData = gridData.tolist() @@ -55,3 +58,6 @@ def region_capacities(id: int): outDict = {'scrapeDates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': gridData} return outDict + +out = region_capacities(1) +print(out) \ No newline at end of file