closes #7: etl_region_capacities erstellt
!! Wie im Issue beschrieben wurde etl_region_capacities zu etl_region_properties_capacities angepasst und die Endpoints ebenfalls.!! !!Die Abfrage der globalen Daten ist implementiert und funktioniert, braucht aber recht lange!!main
parent
774e30c945
commit
67382003ca
|
@ -5,6 +5,7 @@ from data import etl_property_capacities_monthly as etl_pcm
|
|||
from data import etl_property_capacities_weekdays as etl_pcw
|
||||
from data import etl_property_neighbours as etl_pn
|
||||
from data import etl_region_capacities as etl_rc
|
||||
from data import etl_region_properties_capacities as etl_rpc
|
||||
from data import etl_region_capacities_comparison as etl_rcc
|
||||
from fastapi import FastAPI, Response
|
||||
|
||||
|
@ -64,6 +65,11 @@ def property_capacities_data(id: int, scrapeDate: str):
|
|||
def property_base_data(id: int):
|
||||
return d.property_base_data(id).pl().to_dicts()
|
||||
|
||||
@app.get("/region/{id}/properties/capacities")
|
||||
def region_property_capacities_data(id: int):
|
||||
capacities = etl_rpc.region_properties_capacities(id)
|
||||
return capacities
|
||||
|
||||
@app.get("/region/{id}/capacities")
|
||||
def region_capacities_data(id: int):
|
||||
capacities = etl_rc.region_capacities(id)
|
||||
|
|
|
@ -1,61 +1,53 @@
|
|||
import data
|
||||
import polars as pl
|
||||
from io import StringIO
|
||||
import numpy as np
|
||||
from datetime import date
|
||||
|
||||
import polars as pl
|
||||
|
||||
import data
|
||||
|
||||
d = data.load()
|
||||
|
||||
def region_capacities(id: int):
|
||||
|
||||
# Get Data
|
||||
if id == -1:
|
||||
df = d.capacity_global().pl()
|
||||
extractions = d.capacity_global().pl()
|
||||
else:
|
||||
df = d.capacity_of_region(id).pl()
|
||||
extractions = d.capacity_of_region(id).pl()
|
||||
# turn PropertyIDs to ints for sorting
|
||||
df = df.cast({"property_id": int})
|
||||
extractions = extractions.cast({"property_id": int})
|
||||
|
||||
# Get uniques for dates and propIDs and sort them
|
||||
listOfDates = df.get_column("ScrapeDate").unique().sort()
|
||||
listOfPropertyIDs = df.get_column("property_id").unique().sort()
|
||||
extractions.drop('property_id')
|
||||
df_dates = pl.DataFrame()
|
||||
|
||||
# Create DFs from lists to merge later
|
||||
datesDF = pl.DataFrame(listOfDates).with_row_index("date_index")
|
||||
propIdDF = pl.DataFrame(listOfPropertyIDs).with_row_index("prop_index")
|
||||
|
||||
# Merge Dataframe to generate indices
|
||||
df = df.join(datesDF, on='ScrapeDate')
|
||||
df = df.join(propIdDF, on='property_id')
|
||||
# Drop now useless columns ScrapeDate and property_id
|
||||
df = df[['ScrapeDate', 'calendarBody', 'date_index', 'prop_index']]
|
||||
# Calculate grid values
|
||||
# Get Data from JSON
|
||||
gridData = []
|
||||
for row in df.rows(named=True):
|
||||
dayCounts = []
|
||||
for row in extractions.rows(named=True):
|
||||
# Return 0 for sum if calendar is null
|
||||
if row['calendarBody']:
|
||||
calDF = pl.read_json(StringIO(row['calendarBody']))
|
||||
sum_hor = calDF.sum_horizontal()[0]
|
||||
else:
|
||||
sum_hor = 0
|
||||
# With Index
|
||||
# gridData.append([row['prop_index'], row['date_index'], sum_hor])
|
||||
# With ScrapeDate
|
||||
gridData.append([row['ScrapeDate'], row['date_index'], sum_hor])
|
||||
gridData.append([row['ScrapeDate'], sum_hor, calDF.width])
|
||||
|
||||
gridData = np.array(gridData)
|
||||
# get all values to calculate Max
|
||||
allValues = gridData[:, 2].astype(int)
|
||||
maxValue = np.max(allValues)
|
||||
gridData[:, 2] = (allValues*100)/maxValue
|
||||
# Create Aggregates of values
|
||||
df = pl.DataFrame(gridData)
|
||||
df_count = df.group_by("column_0").agg(pl.col("column_1").count())
|
||||
df_sum = df.group_by("column_0").agg(pl.col("column_1").sum())
|
||||
df_numDays = df.group_by("column_0").agg(pl.col("column_2").max())
|
||||
|
||||
# Return back to list
|
||||
gridData = gridData.tolist()
|
||||
# Join and rename DF's
|
||||
df = df_sum.join(df_count, on= 'column_0').join(df_numDays, on= 'column_0')
|
||||
df = df.rename({"column_0": "ScrapeDate", "column_1": "Sum", "column_1_right": "num_properties", "column_2": "max_value", })
|
||||
|
||||
# Cast listOfDates to datetime
|
||||
listOfDates = listOfDates.cast(pl.Date).to_list()
|
||||
listOfPropertyIDs = listOfPropertyIDs.to_list()
|
||||
# Calculate normed capacities for each scrapeDate
|
||||
df = df.with_columns((pl.col("Sum") / pl.col("num_properties") / (pl.col("max_value")*2) * 100).alias("capacity"))
|
||||
|
||||
# Create JSON
|
||||
outDict = {'scrapeDates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': gridData}
|
||||
# Sort the date column
|
||||
df = df.cast({"ScrapeDate": date})
|
||||
df = df.sort('ScrapeDate')
|
||||
|
||||
return outDict
|
||||
result = {"capacities": df['capacity'].to_list(), "dates": df['ScrapeDate'].to_list()}
|
||||
return result
|
|
@ -0,0 +1,61 @@
|
|||
import data
|
||||
import polars as pl
|
||||
from io import StringIO
|
||||
import numpy as np
|
||||
|
||||
|
||||
d = data.load()
|
||||
|
||||
def region_properties_capacities(id: int):
|
||||
# Get Data
|
||||
if id == -1:
|
||||
df = d.capacity_global().pl()
|
||||
else:
|
||||
df = d.capacity_of_region(id).pl()
|
||||
# turn PropertyIDs to ints for sorting
|
||||
df = df.cast({"property_id": int})
|
||||
|
||||
# Get uniques for dates and propIDs and sort them
|
||||
listOfDates = df.get_column("ScrapeDate").unique().sort()
|
||||
listOfPropertyIDs = df.get_column("property_id").unique().sort()
|
||||
|
||||
# Create DFs from lists to merge later
|
||||
datesDF = pl.DataFrame(listOfDates).with_row_index("date_index")
|
||||
propIdDF = pl.DataFrame(listOfPropertyIDs).with_row_index("prop_index")
|
||||
|
||||
# Merge Dataframe to generate indices
|
||||
df = df.join(datesDF, on='ScrapeDate')
|
||||
df = df.join(propIdDF, on='property_id')
|
||||
# Drop now useless columns ScrapeDate and property_id
|
||||
df = df[['ScrapeDate', 'calendarBody', 'date_index', 'prop_index']]
|
||||
# Calculate grid values
|
||||
gridData = []
|
||||
for row in df.rows(named=True):
|
||||
# Return 0 for sum if calendar is null
|
||||
if row['calendarBody']:
|
||||
calDF = pl.read_json(StringIO(row['calendarBody']))
|
||||
sum_hor = calDF.sum_horizontal()[0]
|
||||
else:
|
||||
sum_hor = 0
|
||||
# With Index
|
||||
# gridData.append([row['prop_index'], row['date_index'], sum_hor])
|
||||
# With ScrapeDate
|
||||
gridData.append([row['ScrapeDate'], row['date_index'], sum_hor])
|
||||
|
||||
gridData = np.array(gridData)
|
||||
# get all values to calculate Max
|
||||
allValues = gridData[:, 2].astype(int)
|
||||
maxValue = np.max(allValues)
|
||||
gridData[:, 2] = (allValues*100)/maxValue
|
||||
|
||||
# Return back to list
|
||||
gridData = gridData.tolist()
|
||||
|
||||
# Cast listOfDates to datetime
|
||||
listOfDates = listOfDates.cast(pl.Date).to_list()
|
||||
listOfPropertyIDs = listOfPropertyIDs.to_list()
|
||||
|
||||
# Create JSON
|
||||
outDict = {'scrapeDates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': gridData}
|
||||
|
||||
return outDict
|
Loading…
Reference in New Issue