closes #7: etl_region_capacities erstellt
!! Wie im Issue beschrieben wurde etl_region_capacities zu etl_region_properties_capacities angepasst und die Endpoints ebenfalls.!! !!Die Abfrage der globalen Daten ist implementiert und funktioniert, braucht aber recht lange!!main
parent
774e30c945
commit
67382003ca
|
@ -5,6 +5,7 @@ from data import etl_property_capacities_monthly as etl_pcm
|
||||||
from data import etl_property_capacities_weekdays as etl_pcw
|
from data import etl_property_capacities_weekdays as etl_pcw
|
||||||
from data import etl_property_neighbours as etl_pn
|
from data import etl_property_neighbours as etl_pn
|
||||||
from data import etl_region_capacities as etl_rc
|
from data import etl_region_capacities as etl_rc
|
||||||
|
from data import etl_region_properties_capacities as etl_rpc
|
||||||
from data import etl_region_capacities_comparison as etl_rcc
|
from data import etl_region_capacities_comparison as etl_rcc
|
||||||
from fastapi import FastAPI, Response
|
from fastapi import FastAPI, Response
|
||||||
|
|
||||||
|
@ -64,6 +65,11 @@ def property_capacities_data(id: int, scrapeDate: str):
|
||||||
def property_base_data(id: int):
|
def property_base_data(id: int):
|
||||||
return d.property_base_data(id).pl().to_dicts()
|
return d.property_base_data(id).pl().to_dicts()
|
||||||
|
|
||||||
|
@app.get("/region/{id}/properties/capacities")
|
||||||
|
def region_property_capacities_data(id: int):
|
||||||
|
capacities = etl_rpc.region_properties_capacities(id)
|
||||||
|
return capacities
|
||||||
|
|
||||||
@app.get("/region/{id}/capacities")
|
@app.get("/region/{id}/capacities")
|
||||||
def region_capacities_data(id: int):
|
def region_capacities_data(id: int):
|
||||||
capacities = etl_rc.region_capacities(id)
|
capacities = etl_rc.region_capacities(id)
|
||||||
|
|
|
@ -1,61 +1,53 @@
|
||||||
import data
|
|
||||||
import polars as pl
|
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
import numpy as np
|
from datetime import date
|
||||||
|
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
import data
|
||||||
|
|
||||||
d = data.load()
|
d = data.load()
|
||||||
|
|
||||||
def region_capacities(id: int):
|
def region_capacities(id: int):
|
||||||
# Get Data
|
|
||||||
if id == -1:
|
|
||||||
df = d.capacity_global().pl()
|
|
||||||
else:
|
|
||||||
df = d.capacity_of_region(id).pl()
|
|
||||||
# turn PropertyIDs to ints for sorting
|
|
||||||
df = df.cast({"property_id": int})
|
|
||||||
|
|
||||||
# Get uniques for dates and propIDs and sort them
|
# Get Data
|
||||||
listOfDates = df.get_column("ScrapeDate").unique().sort()
|
if id == -1:
|
||||||
listOfPropertyIDs = df.get_column("property_id").unique().sort()
|
extractions = d.capacity_global().pl()
|
||||||
|
else:
|
||||||
|
extractions = d.capacity_of_region(id).pl()
|
||||||
|
# turn PropertyIDs to ints for sorting
|
||||||
|
extractions = extractions.cast({"property_id": int})
|
||||||
|
|
||||||
# Create DFs from lists to merge later
|
extractions.drop('property_id')
|
||||||
datesDF = pl.DataFrame(listOfDates).with_row_index("date_index")
|
df_dates = pl.DataFrame()
|
||||||
propIdDF = pl.DataFrame(listOfPropertyIDs).with_row_index("prop_index")
|
|
||||||
|
|
||||||
# Merge Dataframe to generate indices
|
# Get Data from JSON
|
||||||
df = df.join(datesDF, on='ScrapeDate')
|
gridData = []
|
||||||
df = df.join(propIdDF, on='property_id')
|
dayCounts = []
|
||||||
# Drop now useless columns ScrapeDate and property_id
|
for row in extractions.rows(named=True):
|
||||||
df = df[['ScrapeDate', 'calendarBody', 'date_index', 'prop_index']]
|
# Return 0 for sum if calendar is null
|
||||||
# Calculate grid values
|
if row['calendarBody']:
|
||||||
gridData = []
|
calDF = pl.read_json(StringIO(row['calendarBody']))
|
||||||
for row in df.rows(named=True):
|
sum_hor = calDF.sum_horizontal()[0]
|
||||||
# Return 0 for sum if calendar is null
|
else:
|
||||||
if row['calendarBody']:
|
sum_hor = 0
|
||||||
calDF = pl.read_json(StringIO(row['calendarBody']))
|
gridData.append([row['ScrapeDate'], sum_hor, calDF.width])
|
||||||
sum_hor = calDF.sum_horizontal()[0]
|
|
||||||
else:
|
|
||||||
sum_hor = 0
|
|
||||||
# With Index
|
|
||||||
# gridData.append([row['prop_index'], row['date_index'], sum_hor])
|
|
||||||
# With ScrapeDate
|
|
||||||
gridData.append([row['ScrapeDate'], row['date_index'], sum_hor])
|
|
||||||
|
|
||||||
gridData = np.array(gridData)
|
# Create Aggregates of values
|
||||||
# get all values to calculate Max
|
df = pl.DataFrame(gridData)
|
||||||
allValues = gridData[:, 2].astype(int)
|
df_count = df.group_by("column_0").agg(pl.col("column_1").count())
|
||||||
maxValue = np.max(allValues)
|
df_sum = df.group_by("column_0").agg(pl.col("column_1").sum())
|
||||||
gridData[:, 2] = (allValues*100)/maxValue
|
df_numDays = df.group_by("column_0").agg(pl.col("column_2").max())
|
||||||
|
|
||||||
# Return back to list
|
# Join and rename DF's
|
||||||
gridData = gridData.tolist()
|
df = df_sum.join(df_count, on= 'column_0').join(df_numDays, on= 'column_0')
|
||||||
|
df = df.rename({"column_0": "ScrapeDate", "column_1": "Sum", "column_1_right": "num_properties", "column_2": "max_value", })
|
||||||
|
|
||||||
# Cast listOfDates to datetime
|
# Calculate normed capacities for each scrapeDate
|
||||||
listOfDates = listOfDates.cast(pl.Date).to_list()
|
df = df.with_columns((pl.col("Sum") / pl.col("num_properties") / (pl.col("max_value")*2) * 100).alias("capacity"))
|
||||||
listOfPropertyIDs = listOfPropertyIDs.to_list()
|
|
||||||
|
|
||||||
# Create JSON
|
# Sort the date column
|
||||||
outDict = {'scrapeDates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': gridData}
|
df = df.cast({"ScrapeDate": date})
|
||||||
|
df = df.sort('ScrapeDate')
|
||||||
|
|
||||||
return outDict
|
result = {"capacities": df['capacity'].to_list(), "dates": df['ScrapeDate'].to_list()}
|
||||||
|
return result
|
|
@ -0,0 +1,61 @@
|
||||||
|
import data
|
||||||
|
import polars as pl
|
||||||
|
from io import StringIO
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
d = data.load()
|
||||||
|
|
||||||
|
def region_properties_capacities(id: int):
|
||||||
|
# Get Data
|
||||||
|
if id == -1:
|
||||||
|
df = d.capacity_global().pl()
|
||||||
|
else:
|
||||||
|
df = d.capacity_of_region(id).pl()
|
||||||
|
# turn PropertyIDs to ints for sorting
|
||||||
|
df = df.cast({"property_id": int})
|
||||||
|
|
||||||
|
# Get uniques for dates and propIDs and sort them
|
||||||
|
listOfDates = df.get_column("ScrapeDate").unique().sort()
|
||||||
|
listOfPropertyIDs = df.get_column("property_id").unique().sort()
|
||||||
|
|
||||||
|
# Create DFs from lists to merge later
|
||||||
|
datesDF = pl.DataFrame(listOfDates).with_row_index("date_index")
|
||||||
|
propIdDF = pl.DataFrame(listOfPropertyIDs).with_row_index("prop_index")
|
||||||
|
|
||||||
|
# Merge Dataframe to generate indices
|
||||||
|
df = df.join(datesDF, on='ScrapeDate')
|
||||||
|
df = df.join(propIdDF, on='property_id')
|
||||||
|
# Drop now useless columns ScrapeDate and property_id
|
||||||
|
df = df[['ScrapeDate', 'calendarBody', 'date_index', 'prop_index']]
|
||||||
|
# Calculate grid values
|
||||||
|
gridData = []
|
||||||
|
for row in df.rows(named=True):
|
||||||
|
# Return 0 for sum if calendar is null
|
||||||
|
if row['calendarBody']:
|
||||||
|
calDF = pl.read_json(StringIO(row['calendarBody']))
|
||||||
|
sum_hor = calDF.sum_horizontal()[0]
|
||||||
|
else:
|
||||||
|
sum_hor = 0
|
||||||
|
# With Index
|
||||||
|
# gridData.append([row['prop_index'], row['date_index'], sum_hor])
|
||||||
|
# With ScrapeDate
|
||||||
|
gridData.append([row['ScrapeDate'], row['date_index'], sum_hor])
|
||||||
|
|
||||||
|
gridData = np.array(gridData)
|
||||||
|
# get all values to calculate Max
|
||||||
|
allValues = gridData[:, 2].astype(int)
|
||||||
|
maxValue = np.max(allValues)
|
||||||
|
gridData[:, 2] = (allValues*100)/maxValue
|
||||||
|
|
||||||
|
# Return back to list
|
||||||
|
gridData = gridData.tolist()
|
||||||
|
|
||||||
|
# Cast listOfDates to datetime
|
||||||
|
listOfDates = listOfDates.cast(pl.Date).to_list()
|
||||||
|
listOfPropertyIDs = listOfPropertyIDs.to_list()
|
||||||
|
|
||||||
|
# Create JSON
|
||||||
|
outDict = {'scrapeDates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': gridData}
|
||||||
|
|
||||||
|
return outDict
|
Loading…
Reference in New Issue