closes #7: etl_region_capacities erstellt

!! Wie im Issue beschrieben wurde etl_region_capacities zu etl_region_properties_capacities angepasst und die Endpoints ebenfalls.!!

!!Die Abfrage der globalen Daten ist implementiert und funktioniert, braucht aber recht lange!!
main
mmaurostoffel 2025-01-11 17:33:50 +01:00
parent 774e30c945
commit 67382003ca
3 changed files with 106 additions and 47 deletions

View File

@ -5,6 +5,7 @@ from data import etl_property_capacities_monthly as etl_pcm
from data import etl_property_capacities_weekdays as etl_pcw from data import etl_property_capacities_weekdays as etl_pcw
from data import etl_property_neighbours as etl_pn from data import etl_property_neighbours as etl_pn
from data import etl_region_capacities as etl_rc from data import etl_region_capacities as etl_rc
from data import etl_region_properties_capacities as etl_rpc
from data import etl_region_capacities_comparison as etl_rcc from data import etl_region_capacities_comparison as etl_rcc
from fastapi import FastAPI, Response from fastapi import FastAPI, Response
@ -64,6 +65,11 @@ def property_capacities_data(id: int, scrapeDate: str):
def property_base_data(id: int): def property_base_data(id: int):
return d.property_base_data(id).pl().to_dicts() return d.property_base_data(id).pl().to_dicts()
@app.get("/region/{id}/properties/capacities")
def region_property_capacities_data(id: int):
capacities = etl_rpc.region_properties_capacities(id)
return capacities
@app.get("/region/{id}/capacities") @app.get("/region/{id}/capacities")
def region_capacities_data(id: int): def region_capacities_data(id: int):
capacities = etl_rc.region_capacities(id) capacities = etl_rc.region_capacities(id)

View File

@ -1,61 +1,53 @@
import data
import polars as pl
from io import StringIO from io import StringIO
import numpy as np from datetime import date
import polars as pl
import data
d = data.load() d = data.load()
def region_capacities(id: int): def region_capacities(id: int):
# Get Data
if id == -1:
df = d.capacity_global().pl()
else:
df = d.capacity_of_region(id).pl()
# turn PropertyIDs to ints for sorting
df = df.cast({"property_id": int})
# Get uniques for dates and propIDs and sort them # Get Data
listOfDates = df.get_column("ScrapeDate").unique().sort() if id == -1:
listOfPropertyIDs = df.get_column("property_id").unique().sort() extractions = d.capacity_global().pl()
else:
extractions = d.capacity_of_region(id).pl()
# turn PropertyIDs to ints for sorting
extractions = extractions.cast({"property_id": int})
# Create DFs from lists to merge later extractions.drop('property_id')
datesDF = pl.DataFrame(listOfDates).with_row_index("date_index") df_dates = pl.DataFrame()
propIdDF = pl.DataFrame(listOfPropertyIDs).with_row_index("prop_index")
# Merge Dataframe to generate indices # Get Data from JSON
df = df.join(datesDF, on='ScrapeDate') gridData = []
df = df.join(propIdDF, on='property_id') dayCounts = []
# Drop now useless columns ScrapeDate and property_id for row in extractions.rows(named=True):
df = df[['ScrapeDate', 'calendarBody', 'date_index', 'prop_index']] # Return 0 for sum if calendar is null
# Calculate grid values if row['calendarBody']:
gridData = [] calDF = pl.read_json(StringIO(row['calendarBody']))
for row in df.rows(named=True): sum_hor = calDF.sum_horizontal()[0]
# Return 0 for sum if calendar is null else:
if row['calendarBody']: sum_hor = 0
calDF = pl.read_json(StringIO(row['calendarBody'])) gridData.append([row['ScrapeDate'], sum_hor, calDF.width])
sum_hor = calDF.sum_horizontal()[0]
else:
sum_hor = 0
# With Index
# gridData.append([row['prop_index'], row['date_index'], sum_hor])
# With ScrapeDate
gridData.append([row['ScrapeDate'], row['date_index'], sum_hor])
gridData = np.array(gridData) # Create Aggregates of values
# get all values to calculate Max df = pl.DataFrame(gridData)
allValues = gridData[:, 2].astype(int) df_count = df.group_by("column_0").agg(pl.col("column_1").count())
maxValue = np.max(allValues) df_sum = df.group_by("column_0").agg(pl.col("column_1").sum())
gridData[:, 2] = (allValues*100)/maxValue df_numDays = df.group_by("column_0").agg(pl.col("column_2").max())
# Return back to list # Join and rename DF's
gridData = gridData.tolist() df = df_sum.join(df_count, on= 'column_0').join(df_numDays, on= 'column_0')
df = df.rename({"column_0": "ScrapeDate", "column_1": "Sum", "column_1_right": "num_properties", "column_2": "max_value", })
# Cast listOfDates to datetime # Calculate normed capacities for each scrapeDate
listOfDates = listOfDates.cast(pl.Date).to_list() df = df.with_columns((pl.col("Sum") / pl.col("num_properties") / (pl.col("max_value")*2) * 100).alias("capacity"))
listOfPropertyIDs = listOfPropertyIDs.to_list()
# Create JSON # Sort the date column
outDict = {'scrapeDates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': gridData} df = df.cast({"ScrapeDate": date})
df = df.sort('ScrapeDate')
return outDict result = {"capacities": df['capacity'].to_list(), "dates": df['ScrapeDate'].to_list()}
return result

View File

@ -0,0 +1,61 @@
import data
import polars as pl
from io import StringIO
import numpy as np
d = data.load()
def region_properties_capacities(id: int):
# Get Data
if id == -1:
df = d.capacity_global().pl()
else:
df = d.capacity_of_region(id).pl()
# turn PropertyIDs to ints for sorting
df = df.cast({"property_id": int})
# Get uniques for dates and propIDs and sort them
listOfDates = df.get_column("ScrapeDate").unique().sort()
listOfPropertyIDs = df.get_column("property_id").unique().sort()
# Create DFs from lists to merge later
datesDF = pl.DataFrame(listOfDates).with_row_index("date_index")
propIdDF = pl.DataFrame(listOfPropertyIDs).with_row_index("prop_index")
# Merge Dataframe to generate indices
df = df.join(datesDF, on='ScrapeDate')
df = df.join(propIdDF, on='property_id')
# Drop now useless columns ScrapeDate and property_id
df = df[['ScrapeDate', 'calendarBody', 'date_index', 'prop_index']]
# Calculate grid values
gridData = []
for row in df.rows(named=True):
# Return 0 for sum if calendar is null
if row['calendarBody']:
calDF = pl.read_json(StringIO(row['calendarBody']))
sum_hor = calDF.sum_horizontal()[0]
else:
sum_hor = 0
# With Index
# gridData.append([row['prop_index'], row['date_index'], sum_hor])
# With ScrapeDate
gridData.append([row['ScrapeDate'], row['date_index'], sum_hor])
gridData = np.array(gridData)
# get all values to calculate Max
allValues = gridData[:, 2].astype(int)
maxValue = np.max(allValues)
gridData[:, 2] = (allValues*100)/maxValue
# Return back to list
gridData = gridData.tolist()
# Cast listOfDates to datetime
listOfDates = listOfDates.cast(pl.Date).to_list()
listOfPropertyIDs = listOfPropertyIDs.to_list()
# Create JSON
outDict = {'scrapeDates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': gridData}
return outDict