First Version of etl_region_movAverage.py eingefügt

main
mmaurostoffel 2025-01-12 20:27:33 +01:00
parent f31c23ea51
commit b23879b6d3
2 changed files with 119 additions and 0 deletions

View File

@ -5,6 +5,7 @@ from data import etl_property_capacities_monthly as etl_pcm
from data import etl_property_capacities_weekdays as etl_pcw from data import etl_property_capacities_weekdays as etl_pcw
from data import etl_property_neighbours as etl_pn from data import etl_property_neighbours as etl_pn
from data import etl_region_capacities as etl_rc from data import etl_region_capacities as etl_rc
from data import etl_region_movAverage as etl_rmA
from data import etl_region_properties_capacities as etl_rpc from data import etl_region_properties_capacities as etl_rpc
from data import etl_region_capacities_comparison as etl_rcc from data import etl_region_capacities_comparison as etl_rcc
from fastapi import FastAPI, Response from fastapi import FastAPI, Response
@ -80,3 +81,7 @@ def region_capacities_data(id_1: int, id_2: int):
capacities = etl_rcc.region_capacities_comparison(id_1, id_2) capacities = etl_rcc.region_capacities_comparison(id_1, id_2)
return capacities return capacities
@app.get("/region/{id}/movingAverage/{startDate}")
def region_capacities_data(id: int, startDate: str):
result = etl_rmA.region_movingAverage(id, startDate)
return result

View File

@ -0,0 +1,114 @@
import polars as pl
from io import StringIO
from datetime import datetime, timedelta, date
import matplotlib.pyplot as plt
import data
d = data.load()
def region_movingAverage(id: int, scrape_date_start_min: str):
# Settings
# Offset between actual and predict ScrapeDate
timeOffset = 30
# Calculation Frame
calcFrame = 180
# Filter Setting
windowSize = 7
# String to Date
scrape_date_start_min = datetime.strptime(scrape_date_start_min, '%Y-%m-%d')
# Get end date of start search-window
scrape_date_start_max = scrape_date_start_min + timedelta(days=1)
# Get start and end date of End search-window
scrape_date_end_min = scrape_date_start_min + timedelta(days=timeOffset)
scrape_date_end_max = scrape_date_end_min + timedelta(days=1)
final_end_date = scrape_date_end_min + timedelta(days=calcFrame)
ex_start = d.singleScrape_of_region(id, scrape_date_start_min, scrape_date_start_max)
ex_start_count = ex_start.shape[0]
ex_end = d.singleScrape_of_region(id, scrape_date_end_min, scrape_date_end_max)
ex_end_count = ex_end.shape[0]
num_properties = [ex_start_count, ex_end_count]
start_end = [ex_start, ex_end]
outDFList = []
for df in start_end:
df = df.pl()
firstExe = True
counter = 1
outDF = pl.DataFrame(schema={"0": int, "dates": date})
for row in df.rows(named=True):
if row['calendarBody']:
calDF = pl.read_json(StringIO(row['calendarBody']))
columnTitles = calDF.columns
calDF = calDF.transpose()
calDF = calDF.with_columns(pl.Series(name="dates", values=columnTitles))
calDF = calDF.with_columns((pl.col("dates").str.to_date()))
# Filter out all Data that's in the calculation frame
calDF = calDF.filter((pl.col("dates") >= scrape_date_end_min))
calDF = calDF.filter((pl.col("dates") < final_end_date))
# Join all information into one Dataframe
if firstExe:
outDF = calDF
firstExe = False
else:
outDF = outDF.join(calDF, on='dates')
outDF = outDF.rename({'column_0': str(counter)})
counter += 1
outDF = outDF.sort('dates')
outDFList.append(outDF)
# Calculate the horizontal Sum for all Dates
arrayCunter = 0
tempDFList = []
for df in outDFList:
dates = df.select(pl.col("dates"))
values = df.select(pl.exclude("dates"))
sum_hor = values.sum_horizontal()
sum_hor = sum_hor / num_properties[arrayCunter] / 2
arrayCunter += 1
newDF = dates.with_columns(sum_hor=pl.Series(sum_hor))
tempDFList.append(newDF)
# Join actual and predict Values
outDF = tempDFList[1].join(tempDFList[0], on='dates', how='outer')
# Rename Columns for clarity
outDF = outDF.drop_nulls()
outDF = outDF.drop('dates_right')
# sum_hor_predict is the data from the earlier ScrapeDate
outDF = outDF.rename({'sum_hor': 'sum_hor_actual', 'sum_hor_right': 'sum_hor_predict'})
# Calculate Moving average from Start
baseValues = outDF.get_column('sum_hor_predict').to_list()
i = 0
moving_averages = []
while i < len(baseValues) - windowSize + 1:
window = baseValues[i: i + windowSize]
window_average = sum(window) / windowSize
moving_averages.append(window_average)
i += 1
# Add empty values back to the front and end of moving_averages
num_empty = int(windowSize / 2)
moving_averages = [None] *num_empty + moving_averages + [None] * num_empty
# Add moving_averages to df
outDF = outDF.with_columns(moving_averages=pl.Series(moving_averages))
result = {'dates':outDF.get_column('dates').to_list(), 'cap_earlierTimeframe':outDF.get_column('sum_hor_predict').to_list(), 'cap_laterTimeframe':outDF.get_column('sum_hor_actual').to_list(), 'movAvg':outDF.get_column('moving_averages').to_list(),}
return result