movAverage überarbeitet, closes #17

erster Monat mit wird nun auch angezeigt
"Nullstelle" zu Beginn entfernt
main
mmaurostoffel 2025-01-17 17:01:29 +01:00
parent 468ad94430
commit 5ffc222430
1 changed files with 8 additions and 4 deletions

View File

@ -6,6 +6,9 @@ import polars as pl
import data import data
from data import etl_cache from data import etl_cache
import matplotlib.pyplot as plt
d = data.load() d = data.load()
def region_movingAverage(id: int, scrape_date_start_min: str): def region_movingAverage(id: int, scrape_date_start_min: str):
@ -78,7 +81,7 @@ def region_movingAverage(id: int, scrape_date_start_min: str):
calDF = calDF.with_columns((pl.col("dates").str.to_date())) calDF = calDF.with_columns((pl.col("dates").str.to_date()))
# Filter out all Data that's in the calculation frame # Filter out all Data that's in the calculation frame
calDF = calDF.filter((pl.col("dates") >= scrape_date_end_min)) calDF = calDF.filter((pl.col("dates") >= (scrape_date_start_min + timedelta(days=1))))
calDF = calDF.filter((pl.col("dates") < final_end_date)) calDF = calDF.filter((pl.col("dates") < final_end_date))
# Join all information into one Dataframe # Join all information into one Dataframe
@ -93,6 +96,7 @@ def region_movingAverage(id: int, scrape_date_start_min: str):
outDF = outDF.sort('dates') outDF = outDF.sort('dates')
outDFList.append(outDF) outDFList.append(outDF)
# Calculate the horizontal Sum for all Dates # Calculate the horizontal Sum for all Dates
arrayCunter = 0 arrayCunter = 0
tempDFList = [] tempDFList = []
@ -108,14 +112,13 @@ def region_movingAverage(id: int, scrape_date_start_min: str):
tempDFList.append(newDF) tempDFList.append(newDF)
# Join actual and predict Values # Join actual and predict Values
outDF = tempDFList[1].join(tempDFList[0], on='dates', how='outer') outDF = tempDFList[0].join(tempDFList[1], on='dates', how='outer')
# Rename Columns for clarity # Rename Columns for clarity
outDF = outDF.drop_nulls()
outDF = outDF.drop('dates_right') outDF = outDF.drop('dates_right')
# sum_hor_predict is the data from the earlier ScrapeDate # sum_hor_predict is the data from the earlier ScrapeDate
outDF = outDF.rename({'sum_hor': 'sum_hor_actual', 'sum_hor_right': 'sum_hor_predict'}) outDF = outDF.rename({'sum_hor_right': 'sum_hor_actual', 'sum_hor': 'sum_hor_predict'})
# Calculate Moving average from Start # Calculate Moving average from Start
baseValues = outDF.get_column('sum_hor_predict').to_list() baseValues = outDF.get_column('sum_hor_predict').to_list()
@ -133,6 +136,7 @@ def region_movingAverage(id: int, scrape_date_start_min: str):
# Add moving_averages to df # Add moving_averages to df
outDF = outDF.with_columns(moving_averages=pl.Series(moving_averages)) outDF = outDF.with_columns(moving_averages=pl.Series(moving_averages))
result = {'dates':outDF.get_column('dates').to_list(), 'cap_earlierTimeframe':outDF.get_column('sum_hor_predict').to_list(), 'cap_laterTimeframe':outDF.get_column('sum_hor_actual').to_list(), 'movAvg':outDF.get_column('moving_averages').to_list(),} result = {'dates':outDF.get_column('dates').to_list(), 'cap_earlierTimeframe':outDF.get_column('sum_hor_predict').to_list(), 'cap_laterTimeframe':outDF.get_column('sum_hor_actual').to_list(), 'movAvg':outDF.get_column('moving_averages').to_list(),}
etl_cache.saveObj(file, result) etl_cache.saveObj(file, result)
return result return result