diff --git a/etl/src/data/etl_region_movAverage.py b/etl/src/data/etl_region_movAverage.py index c3bd6c9..8818251 100644 --- a/etl/src/data/etl_region_movAverage.py +++ b/etl/src/data/etl_region_movAverage.py @@ -6,6 +6,9 @@ import polars as pl import data from data import etl_cache + +import matplotlib.pyplot as plt + d = data.load() def region_movingAverage(id: int, scrape_date_start_min: str): @@ -78,7 +81,7 @@ def region_movingAverage(id: int, scrape_date_start_min: str): calDF = calDF.with_columns((pl.col("dates").str.to_date())) # Filter out all Data that's in the calculation frame - calDF = calDF.filter((pl.col("dates") >= scrape_date_end_min)) + calDF = calDF.filter((pl.col("dates") >= (scrape_date_start_min + timedelta(days=1)))) calDF = calDF.filter((pl.col("dates") < final_end_date)) # Join all information into one Dataframe @@ -93,6 +96,7 @@ def region_movingAverage(id: int, scrape_date_start_min: str): outDF = outDF.sort('dates') outDFList.append(outDF) + # Calculate the horizontal Sum for all Dates arrayCunter = 0 tempDFList = [] @@ -108,14 +112,13 @@ def region_movingAverage(id: int, scrape_date_start_min: str): tempDFList.append(newDF) # Join actual and predict Values - outDF = tempDFList[1].join(tempDFList[0], on='dates', how='outer') + outDF = tempDFList[0].join(tempDFList[1], on='dates', how='outer') # Rename Columns for clarity - outDF = outDF.drop_nulls() outDF = outDF.drop('dates_right') # sum_hor_predict is the data from the earlier ScrapeDate - outDF = outDF.rename({'sum_hor': 'sum_hor_actual', 'sum_hor_right': 'sum_hor_predict'}) + outDF = outDF.rename({'sum_hor_right': 'sum_hor_actual', 'sum_hor': 'sum_hor_predict'}) # Calculate Moving average from Start baseValues = outDF.get_column('sum_hor_predict').to_list() @@ -133,6 +136,7 @@ def region_movingAverage(id: int, scrape_date_start_min: str): # Add moving_averages to df outDF = outDF.with_columns(moving_averages=pl.Series(moving_averages)) + result = {'dates':outDF.get_column('dates').to_list(), 'cap_earlierTimeframe':outDF.get_column('sum_hor_predict').to_list(), 'cap_laterTimeframe':outDF.get_column('sum_hor_actual').to_list(), 'movAvg':outDF.get_column('moving_averages').to_list(),} etl_cache.saveObj(file, result) return result \ No newline at end of file