From d436c5d892755b921d2a7eaa23084510bf95949a Mon Sep 17 00:00:00 2001 From: mmaurostoffel <166130318+mmaurostoffel@users.noreply.github.com> Date: Mon, 13 Jan 2025 23:06:42 +0100 Subject: [PATCH] added missing logic to etl_region_movAverage --- etl/src/data/database.py | 8 ++++++++ etl/src/data/etl_region_movAverage.py | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/etl/src/data/database.py b/etl/src/data/database.py index 8f71c0a..cddce55 100644 --- a/etl/src/data/database.py +++ b/etl/src/data/database.py @@ -527,3 +527,11 @@ class Database: properties.seed_id = {region_id_2}) """) + def unique_scrapeDates(self): + return self.connection.sql(f""" + SELECT DISTINCT + strftime(extractions.created_at, '%Y-%m-%d') AS ScrapeDate, + FROM + consultancy_d.extractions + """) + diff --git a/etl/src/data/etl_region_movAverage.py b/etl/src/data/etl_region_movAverage.py index 4549a97..362755d 100644 --- a/etl/src/data/etl_region_movAverage.py +++ b/etl/src/data/etl_region_movAverage.py @@ -18,6 +18,12 @@ def region_movingAverage(id: int, scrape_date_start_min: str): # Filter Setting windowSize = 7 + + # Get unique ScrapeDates + uniqueScrapeDates = d.unique_scrapeDates().pl() + uniqueScrapeDates = uniqueScrapeDates.get_column('ScrapeDate').str.to_date() + uniqueScrapeDates = uniqueScrapeDates.sort().to_list() + # String to Date scrape_date_start_min = datetime.strptime(scrape_date_start_min, '%Y-%m-%d') @@ -26,6 +32,9 @@ def region_movingAverage(id: int, scrape_date_start_min: str): # Get start and end date of End search-window scrape_date_end_min = scrape_date_start_min + timedelta(days=timeOffset) + + # Get closest ScrapeDate + scrape_date_end_min = min(uniqueScrapeDates, key=lambda x: abs(x - scrape_date_end_min.date())) scrape_date_end_max = scrape_date_end_min + timedelta(days=1) final_end_date = scrape_date_end_min + timedelta(days=calcFrame)