From c3ab7d8e2fdbc1db61a02b788d0a84974bbff12d Mon Sep 17 00:00:00 2001 From: mmaurostoffel <166130318+mmaurostoffel@users.noreply.github.com> Date: Wed, 15 Jan 2025 21:27:53 +0100 Subject: [PATCH] global for movingAverage implemented --- etl/src/data/database.py | 14 ++++++++++++++ etl/src/data/etl_region_movAverage.py | 16 ++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/etl/src/data/database.py b/etl/src/data/database.py index 5747de6..010338e 100644 --- a/etl/src/data/database.py +++ b/etl/src/data/database.py @@ -481,6 +481,20 @@ class Database: extractions.created_at < '{scrape_date_max}' """) + def singleScrape_of_global(self, scrape_date_min: str, scrape_date_max: str): + return self.connection.sql(f""" + SELECT + JSON_EXTRACT(body, '$.content.days') as calendarBody, + FROM + consultancy_d.extractions + LEFT JOIN + consultancy_d.properties ON properties.id = extractions.property_id + WHERE + type == 'calendar' AND + extractions.created_at >= '{scrape_date_min}' AND + extractions.created_at < '{scrape_date_max}' + """) + def singleScrape_of_region_scrapDate(self, region_id: int, scrape_date_min: str, scrape_date_max: str): return self.connection.sql(f""" SELECT diff --git a/etl/src/data/etl_region_movAverage.py b/etl/src/data/etl_region_movAverage.py index c0b9577..c3bd6c9 100644 --- a/etl/src/data/etl_region_movAverage.py +++ b/etl/src/data/etl_region_movAverage.py @@ -46,11 +46,19 @@ def region_movingAverage(id: int, scrape_date_start_min: str): final_end_date = scrape_date_end_min + timedelta(days=calcFrame) - ex_start = d.singleScrape_of_region(id, scrape_date_start_min, scrape_date_start_max) - ex_start_count = ex_start.shape[0] + # Get Data + if id == -1: + ex_start = d.singleScrape_of_global(scrape_date_start_min, scrape_date_start_max) + ex_start_count = ex_start.shape[0] - ex_end = d.singleScrape_of_region(id, scrape_date_end_min, scrape_date_end_max) - ex_end_count = ex_end.shape[0] + ex_end = d.singleScrape_of_global(scrape_date_end_min, scrape_date_end_max) + ex_end_count = ex_end.shape[0] + else: + ex_start = d.singleScrape_of_region(id, scrape_date_start_min, scrape_date_start_max) + ex_start_count = ex_start.shape[0] + + ex_end = d.singleScrape_of_region(id, scrape_date_end_min, scrape_date_end_max) + ex_end_count = ex_end.shape[0] num_properties = [ex_start_count, ex_end_count] start_end = [ex_start, ex_end]