From 7e3862a5786a0a27241bbc8e70f532cc6e218c7d Mon Sep 17 00:00:00 2001 From: mmaurostoffel <166130318+mmaurostoffel@users.noreply.github.com> Date: Wed, 15 Jan 2025 20:44:16 +0100 Subject: [PATCH] =?UTF-8?q?global=20Extractions=20f=C3=BCr=20region=20capa?= =?UTF-8?q?cities=20monthly=20and=20weekdays=20eingef=C3=BCgt,=20closes=20?= =?UTF-8?q?#15?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- etl/src/data/database.py | 15 +++++++++++++++ etl/src/data/etl_region_capacities_monthly.py | 7 ++++++- etl/src/data/etl_region_capacities_weekdays.py | 7 ++++++- 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/etl/src/data/database.py b/etl/src/data/database.py index aef93de..5747de6 100644 --- a/etl/src/data/database.py +++ b/etl/src/data/database.py @@ -497,6 +497,21 @@ class Database: extractions.created_at < '{scrape_date_max}' """) + def singleScrape_of_global_scrapDate(self, scrape_date_min: str, scrape_date_max: str): + return self.connection.sql(f""" + SELECT + JSON_EXTRACT(body, '$.content.days') as calendarBody, + extractions.created_at + FROM + consultancy_d.extractions + LEFT JOIN + consultancy_d.properties ON properties.id = extractions.property_id + WHERE + type == 'calendar' AND + extractions.created_at >= '{scrape_date_min}' AND + extractions.created_at < '{scrape_date_max}' + """) + def capacity_global(self): return self.connection.sql(f""" SELECT diff --git a/etl/src/data/etl_region_capacities_monthly.py b/etl/src/data/etl_region_capacities_monthly.py index 873ec16..ba77c83 100644 --- a/etl/src/data/etl_region_capacities_monthly.py +++ b/etl/src/data/etl_region_capacities_monthly.py @@ -22,7 +22,12 @@ def region_capacities_monthly(id: int, scrapeDate_start: str): # Get end date of start search-window scrapeDate_end = scrapeDate_start + timedelta(days=1) - extractions = d.singleScrape_of_region_scrapDate(id, scrapeDate_start, scrapeDate_end).pl() + # Get Data + if id == -1: + extractions = d.singleScrape_of_global_scrapDate(scrapeDate_start, scrapeDate_end).pl() + else: + extractions = d.singleScrape_of_region_scrapDate(id, scrapeDate_start, scrapeDate_end).pl() + df_calendar = pl.DataFrame() numWeeks = 0 firstExe = True diff --git a/etl/src/data/etl_region_capacities_weekdays.py b/etl/src/data/etl_region_capacities_weekdays.py index fb95225..7c5ff68 100644 --- a/etl/src/data/etl_region_capacities_weekdays.py +++ b/etl/src/data/etl_region_capacities_weekdays.py @@ -21,7 +21,12 @@ def region_capacities_weekdays(id: int, scrapeDate_start: str): # Get end date of start search-window scrapeDate_end = scrapeDate_start + timedelta(days=1) - extractions = d.singleScrape_of_region_scrapDate(id, scrapeDate_start, scrapeDate_end).pl() + # Get Data + if id == -1: + extractions = d.singleScrape_of_global_scrapDate(scrapeDate_start, scrapeDate_end).pl() + else: + extractions = d.singleScrape_of_region_scrapDate(id, scrapeDate_start, scrapeDate_end).pl() + df_calendar = pl.DataFrame() numWeeks = 0 firstExe = True