diff --git a/etl/src/data/database.py b/etl/src/data/database.py index 5367559..e0a917e 100644 --- a/etl/src/data/database.py +++ b/etl/src/data/database.py @@ -298,7 +298,8 @@ class Database: FROM consultancy_d.extractions WHERE - type == 'calendar' + type == 'calendar' AND + calendar NOT NULL ORDER BY property_id """) @@ -319,6 +320,8 @@ class Database: consultancy_d.seeds ON seeds.id = properties.seed_id LEFT JOIN consultancy_d.regions ON regions.id = seeds.region_id + WHERE + calendar NOT NULL """) def extractions_for(self, property_id): @@ -463,7 +466,8 @@ class Database: consultancy_d.properties ON properties.id = extractions.property_id WHERE type == 'calendar' AND - properties.seed_id = {region_id} + properties.seed_id = {region_id} AND + calendarBody NOT NULL """) def singleScrape_of_region(self, region_id: int, scrape_date_min: str, scrape_date_max: str): @@ -478,7 +482,8 @@ class Database: type == 'calendar' AND properties.seed_id = {region_id} AND extractions.created_at >= '{scrape_date_min}' AND - extractions.created_at < '{scrape_date_max}' + extractions.created_at < '{scrape_date_max}' AND + calendarBody NOT NULL """) def singleScrape_of_global(self, scrape_date_min: str, scrape_date_max: str): @@ -492,7 +497,8 @@ class Database: WHERE type == 'calendar' AND extractions.created_at >= '{scrape_date_min}' AND - extractions.created_at < '{scrape_date_max}' + extractions.created_at < '{scrape_date_max}' AND + calendarBody NOT NULL """) def singleScrape_of_region_scrapDate(self, region_id: int, scrape_date_min: str, scrape_date_max: str): @@ -508,7 +514,8 @@ class Database: type == 'calendar' AND properties.seed_id = {region_id} AND extractions.created_at >= '{scrape_date_min}' AND - extractions.created_at < '{scrape_date_max}' + extractions.created_at < '{scrape_date_max}' AND + calendarBody NOT NULL """) def singleScrape_of_global_scrapDate(self, scrape_date_min: str, scrape_date_max: str): @@ -523,7 +530,8 @@ class Database: WHERE type == 'calendar' AND extractions.created_at >= '{scrape_date_min}' AND - extractions.created_at < '{scrape_date_max}' + extractions.created_at < '{scrape_date_max}' AND + calendarBody NOT NULL """) def capacity_global(self): @@ -556,7 +564,8 @@ class Database: WHERE type == 'calendar' AND (properties.seed_id = {region_id_1} OR - properties.seed_id = {region_id_2}) + properties.seed_id = {region_id_2}) AND + calendarBody NOT NULL """) def unique_scrapeDates(self):