From 7663791f33e771fd913b441ce6f47bbd41615aef Mon Sep 17 00:00:00 2001 From: mmaurostoffel <166130318+mmaurostoffel@users.noreply.github.com> Date: Sat, 18 Jan 2025 22:43:49 +0100 Subject: [PATCH] =?UTF-8?q?ungenutzte=20Datei=20etl=5Fpipelines.py=20gel?= =?UTF-8?q?=C3=B6scht?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- etl/src/data/etl_pipelines.py | 46 ----------------------------------- 1 file changed, 46 deletions(-) delete mode 100644 etl/src/data/etl_pipelines.py diff --git a/etl/src/data/etl_pipelines.py b/etl/src/data/etl_pipelines.py deleted file mode 100644 index e67e617..0000000 --- a/etl/src/data/etl_pipelines.py +++ /dev/null @@ -1,46 +0,0 @@ -import polars as pl -import json -from datetime import datetime, timedelta -def expansion_Pipeline(df): - ''' - Rearranges a given extractions Dataframe into an expanded Dataframe. - New Columns :propId, created_at calendar_date, calendar_value - :param df: Inputs from database.py/extractions or database.py/extractions_for functions - :return: expanded dataframe - ''' - data = [] - - for row in df.iter_rows(): - propId = row[1] - createdAt = row[2] - if row[0]: - temp = json.loads(row[0]) - keys = temp.keys() - for key in keys: - out = [propId, createdAt.date(), datetime.strptime(key, '%Y-%m-%d').date(), temp[key]] - data.append(out) - - df = pl.DataFrame(data, schema=["property_id", "created_at", "calendar_date", "calendar_value"]) - return df - -def liveDates_Pipeline(df): - ''' - Returns the expanded Dataframe with only the live data and no future data - :param df: Inputs from database.py/extractions or database.py/extractions_for functions - :return: expanded and filtered dataframe - ''' - df = expansion_Pipeline(df) - print(df) - df = df.filter(pl.col("calendar_date") == pl.col("created_at")+timedelta(days=2)) - return df - -def liveDates_PipelineFromExpanded(df): - ''' - Filters an already expanded df and returns only the live data and no future data - NOTE: The actual live date and the next is always 0. The reason is most likely that it is forbidden to - book on the current or next day. Workaround: Compare with the day after tomorrow - :param df: Inputs from expansion_Pipeline - :return: expanded and filtered dataframe - ''' - df = df.filter(pl.col("calendar_date") == pl.col("created_at")+timedelta(days=2)) - return df \ No newline at end of file