ungenutzte Datei etl_pipelines.py gelöscht

2025-01-18 22:43:49 +01:00 · 2025-01-18 22:43:49 +01:00 · 7663791f33
commit 7663791f33
parent 3d0d45e6bc
1 changed files with 0 additions and 46 deletions
--- a/etl/src/data/etl_pipelines.py
+++ b/etl/src/data/etl_pipelines.py
@ -1,46 +0,0 @@
 import polars as pl
 import json
 from datetime import datetime, timedelta
 def expansion_Pipeline(df):
    '''
    Rearranges a given extractions Dataframe into an expanded Dataframe.
    New Columns :propId, created_at calendar_date, calendar_value
    :param df: Inputs from database.py/extractions or database.py/extractions_for functions
    :return: expanded dataframe
    '''
    data = []
    for row in df.iter_rows():
        propId = row[1]
        createdAt = row[2]
        if row[0]:
            temp = json.loads(row[0])
            keys = temp.keys()
            for key in keys:
                out = [propId, createdAt.date(), datetime.strptime(key, '%Y-%m-%d').date(), temp[key]]
                data.append(out)
    df = pl.DataFrame(data, schema=["property_id", "created_at", "calendar_date", "calendar_value"])
    return df
 def liveDates_Pipeline(df):
    '''
    Returns the expanded Dataframe with only the live data and no future data
    :param df: Inputs from database.py/extractions or database.py/extractions_for functions
    :return: expanded and filtered dataframe
    '''
    df = expansion_Pipeline(df)
    print(df)
    df = df.filter(pl.col("calendar_date") == pl.col("created_at")+timedelta(days=2))
    return df
 def liveDates_PipelineFromExpanded(df):
    '''
    Filters an already expanded df and returns only the live data and no future data
    NOTE: The actual live date and the next is always 0. The reason is most likely that it is forbidden to
    book on the current or next day. Workaround: Compare with the day after tomorrow
    :param df: Inputs from expansion_Pipeline
    :return: expanded and filtered dataframe
    '''
    df = df.filter(pl.col("calendar_date") == pl.col("created_at")+timedelta(days=2))
    return df