ungenutzte Datei etl_pipelines.py gelöscht
parent
3d0d45e6bc
commit
7663791f33
|
@ -1,46 +0,0 @@
|
||||||
import polars as pl
|
|
||||||
import json
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
def expansion_Pipeline(df):
|
|
||||||
'''
|
|
||||||
Rearranges a given extractions Dataframe into an expanded Dataframe.
|
|
||||||
New Columns :propId, created_at calendar_date, calendar_value
|
|
||||||
:param df: Inputs from database.py/extractions or database.py/extractions_for functions
|
|
||||||
:return: expanded dataframe
|
|
||||||
'''
|
|
||||||
data = []
|
|
||||||
|
|
||||||
for row in df.iter_rows():
|
|
||||||
propId = row[1]
|
|
||||||
createdAt = row[2]
|
|
||||||
if row[0]:
|
|
||||||
temp = json.loads(row[0])
|
|
||||||
keys = temp.keys()
|
|
||||||
for key in keys:
|
|
||||||
out = [propId, createdAt.date(), datetime.strptime(key, '%Y-%m-%d').date(), temp[key]]
|
|
||||||
data.append(out)
|
|
||||||
|
|
||||||
df = pl.DataFrame(data, schema=["property_id", "created_at", "calendar_date", "calendar_value"])
|
|
||||||
return df
|
|
||||||
|
|
||||||
def liveDates_Pipeline(df):
|
|
||||||
'''
|
|
||||||
Returns the expanded Dataframe with only the live data and no future data
|
|
||||||
:param df: Inputs from database.py/extractions or database.py/extractions_for functions
|
|
||||||
:return: expanded and filtered dataframe
|
|
||||||
'''
|
|
||||||
df = expansion_Pipeline(df)
|
|
||||||
print(df)
|
|
||||||
df = df.filter(pl.col("calendar_date") == pl.col("created_at")+timedelta(days=2))
|
|
||||||
return df
|
|
||||||
|
|
||||||
def liveDates_PipelineFromExpanded(df):
|
|
||||||
'''
|
|
||||||
Filters an already expanded df and returns only the live data and no future data
|
|
||||||
NOTE: The actual live date and the next is always 0. The reason is most likely that it is forbidden to
|
|
||||||
book on the current or next day. Workaround: Compare with the day after tomorrow
|
|
||||||
:param df: Inputs from expansion_Pipeline
|
|
||||||
:return: expanded and filtered dataframe
|
|
||||||
'''
|
|
||||||
df = df.filter(pl.col("calendar_date") == pl.col("created_at")+timedelta(days=2))
|
|
||||||
return df
|
|
Loading…
Reference in New Issue