ConsultancyProject_2_ETL/etl/src/mauro/vorbuchungsZeit.py

115 lines
4.3 KiB
Python

from etl.src import data
from etl.src.data import etl_pipelines as ep
import polars as pl
from datetime import datetime, timedelta
import pandas as pd
'''
# Get Data from DB
inst = data.load()
df = inst.extractions().pl()
df = ep.expansion_Pipeline(df)
df.write_csv('dok/flatDates.csv')
print(df)
'''
'''
#Load Data from DF
dfLive = pl.read_csv('dok/liveDates.csv')
dfFlat = pl.read_csv('dok/flatDates.csv')
# Step 1 Get all occupied dates in live data
dfLive = dfLive.filter(pl.col("calendar_value") == 0)
dfLive = dfLive.with_columns(pl.col("created_at").str.to_date("%Y-%m-%d"))
dfLive = dfLive.with_columns(pl.col("calendar_date").str.to_date("%Y-%m-%d"))
#print(dfLive)
dfFlat = dfFlat.with_columns(pl.col("created_at").str.to_date("%Y-%m-%d"))
dfFlat = dfFlat.with_columns(pl.col("calendar_date").str.to_date("%Y-%m-%d"))
propIds = dfLive.get_column('property_id').unique()
createdAt = dfLive.get_column('created_at').unique()
#print(createdAt)
fullPreorderMatrix = []
for propId in propIds:
curPreorderList = []
print("Property ID = " + str(propId))
tempPropFlatDf = dfFlat.filter(pl.col("property_id") == propId)
tempPropLiveDf = dfLive.filter(pl.col("property_id") == propId)
allLiveOccupiedDates = tempPropLiveDf.filter(pl.col("calendar_value") == 0).get_column('created_at')
#print("allLiveOccupiedDates = ",allLiveOccupiedDates)
for date in allLiveOccupiedDates:
calLiveDate = tempPropLiveDf.filter(pl.col("created_at") == date).get_column('calendar_date')[0]
#print("Occupied Date = " + str(date), "with Calendar Date =", str(calLiveDate))
numOfScrapedPreordered = 0
foundLastDate = False
for createDate in createdAt:
if date > createDate:
#print("Finding Flat Date with CreateDate =",createDate, "and Calendar Date =", calLiveDate)
tempFlatDf = tempPropFlatDf.filter(pl.col("created_at") == createDate)
tempFlatDf = tempFlatDf.filter(pl.col("calendar_date") == calLiveDate)
#print("tempLiveDf = ", tempFlatDf)
calVal = tempFlatDf.get_column('calendar_value')
if len(calVal) > 0:
if calVal[0] == 0:
# Still Occupied
if not foundLastDate:
numOfScrapedPreordered += 1
else:
# Found last Date where not occupied
foundLastDate = True
#print("number of Scrapes already occupied =", numOfScrapedPreordered)
#break
#else:
#print("Skipped: Live Date = ",date, "Flat Date =",createDate)
#print(propId, date, numOfScrapedPreordered)
curPreorderList.append(numOfScrapedPreordered)
if len(curPreorderList) > 0:
mean = sum(curPreorderList) / len(curPreorderList)
else: mean = 0
#fullPreorderMatrix.append([propId, mean, curPreorderList])
fullPreorderMatrix.append([propId, mean])
print(fullPreorderMatrix)
fullPreoDF = pl.DataFrame(fullPreorderMatrix,orient="row")
fullPreoDF.write_csv('dok/fullPreoDF.csv')
print(fullPreoDF)
'''
# Filter Props to locations and calculate Means per location
inst = data.load()
propDf = inst.propIds_with_region().pl()
print(propDf)
propDf = propDf.select(
pl.col("id").cast(pl.Int64),
pl.col("seed_id").cast(pl.Int64),
)
preoDF = pl.read_csv('dok/fullPreoDF.csv')
preoDF = preoDF.rename({"column_0": "id", "column_1": "meanPreorderScrapeNum"})
merge = preoDF.join(propDf, how='inner', on='id')
print(merge)
print("Global meanPreorderTime = ",round(merge.get_column("meanPreorderScrapeNum").mean()*3,2))
# 1 = Heidiland
heidi = merge.filter(pl.col("seed_id") == 1)
print("Heidiland meanPreorderTime = ", round(heidi.get_column("meanPreorderScrapeNum").mean()*3,2))
# 2 = Davos
Davos = merge.filter(pl.col("seed_id") == 2)
print("Davos meanPreorderTime = ", (Davos.get_column("meanPreorderScrapeNum").mean()*3,2))
# 3 = Engadin
Engadin = merge.filter(pl.col("seed_id") == 3)
print("Engadin meanPreorderTime = ", round(Engadin.get_column("meanPreorderScrapeNum").mean()*3,2))
# 4 = St. Moritz
Moritz = merge.filter(pl.col("seed_id") == 4)
print("St. Moritz meanPreorderTime = ", round(Moritz.get_column("meanPreorderScrapeNum").mean()*3,2))