115 lines
4.3 KiB
Python
115 lines
4.3 KiB
Python
from etl.src import data
|
|
from etl.src.data import etl_pipelines as ep
|
|
import polars as pl
|
|
from datetime import datetime, timedelta
|
|
import pandas as pd
|
|
|
|
|
|
'''
|
|
# Get Data from DB
|
|
inst = data.load()
|
|
|
|
df = inst.extractions().pl()
|
|
df = ep.expansion_Pipeline(df)
|
|
|
|
df.write_csv('dok/flatDates.csv')
|
|
print(df)
|
|
'''
|
|
'''
|
|
#Load Data from DF
|
|
dfLive = pl.read_csv('dok/liveDates.csv')
|
|
dfFlat = pl.read_csv('dok/flatDates.csv')
|
|
|
|
|
|
# Step 1 Get all occupied dates in live data
|
|
dfLive = dfLive.filter(pl.col("calendar_value") == 0)
|
|
dfLive = dfLive.with_columns(pl.col("created_at").str.to_date("%Y-%m-%d"))
|
|
dfLive = dfLive.with_columns(pl.col("calendar_date").str.to_date("%Y-%m-%d"))
|
|
#print(dfLive)
|
|
|
|
dfFlat = dfFlat.with_columns(pl.col("created_at").str.to_date("%Y-%m-%d"))
|
|
dfFlat = dfFlat.with_columns(pl.col("calendar_date").str.to_date("%Y-%m-%d"))
|
|
|
|
propIds = dfLive.get_column('property_id').unique()
|
|
createdAt = dfLive.get_column('created_at').unique()
|
|
#print(createdAt)
|
|
|
|
fullPreorderMatrix = []
|
|
|
|
for propId in propIds:
|
|
curPreorderList = []
|
|
print("Property ID = " + str(propId))
|
|
tempPropFlatDf = dfFlat.filter(pl.col("property_id") == propId)
|
|
tempPropLiveDf = dfLive.filter(pl.col("property_id") == propId)
|
|
allLiveOccupiedDates = tempPropLiveDf.filter(pl.col("calendar_value") == 0).get_column('created_at')
|
|
#print("allLiveOccupiedDates = ",allLiveOccupiedDates)
|
|
for date in allLiveOccupiedDates:
|
|
calLiveDate = tempPropLiveDf.filter(pl.col("created_at") == date).get_column('calendar_date')[0]
|
|
#print("Occupied Date = " + str(date), "with Calendar Date =", str(calLiveDate))
|
|
numOfScrapedPreordered = 0
|
|
foundLastDate = False
|
|
for createDate in createdAt:
|
|
if date > createDate:
|
|
#print("Finding Flat Date with CreateDate =",createDate, "and Calendar Date =", calLiveDate)
|
|
tempFlatDf = tempPropFlatDf.filter(pl.col("created_at") == createDate)
|
|
tempFlatDf = tempFlatDf.filter(pl.col("calendar_date") == calLiveDate)
|
|
#print("tempLiveDf = ", tempFlatDf)
|
|
|
|
calVal = tempFlatDf.get_column('calendar_value')
|
|
if len(calVal) > 0:
|
|
if calVal[0] == 0:
|
|
# Still Occupied
|
|
if not foundLastDate:
|
|
numOfScrapedPreordered += 1
|
|
else:
|
|
# Found last Date where not occupied
|
|
foundLastDate = True
|
|
#print("number of Scrapes already occupied =", numOfScrapedPreordered)
|
|
#break
|
|
#else:
|
|
#print("Skipped: Live Date = ",date, "Flat Date =",createDate)
|
|
#print(propId, date, numOfScrapedPreordered)
|
|
curPreorderList.append(numOfScrapedPreordered)
|
|
if len(curPreorderList) > 0:
|
|
mean = sum(curPreorderList) / len(curPreorderList)
|
|
else: mean = 0
|
|
#fullPreorderMatrix.append([propId, mean, curPreorderList])
|
|
fullPreorderMatrix.append([propId, mean])
|
|
|
|
print(fullPreorderMatrix)
|
|
fullPreoDF = pl.DataFrame(fullPreorderMatrix,orient="row")
|
|
fullPreoDF.write_csv('dok/fullPreoDF.csv')
|
|
print(fullPreoDF)
|
|
'''
|
|
|
|
# Filter Props to locations and calculate Means per location
|
|
inst = data.load()
|
|
|
|
propDf = inst.propIds_with_region().pl()
|
|
print(propDf)
|
|
propDf = propDf.select(
|
|
pl.col("id").cast(pl.Int64),
|
|
pl.col("seed_id").cast(pl.Int64),
|
|
)
|
|
|
|
preoDF = pl.read_csv('dok/fullPreoDF.csv')
|
|
preoDF = preoDF.rename({"column_0": "id", "column_1": "meanPreorderScrapeNum"})
|
|
|
|
|
|
merge = preoDF.join(propDf, how='inner', on='id')
|
|
print(merge)
|
|
|
|
print("Global meanPreorderTime = ",round(merge.get_column("meanPreorderScrapeNum").mean()*3,2))
|
|
|
|
# 1 = Heidiland
|
|
heidi = merge.filter(pl.col("seed_id") == 1)
|
|
print("Heidiland meanPreorderTime = ", round(heidi.get_column("meanPreorderScrapeNum").mean()*3,2))
|
|
# 2 = Davos
|
|
Davos = merge.filter(pl.col("seed_id") == 2)
|
|
print("Davos meanPreorderTime = ", (Davos.get_column("meanPreorderScrapeNum").mean()*3,2))
|
|
# 3 = Engadin
|
|
Engadin = merge.filter(pl.col("seed_id") == 3)
|
|
print("Engadin meanPreorderTime = ", round(Engadin.get_column("meanPreorderScrapeNum").mean()*3,2))
|
|
# 4 = St. Moritz
|
|
Moritz = merge.filter(pl.col("seed_id") == 4)
|
|
print("St. Moritz meanPreorderTime = ", round(Moritz.get_column("meanPreorderScrapeNum").mean()*3,2)) |