from etl.src import data from etl.src.data import etl_pipelines as ep import polars as pl from datetime import datetime, timedelta import pandas as pd ''' # Get Data from DB inst = data.load() df = inst.extractions().pl() df = ep.expansion_Pipeline(df) df.write_csv('dok/flatDates.csv') print(df) ''' ''' #Load Data from DF dfLive = pl.read_csv('dok/liveDates.csv') dfFlat = pl.read_csv('dok/flatDates.csv') # Step 1 Get all occupied dates in live data dfLive = dfLive.filter(pl.col("calendar_value") == 0) dfLive = dfLive.with_columns(pl.col("created_at").str.to_date("%Y-%m-%d")) dfLive = dfLive.with_columns(pl.col("calendar_date").str.to_date("%Y-%m-%d")) #print(dfLive) dfFlat = dfFlat.with_columns(pl.col("created_at").str.to_date("%Y-%m-%d")) dfFlat = dfFlat.with_columns(pl.col("calendar_date").str.to_date("%Y-%m-%d")) propIds = dfLive.get_column('property_id').unique() createdAt = dfLive.get_column('created_at').unique() #print(createdAt) fullPreorderMatrix = [] for propId in propIds: curPreorderList = [] print("Property ID = " + str(propId)) tempPropFlatDf = dfFlat.filter(pl.col("property_id") == propId) tempPropLiveDf = dfLive.filter(pl.col("property_id") == propId) allLiveOccupiedDates = tempPropLiveDf.filter(pl.col("calendar_value") == 0).get_column('created_at') #print("allLiveOccupiedDates = ",allLiveOccupiedDates) for date in allLiveOccupiedDates: calLiveDate = tempPropLiveDf.filter(pl.col("created_at") == date).get_column('calendar_date')[0] #print("Occupied Date = " + str(date), "with Calendar Date =", str(calLiveDate)) numOfScrapedPreordered = 0 foundLastDate = False for createDate in createdAt: if date > createDate: #print("Finding Flat Date with CreateDate =",createDate, "and Calendar Date =", calLiveDate) tempFlatDf = tempPropFlatDf.filter(pl.col("created_at") == createDate) tempFlatDf = tempFlatDf.filter(pl.col("calendar_date") == calLiveDate) #print("tempLiveDf = ", tempFlatDf) calVal = tempFlatDf.get_column('calendar_value') if len(calVal) > 0: if calVal[0] == 0: # Still Occupied if not foundLastDate: numOfScrapedPreordered += 1 else: # Found last Date where not occupied foundLastDate = True #print("number of Scrapes already occupied =", numOfScrapedPreordered) #break #else: #print("Skipped: Live Date = ",date, "Flat Date =",createDate) #print(propId, date, numOfScrapedPreordered) curPreorderList.append(numOfScrapedPreordered) if len(curPreorderList) > 0: mean = sum(curPreorderList) / len(curPreorderList) else: mean = 0 #fullPreorderMatrix.append([propId, mean, curPreorderList]) fullPreorderMatrix.append([propId, mean]) print(fullPreorderMatrix) fullPreoDF = pl.DataFrame(fullPreorderMatrix,orient="row") fullPreoDF.write_csv('dok/fullPreoDF.csv') print(fullPreoDF) ''' # Filter Props to locations and calculate Means per location inst = data.load() propDf = inst.propIds_with_region().pl() print(propDf) propDf = propDf.select( pl.col("id").cast(pl.Int64), pl.col("seed_id").cast(pl.Int64), ) preoDF = pl.read_csv('dok/fullPreoDF.csv') preoDF = preoDF.rename({"column_0": "id", "column_1": "meanPreorderScrapeNum"}) merge = preoDF.join(propDf, how='inner', on='id') print(merge) print("Global meanPreorderTime = ",round(merge.get_column("meanPreorderScrapeNum").mean()*3,2)) # 1 = Heidiland heidi = merge.filter(pl.col("seed_id") == 1) print("Heidiland meanPreorderTime = ", round(heidi.get_column("meanPreorderScrapeNum").mean()*3,2)) # 2 = Davos Davos = merge.filter(pl.col("seed_id") == 2) print("Davos meanPreorderTime = ", (Davos.get_column("meanPreorderScrapeNum").mean()*3,2)) # 3 = Engadin Engadin = merge.filter(pl.col("seed_id") == 3) print("Engadin meanPreorderTime = ", round(Engadin.get_column("meanPreorderScrapeNum").mean()*3,2)) # 4 = St. Moritz Moritz = merge.filter(pl.col("seed_id") == 4) print("St. Moritz meanPreorderTime = ", round(Moritz.get_column("meanPreorderScrapeNum").mean()*3,2))