Untersuchung Vorbuchungszeit abgeschlossen
parent
1e0b9f1233
commit
338d3e9cc2
|
@ -23,6 +23,7 @@
|
|||
*.ipr
|
||||
.idea/
|
||||
|
||||
|
||||
# eclipse project file
|
||||
.settings/
|
||||
.classpath
|
||||
|
@ -65,3 +66,4 @@ env3.*/
|
|||
# duckdb
|
||||
*.duckdb
|
||||
|
||||
/src/mauro/dok/
|
||||
|
|
|
@ -71,6 +71,18 @@ class Database:
|
|||
regions.name
|
||||
""")
|
||||
|
||||
def propIds_with_region(self):
|
||||
return self.connection.sql("""
|
||||
SELECT
|
||||
properties.id, seed_id, regions.name
|
||||
FROM
|
||||
consultancy_d.properties
|
||||
LEFT JOIN
|
||||
consultancy_d.seeds ON seeds.id = properties.seed_id
|
||||
LEFT JOIN
|
||||
consultancy_d.regions ON regions.id = seeds.region_id
|
||||
""")
|
||||
|
||||
def properties_unreachable(self):
|
||||
return self.connection.sql("""
|
||||
SELECT
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,34 +1,115 @@
|
|||
import data
|
||||
from data import etl_pipelines as ep
|
||||
import polars as pl
|
||||
from datetime import datetime, timedelta
|
||||
import pandas as pd
|
||||
|
||||
|
||||
'''
|
||||
#Create Data
|
||||
# Get Data from DB
|
||||
inst = data.load()
|
||||
|
||||
df = inst.extractions().pl()
|
||||
df = ep.liveDates_Pipeline(df)
|
||||
df = ep.expansion_Pipeline(df)
|
||||
|
||||
df.write_csv('dok/liveDates.csv')
|
||||
df.write_csv('dok/flatDates.csv')
|
||||
print(df)
|
||||
'''
|
||||
'''
|
||||
#Load Data from DF
|
||||
dfLive = pl.read_csv('dok/liveDates.csv')
|
||||
dfFlat = pl.read_csv('dok/flatDates.csv')
|
||||
|
||||
#Load Data
|
||||
df = pl.read_csv('dok/liveDates.csv')
|
||||
|
||||
propIds = df.get_column('property_id').unique()
|
||||
# Step 1 Get all occupied dates in live data
|
||||
dfLive = dfLive.filter(pl.col("calendar_value") == 0)
|
||||
dfLive = dfLive.with_columns(pl.col("created_at").str.to_date("%Y-%m-%d"))
|
||||
dfLive = dfLive.with_columns(pl.col("calendar_date").str.to_date("%Y-%m-%d"))
|
||||
#print(dfLive)
|
||||
|
||||
createdAt = df.get_column('created_at').unique()
|
||||
dfFlat = dfFlat.with_columns(pl.col("created_at").str.to_date("%Y-%m-%d"))
|
||||
dfFlat = dfFlat.with_columns(pl.col("calendar_date").str.to_date("%Y-%m-%d"))
|
||||
|
||||
propIds = dfLive.get_column('property_id').unique()
|
||||
createdAt = dfLive.get_column('created_at').unique()
|
||||
#print(createdAt)
|
||||
|
||||
fullPreorderMatrix = []
|
||||
|
||||
for propId in propIds:
|
||||
for createdAt in createdAt:
|
||||
temp = df.filter(pl.col("created_at") == createdAt)
|
||||
temp = temp.filter(pl.col("property_id") == propId)
|
||||
if temp.shape[0] > 0:
|
||||
print(temp.get_column('calendar_value')[0])
|
||||
else:
|
||||
print(0)
|
||||
curPreorderList = []
|
||||
print("Property ID = " + str(propId))
|
||||
tempPropFlatDf = dfFlat.filter(pl.col("property_id") == propId)
|
||||
tempPropLiveDf = dfLive.filter(pl.col("property_id") == propId)
|
||||
allLiveOccupiedDates = tempPropLiveDf.filter(pl.col("calendar_value") == 0).get_column('created_at')
|
||||
#print("allLiveOccupiedDates = ",allLiveOccupiedDates)
|
||||
for date in allLiveOccupiedDates:
|
||||
calLiveDate = tempPropLiveDf.filter(pl.col("created_at") == date).get_column('calendar_date')[0]
|
||||
#print("Occupied Date = " + str(date), "with Calendar Date =", str(calLiveDate))
|
||||
numOfScrapedPreordered = 0
|
||||
foundLastDate = False
|
||||
for createDate in createdAt:
|
||||
if date > createDate:
|
||||
#print("Finding Flat Date with CreateDate =",createDate, "and Calendar Date =", calLiveDate)
|
||||
tempFlatDf = tempPropFlatDf.filter(pl.col("created_at") == createDate)
|
||||
tempFlatDf = tempFlatDf.filter(pl.col("calendar_date") == calLiveDate)
|
||||
#print("tempLiveDf = ", tempFlatDf)
|
||||
|
||||
calVal = tempFlatDf.get_column('calendar_value')
|
||||
if len(calVal) > 0:
|
||||
if calVal[0] == 0:
|
||||
# Still Occupied
|
||||
if not foundLastDate:
|
||||
numOfScrapedPreordered += 1
|
||||
else:
|
||||
# Found last Date where not occupied
|
||||
foundLastDate = True
|
||||
#print("number of Scrapes already occupied =", numOfScrapedPreordered)
|
||||
#break
|
||||
#else:
|
||||
#print("Skipped: Live Date = ",date, "Flat Date =",createDate)
|
||||
#print(propId, date, numOfScrapedPreordered)
|
||||
curPreorderList.append(numOfScrapedPreordered)
|
||||
if len(curPreorderList) > 0:
|
||||
mean = sum(curPreorderList) / len(curPreorderList)
|
||||
else: mean = 0
|
||||
#fullPreorderMatrix.append([propId, mean, curPreorderList])
|
||||
fullPreorderMatrix.append([propId, mean])
|
||||
|
||||
print(fullPreorderMatrix)
|
||||
fullPreoDF = pl.DataFrame(fullPreorderMatrix,orient="row")
|
||||
fullPreoDF.write_csv('dok/fullPreoDF.csv')
|
||||
print(fullPreoDF)
|
||||
'''
|
||||
|
||||
# Filter Props to locations and calculate Means per location
|
||||
inst = data.load()
|
||||
|
||||
propDf = inst.propIds_with_region().pl()
|
||||
print(propDf)
|
||||
propDf = propDf.select(
|
||||
pl.col("id").cast(pl.Int64),
|
||||
pl.col("seed_id").cast(pl.Int64),
|
||||
)
|
||||
|
||||
preoDF = pl.read_csv('dok/fullPreoDF.csv')
|
||||
preoDF = preoDF.rename({"column_0": "id", "column_1": "meanPreorderScrapeNum"})
|
||||
|
||||
|
||||
#Hier weiter
|
||||
merge = preoDF.join(propDf, how='inner', on='id')
|
||||
print(merge)
|
||||
|
||||
print("Global meanPreorderTime = ",round(merge.get_column("meanPreorderScrapeNum").mean()*3,2))
|
||||
|
||||
# 1 = Heidiland
|
||||
heidi = merge.filter(pl.col("seed_id") == 1)
|
||||
print("Heidiland meanPreorderTime = ",round(heidi.get_column("meanPreorderScrapeNum").mean()*3,2))
|
||||
# 2 = Davos
|
||||
Davos = merge.filter(pl.col("seed_id") == 2)
|
||||
print("Davos meanPreorderTime = ",round(Davos.get_column("meanPreorderScrapeNum").mean()*3,2))
|
||||
# 3 = Engadin
|
||||
Engadin = merge.filter(pl.col("seed_id") == 3)
|
||||
print("Engadin meanPreorderTime = ",round(Engadin.get_column("meanPreorderScrapeNum").mean()*3,2))
|
||||
# 4 = St. Moritz
|
||||
Moritz = merge.filter(pl.col("seed_id") == 4)
|
||||
print("St. Moritz meanPreorderTime = ",round(Moritz.get_column("meanPreorderScrapeNum").mean()*3,2))
|
Loading…
Reference in New Issue