Untersuchung Vorbuchungszeit abgeschlossen
parent
1e0b9f1233
commit
338d3e9cc2
|
@ -23,6 +23,7 @@
|
||||||
*.ipr
|
*.ipr
|
||||||
.idea/
|
.idea/
|
||||||
|
|
||||||
|
|
||||||
# eclipse project file
|
# eclipse project file
|
||||||
.settings/
|
.settings/
|
||||||
.classpath
|
.classpath
|
||||||
|
@ -65,3 +66,4 @@ env3.*/
|
||||||
# duckdb
|
# duckdb
|
||||||
*.duckdb
|
*.duckdb
|
||||||
|
|
||||||
|
/src/mauro/dok/
|
||||||
|
|
|
@ -71,6 +71,18 @@ class Database:
|
||||||
regions.name
|
regions.name
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
def propIds_with_region(self):
|
||||||
|
return self.connection.sql("""
|
||||||
|
SELECT
|
||||||
|
properties.id, seed_id, regions.name
|
||||||
|
FROM
|
||||||
|
consultancy_d.properties
|
||||||
|
LEFT JOIN
|
||||||
|
consultancy_d.seeds ON seeds.id = properties.seed_id
|
||||||
|
LEFT JOIN
|
||||||
|
consultancy_d.regions ON regions.id = seeds.region_id
|
||||||
|
""")
|
||||||
|
|
||||||
def properties_unreachable(self):
|
def properties_unreachable(self):
|
||||||
return self.connection.sql("""
|
return self.connection.sql("""
|
||||||
SELECT
|
SELECT
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,34 +1,115 @@
|
||||||
import data
|
import data
|
||||||
from data import etl_pipelines as ep
|
from data import etl_pipelines as ep
|
||||||
import polars as pl
|
import polars as pl
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
#Create Data
|
# Get Data from DB
|
||||||
inst = data.load()
|
inst = data.load()
|
||||||
|
|
||||||
df = inst.extractions().pl()
|
df = inst.extractions().pl()
|
||||||
df = ep.liveDates_Pipeline(df)
|
df = ep.expansion_Pipeline(df)
|
||||||
|
|
||||||
df.write_csv('dok/liveDates.csv')
|
df.write_csv('dok/flatDates.csv')
|
||||||
print(df)
|
print(df)
|
||||||
'''
|
'''
|
||||||
|
'''
|
||||||
|
#Load Data from DF
|
||||||
|
dfLive = pl.read_csv('dok/liveDates.csv')
|
||||||
|
dfFlat = pl.read_csv('dok/flatDates.csv')
|
||||||
|
|
||||||
#Load Data
|
|
||||||
df = pl.read_csv('dok/liveDates.csv')
|
|
||||||
|
|
||||||
propIds = df.get_column('property_id').unique()
|
# Step 1 Get all occupied dates in live data
|
||||||
|
dfLive = dfLive.filter(pl.col("calendar_value") == 0)
|
||||||
|
dfLive = dfLive.with_columns(pl.col("created_at").str.to_date("%Y-%m-%d"))
|
||||||
|
dfLive = dfLive.with_columns(pl.col("calendar_date").str.to_date("%Y-%m-%d"))
|
||||||
|
#print(dfLive)
|
||||||
|
|
||||||
createdAt = df.get_column('created_at').unique()
|
dfFlat = dfFlat.with_columns(pl.col("created_at").str.to_date("%Y-%m-%d"))
|
||||||
|
dfFlat = dfFlat.with_columns(pl.col("calendar_date").str.to_date("%Y-%m-%d"))
|
||||||
|
|
||||||
|
propIds = dfLive.get_column('property_id').unique()
|
||||||
|
createdAt = dfLive.get_column('created_at').unique()
|
||||||
|
#print(createdAt)
|
||||||
|
|
||||||
|
fullPreorderMatrix = []
|
||||||
|
|
||||||
for propId in propIds:
|
for propId in propIds:
|
||||||
for createdAt in createdAt:
|
curPreorderList = []
|
||||||
temp = df.filter(pl.col("created_at") == createdAt)
|
print("Property ID = " + str(propId))
|
||||||
temp = temp.filter(pl.col("property_id") == propId)
|
tempPropFlatDf = dfFlat.filter(pl.col("property_id") == propId)
|
||||||
if temp.shape[0] > 0:
|
tempPropLiveDf = dfLive.filter(pl.col("property_id") == propId)
|
||||||
print(temp.get_column('calendar_value')[0])
|
allLiveOccupiedDates = tempPropLiveDf.filter(pl.col("calendar_value") == 0).get_column('created_at')
|
||||||
|
#print("allLiveOccupiedDates = ",allLiveOccupiedDates)
|
||||||
|
for date in allLiveOccupiedDates:
|
||||||
|
calLiveDate = tempPropLiveDf.filter(pl.col("created_at") == date).get_column('calendar_date')[0]
|
||||||
|
#print("Occupied Date = " + str(date), "with Calendar Date =", str(calLiveDate))
|
||||||
|
numOfScrapedPreordered = 0
|
||||||
|
foundLastDate = False
|
||||||
|
for createDate in createdAt:
|
||||||
|
if date > createDate:
|
||||||
|
#print("Finding Flat Date with CreateDate =",createDate, "and Calendar Date =", calLiveDate)
|
||||||
|
tempFlatDf = tempPropFlatDf.filter(pl.col("created_at") == createDate)
|
||||||
|
tempFlatDf = tempFlatDf.filter(pl.col("calendar_date") == calLiveDate)
|
||||||
|
#print("tempLiveDf = ", tempFlatDf)
|
||||||
|
|
||||||
|
calVal = tempFlatDf.get_column('calendar_value')
|
||||||
|
if len(calVal) > 0:
|
||||||
|
if calVal[0] == 0:
|
||||||
|
# Still Occupied
|
||||||
|
if not foundLastDate:
|
||||||
|
numOfScrapedPreordered += 1
|
||||||
else:
|
else:
|
||||||
print(0)
|
# Found last Date where not occupied
|
||||||
|
foundLastDate = True
|
||||||
|
#print("number of Scrapes already occupied =", numOfScrapedPreordered)
|
||||||
|
#break
|
||||||
|
#else:
|
||||||
|
#print("Skipped: Live Date = ",date, "Flat Date =",createDate)
|
||||||
|
#print(propId, date, numOfScrapedPreordered)
|
||||||
|
curPreorderList.append(numOfScrapedPreordered)
|
||||||
|
if len(curPreorderList) > 0:
|
||||||
|
mean = sum(curPreorderList) / len(curPreorderList)
|
||||||
|
else: mean = 0
|
||||||
|
#fullPreorderMatrix.append([propId, mean, curPreorderList])
|
||||||
|
fullPreorderMatrix.append([propId, mean])
|
||||||
|
|
||||||
|
print(fullPreorderMatrix)
|
||||||
|
fullPreoDF = pl.DataFrame(fullPreorderMatrix,orient="row")
|
||||||
|
fullPreoDF.write_csv('dok/fullPreoDF.csv')
|
||||||
|
print(fullPreoDF)
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Filter Props to locations and calculate Means per location
|
||||||
|
inst = data.load()
|
||||||
|
|
||||||
|
propDf = inst.propIds_with_region().pl()
|
||||||
|
print(propDf)
|
||||||
|
propDf = propDf.select(
|
||||||
|
pl.col("id").cast(pl.Int64),
|
||||||
|
pl.col("seed_id").cast(pl.Int64),
|
||||||
|
)
|
||||||
|
|
||||||
|
preoDF = pl.read_csv('dok/fullPreoDF.csv')
|
||||||
|
preoDF = preoDF.rename({"column_0": "id", "column_1": "meanPreorderScrapeNum"})
|
||||||
|
|
||||||
|
|
||||||
#Hier weiter
|
merge = preoDF.join(propDf, how='inner', on='id')
|
||||||
|
print(merge)
|
||||||
|
|
||||||
|
print("Global meanPreorderTime = ",round(merge.get_column("meanPreorderScrapeNum").mean()*3,2))
|
||||||
|
|
||||||
|
# 1 = Heidiland
|
||||||
|
heidi = merge.filter(pl.col("seed_id") == 1)
|
||||||
|
print("Heidiland meanPreorderTime = ",round(heidi.get_column("meanPreorderScrapeNum").mean()*3,2))
|
||||||
|
# 2 = Davos
|
||||||
|
Davos = merge.filter(pl.col("seed_id") == 2)
|
||||||
|
print("Davos meanPreorderTime = ",round(Davos.get_column("meanPreorderScrapeNum").mean()*3,2))
|
||||||
|
# 3 = Engadin
|
||||||
|
Engadin = merge.filter(pl.col("seed_id") == 3)
|
||||||
|
print("Engadin meanPreorderTime = ",round(Engadin.get_column("meanPreorderScrapeNum").mean()*3,2))
|
||||||
|
# 4 = St. Moritz
|
||||||
|
Moritz = merge.filter(pl.col("seed_id") == 4)
|
||||||
|
print("St. Moritz meanPreorderTime = ",round(Moritz.get_column("meanPreorderScrapeNum").mean()*3,2))
|
Loading…
Reference in New Issue