Untersuchung Vorbuchungszeit abgeschlossen

2024-11-28 16:10:53 +01:00 · 2024-11-28 16:10:53 +01:00 · 338d3e9cc2
commit 338d3e9cc2
parent 1e0b9f1233
4 changed files with 1422 additions and 15 deletions
--- a/.gitignore
+++ b/.gitignore
@ -23,6 +23,7 @@
 *.ipr
 .idea/

+
 # eclipse project file
 .settings/
 .classpath
@ -65,3 +66,4 @@ env3.*/
 # duckdb
 *.duckdb

+/src/mauro/dok/
--- a/src/data/database.py
+++ b/src/data/database.py
@ -71,6 +71,18 @@ class Database:
 				regions.name
 			""")

+	def propIds_with_region(self):
+		return self.connection.sql("""
+			SELECT 
+				properties.id, seed_id, regions.name
+			FROM 
+				consultancy_d.properties 
+			LEFT JOIN 
+				consultancy_d.seeds ON seeds.id = properties.seed_id 
+			LEFT JOIN 
+				consultancy_d.regions ON regions.id = seeds.region_id 
+			""")
+
 	def properties_unreachable(self):
 		return self.connection.sql("""
 			SELECT 
--- a/src/mauro/dok/fullPreoDF.csv
+++ b/src/mauro/dok/fullPreoDF.csv
--- a/src/mauro/vorbuchungsZeit.py
+++ b/src/mauro/vorbuchungsZeit.py
@ -1,34 +1,115 @@
 import data
 from data import etl_pipelines as ep
 import polars as pl
+from datetime import datetime, timedelta
+import pandas as pd


 '''
-#Create Data
+# Get Data from DB
 inst = data.load()

 df = inst.extractions().pl()
-df = ep.liveDates_Pipeline(df)
+df = ep.expansion_Pipeline(df)

-df.write_csv('dok/liveDates.csv')
+df.write_csv('dok/flatDates.csv')
 print(df)
 '''
+'''
+#Load Data from DF 
+dfLive = pl.read_csv('dok/liveDates.csv')
+dfFlat = pl.read_csv('dok/flatDates.csv')

-#Load Data
-df = pl.read_csv('dok/liveDates.csv')

-propIds = df.get_column('property_id').unique()
+# Step 1 Get all occupied dates in live data
+dfLive = dfLive.filter(pl.col("calendar_value") == 0)
+dfLive = dfLive.with_columns(pl.col("created_at").str.to_date("%Y-%m-%d"))
+dfLive = dfLive.with_columns(pl.col("calendar_date").str.to_date("%Y-%m-%d"))
+#print(dfLive)

-createdAt = df.get_column('created_at').unique()
+dfFlat = dfFlat.with_columns(pl.col("created_at").str.to_date("%Y-%m-%d"))
+dfFlat = dfFlat.with_columns(pl.col("calendar_date").str.to_date("%Y-%m-%d"))
+
+propIds = dfLive.get_column('property_id').unique()
+createdAt = dfLive.get_column('created_at').unique()
+#print(createdAt)
+
+fullPreorderMatrix = []

 for propId in propIds:
-    for createdAt in createdAt:
-        temp = df.filter(pl.col("created_at") == createdAt)
-        temp = temp.filter(pl.col("property_id") == propId)
-        if temp.shape[0] > 0:
-            print(temp.get_column('calendar_value')[0])
-        else:
-            print(0)
+    curPreorderList = []
+    print("Property ID = " + str(propId))
+    tempPropFlatDf = dfFlat.filter(pl.col("property_id") == propId)
+    tempPropLiveDf = dfLive.filter(pl.col("property_id") == propId)
+    allLiveOccupiedDates = tempPropLiveDf.filter(pl.col("calendar_value") == 0).get_column('created_at')
+    #print("allLiveOccupiedDates = ",allLiveOccupiedDates)
+    for date in allLiveOccupiedDates:
+        calLiveDate = tempPropLiveDf.filter(pl.col("created_at") == date).get_column('calendar_date')[0]
+        #print("Occupied Date = " + str(date), "with Calendar Date =", str(calLiveDate))
+        numOfScrapedPreordered = 0
+        foundLastDate = False
+        for createDate in createdAt:
+            if date > createDate:
+                #print("Finding Flat Date with CreateDate =",createDate, "and Calendar Date =", calLiveDate)
+                tempFlatDf = tempPropFlatDf.filter(pl.col("created_at") == createDate)
+                tempFlatDf = tempFlatDf.filter(pl.col("calendar_date") == calLiveDate)
+                #print("tempLiveDf = ", tempFlatDf)
+
+                calVal = tempFlatDf.get_column('calendar_value')
+                if len(calVal) > 0:
+                    if calVal[0] == 0:
+                        # Still Occupied
+                        if not foundLastDate:
+                            numOfScrapedPreordered += 1
+                    else:
+                        # Found last Date where not occupied
+                        foundLastDate = True
+                        #print("number of Scrapes already occupied =", numOfScrapedPreordered)
+                        #break
+            #else:
+                #print("Skipped: Live Date = ",date, "Flat Date =",createDate)
+        #print(propId, date, numOfScrapedPreordered)
+        curPreorderList.append(numOfScrapedPreordered)
+    if len(curPreorderList) > 0:
+        mean = sum(curPreorderList) / len(curPreorderList)
+    else: mean = 0
+    #fullPreorderMatrix.append([propId, mean, curPreorderList])
+    fullPreorderMatrix.append([propId, mean])
+
+print(fullPreorderMatrix)
+fullPreoDF = pl.DataFrame(fullPreorderMatrix,orient="row")
+fullPreoDF.write_csv('dok/fullPreoDF.csv')
+print(fullPreoDF)
+'''
+
+# Filter Props to locations and calculate Means per location
+inst = data.load()
+
+propDf = inst.propIds_with_region().pl()
+print(propDf)
+propDf = propDf.select(
+    pl.col("id").cast(pl.Int64),
+    pl.col("seed_id").cast(pl.Int64),
+)
+
+preoDF = pl.read_csv('dok/fullPreoDF.csv')
+preoDF = preoDF.rename({"column_0": "id", "column_1": "meanPreorderScrapeNum"})


-#Hier weiter
+merge = preoDF.join(propDf, how='inner', on='id')
+print(merge)
+
+print("Global meanPreorderTime = ",round(merge.get_column("meanPreorderScrapeNum").mean()*3,2))
+
+# 1 = Heidiland
+heidi = merge.filter(pl.col("seed_id") == 1)
+print("Heidiland meanPreorderTime = ",round(heidi.get_column("meanPreorderScrapeNum").mean()*3,2))
+# 2 = Davos
+Davos = merge.filter(pl.col("seed_id") == 2)
+print("Davos meanPreorderTime = ",round(Davos.get_column("meanPreorderScrapeNum").mean()*3,2))
+# 3 = Engadin
+Engadin = merge.filter(pl.col("seed_id") == 3)
+print("Engadin meanPreorderTime = ",round(Engadin.get_column("meanPreorderScrapeNum").mean()*3,2))
+# 4 = St. Moritz
+Moritz = merge.filter(pl.col("seed_id") == 4)
+print("St. Moritz meanPreorderTime = ",round(Moritz.get_column("meanPreorderScrapeNum").mean()*3,2))