Untersuchung Vorbuchungszeit abgeschlossen
This commit is contained in:
		
							parent
							
								
									1e0b9f1233
								
							
						
					
					
						commit
						338d3e9cc2
					
				
							
								
								
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@ -23,6 +23,7 @@
 | 
			
		||||
*.ipr
 | 
			
		||||
.idea/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# eclipse project file
 | 
			
		||||
.settings/
 | 
			
		||||
.classpath
 | 
			
		||||
@ -65,3 +66,4 @@ env3.*/
 | 
			
		||||
# duckdb
 | 
			
		||||
*.duckdb
 | 
			
		||||
 | 
			
		||||
/src/mauro/dok/
 | 
			
		||||
 | 
			
		||||
@ -71,6 +71,18 @@ class Database:
 | 
			
		||||
				regions.name
 | 
			
		||||
			""")
 | 
			
		||||
 | 
			
		||||
	def propIds_with_region(self):
 | 
			
		||||
		return self.connection.sql("""
 | 
			
		||||
			SELECT 
 | 
			
		||||
				properties.id, seed_id, regions.name
 | 
			
		||||
			FROM 
 | 
			
		||||
				consultancy_d.properties 
 | 
			
		||||
			LEFT JOIN 
 | 
			
		||||
				consultancy_d.seeds ON seeds.id = properties.seed_id 
 | 
			
		||||
			LEFT JOIN 
 | 
			
		||||
				consultancy_d.regions ON regions.id = seeds.region_id 
 | 
			
		||||
			""")
 | 
			
		||||
 | 
			
		||||
	def properties_unreachable(self):
 | 
			
		||||
		return self.connection.sql("""
 | 
			
		||||
			SELECT 
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1312
									
								
								src/mauro/dok/fullPreoDF.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1312
									
								
								src/mauro/dok/fullPreoDF.csv
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -1,34 +1,115 @@
 | 
			
		||||
import data
 | 
			
		||||
from data import etl_pipelines as ep
 | 
			
		||||
import polars as pl
 | 
			
		||||
from datetime import datetime, timedelta
 | 
			
		||||
import pandas as pd
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
'''
 | 
			
		||||
#Create Data
 | 
			
		||||
# Get Data from DB
 | 
			
		||||
inst = data.load()
 | 
			
		||||
 | 
			
		||||
df = inst.extractions().pl()
 | 
			
		||||
df = ep.liveDates_Pipeline(df)
 | 
			
		||||
df = ep.expansion_Pipeline(df)
 | 
			
		||||
 | 
			
		||||
df.write_csv('dok/liveDates.csv')
 | 
			
		||||
df.write_csv('dok/flatDates.csv')
 | 
			
		||||
print(df)
 | 
			
		||||
'''
 | 
			
		||||
'''
 | 
			
		||||
#Load Data from DF 
 | 
			
		||||
dfLive = pl.read_csv('dok/liveDates.csv')
 | 
			
		||||
dfFlat = pl.read_csv('dok/flatDates.csv')
 | 
			
		||||
 | 
			
		||||
#Load Data
 | 
			
		||||
df = pl.read_csv('dok/liveDates.csv')
 | 
			
		||||
 | 
			
		||||
propIds = df.get_column('property_id').unique()
 | 
			
		||||
# Step 1 Get all occupied dates in live data
 | 
			
		||||
dfLive = dfLive.filter(pl.col("calendar_value") == 0)
 | 
			
		||||
dfLive = dfLive.with_columns(pl.col("created_at").str.to_date("%Y-%m-%d"))
 | 
			
		||||
dfLive = dfLive.with_columns(pl.col("calendar_date").str.to_date("%Y-%m-%d"))
 | 
			
		||||
#print(dfLive)
 | 
			
		||||
 | 
			
		||||
createdAt = df.get_column('created_at').unique()
 | 
			
		||||
dfFlat = dfFlat.with_columns(pl.col("created_at").str.to_date("%Y-%m-%d"))
 | 
			
		||||
dfFlat = dfFlat.with_columns(pl.col("calendar_date").str.to_date("%Y-%m-%d"))
 | 
			
		||||
 | 
			
		||||
propIds = dfLive.get_column('property_id').unique()
 | 
			
		||||
createdAt = dfLive.get_column('created_at').unique()
 | 
			
		||||
#print(createdAt)
 | 
			
		||||
 | 
			
		||||
fullPreorderMatrix = []
 | 
			
		||||
 | 
			
		||||
for propId in propIds:
 | 
			
		||||
    for createdAt in createdAt:
 | 
			
		||||
        temp = df.filter(pl.col("created_at") == createdAt)
 | 
			
		||||
        temp = temp.filter(pl.col("property_id") == propId)
 | 
			
		||||
        if temp.shape[0] > 0:
 | 
			
		||||
            print(temp.get_column('calendar_value')[0])
 | 
			
		||||
        else:
 | 
			
		||||
            print(0)
 | 
			
		||||
    curPreorderList = []
 | 
			
		||||
    print("Property ID = " + str(propId))
 | 
			
		||||
    tempPropFlatDf = dfFlat.filter(pl.col("property_id") == propId)
 | 
			
		||||
    tempPropLiveDf = dfLive.filter(pl.col("property_id") == propId)
 | 
			
		||||
    allLiveOccupiedDates = tempPropLiveDf.filter(pl.col("calendar_value") == 0).get_column('created_at')
 | 
			
		||||
    #print("allLiveOccupiedDates = ",allLiveOccupiedDates)
 | 
			
		||||
    for date in allLiveOccupiedDates:
 | 
			
		||||
        calLiveDate = tempPropLiveDf.filter(pl.col("created_at") == date).get_column('calendar_date')[0]
 | 
			
		||||
        #print("Occupied Date = " + str(date), "with Calendar Date =", str(calLiveDate))
 | 
			
		||||
        numOfScrapedPreordered = 0
 | 
			
		||||
        foundLastDate = False
 | 
			
		||||
        for createDate in createdAt:
 | 
			
		||||
            if date > createDate:
 | 
			
		||||
                #print("Finding Flat Date with CreateDate =",createDate, "and Calendar Date =", calLiveDate)
 | 
			
		||||
                tempFlatDf = tempPropFlatDf.filter(pl.col("created_at") == createDate)
 | 
			
		||||
                tempFlatDf = tempFlatDf.filter(pl.col("calendar_date") == calLiveDate)
 | 
			
		||||
                #print("tempLiveDf = ", tempFlatDf)
 | 
			
		||||
 | 
			
		||||
                calVal = tempFlatDf.get_column('calendar_value')
 | 
			
		||||
                if len(calVal) > 0:
 | 
			
		||||
                    if calVal[0] == 0:
 | 
			
		||||
                        # Still Occupied
 | 
			
		||||
                        if not foundLastDate:
 | 
			
		||||
                            numOfScrapedPreordered += 1
 | 
			
		||||
                    else:
 | 
			
		||||
                        # Found last Date where not occupied
 | 
			
		||||
                        foundLastDate = True
 | 
			
		||||
                        #print("number of Scrapes already occupied =", numOfScrapedPreordered)
 | 
			
		||||
                        #break
 | 
			
		||||
            #else:
 | 
			
		||||
                #print("Skipped: Live Date = ",date, "Flat Date =",createDate)
 | 
			
		||||
        #print(propId, date, numOfScrapedPreordered)
 | 
			
		||||
        curPreorderList.append(numOfScrapedPreordered)
 | 
			
		||||
    if len(curPreorderList) > 0:
 | 
			
		||||
        mean = sum(curPreorderList) / len(curPreorderList)
 | 
			
		||||
    else: mean = 0
 | 
			
		||||
    #fullPreorderMatrix.append([propId, mean, curPreorderList])
 | 
			
		||||
    fullPreorderMatrix.append([propId, mean])
 | 
			
		||||
 | 
			
		||||
print(fullPreorderMatrix)
 | 
			
		||||
fullPreoDF = pl.DataFrame(fullPreorderMatrix,orient="row")
 | 
			
		||||
fullPreoDF.write_csv('dok/fullPreoDF.csv')
 | 
			
		||||
print(fullPreoDF)
 | 
			
		||||
'''
 | 
			
		||||
 | 
			
		||||
# Filter Props to locations and calculate Means per location
 | 
			
		||||
inst = data.load()
 | 
			
		||||
 | 
			
		||||
propDf = inst.propIds_with_region().pl()
 | 
			
		||||
print(propDf)
 | 
			
		||||
propDf = propDf.select(
 | 
			
		||||
    pl.col("id").cast(pl.Int64),
 | 
			
		||||
    pl.col("seed_id").cast(pl.Int64),
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
preoDF = pl.read_csv('dok/fullPreoDF.csv')
 | 
			
		||||
preoDF = preoDF.rename({"column_0": "id", "column_1": "meanPreorderScrapeNum"})
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#Hier weiter
 | 
			
		||||
merge = preoDF.join(propDf, how='inner', on='id')
 | 
			
		||||
print(merge)
 | 
			
		||||
 | 
			
		||||
print("Global meanPreorderTime = ",round(merge.get_column("meanPreorderScrapeNum").mean()*3,2))
 | 
			
		||||
 | 
			
		||||
# 1 = Heidiland
 | 
			
		||||
heidi = merge.filter(pl.col("seed_id") == 1)
 | 
			
		||||
print("Heidiland meanPreorderTime = ",round(heidi.get_column("meanPreorderScrapeNum").mean()*3,2))
 | 
			
		||||
# 2 = Davos
 | 
			
		||||
Davos = merge.filter(pl.col("seed_id") == 2)
 | 
			
		||||
print("Davos meanPreorderTime = ",round(Davos.get_column("meanPreorderScrapeNum").mean()*3,2))
 | 
			
		||||
# 3 = Engadin
 | 
			
		||||
Engadin = merge.filter(pl.col("seed_id") == 3)
 | 
			
		||||
print("Engadin meanPreorderTime = ",round(Engadin.get_column("meanPreorderScrapeNum").mean()*3,2))
 | 
			
		||||
# 4 = St. Moritz
 | 
			
		||||
Moritz = merge.filter(pl.col("seed_id") == 4)
 | 
			
		||||
print("St. Moritz meanPreorderTime = ",round(Moritz.get_column("meanPreorderScrapeNum").mean()*3,2))
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user