PropertiesPerScrape erstellt und ein paar Edits
parent
f7b62f4e4c
commit
e8830c32e6
|
@ -36,6 +36,33 @@ def getDataFromDB(propId):
|
||||||
|
|
||||||
return scrapeDates, calendarData
|
return scrapeDates, calendarData
|
||||||
|
|
||||||
|
def getUniqueScrapeDates():
|
||||||
|
db = MySQLdb.connect(host="localhost",user="root",passwd="admin",db="consultancy")
|
||||||
|
cur = db.cursor()
|
||||||
|
|
||||||
|
cur.execute("SELECT JSON_EXTRACT(header, '$.Date') "
|
||||||
|
"FROM extractions "
|
||||||
|
f"WHERE type='calendar'")
|
||||||
|
uniqueScrapeDates = cur.fetchall()
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
return uniqueScrapeDates
|
||||||
|
|
||||||
|
def getPropsPerScrape(scrapeDate):
|
||||||
|
date = datetime.strptime(scrapeDate, '%Y-%m-%d')
|
||||||
|
end_date = date + timedelta(days=1)
|
||||||
|
|
||||||
|
db = MySQLdb.connect(host="localhost",user="root",passwd="admin",db="consultancy")
|
||||||
|
cur = db.cursor()
|
||||||
|
|
||||||
|
cur.execute("SELECT property_id "
|
||||||
|
"FROM extractions "
|
||||||
|
f"WHERE type='calendar' AND created_at > '{scrapeDate}' AND created_at < '{str(end_date)}'")
|
||||||
|
uniqueScrapeDates = cur.fetchall()
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
return uniqueScrapeDates
|
||||||
|
|
||||||
def getuniquePropIdFromDB():
|
def getuniquePropIdFromDB():
|
||||||
'''
|
'''
|
||||||
Function to get unique propId from MySQL database
|
Function to get unique propId from MySQL database
|
||||||
|
|
Binary file not shown.
|
@ -44,7 +44,7 @@ accuracyOverview.insert(0, "Heidi Mean", heidiMean, True)
|
||||||
|
|
||||||
accuracyOverview.drop(index=accuracyOverview.index[0], axis=0, inplace=True)
|
accuracyOverview.drop(index=accuracyOverview.index[0], axis=0, inplace=True)
|
||||||
accuracyOverview.drop(index=accuracyOverview.index[0], axis=0, inplace=True)
|
accuracyOverview.drop(index=accuracyOverview.index[0], axis=0, inplace=True)
|
||||||
accuracyOverview.to_csv('results/accuracyOverview.csv', index=False)
|
accuracyOverview.to_csv('results/accuracyOverview.csv', index=True)
|
||||||
|
|
||||||
#delete unused DF's
|
#delete unused DF's
|
||||||
del merge, accuracy, propData
|
del merge, accuracy, propData
|
||||||
|
|
|
@ -17,4 +17,4 @@ with open('results/allLostProperties', 'w') as f:
|
||||||
write = csv.writer(f)
|
write = csv.writer(f)
|
||||||
write.writerow(lostProperties)
|
write.writerow(lostProperties)
|
||||||
|
|
||||||
#Output: 221 of 1552 properties are lost
|
#Output: 221 of 1552 properties were lost at some point
|
|
@ -0,0 +1,32 @@
|
||||||
|
import Data_Analysis as DA
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
#Alle Scrape Dates auslesen, umformatieren und doppelte Löschen
|
||||||
|
uniqueScrapeDates = DA.getUniqueScrapeDates()
|
||||||
|
uniqueScrapeDates = DA.reformatScrapeDates(uniqueScrapeDates)
|
||||||
|
uniqueScrapeDates= list(dict.fromkeys(uniqueScrapeDates))
|
||||||
|
#print(uniqueScrapeDates)
|
||||||
|
|
||||||
|
#Liste der Listen der properties pro Scrape Datum erstellen
|
||||||
|
fullPropList = []
|
||||||
|
for date in uniqueScrapeDates:
|
||||||
|
propList = []
|
||||||
|
strDate = date
|
||||||
|
properties = DA.getPropsPerScrape(strDate)
|
||||||
|
for prop in properties:
|
||||||
|
propList.append(prop[0])
|
||||||
|
propList = list(dict.fromkeys(propList))
|
||||||
|
fullPropList.append(propList)
|
||||||
|
#print(propList)
|
||||||
|
print(fullPropList)
|
||||||
|
|
||||||
|
#zu DF umwandeln, mit Property ID's in the Spaltennamen und One-Hot-Encoding
|
||||||
|
all_property_ids = sorted(set([item for sublist in fullPropList for item in sublist]))
|
||||||
|
print(all_property_ids)
|
||||||
|
df = pd.DataFrame(0, index=range(len(fullPropList)), columns=all_property_ids)
|
||||||
|
for i, property_list in enumerate(fullPropList):
|
||||||
|
df.loc[i, property_list] = 1
|
||||||
|
|
||||||
|
df.to_csv('results/PropertiesPerScrape.csv', index=True)
|
||||||
|
|
||||||
|
print(df)
|
|
@ -0,0 +1,63 @@
|
||||||
|
import Data_Analysis as DA
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
#DF einlesen
|
||||||
|
propPerScrape = pd.read_csv(f'results/PropertiesPerScrape.csv')
|
||||||
|
propPerScrape.drop(columns=propPerScrape.columns[0], axis=1, inplace=True)
|
||||||
|
#DF Transponieren, dass es dasselbe Format wie die Propdata hat
|
||||||
|
propPerScrape = propPerScrape.T
|
||||||
|
#Index als property_id angeben und zu int umwandeln für merge
|
||||||
|
propPerScrape['property_id'] = propPerScrape.index
|
||||||
|
propPerScrape.property_id = propPerScrape.property_id.astype(int)
|
||||||
|
#print(propPerScrape)
|
||||||
|
|
||||||
|
|
||||||
|
#Propdata ziehen und für merge vorbereiten
|
||||||
|
propData = DA.getPropertyDataFromDB()
|
||||||
|
propData = pd.DataFrame(propData, columns =['property_id', 'region', 'geoLocation'])
|
||||||
|
propData = propData.drop(columns=['geoLocation'])
|
||||||
|
propData.property_id = propData.property_id.astype(int)
|
||||||
|
#print(propData)
|
||||||
|
|
||||||
|
|
||||||
|
#DF's mergen
|
||||||
|
merged_df = pd.merge(propData, propPerScrape, on='property_id', how='right')
|
||||||
|
#print(merged_df)
|
||||||
|
|
||||||
|
|
||||||
|
#sub-DF's erstellen für die einzelnen Regionen
|
||||||
|
heidiProp = merged_df[merged_df['region'] == 1]
|
||||||
|
davosProp = merged_df[merged_df['region'] == 2]
|
||||||
|
EngadProp = merged_df[merged_df['region'] == 3]
|
||||||
|
StMorProp = merged_df[merged_df['region'] == 4]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
dfList = [heidiProp, davosProp, EngadProp, StMorProp]
|
||||||
|
outList = []
|
||||||
|
maxList = []
|
||||||
|
for df in dfList:
|
||||||
|
df = df.drop('property_id', axis=1)
|
||||||
|
df = df.drop('region', axis=1)
|
||||||
|
df = df.sum()
|
||||||
|
maxList.append(df.max())
|
||||||
|
outList.append(df)
|
||||||
|
|
||||||
|
print(maxList)
|
||||||
|
#Heidi: 313, Davos: 296, Engadin: 597, St.Moritz: 338
|
||||||
|
|
||||||
|
for series in outList:
|
||||||
|
plt.plot(series)
|
||||||
|
|
||||||
|
|
||||||
|
ax = plt.gca()
|
||||||
|
ax.set_xlim([0, 47])
|
||||||
|
plt.xlabel('Scrape number')
|
||||||
|
plt.ylabel('number of properties')
|
||||||
|
plt.legend(["Heidiland", "Davos", "Engadin", "St. Moritz"], loc='upper left')
|
||||||
|
plt.savefig("results/Number_of_properties_over_Scrapes.png")
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
plt.draw()
|
Binary file not shown.
After Width: | Height: | Size: 35 KiB |
File diff suppressed because one or more lines are too long
|
@ -1,5 +1,5 @@
|
||||||
Heidi Mean,Heidi StdDev,Davos Mean,Davos StdDev,Engadin Mean,Engadin StdDev,St. Moritz Mean,St. Moritz StdDev
|
,Heidi Mean,Heidi StdDev,Davos Mean,Davos StdDev,Engadin Mean,Engadin StdDev,St. Moritz Mean,St. Moritz StdDev
|
||||||
0.8205301612054612,0.03521328245140846,0.8399836284786809,0.048358617863451414,0.8584327389672194,0.05319145459441233,0.8405512800767019,0.05180554811101561
|
timedelay_1,0.8205301612054612,0.03521328245140846,0.8399836284786809,0.048358617863451414,0.8584327389672194,0.05319145459441233,0.8405512800767019,0.05180554811101561
|
||||||
0.8066005018861457,0.06818803676300687,0.830601813557425,0.04949425409715446,0.8484564978404832,0.05396669349535696,0.8289395302705753,0.05637417919934374
|
timedelay_2,0.8066005018861457,0.06818803676300687,0.830601813557425,0.04949425409715446,0.8484564978404832,0.05396669349535696,0.8289395302705753,0.05637417919934374
|
||||||
0.7368379473832369,0.06546064555588836,0.7598050837068276,0.06886580034893092,0.7667137312752639,0.06523018886732877,0.7565382226489596,0.06984023355676583
|
timedelay_10,0.7368379473832369,0.06546064555588836,0.7598050837068276,0.06886580034893092,0.7667137312752639,0.06523018886732877,0.7565382226489596,0.06984023355676583
|
||||||
0.6590943554763651,0.09741268862524224,0.6767196066764449,0.09656146924686429,0.670509578923442,0.07935806376665934,0.6633952429541463,0.08233444282881987
|
timedelay_20,0.6590943554763651,0.09741268862524224,0.6767196066764449,0.09656146924686429,0.670509578923442,0.07935806376665934,0.6633952429541463,0.08233444282881987
|
||||||
|
|
|
Loading…
Reference in New Issue