PropertiesPerScrape erstellt und ein paar Edits
parent
f7b62f4e4c
commit
e8830c32e6
|
@ -36,6 +36,33 @@ def getDataFromDB(propId):
|
|||
|
||||
return scrapeDates, calendarData
|
||||
|
||||
def getUniqueScrapeDates():
|
||||
db = MySQLdb.connect(host="localhost",user="root",passwd="admin",db="consultancy")
|
||||
cur = db.cursor()
|
||||
|
||||
cur.execute("SELECT JSON_EXTRACT(header, '$.Date') "
|
||||
"FROM extractions "
|
||||
f"WHERE type='calendar'")
|
||||
uniqueScrapeDates = cur.fetchall()
|
||||
db.close()
|
||||
|
||||
return uniqueScrapeDates
|
||||
|
||||
def getPropsPerScrape(scrapeDate):
|
||||
date = datetime.strptime(scrapeDate, '%Y-%m-%d')
|
||||
end_date = date + timedelta(days=1)
|
||||
|
||||
db = MySQLdb.connect(host="localhost",user="root",passwd="admin",db="consultancy")
|
||||
cur = db.cursor()
|
||||
|
||||
cur.execute("SELECT property_id "
|
||||
"FROM extractions "
|
||||
f"WHERE type='calendar' AND created_at > '{scrapeDate}' AND created_at < '{str(end_date)}'")
|
||||
uniqueScrapeDates = cur.fetchall()
|
||||
db.close()
|
||||
|
||||
return uniqueScrapeDates
|
||||
|
||||
def getuniquePropIdFromDB():
|
||||
'''
|
||||
Function to get unique propId from MySQL database
|
||||
|
|
Binary file not shown.
|
@ -44,7 +44,7 @@ accuracyOverview.insert(0, "Heidi Mean", heidiMean, True)
|
|||
|
||||
accuracyOverview.drop(index=accuracyOverview.index[0], axis=0, inplace=True)
|
||||
accuracyOverview.drop(index=accuracyOverview.index[0], axis=0, inplace=True)
|
||||
accuracyOverview.to_csv('results/accuracyOverview.csv', index=False)
|
||||
accuracyOverview.to_csv('results/accuracyOverview.csv', index=True)
|
||||
|
||||
#delete unused DF's
|
||||
del merge, accuracy, propData
|
||||
|
|
|
@ -17,4 +17,4 @@ with open('results/allLostProperties', 'w') as f:
|
|||
write = csv.writer(f)
|
||||
write.writerow(lostProperties)
|
||||
|
||||
#Output: 221 of 1552 properties are lost
|
||||
#Output: 221 of 1552 properties were lost at some point
|
|
@ -0,0 +1,32 @@
|
|||
import Data_Analysis as DA
|
||||
import pandas as pd
|
||||
|
||||
#Alle Scrape Dates auslesen, umformatieren und doppelte Löschen
|
||||
uniqueScrapeDates = DA.getUniqueScrapeDates()
|
||||
uniqueScrapeDates = DA.reformatScrapeDates(uniqueScrapeDates)
|
||||
uniqueScrapeDates= list(dict.fromkeys(uniqueScrapeDates))
|
||||
#print(uniqueScrapeDates)
|
||||
|
||||
#Liste der Listen der properties pro Scrape Datum erstellen
|
||||
fullPropList = []
|
||||
for date in uniqueScrapeDates:
|
||||
propList = []
|
||||
strDate = date
|
||||
properties = DA.getPropsPerScrape(strDate)
|
||||
for prop in properties:
|
||||
propList.append(prop[0])
|
||||
propList = list(dict.fromkeys(propList))
|
||||
fullPropList.append(propList)
|
||||
#print(propList)
|
||||
print(fullPropList)
|
||||
|
||||
#zu DF umwandeln, mit Property ID's in the Spaltennamen und One-Hot-Encoding
|
||||
all_property_ids = sorted(set([item for sublist in fullPropList for item in sublist]))
|
||||
print(all_property_ids)
|
||||
df = pd.DataFrame(0, index=range(len(fullPropList)), columns=all_property_ids)
|
||||
for i, property_list in enumerate(fullPropList):
|
||||
df.loc[i, property_list] = 1
|
||||
|
||||
df.to_csv('results/PropertiesPerScrape.csv', index=True)
|
||||
|
||||
print(df)
|
|
@ -0,0 +1,63 @@
|
|||
import Data_Analysis as DA
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
#DF einlesen
|
||||
propPerScrape = pd.read_csv(f'results/PropertiesPerScrape.csv')
|
||||
propPerScrape.drop(columns=propPerScrape.columns[0], axis=1, inplace=True)
|
||||
#DF Transponieren, dass es dasselbe Format wie die Propdata hat
|
||||
propPerScrape = propPerScrape.T
|
||||
#Index als property_id angeben und zu int umwandeln für merge
|
||||
propPerScrape['property_id'] = propPerScrape.index
|
||||
propPerScrape.property_id = propPerScrape.property_id.astype(int)
|
||||
#print(propPerScrape)
|
||||
|
||||
|
||||
#Propdata ziehen und für merge vorbereiten
|
||||
propData = DA.getPropertyDataFromDB()
|
||||
propData = pd.DataFrame(propData, columns =['property_id', 'region', 'geoLocation'])
|
||||
propData = propData.drop(columns=['geoLocation'])
|
||||
propData.property_id = propData.property_id.astype(int)
|
||||
#print(propData)
|
||||
|
||||
|
||||
#DF's mergen
|
||||
merged_df = pd.merge(propData, propPerScrape, on='property_id', how='right')
|
||||
#print(merged_df)
|
||||
|
||||
|
||||
#sub-DF's erstellen für die einzelnen Regionen
|
||||
heidiProp = merged_df[merged_df['region'] == 1]
|
||||
davosProp = merged_df[merged_df['region'] == 2]
|
||||
EngadProp = merged_df[merged_df['region'] == 3]
|
||||
StMorProp = merged_df[merged_df['region'] == 4]
|
||||
|
||||
|
||||
|
||||
dfList = [heidiProp, davosProp, EngadProp, StMorProp]
|
||||
outList = []
|
||||
maxList = []
|
||||
for df in dfList:
|
||||
df = df.drop('property_id', axis=1)
|
||||
df = df.drop('region', axis=1)
|
||||
df = df.sum()
|
||||
maxList.append(df.max())
|
||||
outList.append(df)
|
||||
|
||||
print(maxList)
|
||||
#Heidi: 313, Davos: 296, Engadin: 597, St.Moritz: 338
|
||||
|
||||
for series in outList:
|
||||
plt.plot(series)
|
||||
|
||||
|
||||
ax = plt.gca()
|
||||
ax.set_xlim([0, 47])
|
||||
plt.xlabel('Scrape number')
|
||||
plt.ylabel('number of properties')
|
||||
plt.legend(["Heidiland", "Davos", "Engadin", "St. Moritz"], loc='upper left')
|
||||
plt.savefig("results/Number_of_properties_over_Scrapes.png")
|
||||
|
||||
plt.show()
|
||||
|
||||
plt.draw()
|
Binary file not shown.
After Width: | Height: | Size: 35 KiB |
File diff suppressed because one or more lines are too long
|
@ -1,5 +1,5 @@
|
|||
Heidi Mean,Heidi StdDev,Davos Mean,Davos StdDev,Engadin Mean,Engadin StdDev,St. Moritz Mean,St. Moritz StdDev
|
||||
0.8205301612054612,0.03521328245140846,0.8399836284786809,0.048358617863451414,0.8584327389672194,0.05319145459441233,0.8405512800767019,0.05180554811101561
|
||||
0.8066005018861457,0.06818803676300687,0.830601813557425,0.04949425409715446,0.8484564978404832,0.05396669349535696,0.8289395302705753,0.05637417919934374
|
||||
0.7368379473832369,0.06546064555588836,0.7598050837068276,0.06886580034893092,0.7667137312752639,0.06523018886732877,0.7565382226489596,0.06984023355676583
|
||||
0.6590943554763651,0.09741268862524224,0.6767196066764449,0.09656146924686429,0.670509578923442,0.07935806376665934,0.6633952429541463,0.08233444282881987
|
||||
,Heidi Mean,Heidi StdDev,Davos Mean,Davos StdDev,Engadin Mean,Engadin StdDev,St. Moritz Mean,St. Moritz StdDev
|
||||
timedelay_1,0.8205301612054612,0.03521328245140846,0.8399836284786809,0.048358617863451414,0.8584327389672194,0.05319145459441233,0.8405512800767019,0.05180554811101561
|
||||
timedelay_2,0.8066005018861457,0.06818803676300687,0.830601813557425,0.04949425409715446,0.8484564978404832,0.05396669349535696,0.8289395302705753,0.05637417919934374
|
||||
timedelay_10,0.7368379473832369,0.06546064555588836,0.7598050837068276,0.06886580034893092,0.7667137312752639,0.06523018886732877,0.7565382226489596,0.06984023355676583
|
||||
timedelay_20,0.6590943554763651,0.09741268862524224,0.6767196066764449,0.09656146924686429,0.670509578923442,0.07935806376665934,0.6633952429541463,0.08233444282881987
|
||||
|
|
|
Loading…
Reference in New Issue