ConsultancyProject_2_ETL/propCountperScrape.py

64 lines
1.8 KiB
Python

import Data_Analysis as DA
import pandas as pd
import matplotlib.pyplot as plt
#DF einlesen
propPerScrape = pd.read_csv(f'results/PropertiesPerScrape.csv')
propPerScrape.drop(columns=propPerScrape.columns[0], axis=1, inplace=True)
#DF Transponieren, dass es dasselbe Format wie die Propdata hat
propPerScrape = propPerScrape.T
#Index als property_id angeben und zu int umwandeln für merge
propPerScrape['property_id'] = propPerScrape.index
propPerScrape.property_id = propPerScrape.property_id.astype(int)
#print(propPerScrape)
#Propdata ziehen und für merge vorbereiten
propData = DA.getPropertyDataFromDB()
propData = pd.DataFrame(propData, columns =['property_id', 'region', 'geoLocation'])
propData = propData.drop(columns=['geoLocation'])
propData.property_id = propData.property_id.astype(int)
#print(propData)
#DF's mergen
merged_df = pd.merge(propData, propPerScrape, on='property_id', how='right')
#print(merged_df)
#sub-DF's erstellen für die einzelnen Regionen
heidiProp = merged_df[merged_df['region'] == 1]
davosProp = merged_df[merged_df['region'] == 2]
EngadProp = merged_df[merged_df['region'] == 3]
StMorProp = merged_df[merged_df['region'] == 4]
dfList = [heidiProp, davosProp, EngadProp, StMorProp]
outList = []
maxList = []
for df in dfList:
df = df.drop('property_id', axis=1)
df = df.drop('region', axis=1)
df = df.sum()
maxList.append(df.max())
outList.append(df)
print(maxList)
#Heidi: 313, Davos: 296, Engadin: 597, St.Moritz: 338
for series in outList:
plt.plot(series)
ax = plt.gca()
ax.set_xlim([0, 47])
plt.xlabel('Scrape number')
plt.ylabel('number of properties')
plt.legend(["Heidiland", "Davos", "Engadin", "St. Moritz"], loc='upper left')
plt.savefig("results/Number_of_properties_over_Scrapes.png")
plt.show()
plt.draw()