32 lines
1.1 KiB
Python
32 lines
1.1 KiB
Python
import Data_Analysis as DA
|
|
import pandas as pd
|
|
|
|
#Alle Scrape Dates auslesen, umformatieren und doppelte Löschen
|
|
uniqueScrapeDates = DA.getUniqueScrapeDates()
|
|
uniqueScrapeDates = DA.reformatScrapeDates(uniqueScrapeDates)
|
|
uniqueScrapeDates= list(dict.fromkeys(uniqueScrapeDates))
|
|
#print(uniqueScrapeDates)
|
|
|
|
#Liste der Listen der properties pro Scrape Datum erstellen
|
|
fullPropList = []
|
|
for date in uniqueScrapeDates:
|
|
propList = []
|
|
strDate = date
|
|
properties = DA.getPropsPerScrape(strDate)
|
|
for prop in properties:
|
|
propList.append(prop[0])
|
|
propList = list(dict.fromkeys(propList))
|
|
fullPropList.append(propList)
|
|
#print(propList)
|
|
print(fullPropList)
|
|
|
|
#zu DF umwandeln, mit Property ID's in the Spaltennamen und One-Hot-Encoding
|
|
all_property_ids = sorted(set([item for sublist in fullPropList for item in sublist]))
|
|
print(all_property_ids)
|
|
df = pd.DataFrame(0, index=range(len(fullPropList)), columns=all_property_ids)
|
|
for i, property_list in enumerate(fullPropList):
|
|
df.loc[i, property_list] = 1
|
|
|
|
df.to_csv('results/PropertiesPerScrape.csv', index=True)
|
|
|
|
print(df) |