ConsultancyProject_2_ETL/createPropertiesPerScrape.py

32 lines
1.1 KiB
Python

import Data_Analysis as DA
import pandas as pd
#Alle Scrape Dates auslesen, umformatieren und doppelte Löschen
uniqueScrapeDates = DA.getUniqueScrapeDates()
uniqueScrapeDates = DA.reformatScrapeDates(uniqueScrapeDates)
uniqueScrapeDates= list(dict.fromkeys(uniqueScrapeDates))
#print(uniqueScrapeDates)
#Liste der Listen der properties pro Scrape Datum erstellen
fullPropList = []
for date in uniqueScrapeDates:
propList = []
strDate = date
properties = DA.getPropsPerScrape(strDate)
for prop in properties:
propList.append(prop[0])
propList = list(dict.fromkeys(propList))
fullPropList.append(propList)
#print(propList)
print(fullPropList)
#zu DF umwandeln, mit Property ID's in the Spaltennamen und One-Hot-Encoding
all_property_ids = sorted(set([item for sublist in fullPropList for item in sublist]))
print(all_property_ids)
df = pd.DataFrame(0, index=range(len(fullPropList)), columns=all_property_ids)
for i, property_list in enumerate(fullPropList):
df.loc[i, property_list] = 1
df.to_csv('results/PropertiesPerScrape.csv', index=True)
print(df)