ConsultancyProject_2_ETL/createPropertiesPerScrape.py

import Data_Analysis as DA
import pandas as pd

#Alle Scrape Dates auslesen, umformatieren und doppelte Löschen
uniqueScrapeDates = DA.getUniqueScrapeDates()
uniqueScrapeDates = DA.reformatScrapeDates(uniqueScrapeDates)
uniqueScrapeDates= list(dict.fromkeys(uniqueScrapeDates))
#print(uniqueScrapeDates)

#Liste der Listen der properties pro Scrape Datum erstellen
fullPropList = []
for date in uniqueScrapeDates:
    propList = []
    strDate = date
    properties = DA.getPropsPerScrape(strDate)
    for prop in properties:
        propList.append(prop[0])
        propList = list(dict.fromkeys(propList))
    fullPropList.append(propList)
    #print(propList)
print(fullPropList)

#zu DF umwandeln, mit Property ID's in the Spaltennamen und One-Hot-Encoding
all_property_ids = sorted(set([item for sublist in fullPropList for item in sublist]))
print(all_property_ids)
df = pd.DataFrame(0, index=range(len(fullPropList)), columns=all_property_ids)
for i, property_list in enumerate(fullPropList):
    df.loc[i, property_list] = 1

df.to_csv('results/PropertiesPerScrape.csv', index=True)

print(df)