import Data_Analysis as DA import pandas as pd #Alle Scrape Dates auslesen, umformatieren und doppelte Löschen uniqueScrapeDates = DA.getUniqueScrapeDates() uniqueScrapeDates = DA.reformatScrapeDates(uniqueScrapeDates) uniqueScrapeDates= list(dict.fromkeys(uniqueScrapeDates)) #print(uniqueScrapeDates) #Liste der Listen der properties pro Scrape Datum erstellen fullPropList = [] for date in uniqueScrapeDates: propList = [] strDate = date properties = DA.getPropsPerScrape(strDate) for prop in properties: propList.append(prop[0]) propList = list(dict.fromkeys(propList)) fullPropList.append(propList) #print(propList) print(fullPropList) #zu DF umwandeln, mit Property ID's in the Spaltennamen und One-Hot-Encoding all_property_ids = sorted(set([item for sublist in fullPropList for item in sublist])) print(all_property_ids) df = pd.DataFrame(0, index=range(len(fullPropList)), columns=all_property_ids) for i, property_list in enumerate(fullPropList): df.loc[i, property_list] = 1 df.to_csv('results/PropertiesPerScrape.csv', index=True) print(df)