import pandas as pd import os import re import numpy as np def getAccuracy(df, baseLine, compLine): try: df = df.iloc[[baseLine,compLine]] except IndexError: return -1 total = 0 noChange = 0 first = True for series_name, series in df.items(): if first: first = False else: total += 1 #print(series_name) if series[baseLine] != -1: if series[compLine] != -1: if series[baseLine] == series[compLine]: noChange += 1 accuracy = noChange / total return accuracy def getMeanAccuracy(accList): out = [] for row in accList: row = [x for x in row if x != -1] out.append(np.average(row)) return out deltaList = [1, 2, 10, 20] #1 = 1 Scrape Interval #2 = ca. 1 Woche #10 = 1 Monat (30Tage) #20 = 2 Monate directory = os.fsencode("dok") columnNames = ['property_id', 'timedelay_1', 'timedelay_2','timedelay_10','timedelay_20'] accListDf = pd.DataFrame(columns = columnNames) accMeanDf = pd.DataFrame(columns = columnNames) for file in os.listdir(directory): filename = os.fsdecode(file) if filename.endswith(".csv"): propId = re.findall("\d+", filename)[0] print(propId) df = pd.read_csv(f'dok/{filename}') fullList = [] accList = [] #Loop though all deltas in the deltaList for delta in deltaList: accList = [] #Loop through all Dates as Baseline date for i in range(df.shape[0]): acc = getAccuracy(df, i, i+delta) accList.append(acc) fullList.append(accList) meanList = getMeanAccuracy(fullList) accListDf = accListDf._append({'property_id': propId, 'timedelay_1': fullList[0], 'timedelay_2': fullList[1], 'timedelay_10': fullList[2], 'timedelay_20': fullList[3]}, ignore_index=True) accMeanDf = accMeanDf._append({'property_id': propId, 'timedelay_1': meanList[0], 'timedelay_2': meanList[1], 'timedelay_10': meanList[2], 'timedelay_20': meanList[3]}, ignore_index=True) accListDf.to_csv('results/accListDf.csv', index=False) accMeanDf.to_csv('results/accMeanDf.csv', index=False)