73 lines
2.2 KiB
Python
73 lines
2.2 KiB
Python
import pandas as pd
|
|
import os
|
|
import re
|
|
import numpy as np
|
|
|
|
def getAccuracy(df, baseLine, compLine):
|
|
try:
|
|
df = df.iloc[[baseLine,compLine]]
|
|
except IndexError:
|
|
return -1
|
|
total = 0
|
|
noChange = 0
|
|
first = True
|
|
for series_name, series in df.items():
|
|
if first:
|
|
first = False
|
|
else:
|
|
total += 1
|
|
#print(series_name)
|
|
if series[baseLine] != -1:
|
|
if series[compLine] != -1:
|
|
if series[baseLine] == series[compLine]:
|
|
noChange += 1
|
|
|
|
accuracy = noChange / total
|
|
return accuracy
|
|
|
|
def getMeanAccuracy(accList):
|
|
out = []
|
|
for row in accList:
|
|
row = [x for x in row if x != -1]
|
|
out.append(np.average(row))
|
|
return out
|
|
|
|
deltaList = [1, 2, 10, 20]
|
|
#1 = 1 Scrape Interval
|
|
#2 = ca. 1 Woche
|
|
#10 = 1 Monat (30Tage)
|
|
#20 = 2 Monate
|
|
|
|
|
|
directory = os.fsencode("dok")
|
|
|
|
columnNames = ['property_id', 'timedelay_1', 'timedelay_2','timedelay_10','timedelay_20']
|
|
accListDf = pd.DataFrame(columns = columnNames)
|
|
accMeanDf = pd.DataFrame(columns = columnNames)
|
|
|
|
|
|
for file in os.listdir(directory):
|
|
filename = os.fsdecode(file)
|
|
if filename.endswith(".csv"):
|
|
propId = re.findall("\d+", filename)[0]
|
|
print(propId)
|
|
df = pd.read_csv(f'dok/{filename}')
|
|
fullList = []
|
|
accList = []
|
|
#Loop though all deltas in the deltaList
|
|
for delta in deltaList:
|
|
accList = []
|
|
#Loop through all Dates as Baseline date
|
|
for i in range(df.shape[0]):
|
|
acc = getAccuracy(df, i, i+delta)
|
|
accList.append(acc)
|
|
fullList.append(accList)
|
|
|
|
|
|
meanList = getMeanAccuracy(fullList)
|
|
accListDf = accListDf._append({'property_id': propId, 'timedelay_1': fullList[0], 'timedelay_2': fullList[1], 'timedelay_10': fullList[2], 'timedelay_20': fullList[3]}, ignore_index=True)
|
|
accMeanDf = accMeanDf._append({'property_id': propId, 'timedelay_1': meanList[0], 'timedelay_2': meanList[1], 'timedelay_10': meanList[2], 'timedelay_20': meanList[3]}, ignore_index=True)
|
|
|
|
|
|
accListDf.to_csv('results/accListDf.csv', index=False)
|
|
accMeanDf.to_csv('results/accMeanDf.csv', index=False) |