ConsultancyProject_2_ETL/createAccuracyValues.py

47 lines
1.5 KiB
Python

import Data_Analysis as DA
import pandas as pd
import os
import re
deltaList = [1, 2, 10, 20]
#1 = 1 Scrape Interval
#2 = ca. 1 Woche
#10 = 1 Monat (30Tage)
#20 = 2 Monate
directory = os.fsencode("dok")
columnNames = ['property_id', 'timedelay_1', 'timedelay_2','timedelay_10','timedelay_20']
accListDf = pd.DataFrame(columns = columnNames)
accMeanDf = pd.DataFrame(columns = columnNames)
for file in os.listdir(directory):
filename = os.fsdecode(file)
if filename.endswith(".csv"):
propId = re.findall("\d+", filename)[0]
print(propId)
df = pd.read_csv(f'dok/{filename}')
fullList = []
accList = []
#Loop though all deltas in the deltaList
for delta in deltaList:
accList = []
#Loop through all Dates as Baseline date
for i in range(df.shape[0]):
acc = DA.getAccuracy(df, i, i+delta)
accList.append(acc)
fullList.append(accList)
meanList = DA.getMeanAccuracy(fullList)
accListDf = accListDf._append({'property_id': propId, 'timedelay_1': fullList[0], 'timedelay_2': fullList[1], 'timedelay_10': fullList[2], 'timedelay_20': fullList[3]}, ignore_index=True)
accMeanDf = accMeanDf._append({'property_id': propId, 'timedelay_1': meanList[0], 'timedelay_2': meanList[1], 'timedelay_10': meanList[2], 'timedelay_20': meanList[3]}, ignore_index=True)
accListDf.to_csv('results/accListDf.csv', index=False)
accMeanDf.to_csv('results/accMeanDf.csv', index=False)