ConsultancyProject_2_ETL/OLD_ConsultProj1/CalendarVisualization.py

84 lines
2.7 KiB
Python
Raw Normal View History

2024-10-05 14:46:03 +02:00
from datetime import datetime, timedelta
import json
import MySQLdb #Version 2.2.4
2024-10-05 14:53:49 +02:00
import pandas as pd #Version 2.2.3
import plotly.express as px #Version 5.24.1
2024-10-05 14:46:03 +02:00
2024-10-05 16:47:19 +02:00
db = MySQLdb.connect(host="localhost",user="root",passwd="admin",db="consultancy")
2024-10-05 14:46:03 +02:00
cur = db.cursor()
cur.execute("SELECT JSON_EXTRACT(header, '$.Date') "
"FROM extractions "
"WHERE type='calendar' AND property_id = 200;")
dateoutput = cur.fetchall()
cur.execute("SELECT JSON_EXTRACT(body, '$.content.days') "
"FROM extractions "
"WHERE type='calendar' AND property_id = 200;")
output = cur.fetchall()
db.close()
#createScrapedate Liste
ytickVals = list(range(0, 30, 5))
scrapeDates = []
#print(dateoutput)
for row in dateoutput:
date = datetime.strptime(json.loads(row[0])[0], '%a, %d %b %Y %H:%M:%S %Z').date()
str = date.strftime('%d/%m/%Y')
scrapeDates.append(str)
#minimales und maximales Datum ermitteln
fullDateList = []
for row in output:
tempJson = json.loads(row[0]).keys()
for key in tempJson:
#print(key)
fullDateList.append(datetime.strptime(key, '%Y-%m-%d').date())
end_dt = max(fullDateList)
start_dt = min(fullDateList)
delta = timedelta(days=1)
HeaderDates = []
while start_dt <= end_dt:
HeaderDates.append(start_dt)
start_dt += delta
#Create data-Matrix
data = []
for row in output:
tempList = [-1] * len(HeaderDates)
tempJson = json.loads(row[0])
for key in tempJson:
date = datetime.strptime(key, '%Y-%m-%d').date()
content = tempJson[key]
index = [i for i, x in enumerate(HeaderDates) if x == date]
tempList[index[0]] = content
data.append(tempList)
#Transform to Dataframe for Plotly
df = pd.DataFrame(data, columns=HeaderDates)
#Generate Plotly Diagramm
colScale = [[0, 'rgb(0, 0, 0)'], [0.33, 'rgb(204, 16, 16)'], [0.66, 'rgb(10, 102, 15)'], [1, 'rgb(17, 184, 26)']]
fig = px.imshow(df, color_continuous_scale= colScale)
lines = list(range(0,30,1))
for i in lines:
#fig.add_hline(y=i+0.5, line_color="white")
fig.add_hline(y=i+0.5)
fig.update_layout(yaxis = dict(tickfont = dict(size=50))),
fig.update_layout(xaxis = dict(tickfont = dict(size=50)))
fig.update_layout(xaxis_title="Verfügbarkeitsdaten Mietobjekt", yaxis_title="Scrapingvorgang")
fig.update_xaxes(title_font_size=100, title_font_weight="bold")
fig.update_yaxes(title_font_size=100, title_font_weight="bold")
fig.update_layout(yaxis = dict(tickmode = 'array',tickvals = ytickVals, ticktext = scrapeDates))
fig.update_xaxes(title_standoff = 80)
fig.update_yaxes(title_standoff = 80)
fig.update_layout(xaxis={'side': 'top'})
fig.show()