2024-10-05 14:46:03 +02:00
|
|
|
from datetime import datetime, timedelta
|
|
|
|
import json
|
|
|
|
|
|
|
|
import MySQLdb #Version 2.2.4
|
2024-10-05 14:53:49 +02:00
|
|
|
import pandas as pd #Version 2.2.3
|
|
|
|
import plotly.express as px #Version 5.24.1
|
2024-10-05 14:46:03 +02:00
|
|
|
|
2024-10-05 16:47:19 +02:00
|
|
|
db = MySQLdb.connect(host="localhost",user="root",passwd="admin",db="consultancy")
|
2024-10-05 14:46:03 +02:00
|
|
|
cur = db.cursor()
|
|
|
|
|
|
|
|
cur.execute("SELECT JSON_EXTRACT(header, '$.Date') "
|
|
|
|
"FROM extractions "
|
|
|
|
"WHERE type='calendar' AND property_id = 200;")
|
|
|
|
dateoutput = cur.fetchall()
|
|
|
|
|
|
|
|
|
|
|
|
cur.execute("SELECT JSON_EXTRACT(body, '$.content.days') "
|
|
|
|
"FROM extractions "
|
|
|
|
"WHERE type='calendar' AND property_id = 200;")
|
|
|
|
|
|
|
|
output = cur.fetchall()
|
|
|
|
db.close()
|
|
|
|
|
|
|
|
#createScrapedate Liste
|
|
|
|
ytickVals = list(range(0, 30, 5))
|
|
|
|
scrapeDates = []
|
|
|
|
#print(dateoutput)
|
|
|
|
for row in dateoutput:
|
|
|
|
date = datetime.strptime(json.loads(row[0])[0], '%a, %d %b %Y %H:%M:%S %Z').date()
|
|
|
|
str = date.strftime('%d/%m/%Y')
|
|
|
|
scrapeDates.append(str)
|
|
|
|
|
|
|
|
#minimales und maximales Datum ermitteln
|
|
|
|
fullDateList = []
|
|
|
|
for row in output:
|
|
|
|
tempJson = json.loads(row[0]).keys()
|
|
|
|
for key in tempJson:
|
|
|
|
#print(key)
|
|
|
|
fullDateList.append(datetime.strptime(key, '%Y-%m-%d').date())
|
|
|
|
|
|
|
|
end_dt = max(fullDateList)
|
|
|
|
start_dt = min(fullDateList)
|
|
|
|
delta = timedelta(days=1)
|
|
|
|
HeaderDates = []
|
|
|
|
|
|
|
|
while start_dt <= end_dt:
|
|
|
|
HeaderDates.append(start_dt)
|
|
|
|
start_dt += delta
|
|
|
|
|
|
|
|
#Create data-Matrix
|
|
|
|
data = []
|
|
|
|
for row in output:
|
|
|
|
tempList = [-1] * len(HeaderDates)
|
|
|
|
tempJson = json.loads(row[0])
|
|
|
|
for key in tempJson:
|
|
|
|
date = datetime.strptime(key, '%Y-%m-%d').date()
|
|
|
|
content = tempJson[key]
|
|
|
|
index = [i for i, x in enumerate(HeaderDates) if x == date]
|
|
|
|
tempList[index[0]] = content
|
|
|
|
data.append(tempList)
|
|
|
|
|
|
|
|
#Transform to Dataframe for Plotly
|
|
|
|
df = pd.DataFrame(data, columns=HeaderDates)
|
|
|
|
|
|
|
|
#Generate Plotly Diagramm
|
|
|
|
colScale = [[0, 'rgb(0, 0, 0)'], [0.33, 'rgb(204, 16, 16)'], [0.66, 'rgb(10, 102, 15)'], [1, 'rgb(17, 184, 26)']]
|
|
|
|
fig = px.imshow(df, color_continuous_scale= colScale)
|
|
|
|
lines = list(range(0,30,1))
|
|
|
|
for i in lines:
|
|
|
|
#fig.add_hline(y=i+0.5, line_color="white")
|
|
|
|
fig.add_hline(y=i+0.5)
|
|
|
|
|
|
|
|
fig.update_layout(yaxis = dict(tickfont = dict(size=50))),
|
|
|
|
fig.update_layout(xaxis = dict(tickfont = dict(size=50)))
|
|
|
|
fig.update_layout(xaxis_title="Verfügbarkeitsdaten Mietobjekt", yaxis_title="Scrapingvorgang")
|
|
|
|
fig.update_xaxes(title_font_size=100, title_font_weight="bold")
|
|
|
|
fig.update_yaxes(title_font_size=100, title_font_weight="bold")
|
|
|
|
fig.update_layout(yaxis = dict(tickmode = 'array',tickvals = ytickVals, ticktext = scrapeDates))
|
|
|
|
fig.update_xaxes(title_standoff = 80)
|
|
|
|
fig.update_yaxes(title_standoff = 80)
|
|
|
|
fig.update_layout(xaxis={'side': 'top'})
|
|
|
|
fig.show()
|
|
|
|
|