ConsultancyProject_2_ETL/OLD_ConsultProj1/CalendarVisualization.py

from datetime import datetime, timedelta
import json

import MySQLdb                                  #Version 2.2.4
import pandas as pd                             #Version 2.2.3
import plotly.express as px                     #Version 5.24.1

db = MySQLdb.connect(host="localhost",user="root",passwd="admin",db="consultancy")
cur = db.cursor()

cur.execute("SELECT JSON_EXTRACT(header, '$.Date') "
            "FROM extractions "
            "WHERE type='calendar' AND property_id = 200;")
dateoutput = cur.fetchall()


cur.execute("SELECT JSON_EXTRACT(body, '$.content.days') "
            "FROM extractions "
            "WHERE type='calendar' AND property_id = 200;")

output = cur.fetchall()
db.close()

#createScrapedate Liste
ytickVals = list(range(0, 30, 5))
scrapeDates = []
#print(dateoutput)
for row in dateoutput:
    date = datetime.strptime(json.loads(row[0])[0], '%a, %d %b %Y %H:%M:%S %Z').date()
    str = date.strftime('%d/%m/%Y')
    scrapeDates.append(str)

#minimales und maximales Datum ermitteln
fullDateList = []
for row in output:
    tempJson = json.loads(row[0]).keys()
    for key in tempJson:
        #print(key)
        fullDateList.append(datetime.strptime(key, '%Y-%m-%d').date())

end_dt = max(fullDateList)
start_dt = min(fullDateList)
delta = timedelta(days=1)
HeaderDates = []

while start_dt <= end_dt:
    HeaderDates.append(start_dt)
    start_dt += delta

#Create data-Matrix
data = []
for row in output:
    tempList = [-1] * len(HeaderDates)
    tempJson = json.loads(row[0])
    for key in tempJson:
        date = datetime.strptime(key, '%Y-%m-%d').date()
        content = tempJson[key]
        index = [i for i, x in enumerate(HeaderDates) if x == date]
        tempList[index[0]] = content
    data.append(tempList)

#Transform to Dataframe for Plotly
df = pd.DataFrame(data, columns=HeaderDates)

#Generate Plotly Diagramm
colScale = [[0, 'rgb(0, 0, 0)'], [0.33, 'rgb(204, 16, 16)'], [0.66, 'rgb(10, 102, 15)'], [1, 'rgb(17, 184, 26)']]
fig = px.imshow(df, color_continuous_scale= colScale)
lines = list(range(0,30,1))
for i in lines:
    #fig.add_hline(y=i+0.5, line_color="white")
    fig.add_hline(y=i+0.5)

fig.update_layout(yaxis = dict(tickfont = dict(size=50))),
fig.update_layout(xaxis = dict(tickfont = dict(size=50)))
fig.update_layout(xaxis_title="Verfügbarkeitsdaten Mietobjekt", yaxis_title="Scrapingvorgang")
fig.update_xaxes(title_font_size=100, title_font_weight="bold")
fig.update_yaxes(title_font_size=100, title_font_weight="bold")
fig.update_layout(yaxis = dict(tickmode = 'array',tickvals = ytickVals, ticktext = scrapeDates))
fig.update_xaxes(title_standoff = 80)
fig.update_yaxes(title_standoff = 80)
fig.update_layout(xaxis={'side': 'top'})
fig.show()
base + old 2024-10-05 14:46:03 +02:00			`from datetime import datetime, timedelta`
			`import json`

			`import MySQLdb #Version 2.2.4`
Requirements geupdated 2024-10-05 14:53:49 +02:00			`import pandas as pd #Version 2.2.3`
			`import plotly.express as px #Version 5.24.1`
base + old 2024-10-05 14:46:03 +02:00
Angepasst an neue DataDump 2024-10-05 16:47:19 +02:00			`db = MySQLdb.connect(host="localhost",user="root",passwd="admin",db="consultancy")`
base + old 2024-10-05 14:46:03 +02:00			`cur = db.cursor()`

			`cur.execute("SELECT JSON_EXTRACT(header, '$.Date') "`
			`"FROM extractions "`
			`"WHERE type='calendar' AND property_id = 200;")`
			`dateoutput = cur.fetchall()`


			`cur.execute("SELECT JSON_EXTRACT(body, '$.content.days') "`
			`"FROM extractions "`
			`"WHERE type='calendar' AND property_id = 200;")`

			`output = cur.fetchall()`
			`db.close()`

			`#createScrapedate Liste`
			`ytickVals = list(range(0, 30, 5))`
			`scrapeDates = []`
			`#print(dateoutput)`
			`for row in dateoutput:`
			`date = datetime.strptime(json.loads(row[0])[0], '%a, %d %b %Y %H:%M:%S %Z').date()`
			`str = date.strftime('%d/%m/%Y')`
			`scrapeDates.append(str)`

			`#minimales und maximales Datum ermitteln`
			`fullDateList = []`
			`for row in output:`
			`tempJson = json.loads(row[0]).keys()`
			`for key in tempJson:`
			`#print(key)`
			`fullDateList.append(datetime.strptime(key, '%Y-%m-%d').date())`

			`end_dt = max(fullDateList)`
			`start_dt = min(fullDateList)`
			`delta = timedelta(days=1)`
			`HeaderDates = []`

			`while start_dt <= end_dt:`
			`HeaderDates.append(start_dt)`
			`start_dt += delta`

			`#Create data-Matrix`
			`data = []`
			`for row in output:`
			`tempList = [-1] * len(HeaderDates)`
			`tempJson = json.loads(row[0])`
			`for key in tempJson:`
			`date = datetime.strptime(key, '%Y-%m-%d').date()`
			`content = tempJson[key]`
			`index = [i for i, x in enumerate(HeaderDates) if x == date]`
			`tempList[index[0]] = content`
			`data.append(tempList)`

			`#Transform to Dataframe for Plotly`
			`df = pd.DataFrame(data, columns=HeaderDates)`

			`#Generate Plotly Diagramm`
			`colScale = [[0, 'rgb(0, 0, 0)'], [0.33, 'rgb(204, 16, 16)'], [0.66, 'rgb(10, 102, 15)'], [1, 'rgb(17, 184, 26)']]`
			`fig = px.imshow(df, color_continuous_scale= colScale)`
			`lines = list(range(0,30,1))`
			`for i in lines:`
			`#fig.add_hline(y=i+0.5, line_color="white")`
			`fig.add_hline(y=i+0.5)`

			`fig.update_layout(yaxis = dict(tickfont = dict(size=50))),`
			`fig.update_layout(xaxis = dict(tickfont = dict(size=50)))`
			`fig.update_layout(xaxis_title="Verfügbarkeitsdaten Mietobjekt", yaxis_title="Scrapingvorgang")`
			`fig.update_xaxes(title_font_size=100, title_font_weight="bold")`
			`fig.update_yaxes(title_font_size=100, title_font_weight="bold")`
			`fig.update_layout(yaxis = dict(tickmode = 'array',tickvals = ytickVals, ticktext = scrapeDates))`
			`fig.update_xaxes(title_standoff = 80)`
			`fig.update_yaxes(title_standoff = 80)`
			`fig.update_layout(xaxis={'side': 'top'})`
			`fig.show()`