From db4319e7d671ec3c7afe0c947cfb13fe7e1f3bcd Mon Sep 17 00:00:00 2001 From: mmaurostoffel <166130318+mmaurostoffel@users.noreply.github.com> Date: Sat, 5 Oct 2024 13:44:23 +0200 Subject: [PATCH] Repository created, old data ported --- .idea/.gitignore | 8 ++ .idea/ConsultancyProject_2_ETL.iml | 8 ++ .../inspectionProfiles/profiles_settings.xml | 6 ++ .idea/misc.xml | 4 + .idea/modules.xml | 8 ++ .idea/vcs.xml | 6 ++ OLD_ConsProj_1/CalendarVisualization.py | 83 +++++++++++++++++++ 7 files changed, 123 insertions(+) create mode 100644 .idea/.gitignore create mode 100644 .idea/ConsultancyProject_2_ETL.iml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 OLD_ConsProj_1/CalendarVisualization.py diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/ConsultancyProject_2_ETL.iml b/.idea/ConsultancyProject_2_ETL.iml new file mode 100644 index 0000000..d0876a7 --- /dev/null +++ b/.idea/ConsultancyProject_2_ETL.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..060d2c5 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..48523a7 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/OLD_ConsProj_1/CalendarVisualization.py b/OLD_ConsProj_1/CalendarVisualization.py new file mode 100644 index 0000000..531f6d7 --- /dev/null +++ b/OLD_ConsProj_1/CalendarVisualization.py @@ -0,0 +1,83 @@ +from datetime import datetime, timedelta +import json + +import MySQLdb #Version 2.2.4 +import pandas as pd #Version 2.2.2 +import plotly.express as px #Version 5.22.0 + +db = MySQLdb.connect(host="localhost",user="root",passwd="admin",db="heiraterei") +cur = db.cursor() + +cur.execute("SELECT JSON_EXTRACT(header, '$.Date') " + "FROM extractions " + "WHERE type='calendar' AND property_id = 200;") +dateoutput = cur.fetchall() + + +cur.execute("SELECT JSON_EXTRACT(body, '$.content.days') " + "FROM extractions " + "WHERE type='calendar' AND property_id = 200;") + +output = cur.fetchall() +db.close() + +#createScrapedate Liste +ytickVals = list(range(0, 30, 5)) +scrapeDates = [] +#print(dateoutput) +for row in dateoutput: + date = datetime.strptime(json.loads(row[0])[0], '%a, %d %b %Y %H:%M:%S %Z').date() + str = date.strftime('%d/%m/%Y') + scrapeDates.append(str) + +#minimales und maximales Datum ermitteln +fullDateList = [] +for row in output: + tempJson = json.loads(row[0]).keys() + for key in tempJson: + #print(key) + fullDateList.append(datetime.strptime(key, '%Y-%m-%d').date()) + +end_dt = max(fullDateList) +start_dt = min(fullDateList) +delta = timedelta(days=1) +HeaderDates = [] + +while start_dt <= end_dt: + HeaderDates.append(start_dt) + start_dt += delta + +#Create data-Matrix +data = [] +for row in output: + tempList = [-1] * len(HeaderDates) + tempJson = json.loads(row[0]) + for key in tempJson: + date = datetime.strptime(key, '%Y-%m-%d').date() + content = tempJson[key] + index = [i for i, x in enumerate(HeaderDates) if x == date] + tempList[index[0]] = content + data.append(tempList) + +#Transform to Dataframe for Plotly +df = pd.DataFrame(data, columns=HeaderDates) + +#Generate Plotly Diagramm +colScale = [[0, 'rgb(0, 0, 0)'], [0.33, 'rgb(204, 16, 16)'], [0.66, 'rgb(10, 102, 15)'], [1, 'rgb(17, 184, 26)']] +fig = px.imshow(df, color_continuous_scale= colScale) +lines = list(range(0,30,1)) +for i in lines: + #fig.add_hline(y=i+0.5, line_color="white") + fig.add_hline(y=i+0.5) + +fig.update_layout(yaxis = dict(tickfont = dict(size=50))), +fig.update_layout(xaxis = dict(tickfont = dict(size=50))) +fig.update_layout(xaxis_title="Verfügbarkeitsdaten Mietobjekt", yaxis_title="Scrapingvorgang") +fig.update_xaxes(title_font_size=100, title_font_weight="bold") +fig.update_yaxes(title_font_size=100, title_font_weight="bold") +fig.update_layout(yaxis = dict(tickmode = 'array',tickvals = ytickVals, ticktext = scrapeDates)) +fig.update_xaxes(title_standoff = 80) +fig.update_yaxes(title_standoff = 80) +fig.update_layout(xaxis={'side': 'top'}) +fig.show() +