62 lines
2.4 KiB
Python
62 lines
2.4 KiB
Python
from datetime import datetime, timedelta
|
|
from io import StringIO
|
|
|
|
import polars as pl
|
|
|
|
import data
|
|
from data import etl_cache
|
|
|
|
d = data.load()
|
|
|
|
def region_capacities_weekdays(id: int, scrapeDate_start: str):
|
|
|
|
file = f"etl_region_capacities_weekdays_{id}.obj"
|
|
obj = etl_cache.openObj(file)
|
|
if obj:
|
|
return obj
|
|
|
|
# String to Date
|
|
scrapeDate_start = datetime.strptime(scrapeDate_start, '%Y-%m-%d')
|
|
|
|
# Get end date of start search-window
|
|
scrapeDate_end = scrapeDate_start + timedelta(days=1)
|
|
|
|
extractions = d.singleScrape_of_region_scrapDate(id, scrapeDate_start, scrapeDate_end).pl()
|
|
df_calendar = pl.DataFrame()
|
|
numWeeks = 0
|
|
firstExe = True
|
|
counter = 0
|
|
for row in extractions.rows(named=True):
|
|
scrapeDate = row['created_at']
|
|
if row['calendarBody']:
|
|
counter += 1
|
|
df_calendar = pl.read_json(StringIO(row['calendarBody']))
|
|
columnTitles = df_calendar.columns
|
|
df_calendar = df_calendar.transpose()
|
|
df_calendar = df_calendar.with_columns(pl.Series(name="dates", values=columnTitles))
|
|
df_calendar = df_calendar.with_columns((pl.col("dates").str.to_date()))
|
|
numWeeks = round((df_calendar.get_column("dates").max() - df_calendar.get_column("dates").min()).days / 7, 0)
|
|
df_calendar = df_calendar.with_columns(pl.col("dates").dt.weekday().alias("weekday_num"))
|
|
df_calendar = df_calendar.with_columns(pl.col("dates").dt.strftime("%A").alias("weekday"))
|
|
df_calendar = df_calendar.drop("dates")
|
|
|
|
df_calendar = df_calendar.group_by(["weekday", "weekday_num"]).agg(pl.col("column_0").sum())
|
|
df_calendar = df_calendar.with_columns((pl.col("column_0") / numWeeks / 2 * 100).alias("column_0"))
|
|
df_calendar = df_calendar.sort('weekday_num')
|
|
df_calendar = df_calendar.drop('weekday_num')
|
|
df_calendar = df_calendar.rename({'column_0': str(counter)})
|
|
|
|
if firstExe:
|
|
outDf = df_calendar
|
|
firstExe = False
|
|
else:
|
|
outDf = outDf.join(df_calendar, on='weekday')
|
|
|
|
# Calculate horizontal Mean
|
|
means = outDf.mean_horizontal()
|
|
outDf = outDf.insert_column(1, means)
|
|
outDf = outDf[['weekday', 'mean']]
|
|
|
|
result = {"scraping-date": scrapeDate, "weekdays": outDf['weekday'].to_list(),'capacities': outDf['mean'].to_list()}
|
|
etl_cache.saveObj(file, result)
|
|
return result |