50 lines
1.6 KiB
Python
50 lines
1.6 KiB
Python
from datetime import date
|
|
from io import StringIO
|
|
|
|
import polars as pl
|
|
|
|
import data
|
|
|
|
d = data.load()
|
|
|
|
def region_capacities(id: int):
|
|
|
|
# Get Data
|
|
if id == -1:
|
|
extractions = d.capacity_global().pl()
|
|
else:
|
|
extractions = d.capacity_of_region(id).pl()
|
|
# turn PropertyIDs to ints for sorting
|
|
extractions = extractions.cast({"property_id": int})
|
|
|
|
extractions.drop('property_id')
|
|
df_dates = pl.DataFrame()
|
|
|
|
# Get Data from JSON
|
|
gridData = pl.DataFrame(schema=[("scrape_date", pl.String), ("sum_hor", pl.Int64), ("calendar_width", pl.Int64)])
|
|
dayCounts = []
|
|
for row in extractions.rows(named=True):
|
|
# Return 0 for sum if calendar is null
|
|
if row['calendarBody']:
|
|
calDF = pl.read_json(StringIO(row['calendarBody']))
|
|
sum_hor = calDF.sum_horizontal()[0]
|
|
else:
|
|
sum_hor = 0
|
|
gridData = gridData.vstack(pl.DataFrame({"scrape_date" : row['ScrapeDate'], "sum_hor": sum_hor, "calendar_width": calDF.width}))
|
|
|
|
# Create Aggregates of values
|
|
df_count = gridData.group_by("scrape_date").agg(pl.col("sum_hor").count())
|
|
df_sum = gridData.group_by("scrape_date").agg(pl.col("sum_hor").sum())
|
|
df_numDays = gridData.group_by("scrape_date").agg(pl.col("calendar_width").max())
|
|
|
|
# Join and rename DF's
|
|
df = df_sum.join(df_count, on= 'scrape_date').join(df_numDays, on= 'scrape_date')
|
|
|
|
# Calculate normed capacities for each scrapeDate
|
|
df = df.with_columns((pl.col("sum_hor") / pl.col("sum_hor_right") / (pl.col("calendar_width")*2) * 100).alias("capacity"))
|
|
|
|
# Sort the date column
|
|
df = df.cast({"scrape_date": date}).sort('scrape_date')
|
|
|
|
result = {"capacities": df['capacity'].to_list(), "dates": df['scrape_date'].to_list()}
|
|
return result |