Betrifft #7. Möglicher fix, bitte Resultat kontrollieren. Das Problem lag m.E. darin, dass durch das hin und her zwischen Listen und DataFrame die Typisierungen der Werte verloren gehen, weshalb es dann auch entsprechenden Fehler schmeisste.
parent
e176d1e73f
commit
67c0d85213
|
@ -1,5 +1,5 @@
|
|||
from io import StringIO
|
||||
from datetime import date
|
||||
from io import StringIO
|
||||
|
||||
import polars as pl
|
||||
|
||||
|
@ -21,7 +21,7 @@ def region_capacities(id: int):
|
|||
df_dates = pl.DataFrame()
|
||||
|
||||
# Get Data from JSON
|
||||
gridData = []
|
||||
gridData = pl.DataFrame(schema=[("scrape_date", pl.String), ("sum_hor", pl.Int64), ("calendar_width", pl.Int64)])
|
||||
dayCounts = []
|
||||
for row in extractions.rows(named=True):
|
||||
# Return 0 for sum if calendar is null
|
||||
|
@ -30,24 +30,21 @@ def region_capacities(id: int):
|
|||
sum_hor = calDF.sum_horizontal()[0]
|
||||
else:
|
||||
sum_hor = 0
|
||||
gridData.append([row['ScrapeDate'], sum_hor, calDF.width])
|
||||
gridData = gridData.vstack(pl.DataFrame({"scrape_date" : row['ScrapeDate'], "sum_hor": sum_hor, "calendar_width": calDF.width}))
|
||||
|
||||
# Create Aggregates of values
|
||||
df = pl.DataFrame(gridData, strict=False)
|
||||
df_count = df.group_by("column_0").agg(pl.col("column_1").count())
|
||||
df_sum = df.group_by("column_0").agg(pl.col("column_1").sum())
|
||||
df_numDays = df.group_by("column_0").agg(pl.col("column_2").max())
|
||||
df_count = gridData.group_by("scrape_date").agg(pl.col("sum_hor").count())
|
||||
df_sum = gridData.group_by("scrape_date").agg(pl.col("sum_hor").sum())
|
||||
df_numDays = gridData.group_by("scrape_date").agg(pl.col("calendar_width").max())
|
||||
|
||||
# Join and rename DF's
|
||||
df = df_sum.join(df_count, on= 'column_0').join(df_numDays, on= 'column_0')
|
||||
df = df.rename({"column_0": "ScrapeDate", "column_1": "Sum", "column_1_right": "num_properties", "column_2": "max_value", })
|
||||
df = df_sum.join(df_count, on= 'scrape_date').join(df_numDays, on= 'scrape_date')
|
||||
|
||||
# Calculate normed capacities for each scrapeDate
|
||||
df = df.with_columns((pl.col("Sum") / pl.col("num_properties") / (pl.col("max_value")*2) * 100).alias("capacity"))
|
||||
df = df.with_columns((pl.col("sum_hor") / pl.col("sum_hor_right") / (pl.col("calendar_width")*2) * 100).alias("capacity"))
|
||||
|
||||
# Sort the date column
|
||||
df = df.cast({"ScrapeDate": date})
|
||||
df = df.sort('ScrapeDate')
|
||||
df = df.cast({"scrape_date": date}).sort('scrape_date')
|
||||
|
||||
result = {"capacities": df['capacity'].to_list(), "dates": df['ScrapeDate'].to_list()}
|
||||
result = {"capacities": df['capacity'].to_list(), "dates": df['scrape_date'].to_list()}
|
||||
return result
|
Loading…
Reference in New Issue