Betrifft #7. Möglicher fix, bitte Resultat kontrollieren. Das Problem lag m.E. darin, dass durch das hin und her zwischen Listen und DataFrame die Typisierungen der Werte verloren gehen, weshalb es dann auch entsprechenden Fehler schmeisste.

main
Giò Diani 2025-01-12 11:56:33 +01:00
parent e176d1e73f
commit 67c0d85213
1 changed files with 10 additions and 13 deletions

View File

@ -1,5 +1,5 @@
from io import StringIO
from datetime import date from datetime import date
from io import StringIO
import polars as pl import polars as pl
@ -21,7 +21,7 @@ def region_capacities(id: int):
df_dates = pl.DataFrame() df_dates = pl.DataFrame()
# Get Data from JSON # Get Data from JSON
gridData = [] gridData = pl.DataFrame(schema=[("scrape_date", pl.String), ("sum_hor", pl.Int64), ("calendar_width", pl.Int64)])
dayCounts = [] dayCounts = []
for row in extractions.rows(named=True): for row in extractions.rows(named=True):
# Return 0 for sum if calendar is null # Return 0 for sum if calendar is null
@ -30,24 +30,21 @@ def region_capacities(id: int):
sum_hor = calDF.sum_horizontal()[0] sum_hor = calDF.sum_horizontal()[0]
else: else:
sum_hor = 0 sum_hor = 0
gridData.append([row['ScrapeDate'], sum_hor, calDF.width]) gridData = gridData.vstack(pl.DataFrame({"scrape_date" : row['ScrapeDate'], "sum_hor": sum_hor, "calendar_width": calDF.width}))
# Create Aggregates of values # Create Aggregates of values
df = pl.DataFrame(gridData, strict=False) df_count = gridData.group_by("scrape_date").agg(pl.col("sum_hor").count())
df_count = df.group_by("column_0").agg(pl.col("column_1").count()) df_sum = gridData.group_by("scrape_date").agg(pl.col("sum_hor").sum())
df_sum = df.group_by("column_0").agg(pl.col("column_1").sum()) df_numDays = gridData.group_by("scrape_date").agg(pl.col("calendar_width").max())
df_numDays = df.group_by("column_0").agg(pl.col("column_2").max())
# Join and rename DF's # Join and rename DF's
df = df_sum.join(df_count, on= 'column_0').join(df_numDays, on= 'column_0') df = df_sum.join(df_count, on= 'scrape_date').join(df_numDays, on= 'scrape_date')
df = df.rename({"column_0": "ScrapeDate", "column_1": "Sum", "column_1_right": "num_properties", "column_2": "max_value", })
# Calculate normed capacities for each scrapeDate # Calculate normed capacities for each scrapeDate
df = df.with_columns((pl.col("Sum") / pl.col("num_properties") / (pl.col("max_value")*2) * 100).alias("capacity")) df = df.with_columns((pl.col("sum_hor") / pl.col("sum_hor_right") / (pl.col("calendar_width")*2) * 100).alias("capacity"))
# Sort the date column # Sort the date column
df = df.cast({"ScrapeDate": date}) df = df.cast({"scrape_date": date}).sort('scrape_date')
df = df.sort('ScrapeDate')
result = {"capacities": df['capacity'].to_list(), "dates": df['ScrapeDate'].to_list()} result = {"capacities": df['capacity'].to_list(), "dates": df['scrape_date'].to_list()}
return result return result