Betrifft #7. Möglicher fix, bitte Resultat kontrollieren. Das Problem lag m.E. darin, dass durch das hin und her zwischen Listen und DataFrame die Typisierungen der Werte verloren gehen, weshalb es dann auch entsprechenden Fehler schmeisste.
parent
e176d1e73f
commit
67c0d85213
|
@ -1,5 +1,5 @@
|
||||||
from io import StringIO
|
|
||||||
from datetime import date
|
from datetime import date
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
import polars as pl
|
import polars as pl
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ def region_capacities(id: int):
|
||||||
df_dates = pl.DataFrame()
|
df_dates = pl.DataFrame()
|
||||||
|
|
||||||
# Get Data from JSON
|
# Get Data from JSON
|
||||||
gridData = []
|
gridData = pl.DataFrame(schema=[("scrape_date", pl.String), ("sum_hor", pl.Int64), ("calendar_width", pl.Int64)])
|
||||||
dayCounts = []
|
dayCounts = []
|
||||||
for row in extractions.rows(named=True):
|
for row in extractions.rows(named=True):
|
||||||
# Return 0 for sum if calendar is null
|
# Return 0 for sum if calendar is null
|
||||||
|
@ -30,24 +30,21 @@ def region_capacities(id: int):
|
||||||
sum_hor = calDF.sum_horizontal()[0]
|
sum_hor = calDF.sum_horizontal()[0]
|
||||||
else:
|
else:
|
||||||
sum_hor = 0
|
sum_hor = 0
|
||||||
gridData.append([row['ScrapeDate'], sum_hor, calDF.width])
|
gridData = gridData.vstack(pl.DataFrame({"scrape_date" : row['ScrapeDate'], "sum_hor": sum_hor, "calendar_width": calDF.width}))
|
||||||
|
|
||||||
# Create Aggregates of values
|
# Create Aggregates of values
|
||||||
df = pl.DataFrame(gridData, strict=False)
|
df_count = gridData.group_by("scrape_date").agg(pl.col("sum_hor").count())
|
||||||
df_count = df.group_by("column_0").agg(pl.col("column_1").count())
|
df_sum = gridData.group_by("scrape_date").agg(pl.col("sum_hor").sum())
|
||||||
df_sum = df.group_by("column_0").agg(pl.col("column_1").sum())
|
df_numDays = gridData.group_by("scrape_date").agg(pl.col("calendar_width").max())
|
||||||
df_numDays = df.group_by("column_0").agg(pl.col("column_2").max())
|
|
||||||
|
|
||||||
# Join and rename DF's
|
# Join and rename DF's
|
||||||
df = df_sum.join(df_count, on= 'column_0').join(df_numDays, on= 'column_0')
|
df = df_sum.join(df_count, on= 'scrape_date').join(df_numDays, on= 'scrape_date')
|
||||||
df = df.rename({"column_0": "ScrapeDate", "column_1": "Sum", "column_1_right": "num_properties", "column_2": "max_value", })
|
|
||||||
|
|
||||||
# Calculate normed capacities for each scrapeDate
|
# Calculate normed capacities for each scrapeDate
|
||||||
df = df.with_columns((pl.col("Sum") / pl.col("num_properties") / (pl.col("max_value")*2) * 100).alias("capacity"))
|
df = df.with_columns((pl.col("sum_hor") / pl.col("sum_hor_right") / (pl.col("calendar_width")*2) * 100).alias("capacity"))
|
||||||
|
|
||||||
# Sort the date column
|
# Sort the date column
|
||||||
df = df.cast({"ScrapeDate": date})
|
df = df.cast({"scrape_date": date}).sort('scrape_date')
|
||||||
df = df.sort('ScrapeDate')
|
|
||||||
|
|
||||||
result = {"capacities": df['capacity'].to_list(), "dates": df['ScrapeDate'].to_list()}
|
result = {"capacities": df['capacity'].to_list(), "dates": df['scrape_date'].to_list()}
|
||||||
return result
|
return result
|
Loading…
Reference in New Issue