ConsultancyProject_2_ETL/etl/src/data/etl_property_capacities.py
2025-01-14 19:56:15 +01:00

46 lines
1.3 KiB
Python

from io import StringIO
import polars as pl
import data
from data import etl_cache
d = data.load()
def property_capacities(id: int):
file = f"etl_property_capacities_{id}.obj"
obj = etl_cache.openObj(file)
if obj:
return obj
extractions = d.extractions_for(id).pl()
df_dates = pl.DataFrame()
for row in extractions.rows(named=True):
df_calendar = pl.read_json(StringIO(row['calendar']))
#df_calendar.insert_column(0, pl.Series("created_at", [row['created_at']]))
df_dates = pl.concat([df_calendar, df_dates], how="diagonal")
# order = sorted(df_dates.columns)
# df_dates = df_dates.select(order)
sum_hor = df_dates.sum_horizontal()
#print(sum_hor)
# Get the available dates per extraction
count_days = []
for dates in df_dates.rows():
# Remove all None values
liste = [x for x in dates if x is not None]
count_days.append(len(liste))
counts = pl.DataFrame({"count_days" : count_days, "sum" : sum_hor})
result = {"capacities": [], "dates": extractions['created_at'].cast(pl.Date).to_list() }
for row in counts.rows(named=True):
max_capacity = row['count_days'] * 2
max_capacity_perc = 100 / max_capacity
result['capacities'].append(round(max_capacity_perc * row['sum'], 2))
result['capacities'].reverse()
etl_cache.saveObj(file, result)
return result