Issue 3 fast fertig

#3

Der Issue ist soweit bereit, es gibt aber noch das Problem, dass das ScrapeDate nicht als Datum sondern asl Integer interpretiert wird im database.py. Deshalb ist es im Moment als konstante implementiert
main
mmaurostoffel 2025-01-05 21:23:10 +01:00
parent 8bcc1c57b5
commit d9cae3d0ab
3 changed files with 47 additions and 0 deletions

View File

@ -1,6 +1,7 @@
import data
import polars as pl
from data import etl_property_capacities as etl_pc
from data import etl_property_capacities_monthly as etl_pcm
from data import etl_region_capacities as etl_rc
from data import etl_region_capacities_comparison as etl_rcc
from fastapi import FastAPI, Response
@ -42,6 +43,11 @@ def property_capacities_data(id: int):
capacities = etl_pc.property_capacities(id)
return capacities
@app.get("/property/{id}/capacities/monthly/{scrapeDate}")
def property_capacities_data(id: int, scrapeDate: str):
capacities = etl_pcm.property_capacities_monthly(id, scrapeDate)
return capacities
@app.get("/property/{id}/base")
def property_base_data(id: int):
return d.property_base_data(id).pl().to_dicts()

View File

@ -334,6 +334,23 @@ class Database:
created_at
""")
def extractions_propId_scrapeDate(self, property_id: int, scrape_date: str):
return self.connection.sql(f"""
SELECT
JSON_EXTRACT(body, '$.content.days') as calendar,
created_at
FROM
consultancy_d.extractions
WHERE
type == 'calendar' AND
property_id = {property_id} AND
calendar NOT NULL AND
created_at >= '2024-04-16'
ORDER BY
created_at
LIMIT 1
""")
# Anzahl der extrahierten properties pro Exktraktionsvorgang
def properties_per_extraction(self, property_id):
return self.connection.sql("""

View File

@ -0,0 +1,24 @@
from io import StringIO
import polars as pl
import data
d = data.load()
def property_capacities_monthly(id: int, scrapeDate: str):
extractions = d.extractions_propId_scrapeDate(id, scrapeDate).pl()
df_calendar = pl.DataFrame()
for row in extractions.rows(named=True):
scrapeDate = row['created_at']
df_calendar = pl.read_json(StringIO(row['calendar']))
columnTitles = df_calendar.columns
df_calendar = df_calendar.transpose()
df_calendar = df_calendar.with_columns(pl.Series(name="dates", values=columnTitles))
df_calendar = df_calendar.with_columns((pl.col("dates").str.to_date()))
df_calendar = df_calendar.with_columns((pl.col("dates").dt.strftime("%b") + " " + (pl.col("dates").dt.strftime("%Y"))))
df_calendar = df_calendar.group_by("dates").agg(pl.col("column_0").sum())
result = {"scraping-date": scrapeDate, "months": df_calendar['dates'].to_list(), 'capacities': df_calendar['column_0'].to_list()}
return result