diff --git a/etl/src/api/main.py b/etl/src/api/main.py index 66d18b5..13c5758 100644 --- a/etl/src/api/main.py +++ b/etl/src/api/main.py @@ -1,3 +1,6 @@ +import datetime +from typing import List, Union + import data import polars as pl from data import etl_property_capacities as etl_pc @@ -10,106 +13,254 @@ from data import etl_region_capacities_daily as etl_rcd from data import etl_region_capacities_monthly as etl_rcm from data import etl_region_movAverage as etl_rmA from data import etl_region_properties_capacities as etl_rpc -from fastapi import FastAPI, Response +from fastapi import FastAPI +from fastapi.responses import JSONResponse +from pydantic import BaseModel + + +class Regions(BaseModel): + name: str + id: str + count_properties: int + +class RegionBase(BaseModel): + name: str + id: str + +class RegionPropertiesCapacitiesValues(BaseModel): + date: str + property_id: str + capacity: float + +class RegionCapacities(BaseModel): + capacities: List[float] + dates: List + +class RegionCapacitiesMonthly(BaseModel): + months: List[str] + capacities: List[float] + +class RegionCapacitiesDaily(BaseModel): + weekdays: List[str] + capacities: List[float] + +class RegionPropertiesCapacities(BaseModel): + dates: List + property_ids: List + values: List[RegionPropertiesCapacitiesValues] + +class RegionMovingAverage(BaseModel): + dates: List + capacities_timeframe_before: List[Union[float, None]] + capacities_timeframe_after: List[Union[float, None]] + capacities_moving_average: List[Union[float, None]] + +class PropertiesGrowth(BaseModel): + dates: List + total_all: List[Union[int, None]] + total_heidiland: List[Union[int, None]] + total_engadin: List[Union[int, None]] + total_stmoritz: List[Union[int, None]] + total_davos: List[Union[int, None]] + +class PropertiesGeoList(BaseModel): + property_id: str + latlng: str + region_id: str + +class PropertiesGeo(BaseModel): + properties: List[PropertiesGeoList] + +class PropertyNeighboursList(BaseModel): + id: str + lat: float + lon: float + +class PropertyNeighbours(BaseModel): + neighbours: List[PropertyNeighboursList] + +class PropertyNeighboursList(BaseModel): + id: str + lat: float + lon: float + +class PropertyExtractionsList(BaseModel): + calendar: str + date: str + +class PropertyExtractions(BaseModel): + extractions: List[PropertyExtractionsList] + +class PropertyCapacities(BaseModel): + capacities: List[float] + dates: List[str] + +class PropertyCapacitiesMonthly(BaseModel): + months: List[str] + capacities: List[float] + +class PropertyCapacitiesDaily(BaseModel): + weekdays: List[str] + capacities: List[float] + +class PropertyBaseDetail(BaseModel): + property_platform_id: str + first_found: str + last_found: str + latlng: str + region_id: str + region_name: str + +class PropertyBase(BaseModel): + property_platform_id: str + first_found: str + last_found: str + latlng: str + region_id: str + region_name: str + d = data.load() -app = FastAPI() +tags_metadata = [ + { + "name": "region", + "description": "Get data by region.", + }, + { + "name": "property", + "description": "Get data by property", + }, +] + +app = FastAPI(openapi_tags=tags_metadata) @app.get("/") def read_root(): return {"Hi there!"} -# regions overview: contains names, ids, and count of properties of regions -@app.get("/regions") -def properties_region(): +@app.get("/regions", response_model=Regions, tags=['region']) +def regions(): + """ + Returns a list of all available regions. + """ return d.properties_per_region().pl().to_dicts() -# get capacities of properties, for every scraping, filterd by region; -1 = all regions -@app.get("/region/{id}/properties/capacities") -def region_property_capacities_data(id: int): - capacities = etl_rpc.region_properties_capacities(id) - return capacities +@app.get("/region/{id}/base", response_model=RegionBase, tags=['region']) +def region_base(id: int): + """ + Returns basic information about a region. + """ + base = d.region_base_data(id).pl().to_dicts() + return {"id": base[0]["id"], "name": base[0]["name"]} -# get the capacity of a region for every scraping; -1 = all regions -@app.get("/region/{id}/capacities") -def region_capacities_data(id: int): +@app.get("/region/{id}/capacities", response_model=RegionCapacities, tags=['region']) +def region_capacities(id: int): + """ + Returs the capacities of a region, for every scraping. Set id to -1 to obtain data for all regions. + """ capacities = etl_rc.region_capacities(id) return capacities -# get the capacity of a region for desired scraping date by months; -1 = all regions -@app.get("/region/{id}/capacities/monthly/{scrapeDate}") -def region_capacities_data(id: int, scrapeDate: str): - capacities = etl_rcm.region_capacities_monthly(id, scrapeDate) +@app.get("/region/{id}/capacities/monthly/{date}", response_model=RegionCapacitiesMonthly, tags=['region']) +def region_capacities_monthly(id: int, date: datetime.date): + """ + Returns the capacities of a region for specified date by months. set id to -1 to obtain data for all regions. + """ + capacities = etl_rcm.region_capacities_monthly(id, date) return capacities -# get the capacity of a region for desired scraping date by days; -1 = all regions -@app.get("/region/{id}/capacities/daily/{scrapeDate}") -def region_capacities_data(id: int, scrapeDate: str): - capacities = etl_rcd.region_capacities_daily(id, scrapeDate) +@app.get("/region/{id}/capacities/daily/{date}", response_model=RegionCapacitiesDaily, tags=['region']) +def region_capacities_daily(id: int, date: datetime.date): + """ + Returns the capacities of a region for specified date by days. set id to -1 to obtain data for all regions. + """ + capacities = etl_rcd.region_capacities_daily(id, date) return capacities -# compare the capacities of two regions -@app.get("/region/capacities/comparison/{id_1}/{id_2}") -def region_capacities_data(id_1: int, id_2: int): - capacities = etl_rcc.region_capacities_comparison(id_1, id_2) - return capacities - -# get the moving average for a region beginning from desired date -@app.get("/region/{id}/movingAverage/{startDate}") -def region_capacities_data(id: int, startDate: str): - result = etl_rmA.region_movingAverage(id, startDate) +@app.get("/region/{id}/moving-average/{date}", response_model=RegionMovingAverage, tags=['region']) +def region_capacities_data(id: int, date: datetime.date): + """ + Returns the moving average of a region for specified date. set id to -1 to obtain data for all regions. + """ + result = etl_rmA.region_movingAverage(id, date) return result -# get id and name of a region -@app.get("/region/{id}/base") -def region_base_data(id: int): - return d.region_base_data(id).pl().to_dicts() +@app.get("/region/{id}/properties/capacities", response_model=RegionPropertiesCapacities, tags=['region']) +def region_property_capacities(id: int): + """ + Returns the capacities of properties in region, for every scraping. set id to -1 to obtain data for all regions. + """ + capacities = etl_rpc.region_properties_capacities(id) + return capacities -# get growth of properties categorized by regions -@app.get("/properties/growth") +@app.get("/properties/growth", response_model=PropertiesGrowth, tags=['property']) def properties_growth(): + """ + Returns the growth rate of found properties + """ options = {"dates" : d.properties_growth().pl()['date'].to_list(), "total_all" : d.properties_growth().pl()['total_all'].to_list(), "total_heidiland" : d.properties_growth().pl()['total_heidiland'].to_list(), "total_engadin" : d.properties_growth().pl()['total_engadin'].to_list(), "total_davos" : d.properties_growth().pl()['total_davos'].to_list(), "total_stmoritz" : d.properties_growth().pl()['total_stmoritz'].to_list()} return options -# get the geo coordinates for all properties -@app.get("/properties/geo") +@app.get("/properties/geo", response_model=PropertiesGeo, tags=['property']) def properties_geo(): - return d.properties_geo().pl().to_dicts() + """ + Returns the geocoordinates of properties + """ + return {"properties": d.properties_geo().pl().to_dicts()} -# get the 10 nearest properties from desired property -@app.get("/property/{id}/neighbours") +@app.get("/property/{id}/base", response_model=PropertyBase, tags=['property']) +def property_base_data(id: int): + """ + Returns basic information about a property. + """ + base = d.property_base_data(id).pl().to_dicts() + return { + "property_platform_id": base[0]['property_platform_id'], + "first_found": base[0]['property_platform_id'], + "last_found": base[0]['property_platform_id'], + "latlng": base[0]['property_platform_id'], + "region_id": base[0]['property_platform_id'], + "region_name": base[0]['property_platform_id']} + +@app.get("/property/{id}/neighbours", response_model=PropertyNeighbours, tags=['property']) def property_neighbours(id: int): - capacities = etl_pn.property_neighbours(id) - return capacities + """ + Returns the 10 nearest properties from given property. + """ + return {"neighbours" : etl_pn.property_neighbours(id)} -# get scraped data for all scrapings from desired property -@app.get("/property/{id}/extractions") +@app.get("/property/{id}/extractions", response_model=PropertyExtractions, tags=['property']) def property_extractions(id: int): - return d.extractions_for(property_id = id).pl().to_dicts() + """ + Returns extracted data from given property. + """ + return {"extractions" : d.extractions_for(property_id = id).pl().cast({"date": pl.String}).to_dicts()} -# get scraped data for all scrapings from desired property -@app.get("/property/{id}/capacities") +@app.get("/property/{id}/capacities", response_model=PropertyCapacities, tags=['property']) def property_capacities_data(id: int): + """ + Returns capacities for given property. + """ capacities = etl_pc.property_capacities(id) return capacities -# get the capacity of a property for desired scraping date by months -@app.get("/property/{id}/capacities/monthly/{scrapeDate}") -def property_capacities_data(id: int, scrapeDate: str): - capacities = etl_pcm.property_capacities_monthly(id, scrapeDate) +@app.get("/property/{id}/capacities/monthly/{date}", response_model=PropertyCapacitiesMonthly, tags=['property']) +def property_capacities_data_monthly(id: int, date: datetime.date): + """ + Returns capacities for given property and date by month. + """ + capacities = etl_pcm.property_capacities_monthly(id, date) return capacities -# get the capacity of a property for desired scraping date by days -@app.get("/property/{id}/capacities/daily/{scrapeDate}") -def property_capacities_data(id: int, scrapeDate: str): - capacities = etl_pcd.property_capacities_daily(id, scrapeDate) +@app.get("/property/{id}/capacities/daily/{date}", response_model=PropertyCapacitiesDaily, tags=['property']) +def property_capacities_data_daily(id: int, date: datetime.date): + """ + Returns capacities for given property and date by day. + """ + capacities = etl_pcd.property_capacities_daily(id, date) return capacities -# get first / last time a property was fond, region id and name in which the property is located and the coordinates -@app.get("/property/{id}/base") -def property_base_data(id: int): - return d.property_base_data(id).pl().to_dicts() diff --git a/etl/src/data/etl_property_capacities.py b/etl/src/data/etl_property_capacities.py index a57c46e..edc94cc 100644 --- a/etl/src/data/etl_property_capacities.py +++ b/etl/src/data/etl_property_capacities.py @@ -34,7 +34,7 @@ def property_capacities(id: int): count_days.append(len(liste)) counts = pl.DataFrame({"count_days" : count_days, "sum" : sum_hor}) - result = {"capacities": [], "dates": extractions['date'].cast(pl.Date).to_list() } + result = {"capacities": [], "dates": extractions['date'].cast(pl.Date).cast(pl.String).to_list() } for row in counts.rows(named=True): max_capacity = row['count_days'] * 2 diff --git a/etl/src/data/etl_property_capacities_monthly.py b/etl/src/data/etl_property_capacities_monthly.py index 48e6bc5..1c21fa9 100644 --- a/etl/src/data/etl_property_capacities_monthly.py +++ b/etl/src/data/etl_property_capacities_monthly.py @@ -33,6 +33,6 @@ def property_capacities_monthly(id: int, scrapeDate: str): df_calendar = df_calendar.group_by(['dates', 'date_short', 'numDays']).agg(pl.col("column_0").sum()) df_calendar = df_calendar.with_columns((pl.col("column_0") / pl.col("numDays") / 2 * 100).alias("column_0")) df_calendar = df_calendar.sort('dates') - result = {"date": scrapeDate, "months": df_calendar['date_short'].to_list(), 'capacities': df_calendar['column_0'].to_list()} + result = {"months": df_calendar['date_short'].to_list(), 'capacities': df_calendar['column_0'].to_list()} etl_cache.saveObj(file, result) return result \ No newline at end of file diff --git a/etl/src/data/etl_region_capacities_daily.py b/etl/src/data/etl_region_capacities_daily.py index 401f874..555df54 100644 --- a/etl/src/data/etl_region_capacities_daily.py +++ b/etl/src/data/etl_region_capacities_daily.py @@ -15,9 +15,6 @@ def region_capacities_daily(id: int, scrapeDate_start: str): if obj: return obj - # String to Date - scrapeDate_start = datetime.strptime(scrapeDate_start, '%Y-%m-%d') - # Get end date of start search-window scrapeDate_end = scrapeDate_start + timedelta(days=1) @@ -62,6 +59,6 @@ def region_capacities_daily(id: int, scrapeDate_start: str): outDf = outDf.insert_column(1, means) outDf = outDf[['weekday', 'mean']] - result = {"date": scrapeDate, "weekdays": outDf['weekday'].to_list(),'capacities': outDf['mean'].to_list()} + result = {"weekdays": outDf['weekday'].to_list(),'capacities': outDf['mean'].to_list()} etl_cache.saveObj(file, result) return result \ No newline at end of file diff --git a/etl/src/data/etl_region_capacities_monthly.py b/etl/src/data/etl_region_capacities_monthly.py index 7060313..b0d165b 100644 --- a/etl/src/data/etl_region_capacities_monthly.py +++ b/etl/src/data/etl_region_capacities_monthly.py @@ -16,9 +16,6 @@ def region_capacities_monthly(id: int, scrapeDate_start: str): if obj: return obj - # String to Date - scrapeDate_start = datetime.strptime(scrapeDate_start, '%Y-%m-%d') - # Get end date of start search-window scrapeDate_end = scrapeDate_start + timedelta(days=1) diff --git a/etl/src/data/etl_region_movAverage.py b/etl/src/data/etl_region_movAverage.py index c98025f..7457ea4 100644 --- a/etl/src/data/etl_region_movAverage.py +++ b/etl/src/data/etl_region_movAverage.py @@ -8,7 +8,7 @@ from data import etl_cache d = data.load() -def region_movingAverage(id: int, scrape_date_start_min: str): +def region_movingAverage(id: int, scrape_date_start_min: datetime.date): file = f"etl_region_movingAverage_{id}_{scrape_date_start_min}.obj" obj = etl_cache.openObj(file) @@ -31,9 +31,6 @@ def region_movingAverage(id: int, scrape_date_start_min: str): uniqueScrapeDates = uniqueScrapeDates.get_column('ScrapeDate').str.to_date() uniqueScrapeDates = uniqueScrapeDates.sort().to_list() - # String to Date - scrape_date_start_min = datetime.strptime(scrape_date_start_min, '%Y-%m-%d') - # Get end date of start search-window scrape_date_start_max = scrape_date_start_min + timedelta(days=1) @@ -41,7 +38,7 @@ def region_movingAverage(id: int, scrape_date_start_min: str): scrape_date_end_min = scrape_date_start_min + timedelta(days=timeOffset) # Get closest ScrapeDate - scrape_date_end_min = min(uniqueScrapeDates, key=lambda x: abs(x - scrape_date_end_min.date())) + scrape_date_end_min = min(uniqueScrapeDates, key=lambda x: abs(x - scrape_date_end_min)) scrape_date_end_max = scrape_date_end_min + timedelta(days=1) final_end_date = scrape_date_end_min + timedelta(days=calcFrame) @@ -134,6 +131,6 @@ def region_movingAverage(id: int, scrape_date_start_min: str): # Add moving_averages to df outDF = outDF.with_columns(moving_averages=pl.Series(moving_averages)) - result = {'dates':outDF.get_column('dates').to_list(), 'cap_earlierTimeframe':outDF.get_column('sum_hor_predict').to_list(), 'cap_laterTimeframe':outDF.get_column('sum_hor_actual').to_list(), 'movAvg':outDF.get_column('moving_averages').to_list(),} + result = {'dates': outDF.get_column('dates').to_list(), 'capacities_timeframe_before': outDF.get_column('sum_hor_predict').to_list(), 'capacities_timeframe_after':outDF.get_column('sum_hor_actual').to_list(), 'capacities_moving_average':outDF.get_column('moving_averages').to_list(),} etl_cache.saveObj(file, result) return result \ No newline at end of file diff --git a/etl/src/data/etl_region_properties_capacities.py b/etl/src/data/etl_region_properties_capacities.py index 12a533d..746b857 100644 --- a/etl/src/data/etl_region_properties_capacities.py +++ b/etl/src/data/etl_region_properties_capacities.py @@ -52,7 +52,7 @@ def region_properties_capacities(id: int): for row in gridData.rows(named=True): capacity = (row['sum_hor']*100)/maxValue - values.append((row['scrape_date'], row['property_id'], capacity)) + values.append({"date" : row['scrape_date'], "property_id": row['property_id'], "capacity": capacity}) # Cast listOfDates to datetime listOfDates = listOfDates.cast(pl.Date).to_list()