documentation for api

main
Giò Diani 2025-01-18 15:39:29 +01:00
parent e4e05b4788
commit cd2d211259
7 changed files with 219 additions and 77 deletions

View File

@ -1,3 +1,6 @@
import datetime
from typing import List, Union
import data
import polars as pl
from data import etl_property_capacities as etl_pc
@ -10,106 +13,254 @@ from data import etl_region_capacities_daily as etl_rcd
from data import etl_region_capacities_monthly as etl_rcm
from data import etl_region_movAverage as etl_rmA
from data import etl_region_properties_capacities as etl_rpc
from fastapi import FastAPI, Response
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from pydantic import BaseModel
class Regions(BaseModel):
name: str
id: str
count_properties: int
class RegionBase(BaseModel):
name: str
id: str
class RegionPropertiesCapacitiesValues(BaseModel):
date: str
property_id: str
capacity: float
class RegionCapacities(BaseModel):
capacities: List[float]
dates: List
class RegionCapacitiesMonthly(BaseModel):
months: List[str]
capacities: List[float]
class RegionCapacitiesDaily(BaseModel):
weekdays: List[str]
capacities: List[float]
class RegionPropertiesCapacities(BaseModel):
dates: List
property_ids: List
values: List[RegionPropertiesCapacitiesValues]
class RegionMovingAverage(BaseModel):
dates: List
capacities_timeframe_before: List[Union[float, None]]
capacities_timeframe_after: List[Union[float, None]]
capacities_moving_average: List[Union[float, None]]
class PropertiesGrowth(BaseModel):
dates: List
total_all: List[Union[int, None]]
total_heidiland: List[Union[int, None]]
total_engadin: List[Union[int, None]]
total_stmoritz: List[Union[int, None]]
total_davos: List[Union[int, None]]
class PropertiesGeoList(BaseModel):
property_id: str
latlng: str
region_id: str
class PropertiesGeo(BaseModel):
properties: List[PropertiesGeoList]
class PropertyNeighboursList(BaseModel):
id: str
lat: float
lon: float
class PropertyNeighbours(BaseModel):
neighbours: List[PropertyNeighboursList]
class PropertyNeighboursList(BaseModel):
id: str
lat: float
lon: float
class PropertyExtractionsList(BaseModel):
calendar: str
date: str
class PropertyExtractions(BaseModel):
extractions: List[PropertyExtractionsList]
class PropertyCapacities(BaseModel):
capacities: List[float]
dates: List[str]
class PropertyCapacitiesMonthly(BaseModel):
months: List[str]
capacities: List[float]
class PropertyCapacitiesDaily(BaseModel):
weekdays: List[str]
capacities: List[float]
class PropertyBaseDetail(BaseModel):
property_platform_id: str
first_found: str
last_found: str
latlng: str
region_id: str
region_name: str
class PropertyBase(BaseModel):
property_platform_id: str
first_found: str
last_found: str
latlng: str
region_id: str
region_name: str
d = data.load()
app = FastAPI()
tags_metadata = [
{
"name": "region",
"description": "Get data by region.",
},
{
"name": "property",
"description": "Get data by property",
},
]
app = FastAPI(openapi_tags=tags_metadata)
@app.get("/")
def read_root():
return {"Hi there!"}
# regions overview: contains names, ids, and count of properties of regions
@app.get("/regions")
def properties_region():
@app.get("/regions", response_model=Regions, tags=['region'])
def regions():
"""
Returns a list of all available regions.
"""
return d.properties_per_region().pl().to_dicts()
# get capacities of properties, for every scraping, filterd by region; -1 = all regions
@app.get("/region/{id}/properties/capacities")
def region_property_capacities_data(id: int):
capacities = etl_rpc.region_properties_capacities(id)
return capacities
@app.get("/region/{id}/base", response_model=RegionBase, tags=['region'])
def region_base(id: int):
"""
Returns basic information about a region.
"""
base = d.region_base_data(id).pl().to_dicts()
return {"id": base[0]["id"], "name": base[0]["name"]}
# get the capacity of a region for every scraping; -1 = all regions
@app.get("/region/{id}/capacities")
def region_capacities_data(id: int):
@app.get("/region/{id}/capacities", response_model=RegionCapacities, tags=['region'])
def region_capacities(id: int):
"""
Returs the capacities of a region, for every scraping. Set id to -1 to obtain data for all regions.
"""
capacities = etl_rc.region_capacities(id)
return capacities
# get the capacity of a region for desired scraping date by months; -1 = all regions
@app.get("/region/{id}/capacities/monthly/{scrapeDate}")
def region_capacities_data(id: int, scrapeDate: str):
capacities = etl_rcm.region_capacities_monthly(id, scrapeDate)
@app.get("/region/{id}/capacities/monthly/{date}", response_model=RegionCapacitiesMonthly, tags=['region'])
def region_capacities_monthly(id: int, date: datetime.date):
"""
Returns the capacities of a region for specified date by months. set id to -1 to obtain data for all regions.
"""
capacities = etl_rcm.region_capacities_monthly(id, date)
return capacities
# get the capacity of a region for desired scraping date by days; -1 = all regions
@app.get("/region/{id}/capacities/daily/{scrapeDate}")
def region_capacities_data(id: int, scrapeDate: str):
capacities = etl_rcd.region_capacities_daily(id, scrapeDate)
@app.get("/region/{id}/capacities/daily/{date}", response_model=RegionCapacitiesDaily, tags=['region'])
def region_capacities_daily(id: int, date: datetime.date):
"""
Returns the capacities of a region for specified date by days. set id to -1 to obtain data for all regions.
"""
capacities = etl_rcd.region_capacities_daily(id, date)
return capacities
# compare the capacities of two regions
@app.get("/region/capacities/comparison/{id_1}/{id_2}")
def region_capacities_data(id_1: int, id_2: int):
capacities = etl_rcc.region_capacities_comparison(id_1, id_2)
return capacities
# get the moving average for a region beginning from desired date
@app.get("/region/{id}/movingAverage/{startDate}")
def region_capacities_data(id: int, startDate: str):
result = etl_rmA.region_movingAverage(id, startDate)
@app.get("/region/{id}/moving-average/{date}", response_model=RegionMovingAverage, tags=['region'])
def region_capacities_data(id: int, date: datetime.date):
"""
Returns the moving average of a region for specified date. set id to -1 to obtain data for all regions.
"""
result = etl_rmA.region_movingAverage(id, date)
return result
# get id and name of a region
@app.get("/region/{id}/base")
def region_base_data(id: int):
return d.region_base_data(id).pl().to_dicts()
@app.get("/region/{id}/properties/capacities", response_model=RegionPropertiesCapacities, tags=['region'])
def region_property_capacities(id: int):
"""
Returns the capacities of properties in region, for every scraping. set id to -1 to obtain data for all regions.
"""
capacities = etl_rpc.region_properties_capacities(id)
return capacities
# get growth of properties categorized by regions
@app.get("/properties/growth")
@app.get("/properties/growth", response_model=PropertiesGrowth, tags=['property'])
def properties_growth():
"""
Returns the growth rate of found properties
"""
options = {"dates" : d.properties_growth().pl()['date'].to_list(), "total_all" : d.properties_growth().pl()['total_all'].to_list(), "total_heidiland" : d.properties_growth().pl()['total_heidiland'].to_list(), "total_engadin" : d.properties_growth().pl()['total_engadin'].to_list(), "total_davos" : d.properties_growth().pl()['total_davos'].to_list(), "total_stmoritz" : d.properties_growth().pl()['total_stmoritz'].to_list()}
return options
# get the geo coordinates for all properties
@app.get("/properties/geo")
@app.get("/properties/geo", response_model=PropertiesGeo, tags=['property'])
def properties_geo():
return d.properties_geo().pl().to_dicts()
"""
Returns the geocoordinates of properties
"""
return {"properties": d.properties_geo().pl().to_dicts()}
# get the 10 nearest properties from desired property
@app.get("/property/{id}/neighbours")
@app.get("/property/{id}/base", response_model=PropertyBase, tags=['property'])
def property_base_data(id: int):
"""
Returns basic information about a property.
"""
base = d.property_base_data(id).pl().to_dicts()
return {
"property_platform_id": base[0]['property_platform_id'],
"first_found": base[0]['property_platform_id'],
"last_found": base[0]['property_platform_id'],
"latlng": base[0]['property_platform_id'],
"region_id": base[0]['property_platform_id'],
"region_name": base[0]['property_platform_id']}
@app.get("/property/{id}/neighbours", response_model=PropertyNeighbours, tags=['property'])
def property_neighbours(id: int):
capacities = etl_pn.property_neighbours(id)
return capacities
"""
Returns the 10 nearest properties from given property.
"""
return {"neighbours" : etl_pn.property_neighbours(id)}
# get scraped data for all scrapings from desired property
@app.get("/property/{id}/extractions")
@app.get("/property/{id}/extractions", response_model=PropertyExtractions, tags=['property'])
def property_extractions(id: int):
return d.extractions_for(property_id = id).pl().to_dicts()
"""
Returns extracted data from given property.
"""
return {"extractions" : d.extractions_for(property_id = id).pl().cast({"date": pl.String}).to_dicts()}
# get scraped data for all scrapings from desired property
@app.get("/property/{id}/capacities")
@app.get("/property/{id}/capacities", response_model=PropertyCapacities, tags=['property'])
def property_capacities_data(id: int):
"""
Returns capacities for given property.
"""
capacities = etl_pc.property_capacities(id)
return capacities
# get the capacity of a property for desired scraping date by months
@app.get("/property/{id}/capacities/monthly/{scrapeDate}")
def property_capacities_data(id: int, scrapeDate: str):
capacities = etl_pcm.property_capacities_monthly(id, scrapeDate)
@app.get("/property/{id}/capacities/monthly/{date}", response_model=PropertyCapacitiesMonthly, tags=['property'])
def property_capacities_data_monthly(id: int, date: datetime.date):
"""
Returns capacities for given property and date by month.
"""
capacities = etl_pcm.property_capacities_monthly(id, date)
return capacities
# get the capacity of a property for desired scraping date by days
@app.get("/property/{id}/capacities/daily/{scrapeDate}")
def property_capacities_data(id: int, scrapeDate: str):
capacities = etl_pcd.property_capacities_daily(id, scrapeDate)
@app.get("/property/{id}/capacities/daily/{date}", response_model=PropertyCapacitiesDaily, tags=['property'])
def property_capacities_data_daily(id: int, date: datetime.date):
"""
Returns capacities for given property and date by day.
"""
capacities = etl_pcd.property_capacities_daily(id, date)
return capacities
# get first / last time a property was fond, region id and name in which the property is located and the coordinates
@app.get("/property/{id}/base")
def property_base_data(id: int):
return d.property_base_data(id).pl().to_dicts()

View File

@ -34,7 +34,7 @@ def property_capacities(id: int):
count_days.append(len(liste))
counts = pl.DataFrame({"count_days" : count_days, "sum" : sum_hor})
result = {"capacities": [], "dates": extractions['date'].cast(pl.Date).to_list() }
result = {"capacities": [], "dates": extractions['date'].cast(pl.Date).cast(pl.String).to_list() }
for row in counts.rows(named=True):
max_capacity = row['count_days'] * 2

View File

@ -33,6 +33,6 @@ def property_capacities_monthly(id: int, scrapeDate: str):
df_calendar = df_calendar.group_by(['dates', 'date_short', 'numDays']).agg(pl.col("column_0").sum())
df_calendar = df_calendar.with_columns((pl.col("column_0") / pl.col("numDays") / 2 * 100).alias("column_0"))
df_calendar = df_calendar.sort('dates')
result = {"date": scrapeDate, "months": df_calendar['date_short'].to_list(), 'capacities': df_calendar['column_0'].to_list()}
result = {"months": df_calendar['date_short'].to_list(), 'capacities': df_calendar['column_0'].to_list()}
etl_cache.saveObj(file, result)
return result

View File

@ -15,9 +15,6 @@ def region_capacities_daily(id: int, scrapeDate_start: str):
if obj:
return obj
# String to Date
scrapeDate_start = datetime.strptime(scrapeDate_start, '%Y-%m-%d')
# Get end date of start search-window
scrapeDate_end = scrapeDate_start + timedelta(days=1)
@ -62,6 +59,6 @@ def region_capacities_daily(id: int, scrapeDate_start: str):
outDf = outDf.insert_column(1, means)
outDf = outDf[['weekday', 'mean']]
result = {"date": scrapeDate, "weekdays": outDf['weekday'].to_list(),'capacities': outDf['mean'].to_list()}
result = {"weekdays": outDf['weekday'].to_list(),'capacities': outDf['mean'].to_list()}
etl_cache.saveObj(file, result)
return result

View File

@ -16,9 +16,6 @@ def region_capacities_monthly(id: int, scrapeDate_start: str):
if obj:
return obj
# String to Date
scrapeDate_start = datetime.strptime(scrapeDate_start, '%Y-%m-%d')
# Get end date of start search-window
scrapeDate_end = scrapeDate_start + timedelta(days=1)

View File

@ -8,7 +8,7 @@ from data import etl_cache
d = data.load()
def region_movingAverage(id: int, scrape_date_start_min: str):
def region_movingAverage(id: int, scrape_date_start_min: datetime.date):
file = f"etl_region_movingAverage_{id}_{scrape_date_start_min}.obj"
obj = etl_cache.openObj(file)
@ -31,9 +31,6 @@ def region_movingAverage(id: int, scrape_date_start_min: str):
uniqueScrapeDates = uniqueScrapeDates.get_column('ScrapeDate').str.to_date()
uniqueScrapeDates = uniqueScrapeDates.sort().to_list()
# String to Date
scrape_date_start_min = datetime.strptime(scrape_date_start_min, '%Y-%m-%d')
# Get end date of start search-window
scrape_date_start_max = scrape_date_start_min + timedelta(days=1)
@ -41,7 +38,7 @@ def region_movingAverage(id: int, scrape_date_start_min: str):
scrape_date_end_min = scrape_date_start_min + timedelta(days=timeOffset)
# Get closest ScrapeDate
scrape_date_end_min = min(uniqueScrapeDates, key=lambda x: abs(x - scrape_date_end_min.date()))
scrape_date_end_min = min(uniqueScrapeDates, key=lambda x: abs(x - scrape_date_end_min))
scrape_date_end_max = scrape_date_end_min + timedelta(days=1)
final_end_date = scrape_date_end_min + timedelta(days=calcFrame)
@ -134,6 +131,6 @@ def region_movingAverage(id: int, scrape_date_start_min: str):
# Add moving_averages to df
outDF = outDF.with_columns(moving_averages=pl.Series(moving_averages))
result = {'dates':outDF.get_column('dates').to_list(), 'cap_earlierTimeframe':outDF.get_column('sum_hor_predict').to_list(), 'cap_laterTimeframe':outDF.get_column('sum_hor_actual').to_list(), 'movAvg':outDF.get_column('moving_averages').to_list(),}
result = {'dates': outDF.get_column('dates').to_list(), 'capacities_timeframe_before': outDF.get_column('sum_hor_predict').to_list(), 'capacities_timeframe_after':outDF.get_column('sum_hor_actual').to_list(), 'capacities_moving_average':outDF.get_column('moving_averages').to_list(),}
etl_cache.saveObj(file, result)
return result

View File

@ -52,7 +52,7 @@ def region_properties_capacities(id: int):
for row in gridData.rows(named=True):
capacity = (row['sum_hor']*100)/maxValue
values.append((row['scrape_date'], row['property_id'], capacity))
values.append({"date" : row['scrape_date'], "property_id": row['property_id'], "capacity": capacity})
# Cast listOfDates to datetime
listOfDates = listOfDates.cast(pl.Date).to_list()