parent
03e78a4105
commit
8bcc1c57b5
|
@ -2,6 +2,7 @@ import data
|
||||||
import polars as pl
|
import polars as pl
|
||||||
from data import etl_property_capacities as etl_pc
|
from data import etl_property_capacities as etl_pc
|
||||||
from data import etl_region_capacities as etl_rc
|
from data import etl_region_capacities as etl_rc
|
||||||
|
from data import etl_region_capacities_comparison as etl_rcc
|
||||||
from fastapi import FastAPI, Response
|
from fastapi import FastAPI, Response
|
||||||
|
|
||||||
d = data.load()
|
d = data.load()
|
||||||
|
@ -50,5 +51,8 @@ def region_capacities_data(id: int):
|
||||||
capacities = etl_rc.region_capacities(id)
|
capacities = etl_rc.region_capacities(id)
|
||||||
return capacities
|
return capacities
|
||||||
|
|
||||||
|
@app.get("/region/capacities/comparison/{id_1}/{id_2}")
|
||||||
|
def region_capacities_data(id_1: int, id_2: int):
|
||||||
|
capacities = etl_rcc.region_capacities_comparison(id_1, id_2)
|
||||||
|
return capacities
|
||||||
|
|
||||||
|
|
|
@ -430,5 +430,20 @@ class Database:
|
||||||
type == 'calendar'
|
type == 'calendar'
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
def capacity_comparison_of_region(self, region_id_1, region_id_2):
|
||||||
|
return self.connection.sql(f"""
|
||||||
|
SELECT
|
||||||
|
JSON_EXTRACT(body, '$.content.days') as calendarBody,
|
||||||
|
strftime(extractions.created_at, '%Y-%m-%d') AS ScrapeDate,
|
||||||
|
extractions.property_id,
|
||||||
|
properties.seed_id
|
||||||
|
FROM
|
||||||
|
consultancy_d.extractions
|
||||||
|
LEFT JOIN
|
||||||
|
consultancy_d.properties ON properties.id = extractions.property_id
|
||||||
|
WHERE
|
||||||
|
type == 'calendar' AND
|
||||||
|
(properties.seed_id = {region_id_1} OR
|
||||||
|
properties.seed_id = {region_id_2})
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
|
@ -45,9 +45,7 @@ def region_capacities(id: int):
|
||||||
gridData = np.array(gridData)
|
gridData = np.array(gridData)
|
||||||
# get all values to calculate Max
|
# get all values to calculate Max
|
||||||
allValues = gridData[:, 2].astype(int)
|
allValues = gridData[:, 2].astype(int)
|
||||||
print(allValues)
|
|
||||||
maxValue = np.max(allValues)
|
maxValue = np.max(allValues)
|
||||||
print(maxValue)
|
|
||||||
gridData[:, 2] = (allValues*100)/maxValue
|
gridData[:, 2] = (allValues*100)/maxValue
|
||||||
|
|
||||||
# Return back to list
|
# Return back to list
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
import data
|
||||||
|
import polars as pl
|
||||||
|
from io import StringIO
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
d = data.load()
|
||||||
|
|
||||||
|
def region_capacities_comparison(id_1: int, id_2: int):
|
||||||
|
fulldf = d.capacity_comparison_of_region(id_1, id_2).pl()
|
||||||
|
# turn PropertyIDs and seedIDs to ints for sorting and filtering
|
||||||
|
fulldf = fulldf.cast({"property_id": int})
|
||||||
|
fulldf = fulldf.cast({"seed_id": int})
|
||||||
|
df_region1 = fulldf.filter(pl.col("seed_id") == id_1)
|
||||||
|
df_region2 = fulldf.filter(pl.col("seed_id") == id_2)
|
||||||
|
df_list = [df_region1, df_region2]
|
||||||
|
outDictList = []
|
||||||
|
|
||||||
|
for df in df_list:
|
||||||
|
# Get uniques for dates and propIDs and sort them
|
||||||
|
listOfDates = df.get_column("ScrapeDate").unique().sort()
|
||||||
|
listOfPropertyIDs = df.get_column("property_id").unique().sort()
|
||||||
|
|
||||||
|
# Create DFs from lists to merge later
|
||||||
|
datesDF = pl.DataFrame(listOfDates).with_row_index("date_index")
|
||||||
|
propIdDF = pl.DataFrame(listOfPropertyIDs).with_row_index("prop_index")
|
||||||
|
|
||||||
|
# Merge Dataframe to generate indices
|
||||||
|
df = df.join(datesDF, on='ScrapeDate')
|
||||||
|
df = df.join(propIdDF, on='property_id')
|
||||||
|
# Drop now useless columns ScrapeDate and property_id
|
||||||
|
df = df[['ScrapeDate', 'calendarBody', 'date_index', 'prop_index']]
|
||||||
|
# Calculate grid values
|
||||||
|
gridData = []
|
||||||
|
for row in df.rows(named=True):
|
||||||
|
# Return 0 for sum if calendar is null
|
||||||
|
if row['calendarBody']:
|
||||||
|
calDF = pl.read_json(StringIO(row['calendarBody']))
|
||||||
|
sum_hor = calDF.sum_horizontal()[0]
|
||||||
|
else:
|
||||||
|
sum_hor = 0
|
||||||
|
# With Index
|
||||||
|
# gridData.append([row['prop_index'], row['date_index'], sum_hor])
|
||||||
|
# With ScrapeDate
|
||||||
|
gridData.append([row['ScrapeDate'], row['date_index'], sum_hor])
|
||||||
|
|
||||||
|
gridData = np.array(gridData)
|
||||||
|
# get all values to calculate Max
|
||||||
|
allValues = gridData[:, 2].astype(int)
|
||||||
|
maxValue = np.max(allValues)
|
||||||
|
gridData[:, 2] = (allValues*100)/maxValue
|
||||||
|
|
||||||
|
# Return back to list
|
||||||
|
gridData = gridData.tolist()
|
||||||
|
|
||||||
|
# Cast listOfDates to datetime
|
||||||
|
listOfDates = listOfDates.cast(pl.Date).to_list()
|
||||||
|
listOfPropertyIDs = listOfPropertyIDs.to_list()
|
||||||
|
|
||||||
|
# Create JSON
|
||||||
|
tempDict = {'scrapeDates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': gridData}
|
||||||
|
outDictList.append(tempDict)
|
||||||
|
|
||||||
|
outDict = {'region1': outDictList[0], 'region2': outDictList[1],}
|
||||||
|
return outDict
|
||||||
|
|
||||||
|
out = region_capacities_comparison(1,2)
|
||||||
|
print(out)
|
Loading…
Reference in New Issue