Compare commits
No commits in common. "818d6fb5ec18cd3c00d4ae8de61d88b430709457" and "0aa0f2345cb02e084b5b09d01a69c3c14c050bc7" have entirely different histories.
818d6fb5ec
...
0aa0f2345c
|
@ -44,10 +44,4 @@ def property_capacities_data(id: int):
|
||||||
def property_base_data(id: int):
|
def property_base_data(id: int):
|
||||||
return d.property_base_data(id).pl().to_dicts()
|
return d.property_base_data(id).pl().to_dicts()
|
||||||
|
|
||||||
@app.get("/region/{id}/capacities")
|
|
||||||
def region_capacities_data(id: int):
|
|
||||||
capacities = etl_pc.region_capacities(id)
|
|
||||||
return capacities
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,8 @@ class Database:
|
||||||
if(spatial_installed and not spatial_installed[0]):
|
if(spatial_installed and not spatial_installed[0]):
|
||||||
self.connection.sql("INSTALL spatial")
|
self.connection.sql("INSTALL spatial")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def db_overview(self):
|
def db_overview(self):
|
||||||
return self.connection.sql("DESCRIBE;").show()
|
return self.connection.sql("DESCRIBE;").show()
|
||||||
|
|
||||||
|
@ -401,20 +403,5 @@ class Database:
|
||||||
consultancy_d.properties p
|
consultancy_d.properties p
|
||||||
""")
|
""")
|
||||||
|
|
||||||
def capacity_of_region(self, region_id):
|
|
||||||
return self.connection.sql(f"""
|
|
||||||
SELECT
|
|
||||||
JSON_EXTRACT(body, '$.content.days') as calendarBody,
|
|
||||||
strftime(extractions.created_at, '%Y-%m-%d') AS ScrapeDate,
|
|
||||||
extractions.property_id,
|
|
||||||
FROM
|
|
||||||
consultancy_d.extractions
|
|
||||||
LEFT JOIN
|
|
||||||
consultancy_d.properties ON properties.id = extractions.property_id
|
|
||||||
WHERE
|
|
||||||
type == 'calendar' AND
|
|
||||||
properties.seed_id = {region_id}
|
|
||||||
""")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,57 +0,0 @@
|
||||||
from etl.src import data
|
|
||||||
import polars as pl
|
|
||||||
from io import StringIO
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
|
|
||||||
d = data.load()
|
|
||||||
|
|
||||||
def region_capacities(id: int):
|
|
||||||
# Get Data
|
|
||||||
df = d.capacity_of_region(id).pl()
|
|
||||||
# turn PropertyIDs to ints for sorting
|
|
||||||
df = df.cast({"property_id": int})
|
|
||||||
|
|
||||||
# Get uniques for dates and propIDs and sort them
|
|
||||||
listOfDates = df.get_column("ScrapeDate").unique().sort()
|
|
||||||
listOfPropertyIDs = df.get_column("property_id").unique().sort()
|
|
||||||
|
|
||||||
# Create DFs from lists to merge later
|
|
||||||
datesDF = pl.DataFrame(listOfDates).with_row_index("date_index")
|
|
||||||
propIdDF = pl.DataFrame(listOfPropertyIDs).with_row_index("prop_index")
|
|
||||||
|
|
||||||
# Merge Dataframe to generate indices
|
|
||||||
df = df.join(datesDF, on='ScrapeDate')
|
|
||||||
df = df.join(propIdDF, on='property_id')
|
|
||||||
|
|
||||||
# Drop now useless columns ScrapeDate and property_id
|
|
||||||
df = df[['calendarBody', 'date_index', 'prop_index']]
|
|
||||||
|
|
||||||
# Calculate grid values
|
|
||||||
gridData = []
|
|
||||||
for row in df.rows(named=True):
|
|
||||||
# Return 0 for sum if calendar is null
|
|
||||||
if row['calendarBody']:
|
|
||||||
calDF = pl.read_json(StringIO(row['calendarBody']))
|
|
||||||
sum_hor = calDF.sum_horizontal()[0]
|
|
||||||
else:
|
|
||||||
sum_hor = 0
|
|
||||||
gridData.append([row['prop_index'], row['date_index'], sum_hor])
|
|
||||||
gridData = np.array(gridData)
|
|
||||||
|
|
||||||
# get all values to calculate Max
|
|
||||||
allValues = gridData[:, 2]
|
|
||||||
maxValue = np.max(allValues)
|
|
||||||
gridData[:, 2] = (gridData[:, 2]*100)/maxValue
|
|
||||||
|
|
||||||
# Return back to list
|
|
||||||
gridData = gridData.tolist()
|
|
||||||
|
|
||||||
# Cast listOfDates to datetime
|
|
||||||
listOfDates = listOfDates.cast(pl.Date).to_list()
|
|
||||||
listOfPropertyIDs = listOfPropertyIDs.to_list()
|
|
||||||
|
|
||||||
# Create JSON
|
|
||||||
outDict = {'scrapeDates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': gridData}
|
|
||||||
|
|
||||||
return outDict
|
|
|
@ -31,6 +31,8 @@ dfNew = pl.from_dicts(data)
|
||||||
dfNew.write_csv('results/data_quality.csv')
|
dfNew.write_csv('results/data_quality.csv')
|
||||||
print(dfNew)
|
print(dfNew)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
dfNew = pl.read_csv('results/data_quality.csv')
|
dfNew = pl.read_csv('results/data_quality.csv')
|
||||||
dfNew = dfNew.with_columns(pl.col("created_at").map_elements(lambda x: datetime.strptime(x, "%Y-%m-%d").date()))
|
dfNew = dfNew.with_columns(pl.col("created_at").map_elements(lambda x: datetime.strptime(x, "%Y-%m-%d").date()))
|
||||||
|
|
Loading…
Reference in New Issue