parent
							
								
									03e78a4105
								
							
						
					
					
						commit
						8bcc1c57b5
					
				@ -2,6 +2,7 @@ import data
 | 
				
			|||||||
import polars as pl
 | 
					import polars as pl
 | 
				
			||||||
from data import etl_property_capacities as etl_pc
 | 
					from data import etl_property_capacities as etl_pc
 | 
				
			||||||
from data import etl_region_capacities as etl_rc
 | 
					from data import etl_region_capacities as etl_rc
 | 
				
			||||||
 | 
					from data import etl_region_capacities_comparison as etl_rcc
 | 
				
			||||||
from fastapi import FastAPI, Response
 | 
					from fastapi import FastAPI, Response
 | 
				
			||||||
 | 
					
 | 
				
			||||||
d = data.load()
 | 
					d = data.load()
 | 
				
			||||||
@ -50,5 +51,8 @@ def region_capacities_data(id: int):
 | 
				
			|||||||
	capacities = etl_rc.region_capacities(id)
 | 
						capacities = etl_rc.region_capacities(id)
 | 
				
			||||||
	return capacities
 | 
						return capacities
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@app.get("/region/capacities/comparison/{id_1}/{id_2}")
 | 
				
			||||||
 | 
					def region_capacities_data(id_1: int, id_2: int):
 | 
				
			||||||
 | 
						capacities = etl_rcc.region_capacities_comparison(id_1, id_2)
 | 
				
			||||||
 | 
						return capacities
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -430,5 +430,20 @@ class Database:
 | 
				
			|||||||
				type == 'calendar'
 | 
									type == 'calendar'
 | 
				
			||||||
			""")
 | 
								""")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						def capacity_comparison_of_region(self, region_id_1, region_id_2):
 | 
				
			||||||
 | 
							return self.connection.sql(f"""
 | 
				
			||||||
 | 
								SELECT 
 | 
				
			||||||
 | 
									JSON_EXTRACT(body, '$.content.days') as calendarBody, 
 | 
				
			||||||
 | 
									strftime(extractions.created_at, '%Y-%m-%d') AS ScrapeDate,
 | 
				
			||||||
 | 
									extractions.property_id, 
 | 
				
			||||||
 | 
									properties.seed_id
 | 
				
			||||||
 | 
								FROM 
 | 
				
			||||||
 | 
									consultancy_d.extractions 
 | 
				
			||||||
 | 
								LEFT JOIN 
 | 
				
			||||||
 | 
									consultancy_d.properties ON properties.id = extractions.property_id 
 | 
				
			||||||
 | 
								WHERE
 | 
				
			||||||
 | 
									type == 'calendar' AND
 | 
				
			||||||
 | 
									(properties.seed_id = {region_id_1} OR 
 | 
				
			||||||
 | 
									properties.seed_id = {region_id_2})		
 | 
				
			||||||
 | 
								""")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -45,9 +45,7 @@ def region_capacities(id: int):
 | 
				
			|||||||
    gridData = np.array(gridData)
 | 
					    gridData = np.array(gridData)
 | 
				
			||||||
    # get all values to calculate Max
 | 
					    # get all values to calculate Max
 | 
				
			||||||
    allValues = gridData[:, 2].astype(int)
 | 
					    allValues = gridData[:, 2].astype(int)
 | 
				
			||||||
    print(allValues)
 | 
					 | 
				
			||||||
    maxValue = np.max(allValues)
 | 
					    maxValue = np.max(allValues)
 | 
				
			||||||
    print(maxValue)
 | 
					 | 
				
			||||||
    gridData[:, 2] = (allValues*100)/maxValue
 | 
					    gridData[:, 2] = (allValues*100)/maxValue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Return back to list
 | 
					    # Return back to list
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										68
									
								
								etl/src/data/etl_region_capacities_comparison.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								etl/src/data/etl_region_capacities_comparison.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,68 @@
 | 
				
			|||||||
 | 
					import data
 | 
				
			||||||
 | 
					import polars as pl
 | 
				
			||||||
 | 
					from io import StringIO
 | 
				
			||||||
 | 
					import numpy as np
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					d = data.load()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def region_capacities_comparison(id_1: int, id_2: int):
 | 
				
			||||||
 | 
					    fulldf = d.capacity_comparison_of_region(id_1, id_2).pl()
 | 
				
			||||||
 | 
					    # turn PropertyIDs and seedIDs to ints for sorting and filtering
 | 
				
			||||||
 | 
					    fulldf = fulldf.cast({"property_id": int})
 | 
				
			||||||
 | 
					    fulldf = fulldf.cast({"seed_id": int})
 | 
				
			||||||
 | 
					    df_region1 = fulldf.filter(pl.col("seed_id") == id_1)
 | 
				
			||||||
 | 
					    df_region2 = fulldf.filter(pl.col("seed_id") == id_2)
 | 
				
			||||||
 | 
					    df_list = [df_region1, df_region2]
 | 
				
			||||||
 | 
					    outDictList = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for df in df_list:
 | 
				
			||||||
 | 
					        # Get uniques for dates and propIDs and sort them
 | 
				
			||||||
 | 
					        listOfDates = df.get_column("ScrapeDate").unique().sort()
 | 
				
			||||||
 | 
					        listOfPropertyIDs = df.get_column("property_id").unique().sort()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Create DFs from lists to merge later
 | 
				
			||||||
 | 
					        datesDF = pl.DataFrame(listOfDates).with_row_index("date_index")
 | 
				
			||||||
 | 
					        propIdDF = pl.DataFrame(listOfPropertyIDs).with_row_index("prop_index")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Merge Dataframe to generate indices
 | 
				
			||||||
 | 
					        df = df.join(datesDF, on='ScrapeDate')
 | 
				
			||||||
 | 
					        df = df.join(propIdDF, on='property_id')
 | 
				
			||||||
 | 
					        # Drop now useless columns ScrapeDate and property_id
 | 
				
			||||||
 | 
					        df = df[['ScrapeDate', 'calendarBody', 'date_index', 'prop_index']]
 | 
				
			||||||
 | 
					        # Calculate grid values
 | 
				
			||||||
 | 
					        gridData = []
 | 
				
			||||||
 | 
					        for row in df.rows(named=True):
 | 
				
			||||||
 | 
					            # Return 0 for sum if calendar is null
 | 
				
			||||||
 | 
					            if row['calendarBody']:
 | 
				
			||||||
 | 
					                calDF = pl.read_json(StringIO(row['calendarBody']))
 | 
				
			||||||
 | 
					                sum_hor = calDF.sum_horizontal()[0]
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                sum_hor = 0
 | 
				
			||||||
 | 
					            # With Index
 | 
				
			||||||
 | 
					            # gridData.append([row['prop_index'], row['date_index'], sum_hor])
 | 
				
			||||||
 | 
					            # With ScrapeDate
 | 
				
			||||||
 | 
					            gridData.append([row['ScrapeDate'], row['date_index'], sum_hor])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        gridData = np.array(gridData)
 | 
				
			||||||
 | 
					        # get all values to calculate Max
 | 
				
			||||||
 | 
					        allValues = gridData[:, 2].astype(int)
 | 
				
			||||||
 | 
					        maxValue = np.max(allValues)
 | 
				
			||||||
 | 
					        gridData[:, 2] = (allValues*100)/maxValue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Return back to list
 | 
				
			||||||
 | 
					        gridData = gridData.tolist()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Cast listOfDates to datetime
 | 
				
			||||||
 | 
					        listOfDates = listOfDates.cast(pl.Date).to_list()
 | 
				
			||||||
 | 
					        listOfPropertyIDs = listOfPropertyIDs.to_list()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Create JSON
 | 
				
			||||||
 | 
					        tempDict = {'scrapeDates': listOfDates, 'property_ids': listOfPropertyIDs, 'values': gridData}
 | 
				
			||||||
 | 
					        outDictList.append(tempDict)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    outDict = {'region1': outDictList[0], 'region2': outDictList[1],}
 | 
				
			||||||
 | 
					    return outDict
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					out = region_capacities_comparison(1,2)
 | 
				
			||||||
 | 
					print(out)
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user