73 lines
2.2 KiB
Python
73 lines
2.2 KiB
Python
from math import asin, atan2, cos, degrees, radians, sin, sqrt
|
|
|
|
import polars as pl
|
|
|
|
import data
|
|
from data import etl_cache
|
|
|
|
d = data.load()
|
|
|
|
|
|
def calcHaversinDistance(latMain, lonMain, lat, lon):
|
|
R = 6371
|
|
|
|
# convert decimal degrees to radians
|
|
latMain, lonMain, lat, lon = map(radians, [latMain, lonMain, lat, lon])
|
|
|
|
# haversine formula
|
|
dlon = lonMain - lon
|
|
dlat = latMain - lat
|
|
|
|
a = sin(dlat / 2) ** 2 + cos(lat) * cos(latMain) * sin(dlon / 2) ** 2
|
|
c = 2 * atan2(sqrt(a), sqrt(1-a))
|
|
d = R * c
|
|
|
|
return d
|
|
|
|
|
|
def property_neighbours(id: int):
|
|
|
|
file = f"etl_property_neighbours_{id}.obj"
|
|
obj = etl_cache.openObj(file)
|
|
if obj:
|
|
return obj
|
|
|
|
extractions = d.properties_geo_seeds().pl()
|
|
|
|
# Get lat, long and region from main property
|
|
latMain, lonMain = extractions.filter(pl.col('id') == str(id))['coordinates'][0].split(',')
|
|
latMain, lonMain = map(float, [latMain, lonMain])
|
|
region = extractions.filter(pl.col('id') == str(id))['seed_id'][0]
|
|
|
|
# Prefilter the dataframe to only the correct region
|
|
extractions = extractions.filter(pl.col('seed_id') == str(region))
|
|
extractions = extractions.drop('seed_id')
|
|
|
|
# Remove main property from DF
|
|
extractions = extractions.filter(pl.col('id') != str(id))
|
|
|
|
# Split coordinate into lat and lon
|
|
extractions = extractions.with_columns(pl.col("coordinates").str.split_exact(",", 1).struct.rename_fields(["lat", "lon"]).alias("lat/lon")).unnest("lat/lon")
|
|
extractions = extractions.drop('coordinates')
|
|
extractions = extractions.with_columns(pl.col("lat").cast(pl.Float32))
|
|
extractions = extractions.with_columns(pl.col("lon").cast(pl.Float32))
|
|
|
|
# Calculate distances
|
|
distances = []
|
|
for row in extractions.rows(named=True):
|
|
lat = row['lat']
|
|
lon = row['lon']
|
|
dist = calcHaversinDistance(latMain, lonMain, lat, lon)
|
|
distances.append(dist)
|
|
|
|
# Add distance to DF
|
|
extractions = extractions.with_columns(pl.Series(name="distances", values=distances))
|
|
|
|
# Sort for distance and give only first 10
|
|
extractions = extractions.sort("distances").head(10)
|
|
extractions = extractions.drop('distances')
|
|
|
|
result = extractions.to_dicts()
|
|
etl_cache.saveObj(file, result)
|
|
|
|
return result |