from math import asin, atan2, cos, degrees, radians, sin, sqrt import polars as pl import data from data import etl_cache d = data.load() def calcHaversinDistance(latMain, lonMain, lat, lon): R = 6371 # convert decimal degrees to radians latMain, lonMain, lat, lon = map(radians, [latMain, lonMain, lat, lon]) # haversine formula dlon = lonMain - lon dlat = latMain - lat a = sin(dlat / 2) ** 2 + cos(lat) * cos(latMain) * sin(dlon / 2) ** 2 c = 2 * atan2(sqrt(a), sqrt(1-a)) d = R * c return d def property_neighbours(id: int): file = f"etl_property_neighbours_{id}.obj" obj = etl_cache.openObj(file) if obj: return obj extractions = d.properties_geo_seeds().pl() # Get lat, long and region from main property latMain, lonMain = extractions.filter(pl.col('id') == str(id))['coordinates'][0].split(',') latMain, lonMain = map(float, [latMain, lonMain]) region = extractions.filter(pl.col('id') == str(id))['seed_id'][0] # Prefilter the dataframe to only the correct region extractions = extractions.filter(pl.col('seed_id') == str(region)) extractions = extractions.drop('seed_id') # Remove main property from DF extractions = extractions.filter(pl.col('id') != str(id)) # Split coordinate into lat and lon extractions = extractions.with_columns(pl.col("coordinates").str.split_exact(",", 1).struct.rename_fields(["lat", "lon"]).alias("lat/lon")).unnest("lat/lon") extractions = extractions.drop('coordinates') extractions = extractions.with_columns(pl.col("lat").cast(pl.Float32)) extractions = extractions.with_columns(pl.col("lon").cast(pl.Float32)) # Calculate distances distances = [] for row in extractions.rows(named=True): lat = row['lat'] lon = row['lon'] dist = calcHaversinDistance(latMain, lonMain, lat, lon) distances.append(dist) # Add distance to DF extractions = extractions.with_columns(pl.Series(name="distances", values=distances)) # Sort for distance and give only first 10 extractions = extractions.sort("distances").head(10) extractions = extractions.drop('distances') result = extractions.to_dicts() etl_cache.saveObj(file, result) return result