issue 5 resolved

#3
Ausgabeforma:
{ids: [84, 43...44], lat:[...], lon[...]}
main
mmaurostoffel 2025-01-07 20:20:48 +01:00
parent 42dc14021f
commit 7884febe53
2 changed files with 74 additions and 0 deletions

View File

@ -418,6 +418,16 @@ class Database:
consultancy_d.properties p consultancy_d.properties p
""") """)
def properties_geo_seeds(self):
return self.connection.sql("""
SELECT
p.id,
p.seed_id,
p.check_data as coordinates
FROM
consultancy_d.properties p
""")
def capacity_of_region(self, region_id): def capacity_of_region(self, region_id):
return self.connection.sql(f""" return self.connection.sql(f"""
SELECT SELECT

View File

@ -0,0 +1,64 @@
import polars as pl
from math import radians, cos, sin, asin, sqrt, degrees, atan2
import data
d = data.load()
def calcHaversinDistance(latMain, lonMain, lat, lon):
R = 6371
# convert decimal degrees to radians
latMain, lonMain, lat, lon = map(radians, [latMain, lonMain, lat, lon])
# haversine formula
dlon = lonMain - lon
dlat = latMain - lat
a = sin(dlat / 2) ** 2 + cos(lat) * cos(latMain) * sin(dlon / 2) ** 2
c = 2 * asin(sqrt(a)) # 2 * atan2(sqrt(a), sqrt(1-a))
d = R * c
return d
def property_neighbours(id: int):
extractions = d.properties_geo_seeds().pl()
# Get lat, long and region from main property
latMain, lonMain = extractions.filter(pl.col('id') == str(id))['coordinates'][0].split(',')
latMain, lonMain = map(float, [latMain, lonMain])
region = extractions.filter(pl.col('id') == str(id))['seed_id'][0]
# Prefilter the dataframe to only the correct region
extractions = extractions.filter(pl.col('seed_id') == str(region))
extractions = extractions.drop('seed_id')
# Remove main property from DF
extractions = extractions.filter(pl.col('id') != str(id))
# Split coordinate into lat and lon
#extractions = extractions.with_columns((pl.col('coordinates').str.split(','))[0].alias("coordinates")).unnest("fields")
extractions = extractions.with_columns(pl.col("coordinates").str.split_exact(",", 1).struct.rename_fields(["lat", "lon"]).alias("lat/lon")).unnest("lat/lon")
extractions = extractions.drop('coordinates')
extractions = extractions.with_columns(pl.col("lat").cast(pl.Float32))
extractions = extractions.with_columns(pl.col("lon").cast(pl.Float32))
# Calculate distances
distances = []
for row in extractions.rows(named=True):
lat = row['lat']
lon = row['lon']
dist = calcHaversinDistance(latMain, lonMain, lat, lon)
distances.append(dist)
# Add distance to DF
extractions = extractions.with_columns(pl.Series(name="distances", values=distances))
# Sort for distance and give only first 10
extractions = extractions.sort("distances").head(10)
extractions = extractions.drop('distances')
result = {"ids": extractions['id'].to_list(), "lat": extractions['lat'].to_list(), "lon": extractions['lon'].to_list()}
return result