diff --git a/src/data/database.py b/src/data/database.py index ad520b1..f408455 100644 --- a/src/data/database.py +++ b/src/data/database.py @@ -2,8 +2,29 @@ import duckdb class Database: + + def check_duckdb_extensions(self, extension): + return self.connection.execute(""" + SELECT + installed + FROM + duckdb_extensions() + WHERE + extension_name = $extension + """, + { + "extension": extension + } + ).fetchone() + def __init__(self, path): - self.connection = duckdb.connect(database = path, read_only = True) + self.connection = duckdb.connect(database = path) + + # Install spatial extension if not already installed + spatial_installed = self.check_duckdb_extensions(extension='spatial') + if(spatial_installed and not spatial_installed[0]): + self.connection.sql("INSTALL spatial") + def db_overview(self): return self.connection.sql("DESCRIBE;").show() @@ -78,3 +99,84 @@ class Database: date """).show() + def properties_distance(self): + + return self.connection.sql(""" + LOAD spatial; + + CREATE OR REPLACE VIEW geolocation_changes AS + SELECT + exceptions.entity_id, + properties.check_data AS geolocation_original, + SUBSTRING(exceptions.exception, 28) AS geolocation_new, + ST_Distance_Sphere( + ST_GeomFromText( + CONCAT( + 'POINT(', + REPLACE(properties.check_data, ',', ' '), + ')' + ) + ), + ST_GeomFromText( + CONCAT( + 'POINT(', + REPLACE(SUBSTRING(exceptions.exception, 28), ',', ' '), + ')' + ) + ) + ) AS distance + FROM + consultancy_d.exceptions + LEFT JOIN + consultancy_d.properties ON exceptions.entity_id = properties.id + WHERE + exception LIKE 'geoLocation was different%' + GROUP BY + entity_id, + check_data, + geolocation_new + ORDER BY + distance; + + SELECT * FROM geolocation_changes; + + SELECT + '0 bis 25' AS category, + COUNT(*) as count_properties + FROM + geolocation_changes + WHERE + distance >= (0) + AND distance < (25) + UNION + SELECT + '25 bis 50' AS category, + COUNT(*) as count_properties + FROM + geolocation_changes + WHERE + distance >= (25) + AND distance < (50) + UNION + SELECT + '50 bis 75' AS category, + COUNT(*) as count_properties + FROM + geolocation_changes + WHERE + distance >= (50) + AND distance < (75) + UNION + SELECT + '75 bis 100' AS category, + COUNT(*) as count_properties + FROM + geolocation_changes + WHERE + distance >= (75) + AND distance < (100); + + """) + + + diff --git a/src/gio/test_duckdb.py b/src/gio/test_duckdb.py index a8562ed..c3cbdce 100644 --- a/src/gio/test_duckdb.py +++ b/src/gio/test_duckdb.py @@ -1,6 +1,4 @@ import data inst = data.load() -inst.seeds() -inst.properties_growth() -inst.properties_per_region() +inst.properties_distance().show() \ No newline at end of file