From 523e338b1babfc14440aafa2f901c4c7f83b967b Mon Sep 17 00:00:00 2001 From: mpeltriaux Date: Tue, 16 Jan 2024 07:57:29 +0100 Subject: [PATCH] # WIP: Performance boost parcel calculation * improves handling of parcel calculation (speed up by ~30%) * ToDo: Clean up code --- konova/models/geometry.py | 199 ++++++++++++++++++++----- konova/settings.py | 4 +- konova/sub_settings/django_settings.py | 1 + konova/tasks.py | 15 +- konova/views/geometry.py | 35 +++-- 5 files changed, 193 insertions(+), 61 deletions(-) diff --git a/konova/models/geometry.py b/konova/models/geometry.py index 0d348c3..b08def6 100644 --- a/konova/models/geometry.py +++ b/konova/models/geometry.py @@ -6,6 +6,7 @@ Created on: 15.11.21 """ import json +from time import process_time from django.contrib.gis.db.models import MultiPolygonField from django.db import models, transaction @@ -140,7 +141,10 @@ class Geometry(BaseResource): return self._set_parcel_update_start_time() - self._perform_parcel_update() + + t1 = process_time() + self._perform_parcel_update_fast() + print(f"Parcel processing: {process_time() - t1}") self._set_parcel_update_end_time() def _perform_parcel_update(self): @@ -155,61 +159,151 @@ class Geometry(BaseResource): fetched_parcels = parcel_fetcher.get_parcels() _now = timezone.now() underlying_parcels = [] + i = 0 + len_fetched_parcels = len(fetched_parcels) + print("Process fetched parcels:") + for result in fetched_parcels: + # There could be parcels which include the word 'Flur', + # which needs to be deleted and just keep the numerical values + ## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE! + flr_val = result["flur"].replace("Flur ", "") + district = District.objects.get_or_create( + key=result["kreisschl"], + name=result["kreis"], + )[0] + municipal = Municipal.objects.get_or_create( + key=result["gmdschl"], + name=result["gemeinde"], + district=district, + )[0] + parcel_group = ParcelGroup.objects.get_or_create( + key=result["gemaschl"], + name=result["gemarkung"], + municipal=municipal, + )[0] + flrstck_nnr = result['flstnrnen'] + if not flrstck_nnr: + flrstck_nnr = None + flrstck_zhlr = result['flstnrzae'] + if not flrstck_zhlr: + flrstck_zhlr = None + parcel_obj = Parcel.objects.get_or_create( + district=district, + municipal=municipal, + parcel_group=parcel_group, + flr=flr_val, + flrstck_nnr=flrstck_nnr, + flrstck_zhlr=flrstck_zhlr, + )[0] + parcel_obj.district = district + parcel_obj.updated_on = _now + parcel_obj.save() + underlying_parcels.append(parcel_obj) + i += 1 + if i % 100 == 0: + print(f" {i}/{len_fetched_parcels}") + + # Update the linked parcels + #self.parcels.clear() + self.parcels.set(underlying_parcels) + + # Set the calculated_on intermediate field, so this related data will be found on lookups + #intersections_without_ts = self.parcelintersection_set.filter( + # parcel__in=self.parcels.all(), + # calculated_on__isnull=True, + #) + #for entry in intersections_without_ts: + # entry.calculated_on = _now + #ParcelIntersection.objects.bulk_update( + # intersections_without_ts, + # ["calculated_on"] + #) + + def _perform_parcel_update_fast(self): + """ + Performs the main logic of parcel updating. + """ + from konova.models import Parcel, District, Municipal, ParcelGroup + + parcel_fetcher = ParcelFetcher( + geometry=self + ) + fetched_parcels = parcel_fetcher.get_parcels() + _now = timezone.now() + underlying_parcels = [] + + i = 0 + len_fetched_parcels = len(fetched_parcels) + print("Process fetched parcels:") + + districts = {} + municipals = {} + parcel_groups = {} + for result in fetched_parcels: - with transaction.atomic(): - # There could be parcels which include the word 'Flur', - # which needs to be deleted and just keep the numerical values - ## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE! - flr_val = result["flur"].replace("Flur ", "") + # There could be parcels which include the word 'Flur', + # which needs to be deleted and just keep the numerical values + ## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE! + flr_val = result["flur"].replace("Flur ", "") + + # Get district (cache in dict) + try: + district = districts["kreisschl"] + except KeyError: district = District.objects.get_or_create( key=result["kreisschl"], name=result["kreis"], )[0] + districts[district.key] = district + + # Get municipal (cache in dict) + try: + municipal = municipals["gmdschl"] + except KeyError: municipal = Municipal.objects.get_or_create( key=result["gmdschl"], name=result["gemeinde"], district=district, )[0] + municipals[municipal.key] = municipal + + # Get parcel group (cache in dict) + try: + parcel_group = parcel_groups["gemaschl"] + except KeyError: parcel_group = ParcelGroup.objects.get_or_create( key=result["gemaschl"], name=result["gemarkung"], municipal=municipal, )[0] - flrstck_nnr = result['flstnrnen'] - if not flrstck_nnr: - flrstck_nnr = None - flrstck_zhlr = result['flstnrzae'] - if not flrstck_zhlr: - flrstck_zhlr = None - parcel_obj = Parcel.objects.get_or_create( - district=district, - municipal=municipal, - parcel_group=parcel_group, - flr=flr_val, - flrstck_nnr=flrstck_nnr, - flrstck_zhlr=flrstck_zhlr, - )[0] - parcel_obj.district = district - parcel_obj.updated_on = _now - parcel_obj.save() + parcel_groups[parcel_group.key] = parcel_group + + # Preprocess parcel data + flrstck_nnr = result['flstnrnen'] + if not flrstck_nnr: + flrstck_nnr = None + flrstck_zhlr = result['flstnrzae'] + if not flrstck_zhlr: + flrstck_zhlr = None + + parcel_obj = Parcel.objects.get_or_create( + district=district, + municipal=municipal, + parcel_group=parcel_group, + flr=flr_val, + flrstck_nnr=flrstck_nnr, + flrstck_zhlr=flrstck_zhlr, + )[0] + parcel_obj.updated_on = _now + parcel_obj.save() underlying_parcels.append(parcel_obj) + i += 1 + if i % 100 == 0: + print(f" {i}/{len_fetched_parcels}") - # Update the linked parcels - self.parcels.clear() + # Update linked parcels self.parcels.set(underlying_parcels) - # Set the calculated_on intermediate field, so this related data will be found on lookups - intersections_without_ts = self.parcelintersection_set.filter( - parcel__in=self.parcels.all(), - calculated_on__isnull=True, - ) - for entry in intersections_without_ts: - entry.calculated_on = _now - ParcelIntersection.objects.bulk_update( - intersections_without_ts, - ["calculated_on"] - ) - @transaction.atomic def _set_parcel_update_start_time(self): """ @@ -233,9 +327,7 @@ class Geometry(BaseResource): Returns: parcels (QuerySet): The related parcels as queryset """ - parcels = self.parcels.filter( - parcelintersection__calculated_on__isnull=False, - ).prefetch_related( + parcels = self.parcels.prefetch_related( "district", "municipal", ).order_by( @@ -305,6 +397,33 @@ class Geometry(BaseResource): } return geojson + @property + def complexity_factor(self) -> float: + """ Calculates a factor to estimate the complexity of a Geometry + + 0 = very low complexity + 1 = very high complexity + + ASSUMPTION: + The envelope is the bounding box of a geometry. If the geometry's area is similar to the area of it's bounding + box, it is considered as rather simple, since it seems to be a closer shape like a simple box. + If the geometry has a very big bounding box, but the geometry's own area is rather small, + compared to the one of the bounding box, the complexity can be higher. + + Example: + geometry area similar to bounding box --> geometry / bounding_box ~ 1 + geometry area far smaller than bb --> geometry / bounding_box ~ 0 + + Result is being inverted for better understanding of 'low' and 'high' complexity. + + Returns: + complexity_factor (float): The estimated complexity + """ + geom_envelope = self.geom.envelope + diff = geom_envelope - self.geom + complexity_factor = 1 - self.geom.area / diff.area + return complexity_factor + class GeometryConflict(UuidModel): """ diff --git a/konova/settings.py b/konova/settings.py index 3e5c9ae..e1e0e46 100644 --- a/konova/settings.py +++ b/konova/settings.py @@ -49,5 +49,5 @@ ETS_GROUP = "Conservation office" # GEOMETRY ## Max number of allowed vertices. Geometries larger will be simplified until they reach this threshold GEOM_MAX_VERTICES = 10000 -## Max seconds to wait for a parcel calculation result before a new request will be started (default: 5 minutes) -GEOM_THRESHOLD_RECALCULATION_SECONDS = 300 +## Max seconds to wait for a parcel calculation result before a new request will be started (default: 30 minutes) +GEOM_THRESHOLD_RECALCULATION_SECONDS = 60 * 30 diff --git a/konova/sub_settings/django_settings.py b/konova/sub_settings/django_settings.py index 7eefa7e..8962a81 100644 --- a/konova/sub_settings/django_settings.py +++ b/konova/sub_settings/django_settings.py @@ -135,6 +135,7 @@ DATABASES = { 'USER': 'postgres', 'HOST': '127.0.0.1', 'PORT': '5432', + 'CONN_MAX_AGE': 120, } } DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" diff --git a/konova/tasks.py b/konova/tasks.py index aa8a65b..f333bb2 100644 --- a/konova/tasks.py +++ b/konova/tasks.py @@ -10,13 +10,14 @@ def celery_update_parcels(geometry_id: str, recheck: bool = True): from konova.models import Geometry, ParcelIntersection try: geom = Geometry.objects.get(id=geometry_id) - objs = geom.parcelintersection_set.all() - for obj in objs: - obj.calculated_on = None - ParcelIntersection.objects.bulk_update( - objs, - ["calculated_on"] - ) + geom.parcels.clear() + #objs = geom.parcelintersection_set.all() + #for obj in objs: + # obj.calculated_on = None + #ParcelIntersection.objects.bulk_update( + # objs, + # ["calculated_on"] + #) geom.update_parcels() except ObjectDoesNotExist: diff --git a/konova/views/geometry.py b/konova/views/geometry.py index d0ab642..ead32b9 100644 --- a/konova/views/geometry.py +++ b/konova/views/geometry.py @@ -37,30 +37,38 @@ class GeomParcelsView(LoginRequiredMixin, View): # https://htmx.org/docs/#polling status_code = 286 template = "konova/includes/parcels/parcel_table_frame.html" + geom = get_object_or_404(Geometry, id=id) - parcels = geom.get_underlying_parcels() geos_geom = geom.geom or MultiPolygon(srid=DEFAULT_SRID_RLP) + geometry_exists = not geos_geom.empty and geos_geom.area > 0 + geom_parcel_update_started = geom.parcel_update_start is not None + geom_parcel_update_finished = geom.parcel_update_end is not None + + parcels = geom.get_underlying_parcels() + parcels_exist = len(parcels) > 0 waiting_too_long = self._check_waiting_too_long(geom) - geometry_exists = not geos_geom.empty and geos_geom.area > 0 parcels_are_currently_calculated = ( geometry_exists and - geom.parcel_update_start and - not geom.parcel_update_end + not parcels_exist and + geom_parcel_update_started and + not geom_parcel_update_finished ) - parcels_available = len(parcels) > 0 + + if not parcels_exist and waiting_too_long: + # Trigger calculation again - process may have failed in the background + celery_update_parcels.delay(geom.id) + parcels_are_currently_calculated = True if parcels_are_currently_calculated: # Parcels are being calculated right now. Change the status code, so polling stays active for fetching # results after the calculation status_code = 200 - if waiting_too_long: - # Trigger calculation again - celery_update_parcels.delay(geom.id) - - if parcels_available or not geometry_exists: + if parcels_exist or not geometry_exists: + # Default case: Parcels are calculated or there is no geometry at all + # (so there will be no parcels to expect) municipals = geom.get_underlying_municipals(parcels) rpp = 100 @@ -88,13 +96,16 @@ class GeomParcelsView(LoginRequiredMixin, View): Depending on the geometry's modified attribute """ + # Scale time to wait longer with increasing geometry complexity + complexity_factor = geom.complexity_factor + 1 + wait_for_seconds = int(GEOM_THRESHOLD_RECALCULATION_SECONDS * complexity_factor) try: pcs_diff = (timezone.now() - geom.parcel_update_start).seconds except TypeError: - pcs_diff = GEOM_THRESHOLD_RECALCULATION_SECONDS + pcs_diff = wait_for_seconds calculation_not_finished = geom.parcel_update_end is None - waiting_too_long = (pcs_diff >= GEOM_THRESHOLD_RECALCULATION_SECONDS) and calculation_not_finished + waiting_too_long = (pcs_diff >= wait_for_seconds) and calculation_not_finished return waiting_too_long