# WIP: Performance boost parcel calculation
* improves handling of parcel calculation (speed up by ~30%) * ToDo: Clean up code
This commit is contained in:
		
							parent
							
								
									5922d5ce06
								
							
						
					
					
						commit
						523e338b1b
					
				@ -6,6 +6,7 @@ Created on: 15.11.21
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
import json
 | 
			
		||||
from time import process_time
 | 
			
		||||
 | 
			
		||||
from django.contrib.gis.db.models import MultiPolygonField
 | 
			
		||||
from django.db import models, transaction
 | 
			
		||||
@ -140,7 +141,10 @@ class Geometry(BaseResource):
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        self._set_parcel_update_start_time()
 | 
			
		||||
        self._perform_parcel_update()
 | 
			
		||||
 | 
			
		||||
        t1 = process_time()
 | 
			
		||||
        self._perform_parcel_update_fast()
 | 
			
		||||
        print(f"Parcel processing: {process_time() - t1}")
 | 
			
		||||
        self._set_parcel_update_end_time()
 | 
			
		||||
 | 
			
		||||
    def _perform_parcel_update(self):
 | 
			
		||||
@ -155,61 +159,151 @@ class Geometry(BaseResource):
 | 
			
		||||
        fetched_parcels = parcel_fetcher.get_parcels()
 | 
			
		||||
        _now = timezone.now()
 | 
			
		||||
        underlying_parcels = []
 | 
			
		||||
        i = 0
 | 
			
		||||
        len_fetched_parcels = len(fetched_parcels)
 | 
			
		||||
        print("Process fetched parcels:")
 | 
			
		||||
        for result in fetched_parcels:
 | 
			
		||||
            with transaction.atomic():
 | 
			
		||||
                # There could be parcels which include the word 'Flur',
 | 
			
		||||
                # which needs to be deleted and just keep the numerical values
 | 
			
		||||
                ## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE!
 | 
			
		||||
                flr_val = result["flur"].replace("Flur ", "")
 | 
			
		||||
            # There could be parcels which include the word 'Flur',
 | 
			
		||||
            # which needs to be deleted and just keep the numerical values
 | 
			
		||||
            ## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE!
 | 
			
		||||
            flr_val = result["flur"].replace("Flur ", "")
 | 
			
		||||
            district = District.objects.get_or_create(
 | 
			
		||||
                key=result["kreisschl"],
 | 
			
		||||
                name=result["kreis"],
 | 
			
		||||
            )[0]
 | 
			
		||||
            municipal = Municipal.objects.get_or_create(
 | 
			
		||||
                key=result["gmdschl"],
 | 
			
		||||
                name=result["gemeinde"],
 | 
			
		||||
                district=district,
 | 
			
		||||
            )[0]
 | 
			
		||||
            parcel_group = ParcelGroup.objects.get_or_create(
 | 
			
		||||
                key=result["gemaschl"],
 | 
			
		||||
                name=result["gemarkung"],
 | 
			
		||||
                municipal=municipal,
 | 
			
		||||
            )[0]
 | 
			
		||||
            flrstck_nnr = result['flstnrnen']
 | 
			
		||||
            if not flrstck_nnr:
 | 
			
		||||
                flrstck_nnr = None
 | 
			
		||||
            flrstck_zhlr = result['flstnrzae']
 | 
			
		||||
            if not flrstck_zhlr:
 | 
			
		||||
                flrstck_zhlr = None
 | 
			
		||||
            parcel_obj = Parcel.objects.get_or_create(
 | 
			
		||||
                district=district,
 | 
			
		||||
                municipal=municipal,
 | 
			
		||||
                parcel_group=parcel_group,
 | 
			
		||||
                flr=flr_val,
 | 
			
		||||
                flrstck_nnr=flrstck_nnr,
 | 
			
		||||
                flrstck_zhlr=flrstck_zhlr,
 | 
			
		||||
            )[0]
 | 
			
		||||
            parcel_obj.district = district
 | 
			
		||||
            parcel_obj.updated_on = _now
 | 
			
		||||
            parcel_obj.save()
 | 
			
		||||
            underlying_parcels.append(parcel_obj)
 | 
			
		||||
            i += 1
 | 
			
		||||
            if i % 100 == 0:
 | 
			
		||||
                print(f"    {i}/{len_fetched_parcels}")
 | 
			
		||||
 | 
			
		||||
        # Update the linked parcels
 | 
			
		||||
        #self.parcels.clear()
 | 
			
		||||
        self.parcels.set(underlying_parcels)
 | 
			
		||||
 | 
			
		||||
        # Set the calculated_on intermediate field, so this related data will be found on lookups
 | 
			
		||||
        #intersections_without_ts = self.parcelintersection_set.filter(
 | 
			
		||||
        #    parcel__in=self.parcels.all(),
 | 
			
		||||
        #    calculated_on__isnull=True,
 | 
			
		||||
        #)
 | 
			
		||||
        #for entry in intersections_without_ts:
 | 
			
		||||
        #    entry.calculated_on = _now
 | 
			
		||||
        #ParcelIntersection.objects.bulk_update(
 | 
			
		||||
        #    intersections_without_ts,
 | 
			
		||||
        #    ["calculated_on"]
 | 
			
		||||
        #)
 | 
			
		||||
 | 
			
		||||
    def _perform_parcel_update_fast(self):
 | 
			
		||||
        """
 | 
			
		||||
        Performs the main logic of parcel updating.
 | 
			
		||||
        """
 | 
			
		||||
        from konova.models import Parcel, District, Municipal, ParcelGroup
 | 
			
		||||
 | 
			
		||||
        parcel_fetcher = ParcelFetcher(
 | 
			
		||||
            geometry=self
 | 
			
		||||
        )
 | 
			
		||||
        fetched_parcels = parcel_fetcher.get_parcels()
 | 
			
		||||
        _now = timezone.now()
 | 
			
		||||
        underlying_parcels = []
 | 
			
		||||
 | 
			
		||||
        i = 0
 | 
			
		||||
        len_fetched_parcels = len(fetched_parcels)
 | 
			
		||||
        print("Process fetched parcels:")
 | 
			
		||||
 | 
			
		||||
        districts = {}
 | 
			
		||||
        municipals = {}
 | 
			
		||||
        parcel_groups = {}
 | 
			
		||||
 | 
			
		||||
        for result in fetched_parcels:
 | 
			
		||||
            # There could be parcels which include the word 'Flur',
 | 
			
		||||
            # which needs to be deleted and just keep the numerical values
 | 
			
		||||
            ## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE!
 | 
			
		||||
            flr_val = result["flur"].replace("Flur ", "")
 | 
			
		||||
 | 
			
		||||
            # Get district (cache in dict)
 | 
			
		||||
            try:
 | 
			
		||||
                district = districts["kreisschl"]
 | 
			
		||||
            except KeyError:
 | 
			
		||||
                district = District.objects.get_or_create(
 | 
			
		||||
                    key=result["kreisschl"],
 | 
			
		||||
                    name=result["kreis"],
 | 
			
		||||
                )[0]
 | 
			
		||||
                districts[district.key] = district
 | 
			
		||||
 | 
			
		||||
            # Get municipal (cache in dict)
 | 
			
		||||
            try:
 | 
			
		||||
                municipal = municipals["gmdschl"]
 | 
			
		||||
            except KeyError:
 | 
			
		||||
                municipal = Municipal.objects.get_or_create(
 | 
			
		||||
                    key=result["gmdschl"],
 | 
			
		||||
                    name=result["gemeinde"],
 | 
			
		||||
                    district=district,
 | 
			
		||||
                )[0]
 | 
			
		||||
                municipals[municipal.key] = municipal
 | 
			
		||||
 | 
			
		||||
            # Get parcel group (cache in dict)
 | 
			
		||||
            try:
 | 
			
		||||
                parcel_group = parcel_groups["gemaschl"]
 | 
			
		||||
            except KeyError:
 | 
			
		||||
                parcel_group = ParcelGroup.objects.get_or_create(
 | 
			
		||||
                    key=result["gemaschl"],
 | 
			
		||||
                    name=result["gemarkung"],
 | 
			
		||||
                    municipal=municipal,
 | 
			
		||||
                )[0]
 | 
			
		||||
                flrstck_nnr = result['flstnrnen']
 | 
			
		||||
                if not flrstck_nnr:
 | 
			
		||||
                    flrstck_nnr = None
 | 
			
		||||
                flrstck_zhlr = result['flstnrzae']
 | 
			
		||||
                if not flrstck_zhlr:
 | 
			
		||||
                    flrstck_zhlr = None
 | 
			
		||||
                parcel_obj = Parcel.objects.get_or_create(
 | 
			
		||||
                    district=district,
 | 
			
		||||
                    municipal=municipal,
 | 
			
		||||
                    parcel_group=parcel_group,
 | 
			
		||||
                    flr=flr_val,
 | 
			
		||||
                    flrstck_nnr=flrstck_nnr,
 | 
			
		||||
                    flrstck_zhlr=flrstck_zhlr,
 | 
			
		||||
                )[0]
 | 
			
		||||
                parcel_obj.district = district
 | 
			
		||||
                parcel_obj.updated_on = _now
 | 
			
		||||
                parcel_obj.save()
 | 
			
		||||
                parcel_groups[parcel_group.key] = parcel_group
 | 
			
		||||
 | 
			
		||||
            # Preprocess parcel data
 | 
			
		||||
            flrstck_nnr = result['flstnrnen']
 | 
			
		||||
            if not flrstck_nnr:
 | 
			
		||||
                flrstck_nnr = None
 | 
			
		||||
            flrstck_zhlr = result['flstnrzae']
 | 
			
		||||
            if not flrstck_zhlr:
 | 
			
		||||
                flrstck_zhlr = None
 | 
			
		||||
 | 
			
		||||
            parcel_obj = Parcel.objects.get_or_create(
 | 
			
		||||
                district=district,
 | 
			
		||||
                municipal=municipal,
 | 
			
		||||
                parcel_group=parcel_group,
 | 
			
		||||
                flr=flr_val,
 | 
			
		||||
                flrstck_nnr=flrstck_nnr,
 | 
			
		||||
                flrstck_zhlr=flrstck_zhlr,
 | 
			
		||||
            )[0]
 | 
			
		||||
            parcel_obj.updated_on = _now
 | 
			
		||||
            parcel_obj.save()
 | 
			
		||||
            underlying_parcels.append(parcel_obj)
 | 
			
		||||
            i += 1
 | 
			
		||||
            if i % 100 == 0:
 | 
			
		||||
                print(f"    {i}/{len_fetched_parcels}")
 | 
			
		||||
 | 
			
		||||
        # Update the linked parcels
 | 
			
		||||
        self.parcels.clear()
 | 
			
		||||
        # Update linked parcels
 | 
			
		||||
        self.parcels.set(underlying_parcels)
 | 
			
		||||
 | 
			
		||||
        # Set the calculated_on intermediate field, so this related data will be found on lookups
 | 
			
		||||
        intersections_without_ts = self.parcelintersection_set.filter(
 | 
			
		||||
            parcel__in=self.parcels.all(),
 | 
			
		||||
            calculated_on__isnull=True,
 | 
			
		||||
        )
 | 
			
		||||
        for entry in intersections_without_ts:
 | 
			
		||||
            entry.calculated_on = _now
 | 
			
		||||
        ParcelIntersection.objects.bulk_update(
 | 
			
		||||
            intersections_without_ts,
 | 
			
		||||
            ["calculated_on"]
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @transaction.atomic
 | 
			
		||||
    def _set_parcel_update_start_time(self):
 | 
			
		||||
        """
 | 
			
		||||
@ -233,9 +327,7 @@ class Geometry(BaseResource):
 | 
			
		||||
        Returns:
 | 
			
		||||
            parcels (QuerySet): The related parcels as queryset
 | 
			
		||||
        """
 | 
			
		||||
        parcels = self.parcels.filter(
 | 
			
		||||
            parcelintersection__calculated_on__isnull=False,
 | 
			
		||||
        ).prefetch_related(
 | 
			
		||||
        parcels = self.parcels.prefetch_related(
 | 
			
		||||
            "district",
 | 
			
		||||
            "municipal",
 | 
			
		||||
        ).order_by(
 | 
			
		||||
@ -305,6 +397,33 @@ class Geometry(BaseResource):
 | 
			
		||||
        }
 | 
			
		||||
        return geojson
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def complexity_factor(self) -> float:
 | 
			
		||||
        """ Calculates a factor to estimate the complexity of a Geometry
 | 
			
		||||
 | 
			
		||||
        0 = very low complexity
 | 
			
		||||
        1 = very high complexity
 | 
			
		||||
 | 
			
		||||
        ASSUMPTION:
 | 
			
		||||
        The envelope is the bounding box of a geometry. If the geometry's area is similar to the area of it's  bounding
 | 
			
		||||
        box, it is considered as rather simple, since it seems to be a closer shape like a simple box.
 | 
			
		||||
        If the geometry has a very big bounding box, but the geometry's own area is rather small,
 | 
			
		||||
        compared to the one of the bounding box, the complexity can be higher.
 | 
			
		||||
 | 
			
		||||
        Example:
 | 
			
		||||
            geometry area similar to bounding box --> geometry / bounding_box ~ 1
 | 
			
		||||
            geometry area far smaller than bb     --> geometry / bounding_box ~ 0
 | 
			
		||||
 | 
			
		||||
        Result is being inverted for better understanding of 'low' and 'high' complexity.
 | 
			
		||||
 | 
			
		||||
        Returns:
 | 
			
		||||
            complexity_factor (float): The estimated complexity
 | 
			
		||||
        """
 | 
			
		||||
        geom_envelope = self.geom.envelope
 | 
			
		||||
        diff = geom_envelope - self.geom
 | 
			
		||||
        complexity_factor = 1 - self.geom.area / diff.area
 | 
			
		||||
        return complexity_factor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GeometryConflict(UuidModel):
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
@ -49,5 +49,5 @@ ETS_GROUP = "Conservation office"
 | 
			
		||||
# GEOMETRY
 | 
			
		||||
## Max number of allowed vertices. Geometries larger will be simplified until they reach this threshold
 | 
			
		||||
GEOM_MAX_VERTICES = 10000
 | 
			
		||||
## Max seconds to wait for a parcel calculation result before a new request will be started (default: 5 minutes)
 | 
			
		||||
GEOM_THRESHOLD_RECALCULATION_SECONDS = 300
 | 
			
		||||
## Max seconds to wait for a parcel calculation result before a new request will be started (default: 30 minutes)
 | 
			
		||||
GEOM_THRESHOLD_RECALCULATION_SECONDS = 60 * 30
 | 
			
		||||
 | 
			
		||||
@ -135,6 +135,7 @@ DATABASES = {
 | 
			
		||||
        'USER': 'postgres',
 | 
			
		||||
        'HOST': '127.0.0.1',
 | 
			
		||||
        'PORT': '5432',
 | 
			
		||||
        'CONN_MAX_AGE': 120,
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
 | 
			
		||||
 | 
			
		||||
@ -10,13 +10,14 @@ def celery_update_parcels(geometry_id: str, recheck: bool = True):
 | 
			
		||||
    from konova.models import Geometry, ParcelIntersection
 | 
			
		||||
    try:
 | 
			
		||||
        geom = Geometry.objects.get(id=geometry_id)
 | 
			
		||||
        objs = geom.parcelintersection_set.all()
 | 
			
		||||
        for obj in objs:
 | 
			
		||||
            obj.calculated_on = None
 | 
			
		||||
        ParcelIntersection.objects.bulk_update(
 | 
			
		||||
            objs,
 | 
			
		||||
            ["calculated_on"]
 | 
			
		||||
        )
 | 
			
		||||
        geom.parcels.clear()
 | 
			
		||||
        #objs = geom.parcelintersection_set.all()
 | 
			
		||||
        #for obj in objs:
 | 
			
		||||
        #    obj.calculated_on = None
 | 
			
		||||
        #ParcelIntersection.objects.bulk_update(
 | 
			
		||||
        #    objs,
 | 
			
		||||
        #    ["calculated_on"]
 | 
			
		||||
        #)
 | 
			
		||||
 | 
			
		||||
        geom.update_parcels()
 | 
			
		||||
    except ObjectDoesNotExist:
 | 
			
		||||
 | 
			
		||||
@ -37,30 +37,38 @@ class GeomParcelsView(LoginRequiredMixin, View):
 | 
			
		||||
        # https://htmx.org/docs/#polling
 | 
			
		||||
        status_code = 286
 | 
			
		||||
        template = "konova/includes/parcels/parcel_table_frame.html"
 | 
			
		||||
 | 
			
		||||
        geom = get_object_or_404(Geometry, id=id)
 | 
			
		||||
        parcels = geom.get_underlying_parcels()
 | 
			
		||||
        geos_geom = geom.geom or MultiPolygon(srid=DEFAULT_SRID_RLP)
 | 
			
		||||
        geometry_exists = not geos_geom.empty and geos_geom.area > 0
 | 
			
		||||
        geom_parcel_update_started = geom.parcel_update_start is not None
 | 
			
		||||
        geom_parcel_update_finished = geom.parcel_update_end is not None
 | 
			
		||||
 | 
			
		||||
        parcels = geom.get_underlying_parcels()
 | 
			
		||||
        parcels_exist = len(parcels) > 0
 | 
			
		||||
 | 
			
		||||
        waiting_too_long = self._check_waiting_too_long(geom)
 | 
			
		||||
 | 
			
		||||
        geometry_exists = not geos_geom.empty and geos_geom.area > 0
 | 
			
		||||
        parcels_are_currently_calculated = (
 | 
			
		||||
                geometry_exists and
 | 
			
		||||
                geom.parcel_update_start and
 | 
			
		||||
                not geom.parcel_update_end
 | 
			
		||||
                not parcels_exist and
 | 
			
		||||
                geom_parcel_update_started and
 | 
			
		||||
                not geom_parcel_update_finished
 | 
			
		||||
        )
 | 
			
		||||
        parcels_available = len(parcels) > 0
 | 
			
		||||
 | 
			
		||||
        if not parcels_exist and waiting_too_long:
 | 
			
		||||
            # Trigger calculation again - process may have failed in the background
 | 
			
		||||
            celery_update_parcels.delay(geom.id)
 | 
			
		||||
            parcels_are_currently_calculated = True
 | 
			
		||||
 | 
			
		||||
        if parcels_are_currently_calculated:
 | 
			
		||||
            # Parcels are being calculated right now. Change the status code, so polling stays active for fetching
 | 
			
		||||
            # results after the calculation
 | 
			
		||||
            status_code = 200
 | 
			
		||||
 | 
			
		||||
        if waiting_too_long:
 | 
			
		||||
            # Trigger calculation again
 | 
			
		||||
            celery_update_parcels.delay(geom.id)
 | 
			
		||||
 | 
			
		||||
        if parcels_available or not geometry_exists:
 | 
			
		||||
        if parcels_exist or not geometry_exists:
 | 
			
		||||
            # Default case: Parcels are calculated or there is no geometry at all
 | 
			
		||||
            # (so there will be no parcels to expect)
 | 
			
		||||
            municipals = geom.get_underlying_municipals(parcels)
 | 
			
		||||
 | 
			
		||||
            rpp = 100
 | 
			
		||||
@ -88,13 +96,16 @@ class GeomParcelsView(LoginRequiredMixin, View):
 | 
			
		||||
        Depending on the geometry's modified attribute
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        # Scale time to wait longer with increasing geometry complexity
 | 
			
		||||
        complexity_factor = geom.complexity_factor + 1
 | 
			
		||||
        wait_for_seconds = int(GEOM_THRESHOLD_RECALCULATION_SECONDS * complexity_factor)
 | 
			
		||||
        try:
 | 
			
		||||
            pcs_diff = (timezone.now() - geom.parcel_update_start).seconds
 | 
			
		||||
        except TypeError:
 | 
			
		||||
            pcs_diff = GEOM_THRESHOLD_RECALCULATION_SECONDS
 | 
			
		||||
            pcs_diff = wait_for_seconds
 | 
			
		||||
 | 
			
		||||
        calculation_not_finished = geom.parcel_update_end is None
 | 
			
		||||
        waiting_too_long = (pcs_diff >= GEOM_THRESHOLD_RECALCULATION_SECONDS) and calculation_not_finished
 | 
			
		||||
        waiting_too_long = (pcs_diff >= wait_for_seconds) and calculation_not_finished
 | 
			
		||||
        return waiting_too_long
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user