# Geom parcel performance improvement

* refactors parcel calculating, resulting in 1.3-1.6x better performance
* optimizes parcel fetching view
This commit is contained in:
mpeltriaux 2024-01-17 11:22:21 +01:00
parent 8bcccb4685
commit cf90f9710c
2 changed files with 57 additions and 107 deletions

View File

@ -6,9 +6,9 @@ Created on: 15.11.21
""" """
import json import json
from time import process_time
from django.contrib.gis.db.models import MultiPolygonField from django.contrib.gis.db.models import MultiPolygonField
from django.core.exceptions import ObjectDoesNotExist
from django.db import models, transaction from django.db import models, transaction
from django.utils import timezone from django.utils import timezone
@ -141,85 +141,10 @@ class Geometry(BaseResource):
return return
self._set_parcel_update_start_time() self._set_parcel_update_start_time()
self._perform_parcel_update()
t1 = process_time()
self._perform_parcel_update_fast()
print(f"Parcel processing: {process_time() - t1}")
self._set_parcel_update_end_time() self._set_parcel_update_end_time()
def _perform_parcel_update(self): def _perform_parcel_update(self):
"""
Performs the main logic of parcel updating.
"""
from konova.models import Parcel, District, ParcelIntersection, Municipal, ParcelGroup
parcel_fetcher = ParcelFetcher(
geometry=self
)
fetched_parcels = parcel_fetcher.get_parcels()
_now = timezone.now()
underlying_parcels = []
i = 0
len_fetched_parcels = len(fetched_parcels)
print("Process fetched parcels:")
for result in fetched_parcels:
# There could be parcels which include the word 'Flur',
# which needs to be deleted and just keep the numerical values
## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE!
flr_val = result["flur"].replace("Flur ", "")
district = District.objects.get_or_create(
key=result["kreisschl"],
name=result["kreis"],
)[0]
municipal = Municipal.objects.get_or_create(
key=result["gmdschl"],
name=result["gemeinde"],
district=district,
)[0]
parcel_group = ParcelGroup.objects.get_or_create(
key=result["gemaschl"],
name=result["gemarkung"],
municipal=municipal,
)[0]
flrstck_nnr = result['flstnrnen']
if not flrstck_nnr:
flrstck_nnr = None
flrstck_zhlr = result['flstnrzae']
if not flrstck_zhlr:
flrstck_zhlr = None
parcel_obj = Parcel.objects.get_or_create(
district=district,
municipal=municipal,
parcel_group=parcel_group,
flr=flr_val,
flrstck_nnr=flrstck_nnr,
flrstck_zhlr=flrstck_zhlr,
)[0]
parcel_obj.district = district
parcel_obj.updated_on = _now
parcel_obj.save()
underlying_parcels.append(parcel_obj)
i += 1
if i % 100 == 0:
print(f" {i}/{len_fetched_parcels}")
# Update the linked parcels
#self.parcels.clear()
self.parcels.set(underlying_parcels)
# Set the calculated_on intermediate field, so this related data will be found on lookups
#intersections_without_ts = self.parcelintersection_set.filter(
# parcel__in=self.parcels.all(),
# calculated_on__isnull=True,
#)
#for entry in intersections_without_ts:
# entry.calculated_on = _now
#ParcelIntersection.objects.bulk_update(
# intersections_without_ts,
# ["calculated_on"]
#)
def _perform_parcel_update_fast(self):
""" """
Performs the main logic of parcel updating. Performs the main logic of parcel updating.
""" """
@ -230,16 +155,13 @@ class Geometry(BaseResource):
) )
fetched_parcels = parcel_fetcher.get_parcels() fetched_parcels = parcel_fetcher.get_parcels()
_now = timezone.now() _now = timezone.now()
underlying_parcels = []
i = 0
len_fetched_parcels = len(fetched_parcels)
print("Process fetched parcels:")
districts = {} districts = {}
municipals = {} municipals = {}
parcel_groups = {} parcel_groups = {}
parcels_to_update = []
parcels_to_create = []
for result in fetched_parcels: for result in fetched_parcels:
# There could be parcels which include the word 'Flur', # There could be parcels which include the word 'Flur',
# which needs to be deleted and just keep the numerical values # which needs to be deleted and just keep the numerical values
@ -280,28 +202,57 @@ class Geometry(BaseResource):
# Preprocess parcel data # Preprocess parcel data
flrstck_nnr = result['flstnrnen'] flrstck_nnr = result['flstnrnen']
if not flrstck_nnr: match flrstck_nnr:
case "":
flrstck_nnr = None flrstck_nnr = None
flrstck_zhlr = result['flstnrzae'] flrstck_zhlr = result['flstnrzae']
if not flrstck_zhlr: match flrstck_zhlr:
case "":
flrstck_zhlr = None flrstck_zhlr = None
parcel_obj = Parcel.objects.get_or_create( try:
# Try to fetch parcel from db. If it already exists, just update timestamp.
parcel_obj = Parcel.objects.get(
district=district, district=district,
municipal=municipal, municipal=municipal,
parcel_group=parcel_group, parcel_group=parcel_group,
flr=flr_val, flr=flr_val,
flrstck_nnr=flrstck_nnr, flrstck_nnr=flrstck_nnr,
flrstck_zhlr=flrstck_zhlr, flrstck_zhlr=flrstck_zhlr,
)[0] )
parcel_obj.updated_on = _now parcel_obj.updated_on = _now
parcel_obj.save() parcels_to_update.append(parcel_obj)
underlying_parcels.append(parcel_obj) except ObjectDoesNotExist:
i += 1 # If not existing, create object but do not commit, yet
if i % 100 == 0: parcel_obj = Parcel(
print(f" {i}/{len_fetched_parcels}") district=district,
municipal=municipal,
parcel_group=parcel_group,
flr=flr_val,
flrstck_nnr=flrstck_nnr,
flrstck_zhlr=flrstck_zhlr,
updated_on=_now,
)
parcels_to_create.append(parcel_obj)
# Update linked parcels # Create new parcels
Parcel.objects.bulk_create(
parcels_to_create,
batch_size=500
)
# Update existing parcels
Parcel.objects.bulk_update(
parcels_to_update,
[
"updated_on"
],
batch_size=500
)
# Update linking to geometry
parcel_ids = [x.id for x in parcels_to_update] + [x.id for x in parcels_to_create]
underlying_parcels = Parcel.objects.filter(id__in=parcel_ids)
self.parcels.set(underlying_parcels) self.parcels.set(underlying_parcels)
@transaction.atomic @transaction.atomic

View File

@ -45,18 +45,18 @@ class GeomParcelsView(LoginRequiredMixin, View):
geom_parcel_update_finished = geom.parcel_update_end is not None geom_parcel_update_finished = geom.parcel_update_end is not None
parcels = geom.get_underlying_parcels() parcels = geom.get_underlying_parcels()
parcels_exist = len(parcels) > 0 parcels_are_available = len(parcels) > 0
waiting_too_long = self._check_waiting_too_long(geom) waiting_too_long = self._check_waiting_too_long(geom)
parcels_are_currently_calculated = ( parcels_are_currently_calculated = (
geometry_exists and geometry_exists and
not parcels_exist and not parcels_are_available and
geom_parcel_update_started and geom_parcel_update_started and
not geom_parcel_update_finished not geom_parcel_update_finished
) )
if not parcels_exist and waiting_too_long: if not parcels_are_available and waiting_too_long:
# Trigger calculation again - process may have failed in the background # Trigger calculation again - process may have failed in the background
celery_update_parcels.delay(geom.id) celery_update_parcels.delay(geom.id)
parcels_are_currently_calculated = True parcels_are_currently_calculated = True
@ -66,7 +66,7 @@ class GeomParcelsView(LoginRequiredMixin, View):
# results after the calculation # results after the calculation
status_code = 200 status_code = 200
if parcels_exist or not geometry_exists: if parcels_are_available or not geometry_exists:
# Default case: Parcels are calculated or there is no geometry at all # Default case: Parcels are calculated or there is no geometry at all
# (so there will be no parcels to expect) # (so there will be no parcels to expect)
municipals = geom.get_underlying_municipals(parcels) municipals = geom.get_underlying_municipals(parcels)
@ -104,8 +104,7 @@ class GeomParcelsView(LoginRequiredMixin, View):
except TypeError: except TypeError:
pcs_diff = wait_for_seconds pcs_diff = wait_for_seconds
calculation_not_finished = geom.parcel_update_end is None waiting_too_long = (pcs_diff >= wait_for_seconds)
waiting_too_long = (pcs_diff >= wait_for_seconds) and calculation_not_finished
return waiting_too_long return waiting_too_long