# Geom parcel performance improvement

* refactors parcel calculating, resulting in 1.3-1.6x better performance
* optimizes parcel fetching view
pull/384/head
mpeltriaux 9 months ago
parent 523e338b1b
commit d639a4e530

@ -6,9 +6,9 @@ Created on: 15.11.21
"""
import json
from time import process_time
from django.contrib.gis.db.models import MultiPolygonField
from django.core.exceptions import ObjectDoesNotExist
from django.db import models, transaction
from django.utils import timezone
@ -141,85 +141,10 @@ class Geometry(BaseResource):
return
self._set_parcel_update_start_time()
t1 = process_time()
self._perform_parcel_update_fast()
print(f"Parcel processing: {process_time() - t1}")
self._perform_parcel_update()
self._set_parcel_update_end_time()
def _perform_parcel_update(self):
"""
Performs the main logic of parcel updating.
"""
from konova.models import Parcel, District, ParcelIntersection, Municipal, ParcelGroup
parcel_fetcher = ParcelFetcher(
geometry=self
)
fetched_parcels = parcel_fetcher.get_parcels()
_now = timezone.now()
underlying_parcels = []
i = 0
len_fetched_parcels = len(fetched_parcels)
print("Process fetched parcels:")
for result in fetched_parcels:
# There could be parcels which include the word 'Flur',
# which needs to be deleted and just keep the numerical values
## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE!
flr_val = result["flur"].replace("Flur ", "")
district = District.objects.get_or_create(
key=result["kreisschl"],
name=result["kreis"],
)[0]
municipal = Municipal.objects.get_or_create(
key=result["gmdschl"],
name=result["gemeinde"],
district=district,
)[0]
parcel_group = ParcelGroup.objects.get_or_create(
key=result["gemaschl"],
name=result["gemarkung"],
municipal=municipal,
)[0]
flrstck_nnr = result['flstnrnen']
if not flrstck_nnr:
flrstck_nnr = None
flrstck_zhlr = result['flstnrzae']
if not flrstck_zhlr:
flrstck_zhlr = None
parcel_obj = Parcel.objects.get_or_create(
district=district,
municipal=municipal,
parcel_group=parcel_group,
flr=flr_val,
flrstck_nnr=flrstck_nnr,
flrstck_zhlr=flrstck_zhlr,
)[0]
parcel_obj.district = district
parcel_obj.updated_on = _now
parcel_obj.save()
underlying_parcels.append(parcel_obj)
i += 1
if i % 100 == 0:
print(f" {i}/{len_fetched_parcels}")
# Update the linked parcels
#self.parcels.clear()
self.parcels.set(underlying_parcels)
# Set the calculated_on intermediate field, so this related data will be found on lookups
#intersections_without_ts = self.parcelintersection_set.filter(
# parcel__in=self.parcels.all(),
# calculated_on__isnull=True,
#)
#for entry in intersections_without_ts:
# entry.calculated_on = _now
#ParcelIntersection.objects.bulk_update(
# intersections_without_ts,
# ["calculated_on"]
#)
def _perform_parcel_update_fast(self):
"""
Performs the main logic of parcel updating.
"""
@ -230,16 +155,13 @@ class Geometry(BaseResource):
)
fetched_parcels = parcel_fetcher.get_parcels()
_now = timezone.now()
underlying_parcels = []
i = 0
len_fetched_parcels = len(fetched_parcels)
print("Process fetched parcels:")
districts = {}
municipals = {}
parcel_groups = {}
parcels_to_update = []
parcels_to_create = []
for result in fetched_parcels:
# There could be parcels which include the word 'Flur',
# which needs to be deleted and just keep the numerical values
@ -280,28 +202,57 @@ class Geometry(BaseResource):
# Preprocess parcel data
flrstck_nnr = result['flstnrnen']
if not flrstck_nnr:
flrstck_nnr = None
match flrstck_nnr:
case "":
flrstck_nnr = None
flrstck_zhlr = result['flstnrzae']
if not flrstck_zhlr:
flrstck_zhlr = None
parcel_obj = Parcel.objects.get_or_create(
district=district,
municipal=municipal,
parcel_group=parcel_group,
flr=flr_val,
flrstck_nnr=flrstck_nnr,
flrstck_zhlr=flrstck_zhlr,
)[0]
parcel_obj.updated_on = _now
parcel_obj.save()
underlying_parcels.append(parcel_obj)
i += 1
if i % 100 == 0:
print(f" {i}/{len_fetched_parcels}")
# Update linked parcels
match flrstck_zhlr:
case "":
flrstck_zhlr = None
try:
# Try to fetch parcel from db. If it already exists, just update timestamp.
parcel_obj = Parcel.objects.get(
district=district,
municipal=municipal,
parcel_group=parcel_group,
flr=flr_val,
flrstck_nnr=flrstck_nnr,
flrstck_zhlr=flrstck_zhlr,
)
parcel_obj.updated_on = _now
parcels_to_update.append(parcel_obj)
except ObjectDoesNotExist:
# If not existing, create object but do not commit, yet
parcel_obj = Parcel(
district=district,
municipal=municipal,
parcel_group=parcel_group,
flr=flr_val,
flrstck_nnr=flrstck_nnr,
flrstck_zhlr=flrstck_zhlr,
updated_on=_now,
)
parcels_to_create.append(parcel_obj)
# Create new parcels
Parcel.objects.bulk_create(
parcels_to_create,
batch_size=500
)
# Update existing parcels
Parcel.objects.bulk_update(
parcels_to_update,
[
"updated_on"
],
batch_size=500
)
# Update linking to geometry
parcel_ids = [x.id for x in parcels_to_update] + [x.id for x in parcels_to_create]
underlying_parcels = Parcel.objects.filter(id__in=parcel_ids)
self.parcels.set(underlying_parcels)
@transaction.atomic

@ -45,18 +45,18 @@ class GeomParcelsView(LoginRequiredMixin, View):
geom_parcel_update_finished = geom.parcel_update_end is not None
parcels = geom.get_underlying_parcels()
parcels_exist = len(parcels) > 0
parcels_are_available = len(parcels) > 0
waiting_too_long = self._check_waiting_too_long(geom)
parcels_are_currently_calculated = (
geometry_exists and
not parcels_exist and
not parcels_are_available and
geom_parcel_update_started and
not geom_parcel_update_finished
)
if not parcels_exist and waiting_too_long:
if not parcels_are_available and waiting_too_long:
# Trigger calculation again - process may have failed in the background
celery_update_parcels.delay(geom.id)
parcels_are_currently_calculated = True
@ -66,7 +66,7 @@ class GeomParcelsView(LoginRequiredMixin, View):
# results after the calculation
status_code = 200
if parcels_exist or not geometry_exists:
if parcels_are_available or not geometry_exists:
# Default case: Parcels are calculated or there is no geometry at all
# (so there will be no parcels to expect)
municipals = geom.get_underlying_municipals(parcels)
@ -104,8 +104,7 @@ class GeomParcelsView(LoginRequiredMixin, View):
except TypeError:
pcs_diff = wait_for_seconds
calculation_not_finished = geom.parcel_update_end is None
waiting_too_long = (pcs_diff >= wait_for_seconds) and calculation_not_finished
waiting_too_long = (pcs_diff >= wait_for_seconds)
return waiting_too_long

Loading…
Cancel
Save