# Geom parcel performance improvement
* refactors parcel calculating, resulting in 1.3-1.6x better performance * optimizes parcel fetching view
This commit is contained in:
parent
523e338b1b
commit
d639a4e530
@ -6,9 +6,9 @@ Created on: 15.11.21
|
||||
|
||||
"""
|
||||
import json
|
||||
from time import process_time
|
||||
|
||||
from django.contrib.gis.db.models import MultiPolygonField
|
||||
from django.core.exceptions import ObjectDoesNotExist
|
||||
from django.db import models, transaction
|
||||
from django.utils import timezone
|
||||
|
||||
@ -141,85 +141,10 @@ class Geometry(BaseResource):
|
||||
return
|
||||
|
||||
self._set_parcel_update_start_time()
|
||||
|
||||
t1 = process_time()
|
||||
self._perform_parcel_update_fast()
|
||||
print(f"Parcel processing: {process_time() - t1}")
|
||||
self._perform_parcel_update()
|
||||
self._set_parcel_update_end_time()
|
||||
|
||||
def _perform_parcel_update(self):
|
||||
"""
|
||||
Performs the main logic of parcel updating.
|
||||
"""
|
||||
from konova.models import Parcel, District, ParcelIntersection, Municipal, ParcelGroup
|
||||
|
||||
parcel_fetcher = ParcelFetcher(
|
||||
geometry=self
|
||||
)
|
||||
fetched_parcels = parcel_fetcher.get_parcels()
|
||||
_now = timezone.now()
|
||||
underlying_parcels = []
|
||||
i = 0
|
||||
len_fetched_parcels = len(fetched_parcels)
|
||||
print("Process fetched parcels:")
|
||||
for result in fetched_parcels:
|
||||
# There could be parcels which include the word 'Flur',
|
||||
# which needs to be deleted and just keep the numerical values
|
||||
## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE!
|
||||
flr_val = result["flur"].replace("Flur ", "")
|
||||
district = District.objects.get_or_create(
|
||||
key=result["kreisschl"],
|
||||
name=result["kreis"],
|
||||
)[0]
|
||||
municipal = Municipal.objects.get_or_create(
|
||||
key=result["gmdschl"],
|
||||
name=result["gemeinde"],
|
||||
district=district,
|
||||
)[0]
|
||||
parcel_group = ParcelGroup.objects.get_or_create(
|
||||
key=result["gemaschl"],
|
||||
name=result["gemarkung"],
|
||||
municipal=municipal,
|
||||
)[0]
|
||||
flrstck_nnr = result['flstnrnen']
|
||||
if not flrstck_nnr:
|
||||
flrstck_nnr = None
|
||||
flrstck_zhlr = result['flstnrzae']
|
||||
if not flrstck_zhlr:
|
||||
flrstck_zhlr = None
|
||||
parcel_obj = Parcel.objects.get_or_create(
|
||||
district=district,
|
||||
municipal=municipal,
|
||||
parcel_group=parcel_group,
|
||||
flr=flr_val,
|
||||
flrstck_nnr=flrstck_nnr,
|
||||
flrstck_zhlr=flrstck_zhlr,
|
||||
)[0]
|
||||
parcel_obj.district = district
|
||||
parcel_obj.updated_on = _now
|
||||
parcel_obj.save()
|
||||
underlying_parcels.append(parcel_obj)
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
print(f" {i}/{len_fetched_parcels}")
|
||||
|
||||
# Update the linked parcels
|
||||
#self.parcels.clear()
|
||||
self.parcels.set(underlying_parcels)
|
||||
|
||||
# Set the calculated_on intermediate field, so this related data will be found on lookups
|
||||
#intersections_without_ts = self.parcelintersection_set.filter(
|
||||
# parcel__in=self.parcels.all(),
|
||||
# calculated_on__isnull=True,
|
||||
#)
|
||||
#for entry in intersections_without_ts:
|
||||
# entry.calculated_on = _now
|
||||
#ParcelIntersection.objects.bulk_update(
|
||||
# intersections_without_ts,
|
||||
# ["calculated_on"]
|
||||
#)
|
||||
|
||||
def _perform_parcel_update_fast(self):
|
||||
"""
|
||||
Performs the main logic of parcel updating.
|
||||
"""
|
||||
@ -230,16 +155,13 @@ class Geometry(BaseResource):
|
||||
)
|
||||
fetched_parcels = parcel_fetcher.get_parcels()
|
||||
_now = timezone.now()
|
||||
underlying_parcels = []
|
||||
|
||||
i = 0
|
||||
len_fetched_parcels = len(fetched_parcels)
|
||||
print("Process fetched parcels:")
|
||||
|
||||
districts = {}
|
||||
municipals = {}
|
||||
parcel_groups = {}
|
||||
|
||||
parcels_to_update = []
|
||||
parcels_to_create = []
|
||||
for result in fetched_parcels:
|
||||
# There could be parcels which include the word 'Flur',
|
||||
# which needs to be deleted and just keep the numerical values
|
||||
@ -280,28 +202,57 @@ class Geometry(BaseResource):
|
||||
|
||||
# Preprocess parcel data
|
||||
flrstck_nnr = result['flstnrnen']
|
||||
if not flrstck_nnr:
|
||||
flrstck_nnr = None
|
||||
match flrstck_nnr:
|
||||
case "":
|
||||
flrstck_nnr = None
|
||||
|
||||
flrstck_zhlr = result['flstnrzae']
|
||||
if not flrstck_zhlr:
|
||||
flrstck_zhlr = None
|
||||
match flrstck_zhlr:
|
||||
case "":
|
||||
flrstck_zhlr = None
|
||||
|
||||
parcel_obj = Parcel.objects.get_or_create(
|
||||
district=district,
|
||||
municipal=municipal,
|
||||
parcel_group=parcel_group,
|
||||
flr=flr_val,
|
||||
flrstck_nnr=flrstck_nnr,
|
||||
flrstck_zhlr=flrstck_zhlr,
|
||||
)[0]
|
||||
parcel_obj.updated_on = _now
|
||||
parcel_obj.save()
|
||||
underlying_parcels.append(parcel_obj)
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
print(f" {i}/{len_fetched_parcels}")
|
||||
try:
|
||||
# Try to fetch parcel from db. If it already exists, just update timestamp.
|
||||
parcel_obj = Parcel.objects.get(
|
||||
district=district,
|
||||
municipal=municipal,
|
||||
parcel_group=parcel_group,
|
||||
flr=flr_val,
|
||||
flrstck_nnr=flrstck_nnr,
|
||||
flrstck_zhlr=flrstck_zhlr,
|
||||
)
|
||||
parcel_obj.updated_on = _now
|
||||
parcels_to_update.append(parcel_obj)
|
||||
except ObjectDoesNotExist:
|
||||
# If not existing, create object but do not commit, yet
|
||||
parcel_obj = Parcel(
|
||||
district=district,
|
||||
municipal=municipal,
|
||||
parcel_group=parcel_group,
|
||||
flr=flr_val,
|
||||
flrstck_nnr=flrstck_nnr,
|
||||
flrstck_zhlr=flrstck_zhlr,
|
||||
updated_on=_now,
|
||||
)
|
||||
parcels_to_create.append(parcel_obj)
|
||||
|
||||
# Update linked parcels
|
||||
# Create new parcels
|
||||
Parcel.objects.bulk_create(
|
||||
parcels_to_create,
|
||||
batch_size=500
|
||||
)
|
||||
# Update existing parcels
|
||||
Parcel.objects.bulk_update(
|
||||
parcels_to_update,
|
||||
[
|
||||
"updated_on"
|
||||
],
|
||||
batch_size=500
|
||||
)
|
||||
|
||||
# Update linking to geometry
|
||||
parcel_ids = [x.id for x in parcels_to_update] + [x.id for x in parcels_to_create]
|
||||
underlying_parcels = Parcel.objects.filter(id__in=parcel_ids)
|
||||
self.parcels.set(underlying_parcels)
|
||||
|
||||
@transaction.atomic
|
||||
|
@ -45,18 +45,18 @@ class GeomParcelsView(LoginRequiredMixin, View):
|
||||
geom_parcel_update_finished = geom.parcel_update_end is not None
|
||||
|
||||
parcels = geom.get_underlying_parcels()
|
||||
parcels_exist = len(parcels) > 0
|
||||
parcels_are_available = len(parcels) > 0
|
||||
|
||||
waiting_too_long = self._check_waiting_too_long(geom)
|
||||
|
||||
parcels_are_currently_calculated = (
|
||||
geometry_exists and
|
||||
not parcels_exist and
|
||||
not parcels_are_available and
|
||||
geom_parcel_update_started and
|
||||
not geom_parcel_update_finished
|
||||
)
|
||||
|
||||
if not parcels_exist and waiting_too_long:
|
||||
if not parcels_are_available and waiting_too_long:
|
||||
# Trigger calculation again - process may have failed in the background
|
||||
celery_update_parcels.delay(geom.id)
|
||||
parcels_are_currently_calculated = True
|
||||
@ -66,7 +66,7 @@ class GeomParcelsView(LoginRequiredMixin, View):
|
||||
# results after the calculation
|
||||
status_code = 200
|
||||
|
||||
if parcels_exist or not geometry_exists:
|
||||
if parcels_are_available or not geometry_exists:
|
||||
# Default case: Parcels are calculated or there is no geometry at all
|
||||
# (so there will be no parcels to expect)
|
||||
municipals = geom.get_underlying_municipals(parcels)
|
||||
@ -104,8 +104,7 @@ class GeomParcelsView(LoginRequiredMixin, View):
|
||||
except TypeError:
|
||||
pcs_diff = wait_for_seconds
|
||||
|
||||
calculation_not_finished = geom.parcel_update_end is None
|
||||
waiting_too_long = (pcs_diff >= wait_for_seconds) and calculation_not_finished
|
||||
waiting_too_long = (pcs_diff >= wait_for_seconds)
|
||||
return waiting_too_long
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user