# WIP: Performance boost parcel calculation

* improves handling of parcel calculation (speed up by ~30%)
* ToDo: Clean up code
pull/384/head
mpeltriaux 9 months ago
parent 5922d5ce06
commit 523e338b1b

@ -6,6 +6,7 @@ Created on: 15.11.21
""" """
import json import json
from time import process_time
from django.contrib.gis.db.models import MultiPolygonField from django.contrib.gis.db.models import MultiPolygonField
from django.db import models, transaction from django.db import models, transaction
@ -140,7 +141,10 @@ class Geometry(BaseResource):
return return
self._set_parcel_update_start_time() self._set_parcel_update_start_time()
self._perform_parcel_update()
t1 = process_time()
self._perform_parcel_update_fast()
print(f"Parcel processing: {process_time() - t1}")
self._set_parcel_update_end_time() self._set_parcel_update_end_time()
def _perform_parcel_update(self): def _perform_parcel_update(self):
@ -155,61 +159,151 @@ class Geometry(BaseResource):
fetched_parcels = parcel_fetcher.get_parcels() fetched_parcels = parcel_fetcher.get_parcels()
_now = timezone.now() _now = timezone.now()
underlying_parcels = [] underlying_parcels = []
i = 0
len_fetched_parcels = len(fetched_parcels)
print("Process fetched parcels:")
for result in fetched_parcels:
# There could be parcels which include the word 'Flur',
# which needs to be deleted and just keep the numerical values
## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE!
flr_val = result["flur"].replace("Flur ", "")
district = District.objects.get_or_create(
key=result["kreisschl"],
name=result["kreis"],
)[0]
municipal = Municipal.objects.get_or_create(
key=result["gmdschl"],
name=result["gemeinde"],
district=district,
)[0]
parcel_group = ParcelGroup.objects.get_or_create(
key=result["gemaschl"],
name=result["gemarkung"],
municipal=municipal,
)[0]
flrstck_nnr = result['flstnrnen']
if not flrstck_nnr:
flrstck_nnr = None
flrstck_zhlr = result['flstnrzae']
if not flrstck_zhlr:
flrstck_zhlr = None
parcel_obj = Parcel.objects.get_or_create(
district=district,
municipal=municipal,
parcel_group=parcel_group,
flr=flr_val,
flrstck_nnr=flrstck_nnr,
flrstck_zhlr=flrstck_zhlr,
)[0]
parcel_obj.district = district
parcel_obj.updated_on = _now
parcel_obj.save()
underlying_parcels.append(parcel_obj)
i += 1
if i % 100 == 0:
print(f" {i}/{len_fetched_parcels}")
# Update the linked parcels
#self.parcels.clear()
self.parcels.set(underlying_parcels)
# Set the calculated_on intermediate field, so this related data will be found on lookups
#intersections_without_ts = self.parcelintersection_set.filter(
# parcel__in=self.parcels.all(),
# calculated_on__isnull=True,
#)
#for entry in intersections_without_ts:
# entry.calculated_on = _now
#ParcelIntersection.objects.bulk_update(
# intersections_without_ts,
# ["calculated_on"]
#)
def _perform_parcel_update_fast(self):
"""
Performs the main logic of parcel updating.
"""
from konova.models import Parcel, District, Municipal, ParcelGroup
parcel_fetcher = ParcelFetcher(
geometry=self
)
fetched_parcels = parcel_fetcher.get_parcels()
_now = timezone.now()
underlying_parcels = []
i = 0
len_fetched_parcels = len(fetched_parcels)
print("Process fetched parcels:")
districts = {}
municipals = {}
parcel_groups = {}
for result in fetched_parcels: for result in fetched_parcels:
with transaction.atomic(): # There could be parcels which include the word 'Flur',
# There could be parcels which include the word 'Flur', # which needs to be deleted and just keep the numerical values
# which needs to be deleted and just keep the numerical values ## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE!
## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE! flr_val = result["flur"].replace("Flur ", "")
flr_val = result["flur"].replace("Flur ", "")
# Get district (cache in dict)
try:
district = districts["kreisschl"]
except KeyError:
district = District.objects.get_or_create( district = District.objects.get_or_create(
key=result["kreisschl"], key=result["kreisschl"],
name=result["kreis"], name=result["kreis"],
)[0] )[0]
districts[district.key] = district
# Get municipal (cache in dict)
try:
municipal = municipals["gmdschl"]
except KeyError:
municipal = Municipal.objects.get_or_create( municipal = Municipal.objects.get_or_create(
key=result["gmdschl"], key=result["gmdschl"],
name=result["gemeinde"], name=result["gemeinde"],
district=district, district=district,
)[0] )[0]
municipals[municipal.key] = municipal
# Get parcel group (cache in dict)
try:
parcel_group = parcel_groups["gemaschl"]
except KeyError:
parcel_group = ParcelGroup.objects.get_or_create( parcel_group = ParcelGroup.objects.get_or_create(
key=result["gemaschl"], key=result["gemaschl"],
name=result["gemarkung"], name=result["gemarkung"],
municipal=municipal, municipal=municipal,
)[0] )[0]
flrstck_nnr = result['flstnrnen'] parcel_groups[parcel_group.key] = parcel_group
if not flrstck_nnr:
flrstck_nnr = None # Preprocess parcel data
flrstck_zhlr = result['flstnrzae'] flrstck_nnr = result['flstnrnen']
if not flrstck_zhlr: if not flrstck_nnr:
flrstck_zhlr = None flrstck_nnr = None
parcel_obj = Parcel.objects.get_or_create( flrstck_zhlr = result['flstnrzae']
district=district, if not flrstck_zhlr:
municipal=municipal, flrstck_zhlr = None
parcel_group=parcel_group,
flr=flr_val, parcel_obj = Parcel.objects.get_or_create(
flrstck_nnr=flrstck_nnr, district=district,
flrstck_zhlr=flrstck_zhlr, municipal=municipal,
)[0] parcel_group=parcel_group,
parcel_obj.district = district flr=flr_val,
parcel_obj.updated_on = _now flrstck_nnr=flrstck_nnr,
parcel_obj.save() flrstck_zhlr=flrstck_zhlr,
)[0]
parcel_obj.updated_on = _now
parcel_obj.save()
underlying_parcels.append(parcel_obj) underlying_parcels.append(parcel_obj)
i += 1
if i % 100 == 0:
print(f" {i}/{len_fetched_parcels}")
# Update the linked parcels # Update linked parcels
self.parcels.clear()
self.parcels.set(underlying_parcels) self.parcels.set(underlying_parcels)
# Set the calculated_on intermediate field, so this related data will be found on lookups
intersections_without_ts = self.parcelintersection_set.filter(
parcel__in=self.parcels.all(),
calculated_on__isnull=True,
)
for entry in intersections_without_ts:
entry.calculated_on = _now
ParcelIntersection.objects.bulk_update(
intersections_without_ts,
["calculated_on"]
)
@transaction.atomic @transaction.atomic
def _set_parcel_update_start_time(self): def _set_parcel_update_start_time(self):
""" """
@ -233,9 +327,7 @@ class Geometry(BaseResource):
Returns: Returns:
parcels (QuerySet): The related parcels as queryset parcels (QuerySet): The related parcels as queryset
""" """
parcels = self.parcels.filter( parcels = self.parcels.prefetch_related(
parcelintersection__calculated_on__isnull=False,
).prefetch_related(
"district", "district",
"municipal", "municipal",
).order_by( ).order_by(
@ -305,6 +397,33 @@ class Geometry(BaseResource):
} }
return geojson return geojson
@property
def complexity_factor(self) -> float:
""" Calculates a factor to estimate the complexity of a Geometry
0 = very low complexity
1 = very high complexity
ASSUMPTION:
The envelope is the bounding box of a geometry. If the geometry's area is similar to the area of it's bounding
box, it is considered as rather simple, since it seems to be a closer shape like a simple box.
If the geometry has a very big bounding box, but the geometry's own area is rather small,
compared to the one of the bounding box, the complexity can be higher.
Example:
geometry area similar to bounding box --> geometry / bounding_box ~ 1
geometry area far smaller than bb --> geometry / bounding_box ~ 0
Result is being inverted for better understanding of 'low' and 'high' complexity.
Returns:
complexity_factor (float): The estimated complexity
"""
geom_envelope = self.geom.envelope
diff = geom_envelope - self.geom
complexity_factor = 1 - self.geom.area / diff.area
return complexity_factor
class GeometryConflict(UuidModel): class GeometryConflict(UuidModel):
""" """

@ -49,5 +49,5 @@ ETS_GROUP = "Conservation office"
# GEOMETRY # GEOMETRY
## Max number of allowed vertices. Geometries larger will be simplified until they reach this threshold ## Max number of allowed vertices. Geometries larger will be simplified until they reach this threshold
GEOM_MAX_VERTICES = 10000 GEOM_MAX_VERTICES = 10000
## Max seconds to wait for a parcel calculation result before a new request will be started (default: 5 minutes) ## Max seconds to wait for a parcel calculation result before a new request will be started (default: 30 minutes)
GEOM_THRESHOLD_RECALCULATION_SECONDS = 300 GEOM_THRESHOLD_RECALCULATION_SECONDS = 60 * 30

@ -135,6 +135,7 @@ DATABASES = {
'USER': 'postgres', 'USER': 'postgres',
'HOST': '127.0.0.1', 'HOST': '127.0.0.1',
'PORT': '5432', 'PORT': '5432',
'CONN_MAX_AGE': 120,
} }
} }
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"

@ -10,13 +10,14 @@ def celery_update_parcels(geometry_id: str, recheck: bool = True):
from konova.models import Geometry, ParcelIntersection from konova.models import Geometry, ParcelIntersection
try: try:
geom = Geometry.objects.get(id=geometry_id) geom = Geometry.objects.get(id=geometry_id)
objs = geom.parcelintersection_set.all() geom.parcels.clear()
for obj in objs: #objs = geom.parcelintersection_set.all()
obj.calculated_on = None #for obj in objs:
ParcelIntersection.objects.bulk_update( # obj.calculated_on = None
objs, #ParcelIntersection.objects.bulk_update(
["calculated_on"] # objs,
) # ["calculated_on"]
#)
geom.update_parcels() geom.update_parcels()
except ObjectDoesNotExist: except ObjectDoesNotExist:

@ -37,30 +37,38 @@ class GeomParcelsView(LoginRequiredMixin, View):
# https://htmx.org/docs/#polling # https://htmx.org/docs/#polling
status_code = 286 status_code = 286
template = "konova/includes/parcels/parcel_table_frame.html" template = "konova/includes/parcels/parcel_table_frame.html"
geom = get_object_or_404(Geometry, id=id) geom = get_object_or_404(Geometry, id=id)
parcels = geom.get_underlying_parcels()
geos_geom = geom.geom or MultiPolygon(srid=DEFAULT_SRID_RLP) geos_geom = geom.geom or MultiPolygon(srid=DEFAULT_SRID_RLP)
geometry_exists = not geos_geom.empty and geos_geom.area > 0
geom_parcel_update_started = geom.parcel_update_start is not None
geom_parcel_update_finished = geom.parcel_update_end is not None
parcels = geom.get_underlying_parcels()
parcels_exist = len(parcels) > 0
waiting_too_long = self._check_waiting_too_long(geom) waiting_too_long = self._check_waiting_too_long(geom)
geometry_exists = not geos_geom.empty and geos_geom.area > 0
parcels_are_currently_calculated = ( parcels_are_currently_calculated = (
geometry_exists and geometry_exists and
geom.parcel_update_start and not parcels_exist and
not geom.parcel_update_end geom_parcel_update_started and
not geom_parcel_update_finished
) )
parcels_available = len(parcels) > 0
if not parcels_exist and waiting_too_long:
# Trigger calculation again - process may have failed in the background
celery_update_parcels.delay(geom.id)
parcels_are_currently_calculated = True
if parcels_are_currently_calculated: if parcels_are_currently_calculated:
# Parcels are being calculated right now. Change the status code, so polling stays active for fetching # Parcels are being calculated right now. Change the status code, so polling stays active for fetching
# results after the calculation # results after the calculation
status_code = 200 status_code = 200
if waiting_too_long: if parcels_exist or not geometry_exists:
# Trigger calculation again # Default case: Parcels are calculated or there is no geometry at all
celery_update_parcels.delay(geom.id) # (so there will be no parcels to expect)
if parcels_available or not geometry_exists:
municipals = geom.get_underlying_municipals(parcels) municipals = geom.get_underlying_municipals(parcels)
rpp = 100 rpp = 100
@ -88,13 +96,16 @@ class GeomParcelsView(LoginRequiredMixin, View):
Depending on the geometry's modified attribute Depending on the geometry's modified attribute
""" """
# Scale time to wait longer with increasing geometry complexity
complexity_factor = geom.complexity_factor + 1
wait_for_seconds = int(GEOM_THRESHOLD_RECALCULATION_SECONDS * complexity_factor)
try: try:
pcs_diff = (timezone.now() - geom.parcel_update_start).seconds pcs_diff = (timezone.now() - geom.parcel_update_start).seconds
except TypeError: except TypeError:
pcs_diff = GEOM_THRESHOLD_RECALCULATION_SECONDS pcs_diff = wait_for_seconds
calculation_not_finished = geom.parcel_update_end is None calculation_not_finished = geom.parcel_update_end is None
waiting_too_long = (pcs_diff >= GEOM_THRESHOLD_RECALCULATION_SECONDS) and calculation_not_finished waiting_too_long = (pcs_diff >= wait_for_seconds) and calculation_not_finished
return waiting_too_long return waiting_too_long

Loading…
Cancel
Save