# WIP: Performance boost parcel calculation

* improves handling of parcel calculation (speed up by ~30%)
* ToDo: Clean up code
pull/384/head
mpeltriaux 9 months ago
parent 5922d5ce06
commit 523e338b1b

@ -6,6 +6,7 @@ Created on: 15.11.21
"""
import json
from time import process_time
from django.contrib.gis.db.models import MultiPolygonField
from django.db import models, transaction
@ -140,7 +141,10 @@ class Geometry(BaseResource):
return
self._set_parcel_update_start_time()
self._perform_parcel_update()
t1 = process_time()
self._perform_parcel_update_fast()
print(f"Parcel processing: {process_time() - t1}")
self._set_parcel_update_end_time()
def _perform_parcel_update(self):
@ -155,8 +159,10 @@ class Geometry(BaseResource):
fetched_parcels = parcel_fetcher.get_parcels()
_now = timezone.now()
underlying_parcels = []
i = 0
len_fetched_parcels = len(fetched_parcels)
print("Process fetched parcels:")
for result in fetched_parcels:
with transaction.atomic():
# There could be parcels which include the word 'Flur',
# which needs to be deleted and just keep the numerical values
## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE!
@ -193,22 +199,110 @@ class Geometry(BaseResource):
parcel_obj.updated_on = _now
parcel_obj.save()
underlying_parcels.append(parcel_obj)
i += 1
if i % 100 == 0:
print(f" {i}/{len_fetched_parcels}")
# Update the linked parcels
self.parcels.clear()
#self.parcels.clear()
self.parcels.set(underlying_parcels)
# Set the calculated_on intermediate field, so this related data will be found on lookups
intersections_without_ts = self.parcelintersection_set.filter(
parcel__in=self.parcels.all(),
calculated_on__isnull=True,
)
for entry in intersections_without_ts:
entry.calculated_on = _now
ParcelIntersection.objects.bulk_update(
intersections_without_ts,
["calculated_on"]
#intersections_without_ts = self.parcelintersection_set.filter(
# parcel__in=self.parcels.all(),
# calculated_on__isnull=True,
#)
#for entry in intersections_without_ts:
# entry.calculated_on = _now
#ParcelIntersection.objects.bulk_update(
# intersections_without_ts,
# ["calculated_on"]
#)
def _perform_parcel_update_fast(self):
"""
Performs the main logic of parcel updating.
"""
from konova.models import Parcel, District, Municipal, ParcelGroup
parcel_fetcher = ParcelFetcher(
geometry=self
)
fetched_parcels = parcel_fetcher.get_parcels()
_now = timezone.now()
underlying_parcels = []
i = 0
len_fetched_parcels = len(fetched_parcels)
print("Process fetched parcels:")
districts = {}
municipals = {}
parcel_groups = {}
for result in fetched_parcels:
# There could be parcels which include the word 'Flur',
# which needs to be deleted and just keep the numerical values
## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE!
flr_val = result["flur"].replace("Flur ", "")
# Get district (cache in dict)
try:
district = districts["kreisschl"]
except KeyError:
district = District.objects.get_or_create(
key=result["kreisschl"],
name=result["kreis"],
)[0]
districts[district.key] = district
# Get municipal (cache in dict)
try:
municipal = municipals["gmdschl"]
except KeyError:
municipal = Municipal.objects.get_or_create(
key=result["gmdschl"],
name=result["gemeinde"],
district=district,
)[0]
municipals[municipal.key] = municipal
# Get parcel group (cache in dict)
try:
parcel_group = parcel_groups["gemaschl"]
except KeyError:
parcel_group = ParcelGroup.objects.get_or_create(
key=result["gemaschl"],
name=result["gemarkung"],
municipal=municipal,
)[0]
parcel_groups[parcel_group.key] = parcel_group
# Preprocess parcel data
flrstck_nnr = result['flstnrnen']
if not flrstck_nnr:
flrstck_nnr = None
flrstck_zhlr = result['flstnrzae']
if not flrstck_zhlr:
flrstck_zhlr = None
parcel_obj = Parcel.objects.get_or_create(
district=district,
municipal=municipal,
parcel_group=parcel_group,
flr=flr_val,
flrstck_nnr=flrstck_nnr,
flrstck_zhlr=flrstck_zhlr,
)[0]
parcel_obj.updated_on = _now
parcel_obj.save()
underlying_parcels.append(parcel_obj)
i += 1
if i % 100 == 0:
print(f" {i}/{len_fetched_parcels}")
# Update linked parcels
self.parcels.set(underlying_parcels)
@transaction.atomic
def _set_parcel_update_start_time(self):
@ -233,9 +327,7 @@ class Geometry(BaseResource):
Returns:
parcels (QuerySet): The related parcels as queryset
"""
parcels = self.parcels.filter(
parcelintersection__calculated_on__isnull=False,
).prefetch_related(
parcels = self.parcels.prefetch_related(
"district",
"municipal",
).order_by(
@ -305,6 +397,33 @@ class Geometry(BaseResource):
}
return geojson
@property
def complexity_factor(self) -> float:
""" Calculates a factor to estimate the complexity of a Geometry
0 = very low complexity
1 = very high complexity
ASSUMPTION:
The envelope is the bounding box of a geometry. If the geometry's area is similar to the area of it's bounding
box, it is considered as rather simple, since it seems to be a closer shape like a simple box.
If the geometry has a very big bounding box, but the geometry's own area is rather small,
compared to the one of the bounding box, the complexity can be higher.
Example:
geometry area similar to bounding box --> geometry / bounding_box ~ 1
geometry area far smaller than bb --> geometry / bounding_box ~ 0
Result is being inverted for better understanding of 'low' and 'high' complexity.
Returns:
complexity_factor (float): The estimated complexity
"""
geom_envelope = self.geom.envelope
diff = geom_envelope - self.geom
complexity_factor = 1 - self.geom.area / diff.area
return complexity_factor
class GeometryConflict(UuidModel):
"""

@ -49,5 +49,5 @@ ETS_GROUP = "Conservation office"
# GEOMETRY
## Max number of allowed vertices. Geometries larger will be simplified until they reach this threshold
GEOM_MAX_VERTICES = 10000
## Max seconds to wait for a parcel calculation result before a new request will be started (default: 5 minutes)
GEOM_THRESHOLD_RECALCULATION_SECONDS = 300
## Max seconds to wait for a parcel calculation result before a new request will be started (default: 30 minutes)
GEOM_THRESHOLD_RECALCULATION_SECONDS = 60 * 30

@ -135,6 +135,7 @@ DATABASES = {
'USER': 'postgres',
'HOST': '127.0.0.1',
'PORT': '5432',
'CONN_MAX_AGE': 120,
}
}
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"

@ -10,13 +10,14 @@ def celery_update_parcels(geometry_id: str, recheck: bool = True):
from konova.models import Geometry, ParcelIntersection
try:
geom = Geometry.objects.get(id=geometry_id)
objs = geom.parcelintersection_set.all()
for obj in objs:
obj.calculated_on = None
ParcelIntersection.objects.bulk_update(
objs,
["calculated_on"]
)
geom.parcels.clear()
#objs = geom.parcelintersection_set.all()
#for obj in objs:
# obj.calculated_on = None
#ParcelIntersection.objects.bulk_update(
# objs,
# ["calculated_on"]
#)
geom.update_parcels()
except ObjectDoesNotExist:

@ -37,30 +37,38 @@ class GeomParcelsView(LoginRequiredMixin, View):
# https://htmx.org/docs/#polling
status_code = 286
template = "konova/includes/parcels/parcel_table_frame.html"
geom = get_object_or_404(Geometry, id=id)
parcels = geom.get_underlying_parcels()
geos_geom = geom.geom or MultiPolygon(srid=DEFAULT_SRID_RLP)
geometry_exists = not geos_geom.empty and geos_geom.area > 0
geom_parcel_update_started = geom.parcel_update_start is not None
geom_parcel_update_finished = geom.parcel_update_end is not None
parcels = geom.get_underlying_parcels()
parcels_exist = len(parcels) > 0
waiting_too_long = self._check_waiting_too_long(geom)
geometry_exists = not geos_geom.empty and geos_geom.area > 0
parcels_are_currently_calculated = (
geometry_exists and
geom.parcel_update_start and
not geom.parcel_update_end
not parcels_exist and
geom_parcel_update_started and
not geom_parcel_update_finished
)
parcels_available = len(parcels) > 0
if not parcels_exist and waiting_too_long:
# Trigger calculation again - process may have failed in the background
celery_update_parcels.delay(geom.id)
parcels_are_currently_calculated = True
if parcels_are_currently_calculated:
# Parcels are being calculated right now. Change the status code, so polling stays active for fetching
# results after the calculation
status_code = 200
if waiting_too_long:
# Trigger calculation again
celery_update_parcels.delay(geom.id)
if parcels_available or not geometry_exists:
if parcels_exist or not geometry_exists:
# Default case: Parcels are calculated or there is no geometry at all
# (so there will be no parcels to expect)
municipals = geom.get_underlying_municipals(parcels)
rpp = 100
@ -88,13 +96,16 @@ class GeomParcelsView(LoginRequiredMixin, View):
Depending on the geometry's modified attribute
"""
# Scale time to wait longer with increasing geometry complexity
complexity_factor = geom.complexity_factor + 1
wait_for_seconds = int(GEOM_THRESHOLD_RECALCULATION_SECONDS * complexity_factor)
try:
pcs_diff = (timezone.now() - geom.parcel_update_start).seconds
except TypeError:
pcs_diff = GEOM_THRESHOLD_RECALCULATION_SECONDS
pcs_diff = wait_for_seconds
calculation_not_finished = geom.parcel_update_end is None
waiting_too_long = (pcs_diff >= GEOM_THRESHOLD_RECALCULATION_SECONDS) and calculation_not_finished
waiting_too_long = (pcs_diff >= wait_for_seconds) and calculation_not_finished
return waiting_too_long

Loading…
Cancel
Save