# WIP: Performance boost parcel calculation

* improves handling of parcel calculation (speed up by ~30%)
* ToDo: Clean up code
This commit is contained in:
2024-01-16 07:57:29 +01:00
parent 50bd6feb89
commit 8bcccb4685
5 changed files with 193 additions and 61 deletions

View File

@@ -6,6 +6,7 @@ Created on: 15.11.21
"""
import json
from time import process_time
from django.contrib.gis.db.models import MultiPolygonField
from django.db import models, transaction
@@ -140,7 +141,10 @@ class Geometry(BaseResource):
return
self._set_parcel_update_start_time()
self._perform_parcel_update()
t1 = process_time()
self._perform_parcel_update_fast()
print(f"Parcel processing: {process_time() - t1}")
self._set_parcel_update_end_time()
def _perform_parcel_update(self):
@@ -155,61 +159,151 @@ class Geometry(BaseResource):
fetched_parcels = parcel_fetcher.get_parcels()
_now = timezone.now()
underlying_parcels = []
i = 0
len_fetched_parcels = len(fetched_parcels)
print("Process fetched parcels:")
for result in fetched_parcels:
with transaction.atomic():
# There could be parcels which include the word 'Flur',
# which needs to be deleted and just keep the numerical values
## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE!
flr_val = result["flur"].replace("Flur ", "")
# There could be parcels which include the word 'Flur',
# which needs to be deleted and just keep the numerical values
## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE!
flr_val = result["flur"].replace("Flur ", "")
district = District.objects.get_or_create(
key=result["kreisschl"],
name=result["kreis"],
)[0]
municipal = Municipal.objects.get_or_create(
key=result["gmdschl"],
name=result["gemeinde"],
district=district,
)[0]
parcel_group = ParcelGroup.objects.get_or_create(
key=result["gemaschl"],
name=result["gemarkung"],
municipal=municipal,
)[0]
flrstck_nnr = result['flstnrnen']
if not flrstck_nnr:
flrstck_nnr = None
flrstck_zhlr = result['flstnrzae']
if not flrstck_zhlr:
flrstck_zhlr = None
parcel_obj = Parcel.objects.get_or_create(
district=district,
municipal=municipal,
parcel_group=parcel_group,
flr=flr_val,
flrstck_nnr=flrstck_nnr,
flrstck_zhlr=flrstck_zhlr,
)[0]
parcel_obj.district = district
parcel_obj.updated_on = _now
parcel_obj.save()
underlying_parcels.append(parcel_obj)
i += 1
if i % 100 == 0:
print(f" {i}/{len_fetched_parcels}")
# Update the linked parcels
#self.parcels.clear()
self.parcels.set(underlying_parcels)
# Set the calculated_on intermediate field, so this related data will be found on lookups
#intersections_without_ts = self.parcelintersection_set.filter(
# parcel__in=self.parcels.all(),
# calculated_on__isnull=True,
#)
#for entry in intersections_without_ts:
# entry.calculated_on = _now
#ParcelIntersection.objects.bulk_update(
# intersections_without_ts,
# ["calculated_on"]
#)
def _perform_parcel_update_fast(self):
"""
Performs the main logic of parcel updating.
"""
from konova.models import Parcel, District, Municipal, ParcelGroup
parcel_fetcher = ParcelFetcher(
geometry=self
)
fetched_parcels = parcel_fetcher.get_parcels()
_now = timezone.now()
underlying_parcels = []
i = 0
len_fetched_parcels = len(fetched_parcels)
print("Process fetched parcels:")
districts = {}
municipals = {}
parcel_groups = {}
for result in fetched_parcels:
# There could be parcels which include the word 'Flur',
# which needs to be deleted and just keep the numerical values
## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE!
flr_val = result["flur"].replace("Flur ", "")
# Get district (cache in dict)
try:
district = districts["kreisschl"]
except KeyError:
district = District.objects.get_or_create(
key=result["kreisschl"],
name=result["kreis"],
)[0]
districts[district.key] = district
# Get municipal (cache in dict)
try:
municipal = municipals["gmdschl"]
except KeyError:
municipal = Municipal.objects.get_or_create(
key=result["gmdschl"],
name=result["gemeinde"],
district=district,
)[0]
municipals[municipal.key] = municipal
# Get parcel group (cache in dict)
try:
parcel_group = parcel_groups["gemaschl"]
except KeyError:
parcel_group = ParcelGroup.objects.get_or_create(
key=result["gemaschl"],
name=result["gemarkung"],
municipal=municipal,
)[0]
flrstck_nnr = result['flstnrnen']
if not flrstck_nnr:
flrstck_nnr = None
flrstck_zhlr = result['flstnrzae']
if not flrstck_zhlr:
flrstck_zhlr = None
parcel_obj = Parcel.objects.get_or_create(
district=district,
municipal=municipal,
parcel_group=parcel_group,
flr=flr_val,
flrstck_nnr=flrstck_nnr,
flrstck_zhlr=flrstck_zhlr,
)[0]
parcel_obj.district = district
parcel_obj.updated_on = _now
parcel_obj.save()
parcel_groups[parcel_group.key] = parcel_group
# Preprocess parcel data
flrstck_nnr = result['flstnrnen']
if not flrstck_nnr:
flrstck_nnr = None
flrstck_zhlr = result['flstnrzae']
if not flrstck_zhlr:
flrstck_zhlr = None
parcel_obj = Parcel.objects.get_or_create(
district=district,
municipal=municipal,
parcel_group=parcel_group,
flr=flr_val,
flrstck_nnr=flrstck_nnr,
flrstck_zhlr=flrstck_zhlr,
)[0]
parcel_obj.updated_on = _now
parcel_obj.save()
underlying_parcels.append(parcel_obj)
i += 1
if i % 100 == 0:
print(f" {i}/{len_fetched_parcels}")
# Update the linked parcels
self.parcels.clear()
# Update linked parcels
self.parcels.set(underlying_parcels)
# Set the calculated_on intermediate field, so this related data will be found on lookups
intersections_without_ts = self.parcelintersection_set.filter(
parcel__in=self.parcels.all(),
calculated_on__isnull=True,
)
for entry in intersections_without_ts:
entry.calculated_on = _now
ParcelIntersection.objects.bulk_update(
intersections_without_ts,
["calculated_on"]
)
@transaction.atomic
def _set_parcel_update_start_time(self):
"""
@@ -233,9 +327,7 @@ class Geometry(BaseResource):
Returns:
parcels (QuerySet): The related parcels as queryset
"""
parcels = self.parcels.filter(
parcelintersection__calculated_on__isnull=False,
).prefetch_related(
parcels = self.parcels.prefetch_related(
"district",
"municipal",
).order_by(
@@ -305,6 +397,33 @@ class Geometry(BaseResource):
}
return geojson
@property
def complexity_factor(self) -> float:
""" Calculates a factor to estimate the complexity of a Geometry
0 = very low complexity
1 = very high complexity
ASSUMPTION:
The envelope is the bounding box of a geometry. If the geometry's area is similar to the area of it's bounding
box, it is considered as rather simple, since it seems to be a closer shape like a simple box.
If the geometry has a very big bounding box, but the geometry's own area is rather small,
compared to the one of the bounding box, the complexity can be higher.
Example:
geometry area similar to bounding box --> geometry / bounding_box ~ 1
geometry area far smaller than bb --> geometry / bounding_box ~ 0
Result is being inverted for better understanding of 'low' and 'high' complexity.
Returns:
complexity_factor (float): The estimated complexity
"""
geom_envelope = self.geom.envelope
diff = geom_envelope - self.geom
complexity_factor = 1 - self.geom.area / diff.area
return complexity_factor
class GeometryConflict(UuidModel):
"""