# WIP: Performance boost parcel calculation
* improves handling of parcel calculation (speed up by ~30%) * ToDo: Clean up code
This commit is contained in:
parent
50bd6feb89
commit
8bcccb4685
@ -6,6 +6,7 @@ Created on: 15.11.21
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
import json
|
import json
|
||||||
|
from time import process_time
|
||||||
|
|
||||||
from django.contrib.gis.db.models import MultiPolygonField
|
from django.contrib.gis.db.models import MultiPolygonField
|
||||||
from django.db import models, transaction
|
from django.db import models, transaction
|
||||||
@ -140,7 +141,10 @@ class Geometry(BaseResource):
|
|||||||
return
|
return
|
||||||
|
|
||||||
self._set_parcel_update_start_time()
|
self._set_parcel_update_start_time()
|
||||||
self._perform_parcel_update()
|
|
||||||
|
t1 = process_time()
|
||||||
|
self._perform_parcel_update_fast()
|
||||||
|
print(f"Parcel processing: {process_time() - t1}")
|
||||||
self._set_parcel_update_end_time()
|
self._set_parcel_update_end_time()
|
||||||
|
|
||||||
def _perform_parcel_update(self):
|
def _perform_parcel_update(self):
|
||||||
@ -155,61 +159,151 @@ class Geometry(BaseResource):
|
|||||||
fetched_parcels = parcel_fetcher.get_parcels()
|
fetched_parcels = parcel_fetcher.get_parcels()
|
||||||
_now = timezone.now()
|
_now = timezone.now()
|
||||||
underlying_parcels = []
|
underlying_parcels = []
|
||||||
|
i = 0
|
||||||
|
len_fetched_parcels = len(fetched_parcels)
|
||||||
|
print("Process fetched parcels:")
|
||||||
for result in fetched_parcels:
|
for result in fetched_parcels:
|
||||||
with transaction.atomic():
|
# There could be parcels which include the word 'Flur',
|
||||||
# There could be parcels which include the word 'Flur',
|
# which needs to be deleted and just keep the numerical values
|
||||||
# which needs to be deleted and just keep the numerical values
|
## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE!
|
||||||
## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE!
|
flr_val = result["flur"].replace("Flur ", "")
|
||||||
flr_val = result["flur"].replace("Flur ", "")
|
district = District.objects.get_or_create(
|
||||||
|
key=result["kreisschl"],
|
||||||
|
name=result["kreis"],
|
||||||
|
)[0]
|
||||||
|
municipal = Municipal.objects.get_or_create(
|
||||||
|
key=result["gmdschl"],
|
||||||
|
name=result["gemeinde"],
|
||||||
|
district=district,
|
||||||
|
)[0]
|
||||||
|
parcel_group = ParcelGroup.objects.get_or_create(
|
||||||
|
key=result["gemaschl"],
|
||||||
|
name=result["gemarkung"],
|
||||||
|
municipal=municipal,
|
||||||
|
)[0]
|
||||||
|
flrstck_nnr = result['flstnrnen']
|
||||||
|
if not flrstck_nnr:
|
||||||
|
flrstck_nnr = None
|
||||||
|
flrstck_zhlr = result['flstnrzae']
|
||||||
|
if not flrstck_zhlr:
|
||||||
|
flrstck_zhlr = None
|
||||||
|
parcel_obj = Parcel.objects.get_or_create(
|
||||||
|
district=district,
|
||||||
|
municipal=municipal,
|
||||||
|
parcel_group=parcel_group,
|
||||||
|
flr=flr_val,
|
||||||
|
flrstck_nnr=flrstck_nnr,
|
||||||
|
flrstck_zhlr=flrstck_zhlr,
|
||||||
|
)[0]
|
||||||
|
parcel_obj.district = district
|
||||||
|
parcel_obj.updated_on = _now
|
||||||
|
parcel_obj.save()
|
||||||
|
underlying_parcels.append(parcel_obj)
|
||||||
|
i += 1
|
||||||
|
if i % 100 == 0:
|
||||||
|
print(f" {i}/{len_fetched_parcels}")
|
||||||
|
|
||||||
|
# Update the linked parcels
|
||||||
|
#self.parcels.clear()
|
||||||
|
self.parcels.set(underlying_parcels)
|
||||||
|
|
||||||
|
# Set the calculated_on intermediate field, so this related data will be found on lookups
|
||||||
|
#intersections_without_ts = self.parcelintersection_set.filter(
|
||||||
|
# parcel__in=self.parcels.all(),
|
||||||
|
# calculated_on__isnull=True,
|
||||||
|
#)
|
||||||
|
#for entry in intersections_without_ts:
|
||||||
|
# entry.calculated_on = _now
|
||||||
|
#ParcelIntersection.objects.bulk_update(
|
||||||
|
# intersections_without_ts,
|
||||||
|
# ["calculated_on"]
|
||||||
|
#)
|
||||||
|
|
||||||
|
def _perform_parcel_update_fast(self):
|
||||||
|
"""
|
||||||
|
Performs the main logic of parcel updating.
|
||||||
|
"""
|
||||||
|
from konova.models import Parcel, District, Municipal, ParcelGroup
|
||||||
|
|
||||||
|
parcel_fetcher = ParcelFetcher(
|
||||||
|
geometry=self
|
||||||
|
)
|
||||||
|
fetched_parcels = parcel_fetcher.get_parcels()
|
||||||
|
_now = timezone.now()
|
||||||
|
underlying_parcels = []
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
len_fetched_parcels = len(fetched_parcels)
|
||||||
|
print("Process fetched parcels:")
|
||||||
|
|
||||||
|
districts = {}
|
||||||
|
municipals = {}
|
||||||
|
parcel_groups = {}
|
||||||
|
|
||||||
|
for result in fetched_parcels:
|
||||||
|
# There could be parcels which include the word 'Flur',
|
||||||
|
# which needs to be deleted and just keep the numerical values
|
||||||
|
## THIS CAN BE REMOVED IN THE FUTURE, WHEN 'Flur' WON'T OCCUR ANYMORE!
|
||||||
|
flr_val = result["flur"].replace("Flur ", "")
|
||||||
|
|
||||||
|
# Get district (cache in dict)
|
||||||
|
try:
|
||||||
|
district = districts["kreisschl"]
|
||||||
|
except KeyError:
|
||||||
district = District.objects.get_or_create(
|
district = District.objects.get_or_create(
|
||||||
key=result["kreisschl"],
|
key=result["kreisschl"],
|
||||||
name=result["kreis"],
|
name=result["kreis"],
|
||||||
)[0]
|
)[0]
|
||||||
|
districts[district.key] = district
|
||||||
|
|
||||||
|
# Get municipal (cache in dict)
|
||||||
|
try:
|
||||||
|
municipal = municipals["gmdschl"]
|
||||||
|
except KeyError:
|
||||||
municipal = Municipal.objects.get_or_create(
|
municipal = Municipal.objects.get_or_create(
|
||||||
key=result["gmdschl"],
|
key=result["gmdschl"],
|
||||||
name=result["gemeinde"],
|
name=result["gemeinde"],
|
||||||
district=district,
|
district=district,
|
||||||
)[0]
|
)[0]
|
||||||
|
municipals[municipal.key] = municipal
|
||||||
|
|
||||||
|
# Get parcel group (cache in dict)
|
||||||
|
try:
|
||||||
|
parcel_group = parcel_groups["gemaschl"]
|
||||||
|
except KeyError:
|
||||||
parcel_group = ParcelGroup.objects.get_or_create(
|
parcel_group = ParcelGroup.objects.get_or_create(
|
||||||
key=result["gemaschl"],
|
key=result["gemaschl"],
|
||||||
name=result["gemarkung"],
|
name=result["gemarkung"],
|
||||||
municipal=municipal,
|
municipal=municipal,
|
||||||
)[0]
|
)[0]
|
||||||
flrstck_nnr = result['flstnrnen']
|
parcel_groups[parcel_group.key] = parcel_group
|
||||||
if not flrstck_nnr:
|
|
||||||
flrstck_nnr = None
|
# Preprocess parcel data
|
||||||
flrstck_zhlr = result['flstnrzae']
|
flrstck_nnr = result['flstnrnen']
|
||||||
if not flrstck_zhlr:
|
if not flrstck_nnr:
|
||||||
flrstck_zhlr = None
|
flrstck_nnr = None
|
||||||
parcel_obj = Parcel.objects.get_or_create(
|
flrstck_zhlr = result['flstnrzae']
|
||||||
district=district,
|
if not flrstck_zhlr:
|
||||||
municipal=municipal,
|
flrstck_zhlr = None
|
||||||
parcel_group=parcel_group,
|
|
||||||
flr=flr_val,
|
parcel_obj = Parcel.objects.get_or_create(
|
||||||
flrstck_nnr=flrstck_nnr,
|
district=district,
|
||||||
flrstck_zhlr=flrstck_zhlr,
|
municipal=municipal,
|
||||||
)[0]
|
parcel_group=parcel_group,
|
||||||
parcel_obj.district = district
|
flr=flr_val,
|
||||||
parcel_obj.updated_on = _now
|
flrstck_nnr=flrstck_nnr,
|
||||||
parcel_obj.save()
|
flrstck_zhlr=flrstck_zhlr,
|
||||||
|
)[0]
|
||||||
|
parcel_obj.updated_on = _now
|
||||||
|
parcel_obj.save()
|
||||||
underlying_parcels.append(parcel_obj)
|
underlying_parcels.append(parcel_obj)
|
||||||
|
i += 1
|
||||||
|
if i % 100 == 0:
|
||||||
|
print(f" {i}/{len_fetched_parcels}")
|
||||||
|
|
||||||
# Update the linked parcels
|
# Update linked parcels
|
||||||
self.parcels.clear()
|
|
||||||
self.parcels.set(underlying_parcels)
|
self.parcels.set(underlying_parcels)
|
||||||
|
|
||||||
# Set the calculated_on intermediate field, so this related data will be found on lookups
|
|
||||||
intersections_without_ts = self.parcelintersection_set.filter(
|
|
||||||
parcel__in=self.parcels.all(),
|
|
||||||
calculated_on__isnull=True,
|
|
||||||
)
|
|
||||||
for entry in intersections_without_ts:
|
|
||||||
entry.calculated_on = _now
|
|
||||||
ParcelIntersection.objects.bulk_update(
|
|
||||||
intersections_without_ts,
|
|
||||||
["calculated_on"]
|
|
||||||
)
|
|
||||||
|
|
||||||
@transaction.atomic
|
@transaction.atomic
|
||||||
def _set_parcel_update_start_time(self):
|
def _set_parcel_update_start_time(self):
|
||||||
"""
|
"""
|
||||||
@ -233,9 +327,7 @@ class Geometry(BaseResource):
|
|||||||
Returns:
|
Returns:
|
||||||
parcels (QuerySet): The related parcels as queryset
|
parcels (QuerySet): The related parcels as queryset
|
||||||
"""
|
"""
|
||||||
parcels = self.parcels.filter(
|
parcels = self.parcels.prefetch_related(
|
||||||
parcelintersection__calculated_on__isnull=False,
|
|
||||||
).prefetch_related(
|
|
||||||
"district",
|
"district",
|
||||||
"municipal",
|
"municipal",
|
||||||
).order_by(
|
).order_by(
|
||||||
@ -305,6 +397,33 @@ class Geometry(BaseResource):
|
|||||||
}
|
}
|
||||||
return geojson
|
return geojson
|
||||||
|
|
||||||
|
@property
|
||||||
|
def complexity_factor(self) -> float:
|
||||||
|
""" Calculates a factor to estimate the complexity of a Geometry
|
||||||
|
|
||||||
|
0 = very low complexity
|
||||||
|
1 = very high complexity
|
||||||
|
|
||||||
|
ASSUMPTION:
|
||||||
|
The envelope is the bounding box of a geometry. If the geometry's area is similar to the area of it's bounding
|
||||||
|
box, it is considered as rather simple, since it seems to be a closer shape like a simple box.
|
||||||
|
If the geometry has a very big bounding box, but the geometry's own area is rather small,
|
||||||
|
compared to the one of the bounding box, the complexity can be higher.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
geometry area similar to bounding box --> geometry / bounding_box ~ 1
|
||||||
|
geometry area far smaller than bb --> geometry / bounding_box ~ 0
|
||||||
|
|
||||||
|
Result is being inverted for better understanding of 'low' and 'high' complexity.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
complexity_factor (float): The estimated complexity
|
||||||
|
"""
|
||||||
|
geom_envelope = self.geom.envelope
|
||||||
|
diff = geom_envelope - self.geom
|
||||||
|
complexity_factor = 1 - self.geom.area / diff.area
|
||||||
|
return complexity_factor
|
||||||
|
|
||||||
|
|
||||||
class GeometryConflict(UuidModel):
|
class GeometryConflict(UuidModel):
|
||||||
"""
|
"""
|
||||||
|
@ -49,5 +49,5 @@ ETS_GROUP = "Conservation office"
|
|||||||
# GEOMETRY
|
# GEOMETRY
|
||||||
## Max number of allowed vertices. Geometries larger will be simplified until they reach this threshold
|
## Max number of allowed vertices. Geometries larger will be simplified until they reach this threshold
|
||||||
GEOM_MAX_VERTICES = 10000
|
GEOM_MAX_VERTICES = 10000
|
||||||
## Max seconds to wait for a parcel calculation result before a new request will be started (default: 5 minutes)
|
## Max seconds to wait for a parcel calculation result before a new request will be started (default: 30 minutes)
|
||||||
GEOM_THRESHOLD_RECALCULATION_SECONDS = 300
|
GEOM_THRESHOLD_RECALCULATION_SECONDS = 60 * 30
|
||||||
|
@ -135,6 +135,7 @@ DATABASES = {
|
|||||||
'USER': 'postgres',
|
'USER': 'postgres',
|
||||||
'HOST': '127.0.0.1',
|
'HOST': '127.0.0.1',
|
||||||
'PORT': '5432',
|
'PORT': '5432',
|
||||||
|
'CONN_MAX_AGE': 120,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
|
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
|
||||||
|
@ -10,13 +10,14 @@ def celery_update_parcels(geometry_id: str, recheck: bool = True):
|
|||||||
from konova.models import Geometry, ParcelIntersection
|
from konova.models import Geometry, ParcelIntersection
|
||||||
try:
|
try:
|
||||||
geom = Geometry.objects.get(id=geometry_id)
|
geom = Geometry.objects.get(id=geometry_id)
|
||||||
objs = geom.parcelintersection_set.all()
|
geom.parcels.clear()
|
||||||
for obj in objs:
|
#objs = geom.parcelintersection_set.all()
|
||||||
obj.calculated_on = None
|
#for obj in objs:
|
||||||
ParcelIntersection.objects.bulk_update(
|
# obj.calculated_on = None
|
||||||
objs,
|
#ParcelIntersection.objects.bulk_update(
|
||||||
["calculated_on"]
|
# objs,
|
||||||
)
|
# ["calculated_on"]
|
||||||
|
#)
|
||||||
|
|
||||||
geom.update_parcels()
|
geom.update_parcels()
|
||||||
except ObjectDoesNotExist:
|
except ObjectDoesNotExist:
|
||||||
|
@ -37,30 +37,38 @@ class GeomParcelsView(LoginRequiredMixin, View):
|
|||||||
# https://htmx.org/docs/#polling
|
# https://htmx.org/docs/#polling
|
||||||
status_code = 286
|
status_code = 286
|
||||||
template = "konova/includes/parcels/parcel_table_frame.html"
|
template = "konova/includes/parcels/parcel_table_frame.html"
|
||||||
|
|
||||||
geom = get_object_or_404(Geometry, id=id)
|
geom = get_object_or_404(Geometry, id=id)
|
||||||
parcels = geom.get_underlying_parcels()
|
|
||||||
geos_geom = geom.geom or MultiPolygon(srid=DEFAULT_SRID_RLP)
|
geos_geom = geom.geom or MultiPolygon(srid=DEFAULT_SRID_RLP)
|
||||||
|
geometry_exists = not geos_geom.empty and geos_geom.area > 0
|
||||||
|
geom_parcel_update_started = geom.parcel_update_start is not None
|
||||||
|
geom_parcel_update_finished = geom.parcel_update_end is not None
|
||||||
|
|
||||||
|
parcels = geom.get_underlying_parcels()
|
||||||
|
parcels_exist = len(parcels) > 0
|
||||||
|
|
||||||
waiting_too_long = self._check_waiting_too_long(geom)
|
waiting_too_long = self._check_waiting_too_long(geom)
|
||||||
|
|
||||||
geometry_exists = not geos_geom.empty and geos_geom.area > 0
|
|
||||||
parcels_are_currently_calculated = (
|
parcels_are_currently_calculated = (
|
||||||
geometry_exists and
|
geometry_exists and
|
||||||
geom.parcel_update_start and
|
not parcels_exist and
|
||||||
not geom.parcel_update_end
|
geom_parcel_update_started and
|
||||||
|
not geom_parcel_update_finished
|
||||||
)
|
)
|
||||||
parcels_available = len(parcels) > 0
|
|
||||||
|
if not parcels_exist and waiting_too_long:
|
||||||
|
# Trigger calculation again - process may have failed in the background
|
||||||
|
celery_update_parcels.delay(geom.id)
|
||||||
|
parcels_are_currently_calculated = True
|
||||||
|
|
||||||
if parcels_are_currently_calculated:
|
if parcels_are_currently_calculated:
|
||||||
# Parcels are being calculated right now. Change the status code, so polling stays active for fetching
|
# Parcels are being calculated right now. Change the status code, so polling stays active for fetching
|
||||||
# results after the calculation
|
# results after the calculation
|
||||||
status_code = 200
|
status_code = 200
|
||||||
|
|
||||||
if waiting_too_long:
|
if parcels_exist or not geometry_exists:
|
||||||
# Trigger calculation again
|
# Default case: Parcels are calculated or there is no geometry at all
|
||||||
celery_update_parcels.delay(geom.id)
|
# (so there will be no parcels to expect)
|
||||||
|
|
||||||
if parcels_available or not geometry_exists:
|
|
||||||
municipals = geom.get_underlying_municipals(parcels)
|
municipals = geom.get_underlying_municipals(parcels)
|
||||||
|
|
||||||
rpp = 100
|
rpp = 100
|
||||||
@ -88,13 +96,16 @@ class GeomParcelsView(LoginRequiredMixin, View):
|
|||||||
Depending on the geometry's modified attribute
|
Depending on the geometry's modified attribute
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
# Scale time to wait longer with increasing geometry complexity
|
||||||
|
complexity_factor = geom.complexity_factor + 1
|
||||||
|
wait_for_seconds = int(GEOM_THRESHOLD_RECALCULATION_SECONDS * complexity_factor)
|
||||||
try:
|
try:
|
||||||
pcs_diff = (timezone.now() - geom.parcel_update_start).seconds
|
pcs_diff = (timezone.now() - geom.parcel_update_start).seconds
|
||||||
except TypeError:
|
except TypeError:
|
||||||
pcs_diff = GEOM_THRESHOLD_RECALCULATION_SECONDS
|
pcs_diff = wait_for_seconds
|
||||||
|
|
||||||
calculation_not_finished = geom.parcel_update_end is None
|
calculation_not_finished = geom.parcel_update_end is None
|
||||||
waiting_too_long = (pcs_diff >= GEOM_THRESHOLD_RECALCULATION_SECONDS) and calculation_not_finished
|
waiting_too_long = (pcs_diff >= wait_for_seconds) and calculation_not_finished
|
||||||
return waiting_too_long
|
return waiting_too_long
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user