Skip to content

Commit d8cf0b8

Browse files
bagababaogluberke.agababaogluddabble
authored
Improve performance of the latest_of_each method of the manager (#1360)
* Improve performance of the latest_of_each method of the manager * Polished docs and code * Improved as_of() test case * Moved assertRecordValues() to new HistoricalTestCase This makes the method available for use in other test cases. * Added LatestOfEachTestCase --------- Co-authored-by: berke.agababaoglu <berke.agababaoglu@scale.eu> Co-authored-by: Anders <6058745+ddabble@users.noreply.github.com>
1 parent 2571899 commit d8cf0b8

File tree

6 files changed

+164
-85
lines changed

6 files changed

+164
-85
lines changed

AUTHORS.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Authors
1919
- Anton Kulikov (`bigtimecriminal <https://github.com/bigtimecriminal>`_)
2020
- Ben Lawson (`blawson <https://github.com/blawson>`_)
2121
- Benjamin Mampaey (`bmampaey <https://github.com/bmampaey>`_)
22+
- Berke Agababaoglu (`bagababaoglu <https://github.com/bagababaoglu>`_)
2223
- Bheesham Persaud (`bheesham <https://github.com/bheesham>`_)
2324
- `bradford281 <https://github.com/bradford281>`_
2425
- Brian Armstrong (`barm <https://github.com/barm>`_)

CHANGES.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Unreleased
66

77
- Made ``skip_history_when_saving`` work when creating an object - not just when
88
updating an object (gh-1262)
9+
- Improved performance of the ``latest_of_each()`` history manager method (gh-1360)
910

1011
3.7.0 (2024-05-29)
1112
------------------

simple_history/manager.py

Lines changed: 23 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from django.conf import settings
2-
from django.db import connection, models
3-
from django.db.models import OuterRef, QuerySet, Subquery
2+
from django.db import models
3+
from django.db.models import Exists, OuterRef, Q, QuerySet
44
from django.utils import timezone
55

66
from simple_history.utils import (
@@ -29,13 +29,11 @@ def __init__(self, *args, **kwargs):
2929
self._as_of = None
3030
self._pk_attr = self.model.instance_type._meta.pk.attname
3131

32-
def as_instances(self):
32+
def as_instances(self) -> "HistoricalQuerySet":
3333
"""
3434
Return a queryset that generates instances instead of historical records.
3535
Queries against the resulting queryset will translate `pk` into the
3636
primary key field of the original type.
37-
38-
Returns a queryset.
3937
"""
4038
if not self._as_instances:
4139
result = self.exclude(history_type="-")
@@ -44,7 +42,7 @@ def as_instances(self):
4442
result = self._clone()
4543
return result
4644

47-
def filter(self, *args, **kwargs):
45+
def filter(self, *args, **kwargs) -> "HistoricalQuerySet":
4846
"""
4947
If a `pk` filter arrives and the queryset is returning instances
5048
then the caller actually wants to filter based on the original
@@ -55,43 +53,26 @@ def filter(self, *args, **kwargs):
5553
kwargs[self._pk_attr] = kwargs.pop("pk")
5654
return super().filter(*args, **kwargs)
5755

58-
def latest_of_each(self):
56+
def latest_of_each(self) -> "HistoricalQuerySet":
5957
"""
6058
Ensures results in the queryset are the latest historical record for each
61-
primary key. Deletions are not removed.
62-
63-
Returns a queryset.
59+
primary key. This includes deletion records.
6460
"""
65-
# If using MySQL, need to get a list of IDs in memory and then use them for the
66-
# second query.
67-
# Does mean two loops through the DB to get the full set, but still a speed
68-
# improvement.
69-
backend = connection.vendor
70-
if backend == "mysql":
71-
history_ids = {}
72-
for item in self.order_by("-history_date", "-pk"):
73-
if getattr(item, self._pk_attr) not in history_ids:
74-
history_ids[getattr(item, self._pk_attr)] = item.pk
75-
latest_historics = self.filter(history_id__in=history_ids.values())
76-
elif backend == "postgresql":
77-
latest_pk_attr_historic_ids = (
78-
self.order_by(self._pk_attr, "-history_date", "-pk")
79-
.distinct(self._pk_attr)
80-
.values_list("pk", flat=True)
81-
)
82-
latest_historics = self.filter(history_id__in=latest_pk_attr_historic_ids)
83-
else:
84-
latest_pk_attr_historic_ids = (
85-
self.filter(**{self._pk_attr: OuterRef(self._pk_attr)})
86-
.order_by("-history_date", "-pk")
87-
.values("pk")[:1]
88-
)
89-
latest_historics = self.filter(
90-
history_id__in=Subquery(latest_pk_attr_historic_ids)
91-
)
92-
return latest_historics
61+
# Subquery for finding the records that belong to the same history-tracked
62+
# object as the record from the outer query (identified by `_pk_attr`),
63+
# and that have a later `history_date` than the outer record.
64+
# The very latest record of a history-tracked object should be excluded from
65+
# this query - which will make it included in the `~Exists` query below.
66+
later_records = self.filter(
67+
Q(**{self._pk_attr: OuterRef(self._pk_attr)}),
68+
Q(history_date__gt=OuterRef("history_date")),
69+
)
70+
71+
# Filter the records to only include those for which the `later_records`
72+
# subquery does not return any results.
73+
return self.filter(~Exists(later_records))
9374

94-
def _select_related_history_tracked_objs(self):
75+
def _select_related_history_tracked_objs(self) -> "HistoricalQuerySet":
9576
"""
9677
A convenience method that calls ``select_related()`` with all the names of
9778
the model's history-tracked ``ForeignKey`` fields.
@@ -103,18 +84,18 @@ def _select_related_history_tracked_objs(self):
10384
]
10485
return self.select_related(*field_names)
10586

106-
def _clone(self):
87+
def _clone(self) -> "HistoricalQuerySet":
10788
c = super()._clone()
10889
c._as_instances = self._as_instances
10990
c._as_of = self._as_of
11091
c._pk_attr = self._pk_attr
11192
return c
11293

113-
def _fetch_all(self):
94+
def _fetch_all(self) -> None:
11495
super()._fetch_all()
11596
self._instanceize()
11697

117-
def _instanceize(self):
98+
def _instanceize(self) -> None:
11899
"""
119100
Convert the result cache to instances if possible and it has not already been
120101
done. If a query extracts `.values(...)` then the result cache will not contain

simple_history/tests/tests/test_manager.py

Lines changed: 96 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,66 @@
88
from simple_history.manager import SIMPLE_HISTORY_REVERSE_ATTR_NAME
99

1010
from ..models import Choice, Document, Poll, RankedDocument
11+
from .utils import HistoricalTestCase
1112

1213
User = get_user_model()
1314

1415

15-
class AsOfTest(TestCase):
16+
class LatestOfEachTestCase(HistoricalTestCase):
17+
def test_filtered_instances_are_as_expected(self):
18+
document1 = RankedDocument.objects.create(rank=10)
19+
document2 = RankedDocument.objects.create(rank=20)
20+
document2.rank = 21
21+
document2.save()
22+
document3 = RankedDocument.objects.create(rank=30)
23+
document3.rank = 31
24+
document3.save()
25+
document3.delete()
26+
document4 = RankedDocument.objects.create(rank=40)
27+
document4_pk = document4.pk
28+
document4.delete()
29+
reincarnated_document4 = RankedDocument.objects.create(pk=document4_pk, rank=42)
30+
31+
record4, record3, record2, record1 = RankedDocument.history.latest_of_each()
32+
self.assertRecordValues(
33+
record1,
34+
RankedDocument,
35+
{
36+
"rank": 10,
37+
"id": document1.id,
38+
"history_type": "+",
39+
},
40+
)
41+
self.assertRecordValues(
42+
record2,
43+
RankedDocument,
44+
{
45+
"rank": 21,
46+
"id": document2.id,
47+
"history_type": "~",
48+
},
49+
)
50+
self.assertRecordValues(
51+
record3,
52+
RankedDocument,
53+
{
54+
"rank": 31,
55+
"id": document3.id,
56+
"history_type": "-",
57+
},
58+
)
59+
self.assertRecordValues(
60+
record4,
61+
RankedDocument,
62+
{
63+
"rank": 42,
64+
"id": reincarnated_document4.id,
65+
"history_type": "+",
66+
},
67+
)
68+
69+
70+
class AsOfTestCase(TestCase):
1671
model = Document
1772

1873
def setUp(self):
@@ -69,7 +124,7 @@ def test_modified(self):
69124
self.assertEqual(as_of_list[0].changed_by, self.obj.changed_by)
70125

71126

72-
class AsOfAdditionalTestCase(TestCase):
127+
class AsOfTestCaseWithoutSetUp(TestCase):
73128
def test_create_and_delete(self):
74129
document = Document.objects.create()
75130
now = datetime.now()
@@ -150,42 +205,57 @@ def test_historical_query_set(self):
150205
"""
151206
Demonstrates how the HistoricalQuerySet works to provide as_of functionality.
152207
"""
153-
document1 = RankedDocument.objects.create(rank=42)
154-
document2 = RankedDocument.objects.create(rank=84)
155-
document2.rank = 51
208+
document1 = RankedDocument.objects.create(rank=10)
209+
document2 = RankedDocument.objects.create(rank=20)
210+
document2.rank = 21
156211
document2.save()
157212
document1.delete()
213+
t1 = datetime.now()
214+
document3 = RankedDocument.objects.create(rank=30) # noqa: F841
215+
document2.rank = 22
216+
document2.save()
158217
t2 = datetime.now()
159218

160-
# look for historical records, get back a queryset
161-
with self.assertNumQueries(1):
162-
queryset = RankedDocument.history.filter(history_date__lte=t2)
163-
self.assertEqual(queryset.count(), 4)
219+
# 4 records before `t1` (for document 1 and 2), 2 after (for document 2 and 3)
220+
queryset = RankedDocument.history.filter(history_date__lte=t1)
221+
self.assertEqual(queryset.count(), 4)
222+
self.assertEqual(RankedDocument.history.filter(history_date__gt=t1).count(), 2)
164223

165-
# only want the most recend records (provided by HistoricalQuerySet)
166-
self.assertEqual(queryset.latest_of_each().count(), 2)
224+
# `latest_of_each()` returns the most recent record of each document
225+
with self.assertNumQueries(1):
226+
self.assertEqual(queryset.latest_of_each().count(), 2)
167227

168-
# want to see the instances as of that time?
169-
self.assertEqual(queryset.latest_of_each().as_instances().count(), 1)
228+
# `as_instances()` returns the historical instances as of each record's time,
229+
# but excludes deletion records (i.e. document 1's most recent record)
230+
with self.assertNumQueries(1):
231+
self.assertEqual(queryset.latest_of_each().as_instances().count(), 1)
170232

171-
# these new methods are idempotent
172-
self.assertEqual(
173-
queryset.latest_of_each()
174-
.latest_of_each()
175-
.as_instances()
176-
.as_instances()
177-
.count(),
178-
1,
179-
)
233+
# (Duplicate calls to these methods should not change the number of queries,
234+
# since they're idempotent)
235+
with self.assertNumQueries(1):
236+
self.assertEqual(
237+
queryset.latest_of_each()
238+
.latest_of_each()
239+
.as_instances()
240+
.as_instances()
241+
.count(),
242+
1,
243+
)
180244

181-
# that was all the same as calling as_of!
182-
self.assertEqual(
245+
self.assertSetEqual(
246+
# In conclusion, all of these methods combined...
183247
set(
184-
RankedDocument.history.filter(history_date__lte=t2)
248+
RankedDocument.history.filter(history_date__lte=t1)
185249
.latest_of_each()
186250
.as_instances()
187251
),
188-
set(RankedDocument.history.as_of(t2)),
252+
# ...are equivalent to calling `as_of()`!
253+
set(RankedDocument.history.as_of(t1)),
254+
)
255+
256+
self.assertEqual(RankedDocument.history.as_of(t1).get().rank, 21)
257+
self.assertListEqual(
258+
[d.rank for d in RankedDocument.history.as_of(t2)], [22, 30]
189259
)
190260

191261

simple_history/tests/tests/test_models.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,6 @@
3030
pre_create_historical_m2m_records,
3131
pre_create_historical_record,
3232
)
33-
from simple_history.tests.tests.utils import (
34-
database_router_override_settings,
35-
database_router_override_settings_history_in_diff_db,
36-
middleware_override_settings,
37-
)
3833
from simple_history.utils import get_history_model_for_model, update_change_reason
3934

4035
from ..external.models import (
@@ -126,6 +121,12 @@
126121
UUIDModel,
127122
WaterLevel,
128123
)
124+
from .utils import (
125+
HistoricalTestCase,
126+
database_router_override_settings,
127+
database_router_override_settings_history_in_diff_db,
128+
middleware_override_settings,
129+
)
129130

130131
get_model = apps.get_model
131132
User = get_user_model()
@@ -140,18 +141,10 @@ def get_fake_file(filename):
140141
return fake_file
141142

142143

143-
class HistoricalRecordsTest(TestCase):
144+
class HistoricalRecordsTest(HistoricalTestCase):
144145
def assertDatetimesEqual(self, time1, time2):
145146
self.assertAlmostEqual(time1, time2, delta=timedelta(seconds=2))
146147

147-
def assertRecordValues(self, record, klass, values_dict):
148-
for key, value in values_dict.items():
149-
self.assertEqual(getattr(record, key), value)
150-
self.assertEqual(record.history_object.__class__, klass)
151-
for key, value in values_dict.items():
152-
if key not in ["history_type", "history_change_reason"]:
153-
self.assertEqual(getattr(record.history_object, key), value)
154-
155148
def test_create(self):
156149
p = Poll(question="what's up?", pub_date=today)
157150
p.save()

0 commit comments

Comments
 (0)