Skip to content

Commit e61d7a6

Browse files
authored
Search: optimize queryset when searching for subprojects (#12588)
With 3 subprojects: from 20 queries (with 4 extra queries per additional subproject) to 11 (with 1 query per extra subproject). In .com we have a customer with 80 subprojects, that's generating +320 queries per search :_ Main optimizations: - get_canonical_custom_domain was changed to be a cached property, so it can be re-used for each superproject (shared instance) on each subproject. - Instead of using `Project.objects.filter(superprojects__parent...)`, we just use `project.subprojects`, so it's easier to cache the relationship into `_superprojects`, which is then used by Project.parent_relationship.
1 parent fe2f472 commit e61d7a6

File tree

9 files changed

+186
-16
lines changed

9 files changed

+186
-16
lines changed

readthedocs/core/resolver.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ def _get_project_domain(self, project, external_version_slug=None, use_canonical
199199
if external_version_slug:
200200
domain = self._get_external_subdomain(canonical_project, external_version_slug)
201201
elif use_canonical_domain and self._use_cname(canonical_project):
202-
domain_object = canonical_project.get_canonical_custom_domain()
202+
domain_object = canonical_project.canonical_custom_domain
203203
if domain_object:
204204
use_https = domain_object.https
205205
domain = domain_object.domain

readthedocs/projects/models.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1367,7 +1367,8 @@ def parent_relationship(self):
13671367

13681368
return self.superprojects.select_related("parent").first()
13691369

1370-
def get_canonical_custom_domain(self):
1370+
@cached_property
1371+
def canonical_custom_domain(self):
13711372
"""Get the canonical custom domain or None."""
13721373
if hasattr(self, "_canonical_domains"):
13731374
# Cached custom domains

readthedocs/proxito/redirects.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def canonical_redirect(request, project, redirect_type, external_version_slug=No
5555
to = parsed_from._replace(scheme="https").geturl()
5656
elif redirect_type == RedirectType.to_canonical_domain:
5757
# We need to change the domain and protocol.
58-
canonical_domain = project.get_canonical_custom_domain()
58+
canonical_domain = project.canonical_custom_domain
5959
protocol = "https" if canonical_domain.https else "http"
6060
to = parsed_from._replace(scheme=protocol, netloc=canonical_domain.domain).geturl()
6161
elif redirect_type == RedirectType.subproject_to_main_domain:

readthedocs/proxito/tests/test_hosting.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,7 @@ def test_number_of_queries_versions_with_downloads(self):
408408
active=True,
409409
)
410410

411-
with self.assertNumQueries(13):
411+
with self.assertNumQueries(12):
412412
r = self.client.get(
413413
reverse("proxito_readthedocs_docs_addons"),
414414
{

readthedocs/rtd_tests/tests/test_resolver.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,8 @@ def test_resolver_public_domain_overrides(self):
539539
canonical=True,
540540
https=False,
541541
)
542+
# Purge the cached domain.
543+
del self.pip.canonical_custom_domain
542544
url = Resolver().resolve(project=self.pip)
543545
self.assertEqual(url, "http://docs.foobar.com/en/latest/")
544546
url = Resolver().resolve(project=self.pip)

readthedocs/search/api/v2/serializers.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@
1212

1313
from rest_framework import serializers
1414

15+
from readthedocs.builds.models import Version
16+
from readthedocs.core.resolver import Resolver
1517
from readthedocs.projects.constants import GENERIC
1618
from readthedocs.projects.constants import MKDOCS
1719
from readthedocs.projects.constants import SPHINX_HTMLDIR
18-
from readthedocs.projects.models import Project
1920

2021

2122
# Structures used for storing cached data of a version mostly.
@@ -98,17 +99,22 @@ def __init__(self, *args, projects=None, **kwargs):
9899
if projects:
99100
context = kwargs.setdefault("context", {})
100101
context["projects_data"] = {
101-
project.slug: self._build_project_data(project, version.slug)
102+
project.slug: self._build_project_data(project, version=version)
102103
for project, version in projects
103104
}
104105
super().__init__(*args, **kwargs)
105106

106-
def _build_project_data(self, project, version_slug):
107+
def _build_project_data(self, project, version):
107108
"""Build a `ProjectData` object given a project and its version."""
108-
url = project.get_docs_url(version_slug=version_slug)
109-
project_alias = project.superprojects.values_list("alias", flat=True).first()
109+
# NOTE: re-using the resolver doesn't help here,
110+
# as this method is called just once per project,
111+
# re-using the resolver is useful when resolving the same project multiple times.
112+
url = Resolver().resolve_version(project, version)
113+
project_alias = None
114+
if project.parent_relationship:
115+
project_alias = project.parent_relationship.alias
110116
version_data = VersionData(
111-
slug=version_slug,
117+
slug=version.slug,
112118
docs_url=url,
113119
)
114120
return ProjectData(
@@ -129,10 +135,15 @@ def _get_project_data(self, obj):
129135
if project_data:
130136
return project_data
131137

132-
project = Project.objects.filter(slug=obj.project).first()
133-
if project:
138+
version = (
139+
Version.objects.filter(project__slug=obj.project, slug=obj.version)
140+
.select_related("project")
141+
.first()
142+
)
143+
if version:
144+
project = version.project
134145
projects_data = self.context.setdefault("projects_data", {})
135-
projects_data[obj.project] = self._build_project_data(project, obj.version)
146+
projects_data[obj.project] = self._build_project_data(project, version=version)
136147
return projects_data[obj.project]
137148
return None
138149

readthedocs/search/api/v3/executor.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,14 @@ def _get_subprojects(self, project, version_slug=None):
128128
the default version will be used.
129129
If `version_slug` is None, we will always use the default version.
130130
"""
131-
subprojects = Project.objects.filter(superprojects__parent=project)
132-
for subproject in subprojects:
131+
relationships = project.subprojects.select_related("child")
132+
for relationship in relationships:
133+
subproject = relationship.child
134+
# NOTE: Since we already have the superproject relationship,
135+
# we can set it here to avoid an extra query later
136+
# when using Project.parent_relationship property.
137+
# The superproject instannce is also shared among all subprojects.
138+
subproject._superprojects = [relationship]
133139
version = None
134140
if version_slug:
135141
version = self._get_project_version(

readthedocs/search/api/v3/tests/test_api.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,156 @@ class ProxiedSearchAPITest(SearchAPITest):
362362
def get(self, *args, **kwargs):
363363
return self.client.get(*args, HTTP_HOST=self.host, **kwargs)
364364

365+
def test_search_project_number_of_queries(self):
366+
# Default version
367+
with self.assertNumQueries(11):
368+
resp = self.get(self.url, data={"q": "project:project test"})
369+
assert resp.status_code == 200
370+
assert resp.data["results"]
371+
372+
with self.assertNumQueries(17):
373+
resp = self.get(
374+
self.url, data={"q": "project:project project:another-project test"}
375+
)
376+
assert resp.status_code == 200
377+
assert resp.data["results"]
378+
379+
# With explicit version
380+
with self.assertNumQueries(10):
381+
resp = self.get(self.url, data={"q": "project:project/latest test"})
382+
assert resp.status_code == 200
383+
assert resp.data["results"]
384+
385+
with self.assertNumQueries(16):
386+
resp = self.get(
387+
self.url, data={"q": "project:project/latest project:another-project/latest test"}
388+
)
389+
assert resp.status_code == 200
390+
assert resp.data["results"]
391+
392+
@mock.patch("readthedocs.search.api.v3.views.tasks.record_search_query.delay", new=mock.MagicMock())
393+
def test_search_project_number_of_queries_without_search_recording(self):
394+
# Default version
395+
with self.assertNumQueries(8):
396+
resp = self.get(self.url, data={"q": "project:project test"})
397+
assert resp.status_code == 200
398+
assert resp.data["results"]
399+
400+
with self.assertNumQueries(12):
401+
resp = self.get(
402+
self.url, data={"q": "project:project project:another-project test"}
403+
)
404+
assert resp.status_code == 200
405+
assert resp.data["results"]
406+
407+
# With explicit version
408+
with self.assertNumQueries(8):
409+
resp = self.get(self.url, data={"q": "project:project/latest test"})
410+
assert resp.status_code == 200
411+
assert resp.data["results"]
412+
413+
with self.assertNumQueries(12):
414+
resp = self.get(
415+
self.url, data={"q": "project:project/latest project:another-project/latest test"}
416+
)
417+
assert resp.status_code == 200
418+
assert resp.data["results"]
419+
420+
def test_search_subprojects_number_of_queries(self):
421+
subproject = get(
422+
Project,
423+
slug="subproject",
424+
users=[self.user],
425+
privacy_level=PUBLIC,
426+
)
427+
subproject.versions.update(built=True, active=True, privacy_level=PUBLIC)
428+
self.create_index(subproject.versions.first())
429+
self.project.add_subproject(subproject)
430+
431+
# Search on default version.
432+
with self.assertNumQueries(16):
433+
resp = self.get(self.url, data={"q": "subprojects:project test"})
434+
assert resp.status_code == 200
435+
assert resp.data["results"]
436+
437+
# Search on explicit version.
438+
with self.assertNumQueries(14):
439+
resp = self.get(self.url, data={"q": "subprojects:project/latest test"})
440+
assert resp.status_code == 200
441+
assert resp.data["results"]
442+
443+
# Add subprojects.
444+
for i in range(3):
445+
subproject = get(
446+
Project,
447+
slug=f"subproject-{i}",
448+
users=[self.user],
449+
privacy_level=PUBLIC,
450+
)
451+
subproject.versions.update(built=True, active=True, privacy_level=PUBLIC)
452+
self.create_index(subproject.versions.first())
453+
self.project.add_subproject(subproject)
454+
455+
# Search on default version.
456+
with self.assertNumQueries(26):
457+
resp = self.get(self.url, data={"q": "subprojects:project test"})
458+
assert resp.status_code == 200
459+
assert resp.data["results"]
460+
461+
# Search on explicit version.
462+
with self.assertNumQueries(23):
463+
resp = self.get(self.url, data={"q": "subprojects:project/latest test"})
464+
assert resp.status_code == 200
465+
assert resp.data["results"]
466+
467+
@mock.patch("readthedocs.search.api.v3.views.tasks.record_search_query.delay", new=mock.MagicMock())
468+
def test_search_subprojects_number_of_queries_without_search_recording(self):
469+
subproject = get(
470+
Project,
471+
slug="subproject",
472+
users=[self.user],
473+
privacy_level=PUBLIC,
474+
)
475+
subproject.versions.update(built=True, active=True, privacy_level=PUBLIC)
476+
self.create_index(subproject.versions.first())
477+
self.project.add_subproject(subproject)
478+
479+
# Search on default version.
480+
with self.assertNumQueries(10):
481+
resp = self.get(self.url, data={"q": "subprojects:project test"})
482+
assert resp.status_code == 200
483+
assert resp.data["results"]
484+
485+
# Search on explicit version.
486+
with self.assertNumQueries(10):
487+
resp = self.get(self.url, data={"q": "subprojects:project/latest test"})
488+
assert resp.status_code == 200
489+
assert resp.data["results"]
490+
491+
# Add subprojects.
492+
for i in range(3):
493+
subproject = get(
494+
Project,
495+
slug=f"subproject-{i}",
496+
users=[self.user],
497+
privacy_level=PUBLIC,
498+
)
499+
subproject.versions.update(built=True, active=True, privacy_level=PUBLIC)
500+
self.create_index(subproject.versions.first())
501+
self.project.add_subproject(subproject)
502+
503+
# Search on default version.
504+
with self.assertNumQueries(13):
505+
resp = self.get(self.url, data={"q": "subprojects:project test"})
506+
assert resp.status_code == 200
507+
assert resp.data["results"]
508+
509+
# Search on explicit version.
510+
with self.assertNumQueries(13):
511+
resp = self.get(self.url, data={"q": "subprojects:project/latest test"})
512+
assert resp.status_code == 200
513+
assert resp.data["results"]
514+
365515

366516
@override_settings(ALLOW_PRIVATE_REPOS=True)
367517
@override_settings(RTD_ALLOW_ORGANIZATIONS=True)

readthedocs/search/tasks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def record_search_query(project_slug, version_slug, query, total_results, time_s
200200

201201
version = (
202202
Version.objects.filter(slug=version_slug, project__slug=project_slug)
203-
.prefetch_related("project")
203+
.select_related("project")
204204
.first()
205205
)
206206
if not version:

0 commit comments

Comments
 (0)