From 84451d009da4373e9cc37197e6d94d6015ca8363 Mon Sep 17 00:00:00 2001 From: Tim Schilling Date: Mon, 1 Sep 2025 14:10:16 -0500 Subject: [PATCH 1/3] Added support for searching ecosystem and blog entries. The blog results should have a property of whether it is included in the search results. We should also limit the blogs that are searchable for a version of Django based on the support end. This will allow us to limit the inclusion of blog posts in the search based on the time the entry was created, keeping the search results relevant to that version of Django. * Added is_searchable and made published accept datetime cut-off parameter. * Used Entry.get_absolute_url to encapsulate www host. * Extracted get_search_config helper function. Co-authored-by: Baptiste Mispelon --- blog/admin.py | 11 +- blog/migrations/0006_entry_is_searchable.py | 21 ++++ blog/models.py | 17 ++- blog/tests.py | 25 +++++ docs/models.py | 88 ++++++++++++++- docs/search.py | 29 +++++ docs/templates/docs/search_results.html | 6 +- docs/tests/test_models.py | 116 ++++++++++++++++++-- docs/tests/test_views.py | 4 +- 9 files changed, 296 insertions(+), 21 deletions(-) create mode 100644 blog/migrations/0006_entry_is_searchable.py diff --git a/blog/admin.py b/blog/admin.py index 22e1093d16..4af8550427 100644 --- a/blog/admin.py +++ b/blog/admin.py @@ -11,8 +11,15 @@ @admin.register(Entry) class EntryAdmin(admin.ModelAdmin): - list_display = ("headline", "pub_date", "is_active", "is_published", "author") - list_filter = ("is_active",) + list_display = ( + "headline", + "pub_date", + "is_active", + "is_published", + "is_searchable", + "author", + ) + list_filter = ("is_active", "is_searchable") exclude = ("summary_html", "body_html") prepopulated_fields = {"slug": ("headline",)} raw_id_fields = ["social_media_card"] diff --git a/blog/migrations/0006_entry_is_searchable.py b/blog/migrations/0006_entry_is_searchable.py new file mode 100644 index 0000000000..72f9bab84d --- /dev/null +++ b/blog/migrations/0006_entry_is_searchable.py @@ -0,0 +1,21 @@ +# Generated by Django 5.2 on 2025-09-03 20:02 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("blog", "0005_entry_social_media_card"), + ] + + operations = [ + migrations.AddField( + model_name="entry", + name="is_searchable", + field=models.BooleanField( + default=False, + help_text="Tick to make this entry appear in the Django documentation search.", + ), + ), + ] diff --git a/blog/models.py b/blog/models.py index 988c607eb4..1077aeb55e 100644 --- a/blog/models.py +++ b/blog/models.py @@ -31,12 +31,17 @@ def _md_slugify(value, separator): class EntryQuerySet(models.QuerySet): - def published(self): - return self.active().filter(pub_date__lte=timezone.now()) + def published(self, as_of=None): + if as_of is None: + as_of = timezone.now() + return self.active().filter(pub_date__lte=as_of) def active(self): return self.filter(is_active=True) + def searchable(self): + return self.filter(is_searchable=True) + class ContentFormat(models.TextChoices): REST = "reST", "reStructuredText" @@ -126,6 +131,12 @@ class Entry(models.Model): ), default=False, ) + is_searchable = models.BooleanField( + default=False, + help_text=_( + "Tick to make this entry appear in the Django documentation search." + ), + ) pub_date = models.DateTimeField( verbose_name=_("Publication date"), help_text=_( @@ -168,7 +179,7 @@ def get_absolute_url(self): "day": self.pub_date.strftime("%d").lower(), "slug": self.slug, } - return reverse("weblog:entry", kwargs=kwargs) + return reverse("weblog:entry", kwargs=kwargs, host="www") def is_published(self): """ diff --git a/blog/tests.py b/blog/tests.py index b5c1b1802b..6a4dea58a5 100644 --- a/blog/tests.py +++ b/blog/tests.py @@ -66,6 +66,31 @@ def test_manager_published(self): ["past active"], transform=lambda entry: entry.headline, ) + self.assertQuerySetEqual( + Entry.objects.published(self.tomorrow), + ["future active", "past active"], + transform=lambda entry: entry.headline, + ) + + def test_manager_searchable(self): + """ + Make sure that the Entry manager's `searchable` method works + """ + Entry.objects.create( + pub_date=self.yesterday, + is_searchable=False, + headline="not searchable", + slug="a", + ) + Entry.objects.create( + pub_date=self.yesterday, is_searchable=True, headline="searchable", slug="b" + ) + + self.assertQuerySetEqual( + Entry.objects.searchable(), + ["searchable"], + transform=lambda entry: entry.headline, + ) def test_docutils_safe(self): """ diff --git a/docs/models.py b/docs/models.py index 17c2cca01a..ea1874b067 100644 --- a/docs/models.py +++ b/docs/models.py @@ -26,18 +26,26 @@ from django.utils.html import strip_tags from django_hosts.resolvers import reverse +from blog.models import Entry from releases.models import Release from . import utils from .search import ( DEFAULT_TEXT_SEARCH_CONFIG, + SEARCHABLE_VIEWS, START_SEL, STOP_SEL, TSEARCH_CONFIG_LANGUAGES, get_document_search_vector, + DocumentationCategory, ) +def get_search_config(lang): + """Determine the PostgreSQL search language""" + return TSEARCH_CONFIG_LANGUAGES.get(lang[:2], DEFAULT_TEXT_SEARCH_CONFIG) + + class DocumentReleaseQuerySet(models.QuerySet): def current(self, lang="en"): current = self.get(is_default=True) @@ -206,9 +214,7 @@ def sync_to_db(self, decoded_documents): path=document_path, title=html.unescape(strip_tags(document["title"])), metadata=document, - config=TSEARCH_CONFIG_LANGUAGES.get( - self.lang[:2], DEFAULT_TEXT_SEARCH_CONFIG - ), + config=get_search_config(self.lang), ) for document in self.documents.all(): document.metadata["breadcrumbs"] = list( @@ -216,6 +222,78 @@ def sync_to_db(self, decoded_documents): ) document.save(update_fields=("metadata",)) + self._sync_blog_to_db() + self._sync_views_to_db() + + def _sync_blog_to_db(self): + """ + Sync the blog entries into search based on the release documents + support end date. + """ + if self.lang != "en" or not self.release.eol_date: + # The blog is only written in English, and we need to know + # the release's support end to know when to stop considering + # blog posts relevant. + return + entries = Entry.objects.published(self.release.eol_date).searchable() + Document.objects.bulk_create( + [ + Document( + release=self, + path=entry.get_absolute_url(), + title=entry.headline, + metadata={ + "body": entry.body_html, + "breadcrumbs": [ + { + "path": DocumentationCategory.WEBSITE, + "title": "News", + }, + ], + "parents": DocumentationCategory.WEBSITE, + "slug": entry.slug, + "title": entry.headline, + "toc": "", + }, + config=get_search_config(self.lang), + ) + for entry in entries + ] + ) + + def _sync_views_to_db(self): + """ + Sync the specific views into search based on the release documents + support end date. + """ + if self.lang != "en": + return # The searchable views are only written in English currently + + Document.objects.bulk_create( + [ + Document( + release=self, + path=searchable_view.www_absolute_url, + title=searchable_view.page_title, + metadata={ + "body": searchable_view.html, + "breadcrumbs": [ + { + "path": DocumentationCategory.WEBSITE, + "title": "Website", + }, + ], + "parents": DocumentationCategory.WEBSITE, + "slug": searchable_view.url_name, + "title": searchable_view.page_title, + "toc": "", + }, + config=get_search_config(self.lang), + ) + for searchable_view in SEARCHABLE_VIEWS + ] + ) + def _clean_document_path(path): # We have to be a bit careful to reverse-engineer the correct @@ -228,7 +306,9 @@ def _clean_document_path(path): def document_url(doc): - if doc.path: + if doc.metadata.get("parents") == DocumentationCategory.WEBSITE: + return doc.path + elif doc.path: kwargs = { "lang": doc.release.lang, "version": doc.release.version, diff --git a/docs/search.py b/docs/search.py index 3a99978d58..0b7eaef1d3 100644 --- a/docs/search.py +++ b/docs/search.py @@ -1,7 +1,11 @@ +from dataclasses import dataclass + from django.contrib.postgres.search import SearchVector from django.db.models import TextChoices from django.db.models.fields.json import KeyTextTransform +from django.template.loader import get_template from django.utils.translation import gettext_lazy as _ +from django_hosts import reverse # Imported from # https://github.com/postgres/postgres/blob/REL_14_STABLE/src/bin/initdb/initdb.c#L659 @@ -67,6 +71,7 @@ class DocumentationCategory(TextChoices): TOPICS = "topics", _("Using Django") HOWTO = "howto", _("How-to guides") RELEASE_NOTES = "releases", _("Release notes") + WEBSITE = "website", _("Django Website") @classmethod def parse(cls, value, default=None): @@ -74,3 +79,27 @@ def parse(cls, value, default=None): return cls(value) except ValueError: return None + + +@dataclass +class SearchableView: + page_title: str + url_name: str + template: str + + @property + def html(self): + return get_template(self.template).render() + + @property + def www_absolute_url(self): + return reverse(self.url_name, host="www") + + +SEARCHABLE_VIEWS = [ + SearchableView( + page_title="Django's Ecosystem", + url_name="community-ecosystem", + template="aggregator/ecosystem.html", + ), +] diff --git a/docs/templates/docs/search_results.html b/docs/templates/docs/search_results.html index 4f400a8384..e0c0383aa4 100644 --- a/docs/templates/docs/search_results.html +++ b/docs/templates/docs/search_results.html @@ -43,11 +43,11 @@

{% translate "No search query given" %}

{% for result in page.object_list %}

- {{ result.headline|safe }} + {{ result.headline|safe }}

{% for breadcrumb in result.breadcrumbs %} - {{ breadcrumb.title }}{% if not forloop.last %} »{% endif %} + {{ breadcrumb.title }}{% if not forloop.last %} »{% endif %} {% endfor %}
@@ -60,7 +60,7 @@