Skip to content

Commit 923bcb7

Browse files
authored
Ensure re-indexing and re-embedding operations include all non-course learning resource types (#2695)
* adding all non-course indexes to command: * fixing vector search indexes * revised tests to ensure we catch all resource types
1 parent 8563fe5 commit 923bcb7

File tree

4 files changed

+14
-50
lines changed

4 files changed

+14
-50
lines changed

learning_resources_search/tasks.py

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,9 @@
3737
from learning_resources_search.constants import (
3838
CONTENT_FILE_TYPE,
3939
COURSE_TYPE,
40-
LEARNING_PATH_TYPE,
40+
LEARNING_RESOURCE_TYPES,
4141
PERCOLATE_INDEX_TYPE,
42-
PODCAST_EPISODE_TYPE,
43-
PODCAST_TYPE,
44-
PROGRAM_TYPE,
4542
SEARCH_CONN_EXCEPTIONS,
46-
VIDEO_PLAYLIST_TYPE,
47-
VIDEO_TYPE,
4843
IndexestoUpdate,
4944
)
5045
from learning_resources_search.exceptions import ReindexError, RetryError
@@ -624,14 +619,7 @@ def start_recreate_index(self, indexes, remove_existing_reindexing_tags):
624619
)
625620
]
626621

627-
for resource_type in [
628-
PROGRAM_TYPE,
629-
PODCAST_TYPE,
630-
PODCAST_EPISODE_TYPE,
631-
LEARNING_PATH_TYPE,
632-
VIDEO_TYPE,
633-
VIDEO_PLAYLIST_TYPE,
634-
]:
622+
for resource_type in set(LEARNING_RESOURCE_TYPES) - {COURSE_TYPE}:
635623
if resource_type in indexes:
636624
index_tasks = index_tasks + [
637625
index_learning_resources.si(
@@ -692,14 +680,7 @@ def start_update_index(self, indexes, etl_source):
692680
if PERCOLATE_INDEX_TYPE in indexes:
693681
index_tasks = index_tasks + get_update_percolator_tasks()
694682

695-
for resource_type in [
696-
PROGRAM_TYPE,
697-
PODCAST_TYPE,
698-
PODCAST_EPISODE_TYPE,
699-
LEARNING_PATH_TYPE,
700-
VIDEO_TYPE,
701-
VIDEO_PLAYLIST_TYPE,
702-
]:
683+
for resource_type in set(LEARNING_RESOURCE_TYPES) - {COURSE_TYPE}:
703684
if resource_type in indexes:
704685
index_tasks = index_tasks + get_update_learning_resource_tasks(
705686
resource_type

learning_resources_search/tasks_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def test_system_exit_retry(mocker):
134134

135135
@pytest.mark.parametrize(
136136
"indexes",
137-
[["course"], ["program"]],
137+
[["course"], ["program"], list(LEARNING_RESOURCE_TYPES)],
138138
)
139139
def test_start_recreate_index(mocker, mocked_celery, user, indexes):
140140
"""
@@ -252,8 +252,7 @@ def test_start_recreate_index(mocker, mocked_celery, user, indexes):
252252
course.learning_resource_id,
253253
index_types=IndexestoUpdate.reindexing_index.value,
254254
)
255-
256-
if PROGRAM_TYPE in indexes:
255+
if indexes == [PROGRAM_TYPE]:
257256
assert index_learning_resources_mock.si.call_count == 2
258257
index_learning_resources_mock.si.assert_any_call(
259258
[programs[0].learning_resource_id, programs[1].learning_resource_id],
@@ -462,6 +461,7 @@ def test_bulk_deindex_learning_resources(mocker, with_error):
462461
[
463462
(["program"], None),
464463
(["course, content_file"], None),
464+
(list(LEARNING_RESOURCE_TYPES), None),
465465
(["course"], ETLSource.xpro.value),
466466
(["content_file"], ETLSource.xpro.value),
467467
(["content_file"], ETLSource.oll.value),
@@ -588,7 +588,7 @@ def test_start_update_index(mocker, mocked_celery, indexes, etl_source, settings
588588
COURSE_TYPE,
589589
)
590590

591-
if PROGRAM_TYPE in indexes:
591+
if indexes == [PROGRAM_TYPE]:
592592
assert index_learning_resources_mock.si.call_count == 2
593593
index_learning_resources_mock.si.assert_any_call(
594594
[programs[0].learning_resource_id, programs[1].learning_resource_id],

vector_search/tasks.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,10 @@
1515
)
1616
from learning_resources.utils import load_course_blocklist
1717
from learning_resources_search.constants import (
18-
ARTICLE_TYPE,
1918
CONTENT_FILE_TYPE,
2019
COURSE_TYPE,
21-
LEARNING_PATH_TYPE,
2220
LEARNING_RESOURCE_TYPES,
23-
PODCAST_EPISODE_TYPE,
24-
PODCAST_TYPE,
25-
PROGRAM_TYPE,
2621
SEARCH_CONN_EXCEPTIONS,
27-
VIDEO_PLAYLIST_TYPE,
28-
VIDEO_TYPE,
2922
)
3023
from learning_resources_search.exceptions import RetryError
3124
from learning_resources_search.tasks import wrap_retry_exception
@@ -173,15 +166,7 @@ def start_embed_resources(self, indexes, skip_content_files, overwrite):
173166
chunk_size=settings.QDRANT_CHUNK_SIZE,
174167
)
175168
]
176-
for resource_type in [
177-
PROGRAM_TYPE,
178-
PODCAST_TYPE,
179-
PODCAST_EPISODE_TYPE,
180-
LEARNING_PATH_TYPE,
181-
VIDEO_TYPE,
182-
VIDEO_PLAYLIST_TYPE,
183-
ARTICLE_TYPE,
184-
]:
169+
for resource_type in set(LEARNING_RESOURCE_TYPES) - {COURSE_TYPE}:
185170
if resource_type in indexes:
186171
for ids in chunks(
187172
LearningResource.objects.filter(

vector_search/tasks_test.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from learning_resources.models import ContentFile, LearningResource
2121
from learning_resources_search.constants import (
2222
COURSE_TYPE,
23+
LEARNING_RESOURCE_TYPES,
2324
)
2425
from main.utils import now_in_utc
2526
from vector_search.tasks import (
@@ -34,10 +35,7 @@
3435
pytestmark = pytest.mark.django_db
3536

3637

37-
@pytest.mark.parametrize(
38-
"index",
39-
["course", "program"],
40-
)
38+
@pytest.mark.parametrize("index", list(LEARNING_RESOURCE_TYPES))
4139
def test_start_embed_resources(mocker, mocked_celery, index):
4240
"""
4341
start_embed_resources should generate embeddings for each resource type
@@ -64,11 +62,11 @@ def test_start_embed_resources(mocker, mocked_celery, index):
6462
)
6563
resource_ids = [c.pk for c in courses]
6664
else:
67-
programs = sorted(
68-
ProgramFactory.create_batch(4),
69-
key=lambda program: program.learning_resource_id,
65+
resources = sorted(
66+
LearningResourceFactory.create_batch(4, resource_type=index),
67+
key=lambda resource: resource.id,
7068
)
71-
resource_ids = [p.pk for p in programs]
69+
resource_ids = [p.pk for p in resources]
7270

7371
generate_embeddings_mock = mocker.patch(
7472
"vector_search.tasks.generate_embeddings", autospec=True

0 commit comments

Comments
 (0)