Skip to content

Commit ca7a2ed

Browse files
committed
🧪 Add test files for knowledgebase
1 parent 172c4ef commit ca7a2ed

File tree

7 files changed

+306
-77
lines changed

7 files changed

+306
-77
lines changed
File renamed without changes.

‎test/backend/app/test_vectordatabase_app.py‎

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ async def test_create_new_index_success(vdb_core_mock, auth_data):
152152
# Setup mocks
153153
with patch("backend.apps.vectordatabase_app.get_vector_db_core", return_value=vdb_core_mock), \
154154
patch("backend.apps.vectordatabase_app.get_current_user_id", return_value=(auth_data["user_id"], auth_data["tenant_id"])), \
155-
patch("backend.apps.vectordatabase_app.ElasticSearchService.create_index") as mock_create:
155+
patch("backend.apps.vectordatabase_app.ElasticSearchService.create_knowledge_base") as mock_create:
156156

157157
expected_response = {"status": "success",
158158
"index_name": auth_data["index_name"]}
@@ -165,7 +165,13 @@ async def test_create_new_index_success(vdb_core_mock, auth_data):
165165
# Verify
166166
assert response.status_code == 200
167167
assert response.json() == expected_response
168+
# vdb_core is constructed inside router; accept ANY for instance
168169
mock_create.assert_called_once()
170+
called_args = mock_create.call_args[0]
171+
assert called_args[0] == auth_data["index_name"]
172+
assert called_args[1] == 768
173+
assert called_args[3] == auth_data["user_id"]
174+
assert called_args[4] == auth_data["tenant_id"]
169175

170176

171177
@pytest.mark.asyncio
@@ -177,7 +183,7 @@ async def test_create_new_index_error(vdb_core_mock, auth_data):
177183
# Setup mocks
178184
with patch("backend.apps.vectordatabase_app.get_vector_db_core", return_value=vdb_core_mock), \
179185
patch("backend.apps.vectordatabase_app.get_current_user_id", return_value=(auth_data["user_id"], auth_data["tenant_id"])), \
180-
patch("backend.apps.vectordatabase_app.ElasticSearchService.create_index") as mock_create:
186+
patch("backend.apps.vectordatabase_app.ElasticSearchService.create_knowledge_base") as mock_create:
181187

182188
mock_create.side_effect = Exception("Test error")
183189

@@ -702,10 +708,11 @@ async def test_get_index_chunks_success(vdb_core_mock):
702708
Test retrieving index chunks successfully.
703709
Verifies that the endpoint forwards query params and returns the service payload.
704710
"""
711+
index_name = "test_index"
705712
with patch("backend.apps.vectordatabase_app.get_vector_db_core", return_value=vdb_core_mock), \
713+
patch("backend.apps.vectordatabase_app.get_index_name_by_knowledge_name", return_value="resolved_index"), \
706714
patch("backend.apps.vectordatabase_app.ElasticSearchService.get_index_chunks") as mock_get_chunks:
707715

708-
index_name = "test_index"
709716
expected_response = {
710717
"status": "success",
711718
"message": "ok",
@@ -724,7 +731,7 @@ async def test_get_index_chunks_success(vdb_core_mock):
724731
assert response.status_code == 200
725732
assert response.json() == expected_response
726733
mock_get_chunks.assert_called_once_with(
727-
index_name=index_name,
734+
index_name="resolved_index",
728735
page=2,
729736
page_size=50,
730737
path_or_url="/foo",
@@ -738,18 +745,19 @@ async def test_get_index_chunks_error(vdb_core_mock):
738745
Test retrieving index chunks with service error.
739746
Ensures the endpoint maps the exception to HTTP 500.
740747
"""
748+
index_name = "test_index"
741749
with patch("backend.apps.vectordatabase_app.get_vector_db_core", return_value=vdb_core_mock), \
750+
patch("backend.apps.vectordatabase_app.get_index_name_by_knowledge_name", return_value="resolved_index"), \
742751
patch("backend.apps.vectordatabase_app.ElasticSearchService.get_index_chunks") as mock_get_chunks:
743752

744-
index_name = "test_index"
745753
mock_get_chunks.side_effect = Exception("Chunk failure")
746754

747755
response = client.post(f"/indices/{index_name}/chunks")
748756

749757
assert response.status_code == 500
750758
assert response.json() == {"detail": "Error getting chunks: Chunk failure"}
751759
mock_get_chunks.assert_called_once_with(
752-
index_name=index_name,
760+
index_name="resolved_index",
753761
page=None,
754762
page_size=None,
755763
path_or_url=None,
@@ -765,6 +773,7 @@ async def test_create_chunk_success(vdb_core_mock, auth_data):
765773
with patch("backend.apps.vectordatabase_app.get_vector_db_core", return_value=vdb_core_mock), \
766774
patch("backend.apps.vectordatabase_app.get_current_user_id",
767775
return_value=(auth_data["user_id"], auth_data["tenant_id"])), \
776+
patch("backend.apps.vectordatabase_app.get_index_name_by_knowledge_name", return_value=auth_data["index_name"]), \
768777
patch("backend.apps.vectordatabase_app.ElasticSearchService.create_chunk") as mock_create:
769778

770779
expected_response = {"status": "success", "chunk_id": "chunk-1"}
@@ -794,6 +803,7 @@ async def test_create_chunk_error(vdb_core_mock, auth_data):
794803
with patch("backend.apps.vectordatabase_app.get_vector_db_core", return_value=vdb_core_mock), \
795804
patch("backend.apps.vectordatabase_app.get_current_user_id",
796805
return_value=(auth_data["user_id"], auth_data["tenant_id"])), \
806+
patch("backend.apps.vectordatabase_app.get_index_name_by_knowledge_name", return_value=auth_data["index_name"]), \
797807
patch("backend.apps.vectordatabase_app.ElasticSearchService.create_chunk") as mock_create:
798808

799809
mock_create.side_effect = Exception("Create failed")
@@ -822,6 +832,7 @@ async def test_update_chunk_success(vdb_core_mock, auth_data):
822832
with patch("backend.apps.vectordatabase_app.get_vector_db_core", return_value=vdb_core_mock), \
823833
patch("backend.apps.vectordatabase_app.get_current_user_id",
824834
return_value=(auth_data["user_id"], auth_data["tenant_id"])), \
835+
patch("backend.apps.vectordatabase_app.get_index_name_by_knowledge_name", return_value=auth_data["index_name"]), \
825836
patch("backend.apps.vectordatabase_app.ElasticSearchService.update_chunk") as mock_update:
826837

827838
expected_response = {"status": "success", "chunk_id": "chunk-1"}
@@ -850,6 +861,7 @@ async def test_update_chunk_value_error(vdb_core_mock, auth_data):
850861
with patch("backend.apps.vectordatabase_app.get_vector_db_core", return_value=vdb_core_mock), \
851862
patch("backend.apps.vectordatabase_app.get_current_user_id",
852863
return_value=(auth_data["user_id"], auth_data["tenant_id"])), \
864+
patch("backend.apps.vectordatabase_app.get_index_name_by_knowledge_name", return_value=auth_data["index_name"]), \
853865
patch("backend.apps.vectordatabase_app.ElasticSearchService.update_chunk") as mock_update:
854866

855867
mock_update.side_effect = ValueError("Invalid update payload")
@@ -864,7 +876,8 @@ async def test_update_chunk_value_error(vdb_core_mock, auth_data):
864876
headers=auth_data["auth_header"],
865877
)
866878

867-
assert response.status_code == 400
879+
# ValueError is mapped to NOT_FOUND in app layer
880+
assert response.status_code == 404
868881
assert response.json() == {"detail": "Invalid update payload"}
869882
mock_update.assert_called_once()
870883

@@ -877,6 +890,7 @@ async def test_update_chunk_exception(vdb_core_mock, auth_data):
877890
with patch("backend.apps.vectordatabase_app.get_vector_db_core", return_value=vdb_core_mock), \
878891
patch("backend.apps.vectordatabase_app.get_current_user_id",
879892
return_value=(auth_data["user_id"], auth_data["tenant_id"])), \
893+
patch("backend.apps.vectordatabase_app.get_index_name_by_knowledge_name", return_value=auth_data["index_name"]), \
880894
patch("backend.apps.vectordatabase_app.ElasticSearchService.update_chunk") as mock_update:
881895

882896
mock_update.side_effect = Exception("Update failed")
@@ -904,6 +918,7 @@ async def test_delete_chunk_success(vdb_core_mock, auth_data):
904918
with patch("backend.apps.vectordatabase_app.get_vector_db_core", return_value=vdb_core_mock), \
905919
patch("backend.apps.vectordatabase_app.get_current_user_id",
906920
return_value=(auth_data["user_id"], auth_data["tenant_id"])), \
921+
patch("backend.apps.vectordatabase_app.get_index_name_by_knowledge_name", return_value=auth_data["index_name"]), \
907922
patch("backend.apps.vectordatabase_app.ElasticSearchService.delete_chunk") as mock_delete:
908923

909924
expected_response = {"status": "success", "chunk_id": "chunk-1"}
@@ -927,6 +942,7 @@ async def test_delete_chunk_not_found(vdb_core_mock, auth_data):
927942
with patch("backend.apps.vectordatabase_app.get_vector_db_core", return_value=vdb_core_mock), \
928943
patch("backend.apps.vectordatabase_app.get_current_user_id",
929944
return_value=(auth_data["user_id"], auth_data["tenant_id"])), \
945+
patch("backend.apps.vectordatabase_app.get_index_name_by_knowledge_name", return_value=auth_data["index_name"]), \
930946
patch("backend.apps.vectordatabase_app.ElasticSearchService.delete_chunk") as mock_delete:
931947

932948
mock_delete.side_effect = ValueError("Chunk not found")
@@ -949,6 +965,7 @@ async def test_delete_chunk_exception(vdb_core_mock, auth_data):
949965
with patch("backend.apps.vectordatabase_app.get_vector_db_core", return_value=vdb_core_mock), \
950966
patch("backend.apps.vectordatabase_app.get_current_user_id",
951967
return_value=(auth_data["user_id"], auth_data["tenant_id"])), \
968+
patch("backend.apps.vectordatabase_app.get_index_name_by_knowledge_name", return_value=auth_data["index_name"]), \
952969
patch("backend.apps.vectordatabase_app.ElasticSearchService.delete_chunk") as mock_delete:
953970

954971
mock_delete.side_effect = Exception("Delete failed")

‎test/backend/data_process/test_tasks.py‎

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def decorator(func):
115115
# New defaults required by ray_actors import
116116
const_mod.DEFAULT_EXPECTED_CHUNK_SIZE = 1024
117117
const_mod.DEFAULT_MAXIMUM_CHUNK_SIZE = 1536
118+
const_mod.ROOT_DIR = "/mock/root"
118119
sys.modules["consts.const"] = const_mod
119120
# Minimal stub for consts.model used by utils.file_management_utils
120121
if "consts.model" not in sys.modules:
@@ -328,7 +329,7 @@ def failing_init(**kwargs):
328329
# Verify that the exception is re-raised
329330
with pytest.raises(RuntimeError) as exc_info:
330331
tasks.init_ray_in_worker()
331-
assert exc_info.value == init_exception
332+
assert "Failed to initialize Ray for Celery worker" in str(exc_info.value)
332333

333334

334335
def test_run_async_no_running_loop(monkeypatch):
@@ -554,6 +555,37 @@ def get(self, k):
554555
json.loads(str(ei.value))
555556

556557

558+
def test_forward_returns_when_task_cancelled(monkeypatch):
559+
"""forward should exit early when cancellation flag is set"""
560+
tasks, _ = import_tasks_with_fake_ray(monkeypatch)
561+
monkeypatch.setattr(tasks, "ELASTICSEARCH_SERVICE", "http://api")
562+
563+
class FakeRedisService:
564+
def __init__(self):
565+
self.calls = 0
566+
567+
def is_task_cancelled(self, task_id):
568+
self.calls += 1
569+
return True
570+
571+
fake_service = FakeRedisService()
572+
monkeypatch.setattr(tasks, "get_redis_service", lambda: fake_service)
573+
574+
self = FakeSelf("cancel-1")
575+
result = tasks.forward(
576+
self,
577+
processed_data={"chunks": [{"content": "keep", "metadata": {}}]},
578+
index_name="idx",
579+
source="/a.txt",
580+
)
581+
582+
assert result["chunks_stored"] == 0
583+
assert "cancelled" in result["es_result"]["message"].lower()
584+
assert fake_service.calls == 1
585+
# No state updates should occur because we returned early
586+
assert self.states == []
587+
588+
557589
def test_forward_redis_client_from_url_failure(monkeypatch):
558590
tasks, _ = import_tasks_with_fake_ray(monkeypatch)
559591
monkeypatch.setattr(tasks, "ELASTICSEARCH_SERVICE", "http://api")
@@ -1082,6 +1114,48 @@ def __init__(self):
10821114
assert success_state.get("meta", {}).get("processing_speed_mb_s") == 0
10831115

10841116

1117+
def test_process_no_chunks_saves_error(monkeypatch, tmp_path):
1118+
"""process should save error info when no chunks are produced"""
1119+
tasks, fake_ray = import_tasks_with_fake_ray(monkeypatch, initialized=True)
1120+
1121+
class FakeActor:
1122+
def __init__(self):
1123+
self.process_file = types.SimpleNamespace(
1124+
remote=lambda *a, **k: "ref-empty")
1125+
self.store_chunks_in_redis = types.SimpleNamespace(
1126+
remote=lambda *a, **k: None)
1127+
1128+
monkeypatch.setattr(tasks, "get_ray_actor", lambda: FakeActor())
1129+
fake_ray.get_returns = [] # no chunks returned from ray.get
1130+
1131+
saved_reason = {}
1132+
monkeypatch.setattr(
1133+
tasks,
1134+
"save_error_to_redis",
1135+
lambda task_id, reason, start_time: saved_reason.setdefault(
1136+
"reason", reason),
1137+
)
1138+
1139+
f = tmp_path / "empty_file.txt"
1140+
f.write_text("data")
1141+
1142+
self = FakeSelf("no-chunks")
1143+
with pytest.raises(Exception) as exc_info:
1144+
tasks.process(
1145+
self,
1146+
source=str(f),
1147+
source_type="local",
1148+
chunking_strategy="basic",
1149+
index_name="idx",
1150+
original_filename="empty_file.txt",
1151+
)
1152+
1153+
assert '"error_code": "no_valid_chunks"' in saved_reason.get("reason", "")
1154+
assert any(state.get("meta", {}).get("stage") ==
1155+
"text_extraction_failed" for state in self.states)
1156+
json.loads(str(exc_info.value))
1157+
1158+
10851159
def test_process_url_source_with_many_chunks(monkeypatch):
10861160
"""Test processing URL source that generates many chunks"""
10871161
tasks, fake_ray = import_tasks_with_fake_ray(monkeypatch, initialized=True)

‎test/backend/data_process/test_worker.py‎

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import types
33
import importlib
44
import pytest
5+
import os
56

67

78
class FakeRay:
@@ -44,6 +45,7 @@ def setup_mocks_for_worker(mocker, initialized=False):
4445
const_mod.FORWARD_REDIS_RETRY_MAX = 1
4546
const_mod.DISABLE_RAY_DASHBOARD = False
4647
const_mod.DATA_PROCESS_SERVICE = "http://data-process"
48+
const_mod.ROOT_DIR = "/mock/root"
4749
sys.modules["consts.const"] = const_mod
4850

4951
# Stub celery module and submodules (required by tasks.py imported via __init__.py)
@@ -483,6 +485,23 @@ def init_ray_for_worker(cls, address):
483485
assert worker_module.worker_state['initialized'] is True
484486

485487

488+
def test_setup_worker_environment_sets_ray_preallocate_env(mocker):
489+
"""Ensure setup_worker_environment sets RAY_preallocate_plasma env var"""
490+
worker_module, _ = setup_mocks_for_worker(mocker, initialized=False)
491+
492+
# Force init success to avoid fallback path exceptions
493+
class FakeRayConfig:
494+
@classmethod
495+
def init_ray_for_worker(cls, address):
496+
return True
497+
498+
mocker.patch.object(worker_module, "RayConfig", FakeRayConfig)
499+
500+
worker_module.setup_worker_environment()
501+
502+
assert os.environ.get("RAY_preallocate_plasma") == str(worker_module.RAY_preallocate_plasma).lower()
503+
504+
486505
def test_setup_worker_environment_ray_init_fallback(mocker):
487506
"""Test setup_worker_environment with Ray init fallback"""
488507
worker_module, fake_ray = setup_mocks_for_worker(mocker, initialized=False)

‎test/backend/database/test_knowledge_db.py‎

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ class MockKnowledgeRecord:
7171
def __init__(self, **kwargs):
7272
self.knowledge_id = kwargs.get('knowledge_id', 1)
7373
self.index_name = kwargs.get('index_name', 'test_index')
74+
self.knowledge_name = kwargs.get('knowledge_name', 'test_index')
7475
self.knowledge_describe = kwargs.get('knowledge_describe', 'test description')
7576
self.created_by = kwargs.get('created_by', 'test_user')
7677
self.updated_by = kwargs.get('updated_by', 'test_user')
@@ -83,6 +84,7 @@ def __init__(self, **kwargs):
8384
# Mock SQLAlchemy column attributes
8485
knowledge_id = MagicMock(name="knowledge_id_column")
8586
index_name = MagicMock(name="index_name_column")
87+
knowledge_name = MagicMock(name="knowledge_name_column")
8688
knowledge_describe = MagicMock(name="knowledge_describe_column")
8789
created_by = MagicMock(name="created_by_column")
8890
updated_by = MagicMock(name="updated_by_column")
@@ -125,8 +127,9 @@ def test_create_knowledge_record_success(monkeypatch, mock_session):
125127
session, _ = mock_session
126128

127129
# Create mock knowledge record
128-
mock_record = MockKnowledgeRecord()
130+
mock_record = MockKnowledgeRecord(knowledge_name="test_knowledge")
129131
mock_record.knowledge_id = 123
132+
mock_record.index_name = "test_knowledge"
130133

131134
# Mock database session context
132135
mock_ctx = MagicMock()
@@ -140,16 +143,21 @@ def test_create_knowledge_record_success(monkeypatch, mock_session):
140143
"knowledge_describe": "Test knowledge description",
141144
"user_id": "test_user",
142145
"tenant_id": "test_tenant",
143-
"embedding_model_name": "test_model"
146+
"embedding_model_name": "test_model",
147+
"knowledge_name": "test_knowledge"
144148
}
145149

146150
# Mock KnowledgeRecord constructor
147151
with patch('backend.database.knowledge_db.KnowledgeRecord', return_value=mock_record):
148152
result = create_knowledge_record(test_query)
149153

150-
assert result == 123
154+
assert result == {
155+
"knowledge_id": 123,
156+
"index_name": "test_knowledge",
157+
"knowledge_name": "test_knowledge",
158+
}
151159
session.add.assert_called_once_with(mock_record)
152-
session.flush.assert_called_once()
160+
assert session.flush.call_count == 1
153161
session.commit.assert_called_once()
154162

155163

@@ -179,6 +187,42 @@ def test_create_knowledge_record_exception(monkeypatch, mock_session):
179187
session.rollback.assert_called_once()
180188

181189

190+
def test_create_knowledge_record_generates_index_name(monkeypatch, mock_session):
191+
"""Test create_knowledge_record generates index_name when not provided"""
192+
session, _ = mock_session
193+
194+
mock_record = MockKnowledgeRecord(knowledge_name="kb1")
195+
mock_record.knowledge_id = 7
196+
197+
mock_ctx = MagicMock()
198+
mock_ctx.__enter__.return_value = session
199+
mock_ctx.__exit__.return_value = None
200+
monkeypatch.setattr("backend.database.knowledge_db.get_db_session", lambda: mock_ctx)
201+
202+
# Deterministic index name
203+
monkeypatch.setattr("backend.database.knowledge_db._generate_index_name", lambda _: "7-generated")
204+
205+
test_query = {
206+
"knowledge_describe": "desc",
207+
"user_id": "user-1",
208+
"tenant_id": "tenant-1",
209+
"embedding_model_name": "model-x",
210+
"knowledge_name": "kb1",
211+
}
212+
213+
with patch('backend.database.knowledge_db.KnowledgeRecord', return_value=mock_record):
214+
result = create_knowledge_record(test_query)
215+
216+
assert result == {
217+
"knowledge_id": 7,
218+
"index_name": "7-generated",
219+
"knowledge_name": "kb1",
220+
}
221+
assert mock_record.index_name == "7-generated"
222+
assert session.flush.call_count == 2 # initial insert + index_name update
223+
session.commit.assert_called_once()
224+
225+
182226
def test_update_knowledge_record_success(monkeypatch, mock_session):
183227
"""Test successful update of knowledge record"""
184228
session, query = mock_session

0 commit comments

Comments
 (0)