Skip to content

Commit 172c4ef

Browse files
committed
✨ Now document process has a progress bar
✨ Now document process has a clear description and suggestions on its failing reason ✨ Now user can name the knowledge base whatever he wants without obeying the naming rule of elasticsearch ✨ Now user can adjust chunk size when requesting embedding service 🐛 Bugfix: knowledge base order not fixed 🐛 Bugfix: switch embedding model will cause silent fail when creating knowledge base 🐛 Bugfix: user cannot adjust configurations of batch imported embedding models
1 parent 9b9766d commit 172c4ef

File tree

8 files changed

+300
-222
lines changed

8 files changed

+300
-222
lines changed

backend/apps/vectordatabase_app.py

Lines changed: 53 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import json
23
from http import HTTPStatus
34
from typing import Any, Dict, List, Optional
45

@@ -16,6 +17,7 @@
1617
from services.redis_service import get_redis_service
1718
from utils.auth_utils import get_current_user_id
1819
from utils.file_management_utils import get_all_files_status
20+
from database.knowledge_db import get_index_name_by_knowledge_name
1921

2022
router = APIRouter(prefix="/indices")
2123
service = ElasticSearchService()
@@ -206,9 +208,6 @@ async def get_document_error_info(
206208
):
207209
"""Get error information for a document"""
208210
try:
209-
user_id, tenant_id = get_current_user_id(authorization)
210-
211-
# Get task_id from file status
212211
celery_task_files = await get_all_files_status(index_name)
213212
file_status = celery_task_files.get(path_or_url)
214213

@@ -222,50 +221,29 @@ async def get_document_error_info(
222221
if not task_id:
223222
return {
224223
"status": "success",
225-
"error_reason": None,
226-
"suggestion": None,
224+
"error_code": None,
227225
}
228226

229-
# Get raw error info from Redis
230227
redis_service = get_redis_service()
231228
raw_error = redis_service.get_error_info(task_id)
232-
233-
# Parse into reason + suggestion
234-
error_reason = None
235-
suggestion = None
229+
error_code = None
236230

237231
if raw_error:
238232
text = raw_error
239233

240-
# Try to parse JSON (legacy format where full JSON string was stored)
234+
# Try to parse JSON (new format with error_code only)
241235
if isinstance(text, str) and text.strip().startswith("{"):
242236
try:
243237
parsed = json.loads(text)
244238
if isinstance(parsed, dict):
245-
friendly = parsed.get("friendly_reason")
246-
message = parsed.get("message")
247-
if isinstance(friendly, str):
248-
text = friendly
249-
elif isinstance(message, str):
250-
text = message
239+
if "error_code" in parsed:
240+
error_code = parsed.get("error_code")
251241
except Exception:
252-
# If JSON parsing fails, fall back to raw text
253242
pass
254243

255-
if isinstance(text, str):
256-
# Split our friendly_reason format: "<reason>。建议:<suggestion>"
257-
marker = "。建议:"
258-
if marker in text:
259-
reason_part, suggestion_part = text.split(marker, 1)
260-
error_reason = reason_part or None
261-
suggestion = suggestion_part or None
262-
else:
263-
error_reason = text
264-
265244
return {
266245
"status": "success",
267-
"error_reason": error_reason,
268-
"suggestion": suggestion,
246+
"error_code": error_code,
269247
}
270248
except HTTPException:
271249
raise
@@ -292,25 +270,35 @@ def health_check(vdb_core: VectorDatabaseCore = Depends(get_vector_db_core)):
292270
@router.post("/{index_name}/chunks")
293271
def get_index_chunks(
294272
index_name: str = Path(...,
295-
description="Name of the index to get chunks from"),
273+
description="Name of the index (or knowledge_name) to get chunks from"),
296274
page: int = Query(
297275
None, description="Page number (1-based) for pagination"),
298276
page_size: int = Query(
299277
None, description="Number of records per page for pagination"),
300278
path_or_url: Optional[str] = Query(
301279
None, description="Filter chunks by document path_or_url"),
302-
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core)
280+
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
281+
authorization: Optional[str] = Header(None)
303282
):
304283
"""Get chunks from the specified index, with optional pagination support"""
305284
try:
285+
_, tenant_id = get_current_user_id(authorization)
286+
actual_index_name = get_index_name_by_knowledge_name(
287+
index_name, tenant_id)
288+
306289
result = ElasticSearchService.get_index_chunks(
307-
index_name=index_name,
290+
index_name=actual_index_name,
308291
page=page,
309292
page_size=page_size,
310293
path_or_url=path_or_url,
311294
vdb_core=vdb_core,
312295
)
313296
return JSONResponse(status_code=HTTPStatus.OK, content=result)
297+
except ValueError as e:
298+
raise HTTPException(
299+
status_code=HTTPStatus.NOT_FOUND,
300+
detail=str(e)
301+
)
314302
except Exception as e:
315303
error_msg = str(e)
316304
logger.error(
@@ -321,21 +309,29 @@ def get_index_chunks(
321309

322310
@router.post("/{index_name}/chunk")
323311
def create_chunk(
324-
index_name: str = Path(..., description="Name of the index"),
312+
index_name: str = Path(...,
313+
description="Name of the index (or knowledge_name)"),
325314
payload: ChunkCreateRequest = Body(..., description="Chunk data"),
326315
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
327316
authorization: Optional[str] = Header(None),
328317
):
329318
"""Create a manual chunk."""
330319
try:
331-
user_id, _ = get_current_user_id(authorization)
320+
user_id, tenant_id = get_current_user_id(authorization)
321+
actual_index_name = get_index_name_by_knowledge_name(
322+
index_name, tenant_id)
332323
result = ElasticSearchService.create_chunk(
333-
index_name=index_name,
324+
index_name=actual_index_name,
334325
chunk_request=payload,
335326
vdb_core=vdb_core,
336327
user_id=user_id,
337328
)
338329
return JSONResponse(status_code=HTTPStatus.OK, content=result)
330+
except ValueError as e:
331+
raise HTTPException(
332+
status_code=HTTPStatus.NOT_FOUND,
333+
detail=str(e)
334+
)
339335
except Exception as exc:
340336
logger.error(
341337
"Error creating chunk for index %s: %s", index_name, exc, exc_info=True
@@ -347,7 +343,8 @@ def create_chunk(
347343

348344
@router.put("/{index_name}/chunk/{chunk_id}")
349345
def update_chunk(
350-
index_name: str = Path(..., description="Name of the index"),
346+
index_name: str = Path(...,
347+
description="Name of the index (or knowledge_name)"),
351348
chunk_id: str = Path(..., description="Chunk identifier"),
352349
payload: ChunkUpdateRequest = Body(...,
353350
description="Chunk update payload"),
@@ -356,18 +353,22 @@ def update_chunk(
356353
):
357354
"""Update an existing chunk."""
358355
try:
359-
user_id, _ = get_current_user_id(authorization)
356+
user_id, tenant_id = get_current_user_id(authorization)
357+
actual_index_name = get_index_name_by_knowledge_name(
358+
index_name, tenant_id)
360359
result = ElasticSearchService.update_chunk(
361-
index_name=index_name,
360+
index_name=actual_index_name,
362361
chunk_id=chunk_id,
363362
chunk_request=payload,
364363
vdb_core=vdb_core,
365364
user_id=user_id,
366365
)
367366
return JSONResponse(status_code=HTTPStatus.OK, content=result)
368-
except ValueError as exc:
367+
except ValueError as e:
369368
raise HTTPException(
370-
status_code=HTTPStatus.BAD_REQUEST, detail=str(exc))
369+
status_code=HTTPStatus.NOT_FOUND,
370+
detail=str(e)
371+
)
371372
except Exception as exc:
372373
logger.error(
373374
"Error updating chunk %s for index %s: %s",
@@ -383,22 +384,28 @@ def update_chunk(
383384

384385
@router.delete("/{index_name}/chunk/{chunk_id}")
385386
def delete_chunk(
386-
index_name: str = Path(..., description="Name of the index"),
387+
index_name: str = Path(...,
388+
description="Name of the index (or knowledge_name)"),
387389
chunk_id: str = Path(..., description="Chunk identifier"),
388390
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
389391
authorization: Optional[str] = Header(None),
390392
):
391393
"""Delete a chunk."""
392394
try:
393-
get_current_user_id(authorization)
395+
_, tenant_id = get_current_user_id(authorization)
396+
actual_index_name = get_index_name_by_knowledge_name(
397+
index_name, tenant_id)
394398
result = ElasticSearchService.delete_chunk(
395-
index_name=index_name,
399+
index_name=actual_index_name,
396400
chunk_id=chunk_id,
397401
vdb_core=vdb_core,
398402
)
399403
return JSONResponse(status_code=HTTPStatus.OK, content=result)
400-
except ValueError as exc:
401-
raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc))
404+
except ValueError as e:
405+
raise HTTPException(
406+
status_code=HTTPStatus.NOT_FOUND,
407+
detail=str(e)
408+
)
402409
except Exception as exc:
403410
logger.error(
404411
"Error deleting chunk %s for index %s: %s",

0 commit comments

Comments
 (0)