1515)
1616from services .redis_service import get_redis_service
1717from utils .auth_utils import get_current_user_id
18+ from utils .file_management_utils import get_all_files_status
1819
1920router = APIRouter (prefix = "/indices" )
2021service = ElasticSearchService ()
@@ -49,7 +50,8 @@ def create_new_index(
4950 """Create a new vector index and store it in the knowledge table"""
5051 try :
5152 user_id , tenant_id = get_current_user_id (authorization )
52- return ElasticSearchService .create_index (index_name , embedding_dim , vdb_core , user_id , tenant_id )
53+ # Treat path parameter as user-facing knowledge base name for new creations
54+ return ElasticSearchService .create_knowledge_base (index_name , embedding_dim , vdb_core , user_id , tenant_id )
5355 except Exception as e :
5456 raise HTTPException (
5557 status_code = HTTPStatus .INTERNAL_SERVER_ERROR , detail = f"Error creating index: { str (e )} " )
@@ -99,7 +101,9 @@ def create_index_documents(
99101 data : List [Dict [str , Any ]
100102 ] = Body (..., description = "Document List to process" ),
101103 vdb_core : VectorDatabaseCore = Depends (get_vector_db_core ),
102- authorization : Optional [str ] = Header (None )
104+ authorization : Optional [str ] = Header (None ),
105+ task_id : Optional [str ] = Header (
106+ None , alias = "X-Task-Id" , description = "Task ID for progress tracking" ),
103107):
104108 """
105109 Index documents with embeddings, creating the index if it doesn't exist.
@@ -108,7 +112,13 @@ def create_index_documents(
108112 try :
109113 user_id , tenant_id = get_current_user_id (authorization )
110114 embedding_model = get_embedding_model (tenant_id )
111- return ElasticSearchService .index_documents (embedding_model , index_name , data , vdb_core )
115+ return ElasticSearchService .index_documents (
116+ embedding_model = embedding_model ,
117+ index_name = index_name ,
118+ data = data ,
119+ vdb_core = vdb_core ,
120+ task_id = task_id ,
121+ )
112122 except Exception as e :
113123 error_msg = str (e )
114124 logger .error (f"Error indexing documents: { error_msg } " )
@@ -187,6 +197,87 @@ def delete_documents(
187197 status_code = HTTPStatus .INTERNAL_SERVER_ERROR , detail = f"Error delete indexing documents: { e } " )
188198
189199
200+ @router .get ("/{index_name}/documents/{path_or_url:path}/error-info" )
201+ async def get_document_error_info (
202+ index_name : str = Path (..., description = "Name of the index" ),
203+ path_or_url : str = Path (...,
204+ description = "Path or URL of the document" ),
205+ authorization : Optional [str ] = Header (None )
206+ ):
207+ """Get error information for a document"""
208+ try :
209+ user_id , tenant_id = get_current_user_id (authorization )
210+
211+ # Get task_id from file status
212+ celery_task_files = await get_all_files_status (index_name )
213+ file_status = celery_task_files .get (path_or_url )
214+
215+ if not file_status :
216+ raise HTTPException (
217+ status_code = HTTPStatus .NOT_FOUND ,
218+ detail = f"Document { path_or_url } not found in index { index_name } "
219+ )
220+
221+ task_id = file_status .get ('latest_task_id' , '' )
222+ if not task_id :
223+ return {
224+ "status" : "success" ,
225+ "error_reason" : None ,
226+ "suggestion" : None ,
227+ }
228+
229+ # Get raw error info from Redis
230+ redis_service = get_redis_service ()
231+ raw_error = redis_service .get_error_info (task_id )
232+
233+ # Parse into reason + suggestion
234+ error_reason = None
235+ suggestion = None
236+
237+ if raw_error :
238+ text = raw_error
239+
240+ # Try to parse JSON (legacy format where full JSON string was stored)
241+ if isinstance (text , str ) and text .strip ().startswith ("{" ):
242+ try :
243+ parsed = json .loads (text )
244+ if isinstance (parsed , dict ):
245+ friendly = parsed .get ("friendly_reason" )
246+ message = parsed .get ("message" )
247+ if isinstance (friendly , str ):
248+ text = friendly
249+ elif isinstance (message , str ):
250+ text = message
251+ except Exception :
252+ # If JSON parsing fails, fall back to raw text
253+ pass
254+
255+ if isinstance (text , str ):
256+ # Split our friendly_reason format: "<reason>。建议:<suggestion>"
257+ marker = "。建议:"
258+ if marker in text :
259+ reason_part , suggestion_part = text .split (marker , 1 )
260+ error_reason = reason_part or None
261+ suggestion = suggestion_part or None
262+ else :
263+ error_reason = text
264+
265+ return {
266+ "status" : "success" ,
267+ "error_reason" : error_reason ,
268+ "suggestion" : suggestion ,
269+ }
270+ except HTTPException :
271+ raise
272+ except Exception as e :
273+ logger .error (
274+ f"Error getting error info for document { path_or_url } : { str (e )} " )
275+ raise HTTPException (
276+ status_code = HTTPStatus .INTERNAL_SERVER_ERROR ,
277+ detail = f"Error getting error info: { str (e )} "
278+ )
279+
280+
190281# Health check
191282@router .get ("/health" )
192283def health_check (vdb_core : VectorDatabaseCore = Depends (get_vector_db_core )):
0 commit comments