1
1
import time
2
2
3
+ import pandas as pd
3
4
import streamlit as st
4
5
from dotenv import load_dotenv
5
6
import logging
@@ -16,6 +17,28 @@ def delete_entity_sample(profile_name, id):
16
17
st .success (f'Sample { id } deleted.' )
17
18
18
19
20
+ def read_file (uploaded_file ):
21
+ """
22
+ read upload csv file
23
+ :param uploaded_file:
24
+ :return:
25
+ """
26
+ file_type = uploaded_file .name .split ('.' )[- 1 ].lower ()
27
+ if file_type == 'csv' :
28
+ uploaded_data = pd .read_csv (uploaded_file )
29
+ elif file_type in ['xls' , 'xlsx' ]:
30
+ uploaded_data = pd .read_excel (uploaded_file )
31
+ else :
32
+ st .error (f"Unsupported file type: { file_type } " )
33
+ return None
34
+ columns = list (uploaded_data .columns )
35
+ if "entity" in columns and "comment" in columns :
36
+ return uploaded_data
37
+ else :
38
+ st .error (f"The columns need contains entity and comment" )
39
+ return None
40
+
41
+
19
42
def main ():
20
43
load_dotenv ()
21
44
logger .info ('start entity management' )
@@ -36,10 +59,11 @@ def main():
36
59
current_profile = st .selectbox ("My Data Profiles" , all_profiles_list , index = profile_index )
37
60
else :
38
61
current_profile = st .selectbox ("My Data Profiles" , ProfileManagement .get_all_profiles (),
39
- index = None ,
40
- placeholder = "Please select data profile..." , key = 'current_profile_name' )
62
+ index = None ,
63
+ placeholder = "Please select data profile..." , key = 'current_profile_name' )
41
64
42
- tab_view , tab_add , tab_search = st .tabs (['View Samples' , 'Add New Sample' , 'Sample Search' ])
65
+ tab_view , tab_add , tab_search , batch_insert = st .tabs (
66
+ ['View Samples' , 'Add New Sample' , 'Sample Search' , 'Batch Insert Samples' ])
43
67
if current_profile is not None :
44
68
with tab_view :
45
69
if current_profile is not None :
@@ -72,7 +96,8 @@ def main():
72
96
retrieve_number = st .slider ("Entity Retrieve Number" , 0 , 100 , 10 )
73
97
if st .button ('Search' , type = 'primary' ):
74
98
if len (entity_search ) > 0 :
75
- search_sample_result = VectorStore .search_sample (current_profile , retrieve_number , opensearch_info ['ner_index' ],
99
+ search_sample_result = VectorStore .search_sample (current_profile , retrieve_number ,
100
+ opensearch_info ['ner_index' ],
76
101
entity_search )
77
102
for sample in search_sample_result :
78
103
sample_res = {'Score' : sample ['_score' ],
@@ -82,8 +107,27 @@ def main():
82
107
st .button ('Delete ' + sample ['_id' ], key = sample ['_id' ], on_click = delete_entity_sample ,
83
108
args = [current_profile , sample ['_id' ]])
84
109
85
-
86
-
110
+ with batch_insert :
111
+ if current_profile is not None :
112
+ st .write ("This page support CSV or Excel files batch insert entity samples." )
113
+ st .write ("**The Column Name need contain 'entity' and 'comment'**" )
114
+ uploaded_files = st .file_uploader ("Choose CSV or Excel files" , accept_multiple_files = True ,
115
+ type = ['csv' , 'xls' , 'xlsx' ])
116
+ if uploaded_files :
117
+ progress_bar = st .progress (0 )
118
+ status_text = st .empty ()
119
+ for i , uploaded_file in enumerate (uploaded_files ):
120
+ status_text .text (f"Processing file { i + 1 } of { len (uploaded_files )} : { uploaded_file .name } " )
121
+ each_upload_data = read_file (uploaded_file )
122
+ if each_upload_data is not None :
123
+ for index , item in each_upload_data .iterrows ():
124
+ entity = str (item ["entity" ])
125
+ comment = str (item ["comment" ])
126
+ VectorStore .add_entity_sample (current_profile , entity , comment )
127
+ progress_bar .progress ((i + 1 ) / len (uploaded_files ))
128
+
129
+ st .success ("{uploaded_file} uploaded successfully!" .format (uploaded_file = uploaded_file .name ))
130
+ progress_bar .empty ()
87
131
else :
88
132
st .info ('Please select data profile in the left sidebar.' )
89
133
0 commit comments