Skip to content

Commit 1eaf648

Browse files
authored
Merge pull request #141 from aws-samples/spy_dev
add batch entity upload
2 parents 260cfbb + b813c7e commit 1eaf648

File tree

2 files changed

+93
-7
lines changed

2 files changed

+93
-7
lines changed

application/pages/6_📚_Index_Management.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import time
22

3+
import pandas as pd
34
import streamlit as st
45
from dotenv import load_dotenv
56
import logging
@@ -14,6 +15,26 @@ def delete_sample(profile_name, id):
1415
VectorStore.delete_sample(profile_name, id)
1516
st.success(f'Sample {id} deleted.')
1617

18+
def read_file(uploaded_file):
19+
"""
20+
read upload csv file
21+
:param uploaded_file:
22+
:return:
23+
"""
24+
file_type = uploaded_file.name.split('.')[-1].lower()
25+
if file_type == 'csv':
26+
uploaded_data = pd.read_csv(uploaded_file)
27+
elif file_type in ['xls', 'xlsx']:
28+
uploaded_data = pd.read_excel(uploaded_file)
29+
else:
30+
st.error(f"Unsupported file type: {file_type}")
31+
return None
32+
columns = list(uploaded_data.columns)
33+
if "question" in columns and "sql" in columns:
34+
return uploaded_data
35+
else:
36+
st.error(f"The columns need contains question and sql")
37+
return None
1738

1839
def main():
1940
load_dotenv()
@@ -39,7 +60,7 @@ def main():
3960
index=None,
4061
placeholder="Please select data profile...", key='current_profile_name')
4162

42-
tab_view, tab_add, tab_search = st.tabs(['View Samples', 'Add New Sample', 'Sample Search'])
63+
tab_view, tab_add, tab_search, batch_insert = st.tabs(['View Samples', 'Add New Sample', 'Sample Search', 'Batch Insert Samples'])
4364

4465
if current_profile is not None:
4566
with tab_view:
@@ -78,6 +99,27 @@ def main():
7899
st.code(sample_res)
79100
st.button('Delete ' + sample['_id'], key=sample['_id'], on_click=delete_sample,
80101
args=[current_profile, sample['_id']])
102+
with batch_insert:
103+
if current_profile is not None:
104+
st.write("This page support CSV or Excel files batch insert sql samples.")
105+
st.write("**The Column Name need contain 'question' and 'sql'**")
106+
uploaded_files = st.file_uploader("Choose CSV or Excel files", accept_multiple_files=True,
107+
type=['csv', 'xls', 'xlsx'])
108+
if uploaded_files:
109+
progress_bar = st.progress(0)
110+
status_text = st.empty()
111+
for i, uploaded_file in enumerate(uploaded_files):
112+
status_text.text(f"Processing file {i + 1} of {len(uploaded_files)}: {uploaded_file.name}")
113+
each_upload_data = read_file(uploaded_file)
114+
if each_upload_data is not None:
115+
for index, item in each_upload_data.iterrows():
116+
question = str(item["question"])
117+
sql = str(item["sql"])
118+
VectorStore.add_sample(current_profile, question, sql)
119+
progress_bar.progress((i + 1) / len(uploaded_files))
120+
121+
st.success("{uploaded_file} uploaded successfully!".format(uploaded_file=uploaded_file.name))
122+
progress_bar.empty()
81123
else:
82124
st.info('Please select data profile in the left sidebar.')
83125

application/pages/7_📚_Entity_Management.py

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import time
22

3+
import pandas as pd
34
import streamlit as st
45
from dotenv import load_dotenv
56
import logging
@@ -16,6 +17,28 @@ def delete_entity_sample(profile_name, id):
1617
st.success(f'Sample {id} deleted.')
1718

1819

20+
def read_file(uploaded_file):
21+
"""
22+
read upload csv file
23+
:param uploaded_file:
24+
:return:
25+
"""
26+
file_type = uploaded_file.name.split('.')[-1].lower()
27+
if file_type == 'csv':
28+
uploaded_data = pd.read_csv(uploaded_file)
29+
elif file_type in ['xls', 'xlsx']:
30+
uploaded_data = pd.read_excel(uploaded_file)
31+
else:
32+
st.error(f"Unsupported file type: {file_type}")
33+
return None
34+
columns = list(uploaded_data.columns)
35+
if "entity" in columns and "comment" in columns:
36+
return uploaded_data
37+
else:
38+
st.error(f"The columns need contains entity and comment")
39+
return None
40+
41+
1942
def main():
2043
load_dotenv()
2144
logger.info('start entity management')
@@ -36,10 +59,11 @@ def main():
3659
current_profile = st.selectbox("My Data Profiles", all_profiles_list, index=profile_index)
3760
else:
3861
current_profile = st.selectbox("My Data Profiles", ProfileManagement.get_all_profiles(),
39-
index=None,
40-
placeholder="Please select data profile...", key='current_profile_name')
62+
index=None,
63+
placeholder="Please select data profile...", key='current_profile_name')
4164

42-
tab_view, tab_add, tab_search = st.tabs(['View Samples', 'Add New Sample', 'Sample Search'])
65+
tab_view, tab_add, tab_search, batch_insert = st.tabs(
66+
['View Samples', 'Add New Sample', 'Sample Search', 'Batch Insert Samples'])
4367
if current_profile is not None:
4468
with tab_view:
4569
if current_profile is not None:
@@ -72,7 +96,8 @@ def main():
7296
retrieve_number = st.slider("Entity Retrieve Number", 0, 100, 10)
7397
if st.button('Search', type='primary'):
7498
if len(entity_search) > 0:
75-
search_sample_result = VectorStore.search_sample(current_profile, retrieve_number, opensearch_info['ner_index'],
99+
search_sample_result = VectorStore.search_sample(current_profile, retrieve_number,
100+
opensearch_info['ner_index'],
76101
entity_search)
77102
for sample in search_sample_result:
78103
sample_res = {'Score': sample['_score'],
@@ -82,8 +107,27 @@ def main():
82107
st.button('Delete ' + sample['_id'], key=sample['_id'], on_click=delete_entity_sample,
83108
args=[current_profile, sample['_id']])
84109

85-
86-
110+
with batch_insert:
111+
if current_profile is not None:
112+
st.write("This page support CSV or Excel files batch insert entity samples.")
113+
st.write("**The Column Name need contain 'entity' and 'comment'**")
114+
uploaded_files = st.file_uploader("Choose CSV or Excel files", accept_multiple_files=True,
115+
type=['csv', 'xls', 'xlsx'])
116+
if uploaded_files:
117+
progress_bar = st.progress(0)
118+
status_text = st.empty()
119+
for i, uploaded_file in enumerate(uploaded_files):
120+
status_text.text(f"Processing file {i + 1} of {len(uploaded_files)}: {uploaded_file.name}")
121+
each_upload_data = read_file(uploaded_file)
122+
if each_upload_data is not None:
123+
for index, item in each_upload_data.iterrows():
124+
entity = str(item["entity"])
125+
comment = str(item["comment"])
126+
VectorStore.add_entity_sample(current_profile, entity, comment)
127+
progress_bar.progress((i + 1) / len(uploaded_files))
128+
129+
st.success("{uploaded_file} uploaded successfully!".format(uploaded_file=uploaded_file.name))
130+
progress_bar.empty()
87131
else:
88132
st.info('Please select data profile in the left sidebar.')
89133

0 commit comments

Comments
 (0)