Skip to content

Commit 15b1127

Browse files
add demo for multilingual agent
1 parent ec50f90 commit 15b1127

File tree

6 files changed

+254
-0
lines changed

6 files changed

+254
-0
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
*.log
2+
3+
.idea/
4+
5+
.env

ai-services/multilingual-agent/app.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import os
2+
import logging
3+
4+
from dotenv import load_dotenv
5+
from openai import AzureOpenAI
6+
from azure.cognitiveservices.speech import SpeechConfig, SpeechRecognizer, AutoDetectSourceLanguageConfig, SpeechSynthesizer
7+
from azure.cognitiveservices.speech.audio import AudioOutputConfig
8+
from azure.ai.translation.text import TextTranslationClient, TranslatorCredential
9+
10+
from cli import Cli
11+
from assistant import create_assistant
12+
13+
load_dotenv()
14+
15+
logger = logging.getLogger(__name__)
16+
17+
18+
if __name__ == "__main__":
19+
try:
20+
logging.basicConfig(filename='app.log', level=logging.INFO)
21+
22+
speech_key = os.getenv("SPEECH_API_KEY")
23+
speech_region = os.getenv("SPEECH_REGION")
24+
translation_key = os.getenv("TRANSLATION_KEY")
25+
translation_region = os.getenv("TRANSLATION_REGION")
26+
27+
openai_client = AzureOpenAI(
28+
api_key=os.getenv("OPENAI_KEY"),
29+
api_version="2024-07-01-preview",
30+
azure_endpoint=os.getenv("OPENAI_ENDPOINT"),
31+
default_headers={"X-Ms-Enable-Preview": "true"}
32+
)
33+
34+
assistant_id = os.getenv("ASSISTANT_ID")
35+
36+
if assistant_id is None or assistant_id == "":
37+
assistant_id = create_assistant(openai_client).id
38+
logger.debug("created new assistant with id {}".format(assistant_id))
39+
40+
speech_config = SpeechConfig(subscription=speech_key, region=speech_region)
41+
42+
auto_detect_config = AutoDetectSourceLanguageConfig(languages=["en-US", "fr-FR", "pt-BR"])
43+
speech_recognizer = SpeechRecognizer(speech_config=speech_config, auto_detect_source_language_config=auto_detect_config)
44+
45+
audio_config = AudioOutputConfig(use_default_speaker=True)
46+
speech_synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
47+
48+
translator_credential = TranslatorCredential(key=translation_key, region=translation_region)
49+
text_translator = TextTranslationClient(credential=translator_credential)
50+
51+
runner = Cli(
52+
openai_client=openai_client,
53+
assistant_id=assistant_id,
54+
speech_recognizer=speech_recognizer,
55+
speech_synthesizer=speech_synthesizer,
56+
text_translator=text_translator
57+
)
58+
59+
runner.run()
60+
except Exception as error:
61+
raise error
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import os
2+
3+
from openai import AzureOpenAI
4+
5+
6+
def create_assistant(client: AzureOpenAI):
7+
return client.beta.assistants.create(
8+
name="Travel planner copilot",
9+
instructions='''
10+
You are travel planner that helps people plan trips across the world.
11+
The user might give you constraints like:
12+
- destination
13+
- weather preference
14+
- attractions preference
15+
- date preference
16+
When asked for up-to-date information, you should use the browser tool.
17+
You should try to give a plan in the following format:
18+
- city
19+
- start and end date
20+
- cost breakdown
21+
- weather forecast
22+
- attractions and any useful information about tickets.
23+
''',
24+
tools=[{
25+
"type": "browser",
26+
"browser": {
27+
"bing_resource_id": os.getenv("BING_RESOURCE_ID")
28+
}
29+
}],
30+
model="gpt-4-1106-preview",
31+
)

ai-services/multilingual-agent/cli.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import logging
2+
3+
from openai import AzureOpenAI
4+
from azure.cognitiveservices.speech import SpeechRecognizer, SpeechSynthesizer, ResultReason, CancellationReason, PropertyId
5+
from azure.ai.translation.text import TextTranslationClient
6+
from azure.ai.translation.text.models import InputTextItem
7+
8+
from event_handler import EventHandler
9+
10+
11+
logger = logging.getLogger(__name__)
12+
13+
base_language = 'en'
14+
15+
16+
class Cli:
17+
def __init__(self,
18+
openai_client: AzureOpenAI,
19+
assistant_id: str,
20+
speech_recognizer: SpeechRecognizer,
21+
speech_synthesizer: SpeechSynthesizer,
22+
text_translator: TextTranslationClient):
23+
self.openai_client = openai_client
24+
self.assistant_id = assistant_id
25+
self.speech_recognizer = speech_recognizer
26+
self.speech_synthesizer = speech_synthesizer
27+
self.text_translator = text_translator
28+
self.language = ''
29+
self.thread_id = ''
30+
31+
def run(self):
32+
thread = self.openai_client.beta.threads.create()
33+
self.thread_id = thread.id
34+
35+
print("Say something...")
36+
37+
while True:
38+
try:
39+
user_input = self.recognize()
40+
41+
base_language_text = user_input
42+
if not self.language.startswith(base_language):
43+
base_language_text = self.translate(text=user_input, language=base_language)
44+
45+
output_text = self.assistant(content=base_language_text)
46+
47+
if not self.language.startswith(base_language):
48+
output_text = self.translate(text=output_text, language=self.language)
49+
50+
self.synthesize(output_text)
51+
except Exception as e:
52+
logger.error("failure: {}".format(e))
53+
continue
54+
55+
def recognize(self) -> str:
56+
response = self.speech_recognizer.recognize_once()
57+
58+
reason = response.reason
59+
if reason != ResultReason.RecognizedSpeech:
60+
error = 'Failed to recognize speech.'
61+
if reason == ResultReason.NoMatch:
62+
error = "No speech could be recognized: {}".format(response.no_match_details)
63+
elif reason == ResultReason.Canceled:
64+
cancellation_details = response.cancellation_details
65+
error = "Speech Recognition canceled: {}".format(cancellation_details.reason)
66+
if cancellation_details.reason == CancellationReason.Error:
67+
error += "Error details: {}".format(cancellation_details.error_details)
68+
raise Exception("Speech recognition failed with error: {}".format(error))
69+
70+
self.language = response.properties[PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult]
71+
logger.info("Recognized (language={}): {}".format(self.language, response.text))
72+
73+
return response.text
74+
75+
def synthesize(self, text: str) -> None:
76+
response = self.speech_synthesizer.speak_text(text)
77+
78+
if response.reason != ResultReason.SynthesizingAudioCompleted:
79+
cancellation_details = response.cancellation_details
80+
error = "Speech synthesis canceled: {}".format(cancellation_details.reason)
81+
if cancellation_details.reason == CancellationReason.Error:
82+
if cancellation_details.error_details:
83+
error += "Error details: {}".format(cancellation_details.error_details)
84+
raise Exception("Speech synthesis failed with error: {}".format(error))
85+
86+
logger.info("Speech synthesized for text [{}]".format(text))
87+
88+
def translate(self, text: str, language: str) -> str:
89+
content = InputTextItem(text=text)
90+
translation = self.text_translator.translate(content=[content], to=[language])
91+
if len(translation) == 0 or len(translation[0].translations) == 0:
92+
raise Exception("Failed to translate to {} text: {}".format(language, text))
93+
94+
logger.info("Translated [{}] to [{}]".format(text, translation[0].translations[0].text))
95+
return translation[0].translations[0].text
96+
97+
def assistant(self, content: str) -> str:
98+
self.openai_client.beta.threads.messages.create(
99+
thread_id=self.thread_id,
100+
role="user",
101+
content=content
102+
)
103+
104+
event_handler = EventHandler()
105+
with self.openai_client.beta.threads.runs.stream(assistant_id=self.assistant_id, thread_id=self.thread_id,
106+
event_handler=event_handler) as stream:
107+
stream.until_done()
108+
109+
return event_handler.get_result()
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import logging
2+
3+
from openai import AssistantEventHandler
4+
from openai.types.beta.threads.runs import ToolCall
5+
from openai.types.beta.threads import Text
6+
7+
8+
logger = logging.getLogger(__name__)
9+
10+
11+
class EventHandler(AssistantEventHandler):
12+
def __init__(self):
13+
super().__init__()
14+
self.result = ''
15+
16+
def on_exception(self, exception: Exception) -> None:
17+
logger.error("please try again. an exception occurred: {}".format(exception))
18+
19+
def on_tool_call_created(self, tool_call: ToolCall):
20+
logger.info("started calling tool {}".format(tool_call['type']))
21+
22+
def on_tool_call_done(self, tool_call: ToolCall) -> None:
23+
logger.info("completed calling tool {}".format(tool_call['type']))
24+
25+
def on_text_done(self, text: Text) -> None:
26+
self.result = text.value
27+
28+
is_first_url_citation = True
29+
for annotation in text.annotations:
30+
if annotation.type == "url_citation":
31+
if is_first_url_citation:
32+
self.result += "\nUrl citations: \n"
33+
title = annotation.model_extra['url_citation']['title']
34+
url = annotation.model_extra['url_citation']['url']
35+
self.result += "* {} - [{}]({})\n".format(annotation.text, title, url)
36+
37+
def on_timeout(self) -> None:
38+
logger.warning("timeout occurred. please try again")
39+
40+
def on_end(self) -> None:
41+
logger.info("completed conversation with assistant")
42+
43+
def get_result(self) -> str:
44+
return self.result
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
azure.cognitiveservices.speech==1.38.0
2+
azure-ai-translation-text==1.0.0b1
3+
openai==1.30.1
4+
python-dotenv

0 commit comments

Comments
 (0)