add demo for multilingual agent

franklinlindemberg · franklinlindemberg · commit 15b1127b3151 · 2024-07-23T14:31:36.000-04:00
diff --git a/ai-services/multilingual-agent/.gitignore b/ai-services/multilingual-agent/.gitignore
@@ -0,0 +1,5 @@
+*.log
+
+.idea/
+
+.env
diff --git a/ai-services/multilingual-agent/app.py b/ai-services/multilingual-agent/app.py
@@ -0,0 +1,61 @@
+import os
+import logging
+
+from dotenv import load_dotenv
+from openai import AzureOpenAI
+from azure.cognitiveservices.speech import SpeechConfig, SpeechRecognizer, AutoDetectSourceLanguageConfig, SpeechSynthesizer
+from azure.cognitiveservices.speech.audio import AudioOutputConfig
+from azure.ai.translation.text import TextTranslationClient, TranslatorCredential
+
+from cli import Cli
+from assistant import create_assistant
+
+load_dotenv()
+
+logger = logging.getLogger(__name__)
+
+
+if __name__ == "__main__":
+    try:
+        logging.basicConfig(filename='app.log', level=logging.INFO)
+
+        speech_key = os.getenv("SPEECH_API_KEY")
+        speech_region = os.getenv("SPEECH_REGION")
+        translation_key = os.getenv("TRANSLATION_KEY")
+        translation_region = os.getenv("TRANSLATION_REGION")
+
+        openai_client = AzureOpenAI(
+            api_key=os.getenv("OPENAI_KEY"),
+            api_version="2024-07-01-preview",
+            azure_endpoint=os.getenv("OPENAI_ENDPOINT"),
+            default_headers={"X-Ms-Enable-Preview": "true"}
+        )
+
+        assistant_id = os.getenv("ASSISTANT_ID")
+
+        if assistant_id is None or assistant_id == "":
+            assistant_id = create_assistant(openai_client).id
+            logger.debug("created new assistant with id {}".format(assistant_id))
+
+        speech_config = SpeechConfig(subscription=speech_key, region=speech_region)
+
+        auto_detect_config = AutoDetectSourceLanguageConfig(languages=["en-US", "fr-FR", "pt-BR"])
+        speech_recognizer = SpeechRecognizer(speech_config=speech_config, auto_detect_source_language_config=auto_detect_config)
+
+        audio_config = AudioOutputConfig(use_default_speaker=True)
+        speech_synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
+
+        translator_credential = TranslatorCredential(key=translation_key, region=translation_region)
+        text_translator = TextTranslationClient(credential=translator_credential)
+
+        runner = Cli(
+            openai_client=openai_client,
+            assistant_id=assistant_id,
+            speech_recognizer=speech_recognizer,
+            speech_synthesizer=speech_synthesizer,
+            text_translator=text_translator
+        )
+
+        runner.run()
+    except Exception as error:
+        raise error
diff --git a/ai-services/multilingual-agent/assistant.py b/ai-services/multilingual-agent/assistant.py
@@ -0,0 +1,31 @@
+import os
+
+from openai import AzureOpenAI
+
+
+def create_assistant(client: AzureOpenAI):
+    return client.beta.assistants.create(
+        name="Travel planner copilot",
+        instructions='''
+You are travel planner that helps people plan trips across the world.
+The user might give you constraints like:
+- destination
+- weather preference
+- attractions preference
+- date preference
+When asked for up-to-date information, you should use the browser tool.
+You should try to give a plan in the following format:
+- city
+- start and end date
+- cost breakdown
+- weather forecast
+- attractions and any useful information about tickets.
+        ''',
+        tools=[{
+            "type": "browser",
+            "browser": {
+                "bing_resource_id": os.getenv("BING_RESOURCE_ID")
+            }
+        }],
+        model="gpt-4-1106-preview",
+    )
diff --git a/ai-services/multilingual-agent/cli.py b/ai-services/multilingual-agent/cli.py
@@ -0,0 +1,109 @@
+import logging
+
+from openai import AzureOpenAI
+from azure.cognitiveservices.speech import SpeechRecognizer, SpeechSynthesizer, ResultReason, CancellationReason, PropertyId
+from azure.ai.translation.text import TextTranslationClient
+from azure.ai.translation.text.models import InputTextItem
+
+from event_handler import EventHandler
+
+
+logger = logging.getLogger(__name__)
+
+base_language = 'en'
+
+
+class Cli:
+    def __init__(self,
+                 openai_client: AzureOpenAI,
+                 assistant_id: str,
+                 speech_recognizer: SpeechRecognizer,
+                 speech_synthesizer: SpeechSynthesizer,
+                 text_translator: TextTranslationClient):
+        self.openai_client = openai_client
+        self.assistant_id = assistant_id
+        self.speech_recognizer = speech_recognizer
+        self.speech_synthesizer = speech_synthesizer
+        self.text_translator = text_translator
+        self.language = ''
+        self.thread_id = ''
+
+    def run(self):
+        thread = self.openai_client.beta.threads.create()
+        self.thread_id = thread.id
+
+        print("Say something...")
+
+        while True:
+            try:
+                user_input = self.recognize()
+
+                base_language_text = user_input
+                if not self.language.startswith(base_language):
+                    base_language_text = self.translate(text=user_input, language=base_language)
+
+                output_text = self.assistant(content=base_language_text)
+
+                if not self.language.startswith(base_language):
+                    output_text = self.translate(text=output_text, language=self.language)
+
+                self.synthesize(output_text)
+            except Exception as e:
+                logger.error("failure: {}".format(e))
+                continue
+
+    def recognize(self) -> str:
+        response = self.speech_recognizer.recognize_once()
+
+        reason = response.reason
+        if reason != ResultReason.RecognizedSpeech:
+            error = 'Failed to recognize speech.'
+            if reason == ResultReason.NoMatch:
+                error = "No speech could be recognized: {}".format(response.no_match_details)
+            elif reason == ResultReason.Canceled:
+                cancellation_details = response.cancellation_details
+                error = "Speech Recognition canceled: {}".format(cancellation_details.reason)
+                if cancellation_details.reason == CancellationReason.Error:
+                    error += "Error details: {}".format(cancellation_details.error_details)
+            raise Exception("Speech recognition failed with error: {}".format(error))
+
+        self.language = response.properties[PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult]
+        logger.info("Recognized (language={}): {}".format(self.language, response.text))
+
+        return response.text
+
+    def synthesize(self, text: str) -> None:
+        response = self.speech_synthesizer.speak_text(text)
+
+        if response.reason != ResultReason.SynthesizingAudioCompleted:
+            cancellation_details = response.cancellation_details
+            error = "Speech synthesis canceled: {}".format(cancellation_details.reason)
+            if cancellation_details.reason == CancellationReason.Error:
+                if cancellation_details.error_details:
+                    error += "Error details: {}".format(cancellation_details.error_details)
+            raise Exception("Speech synthesis failed with error: {}".format(error))
+
+        logger.info("Speech synthesized for text [{}]".format(text))
+
+    def translate(self, text: str, language: str) -> str:
+        content = InputTextItem(text=text)
+        translation = self.text_translator.translate(content=[content], to=[language])
+        if len(translation) == 0 or len(translation[0].translations) == 0:
+            raise Exception("Failed to translate to {} text: {}".format(language, text))
+
+        logger.info("Translated [{}] to [{}]".format(text, translation[0].translations[0].text))
+        return translation[0].translations[0].text
+
+    def assistant(self, content: str) -> str:
+        self.openai_client.beta.threads.messages.create(
+            thread_id=self.thread_id,
+            role="user",
+            content=content
+        )
+
+        event_handler = EventHandler()
+        with self.openai_client.beta.threads.runs.stream(assistant_id=self.assistant_id, thread_id=self.thread_id,
+                                                         event_handler=event_handler) as stream:
+            stream.until_done()
+
+        return event_handler.get_result()
diff --git a/ai-services/multilingual-agent/event_handler.py b/ai-services/multilingual-agent/event_handler.py
@@ -0,0 +1,44 @@
+import logging
+
+from openai import AssistantEventHandler
+from openai.types.beta.threads.runs import ToolCall
+from openai.types.beta.threads import Text
+
+
+logger = logging.getLogger(__name__)
+
+
+class EventHandler(AssistantEventHandler):
+    def __init__(self):
+        super().__init__()
+        self.result = ''
+
+    def on_exception(self, exception: Exception) -> None:
+        logger.error("please try again. an exception occurred: {}".format(exception))
+
+    def on_tool_call_created(self, tool_call: ToolCall):
+        logger.info("started calling tool {}".format(tool_call['type']))
+
+    def on_tool_call_done(self, tool_call: ToolCall) -> None:
+        logger.info("completed calling tool {}".format(tool_call['type']))
+
+    def on_text_done(self, text: Text) -> None:
+        self.result = text.value
+
+        is_first_url_citation = True
+        for annotation in text.annotations:
+            if annotation.type == "url_citation":
+                if is_first_url_citation:
+                    self.result += "\nUrl citations: \n"
+                title = annotation.model_extra['url_citation']['title']
+                url = annotation.model_extra['url_citation']['url']
+                self.result += "* {} - [{}]({})\n".format(annotation.text, title, url)
+
+    def on_timeout(self) -> None:
+        logger.warning("timeout occurred. please try again")
+
+    def on_end(self) -> None:
+        logger.info("completed conversation with assistant")
+
+    def get_result(self) -> str:
+        return self.result
diff --git a/ai-services/multilingual-agent/requirements.txt b/ai-services/multilingual-agent/requirements.txt
@@ -0,0 +1,4 @@
+azure.cognitiveservices.speech==1.38.0
+azure-ai-translation-text==1.0.0b1
+openai==1.30.1
+python-dotenv

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +*.log
++
 +.idea/
++
 +.env