diff --git a/.gradio/flagged/dataset1.csv b/.gradio/flagged/dataset1.csv new file mode 100644 index 0000000..627347a --- /dev/null +++ b/.gradio/flagged/dataset1.csv @@ -0,0 +1,2 @@ +keyword,output,timestamp +Apple SVP Software Engineering,"[{""source"": {""title"": ""Craig Federighi"", ""section"": ""short description""}, ""highlight"": {""text"": [""Craig Federighi (born May 27, 1969) is an American engineer and business executive who is the senior vice president (SVP) of software engineering at Apple Inc. He oversees the development of Apple's operating systems. His teams are responsible for delivering the software of Apple's products, including the user interface, applications, and frameworks.""]}}, {""source"": {""title"": ""Sabih Khan"", ""section"": ""Infobox person\n""}, ""highlight"": {""text"": [""Sabih Khan (born 1966) is an Indian-American business executive, who is the senior vice president (SVP) of operations at Apple Inc. He oversees Apple's global supply chain, and is responsible for Apple's supplier responsibility programs.""]}}, {""source"": {""title"": ""University of Naples Federico II"", ""section"": ""Apple Developer Academy""}, ""highlight"": {""text"": ["" Apple Developer Academy \nThe Apple Developer Academy is a university academy established on October 6, 2016, in collaboration with the American company Apple Inc.. It is situated in the San Giovanni Complex, located in the San Giovanni a Teduccio district.\n\nThe training primarily focuses on software development and app design tailored for the Apple ecosystem. The training areas are categorized into:\n\n Programming (Swift, server-side scripting, SQL, NoSQL)\n Graphical interface design (HCI)\n Business\n\nThe lessons are centered around Challenge-based learning (CBL), a multidisciplinary approach that motivates students to leverage everyday technologies to solve real-world problems. As of December 2023, the Academy has welcomed over 1700 students, resulting in the creation and deployment of more than 800 applications.""]}}, {""source"": {""title"": ""RSVP-TE"", ""section"": ""short description""}, ""highlight"": {""text"": [""Resource Reservation Protocol - Traffic Engineering (RSVP-TE) is an extension of the Resource Reservation Protocol (RSVP) for traffic engineering. It supports the reservation of resources across an IP network. Applications running on IP end systems can use RSVP to indicate to other nodes the nature (bandwidth, jitter, maximum burst, and so forth) of the packet streams they want to receive. RSVP runs on both IPv4 and IPv6.\n\nRSVP-TE generally allows the establishment of Multiprotocol Label Switching (MPLS) label-switched paths (LSPs), taking into consideration network constraint parameters such as available bandwidth and explicit hops. Updated by , , , , , , , , and .""]}}, {""source"": {""title"": ""VSIP"", ""section"": ""notability""}, ""highlight"": {""text"": [""The Visual Studio Industry Partner (VSIP) Program (formerly Visual Studio Integration Program) allows third-party developers and software vendors to develop tools, components and languages for use in the Microsoft Visual Studio .NET IDE. The program offers partnership benefits including co-marketing opportunities, and Visual Studio licensing options as well as extended access to Microsoft technical and premier support.\n\nThe VSIP SDK (software development kit) facilitates development of integrated tools and includes development software and documentation that can be used within the Visual Studio .NET IDE directly. Extensions to the IDE, also known as \""Add-ins\"", can be as simple as adding a custom server control to the toolbox, or as complex as adding support for a new CLR-compliant language.\n\nVisual Studio Express is limited and does not support third-party extensions.""]}}]",2025-07-15 15:27:15.631174 diff --git a/README.md b/README.md index 6056a42..393ef88 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Agent Bootcamp - +test commit ---------------------------------------------------------------------------------------- This is a collection of reference implementations for Vector Institute's **Agent Bootcamp**, taking place between June and September 2025. The repository demonstrates modern agentic workflows for retrieval-augmented generation (RAG), evaluation, and orchestration using the latest Python tools and frameworks. diff --git a/src/2_frameworks/2_multi_agent/planner_worker_gradio_ioana.py b/src/2_frameworks/2_multi_agent/planner_worker_gradio_ioana.py new file mode 100644 index 0000000..f2551bb --- /dev/null +++ b/src/2_frameworks/2_multi_agent/planner_worker_gradio_ioana.py @@ -0,0 +1,234 @@ +"""Example code for planner-worker agent collaboration. + +With reference to: + +github.com/ComplexData-MILA/misinfo-datasets +/blob/3304e6e/misinfo_data_eval/tasks/web_search.py +""" + +import asyncio +import contextlib +import logging +import signal +import sys +import os +import json +import openai +import numpy as np + +import agents +from agents import function_tool +import gradio as gr +from dotenv import load_dotenv +from gradio.components.chatbot import ChatMessage +from openai import AsyncOpenAI +from sklearn.metrics.pairwise import cosine_similarity +from pydantic import BaseModel + +from src.prompts import REACT_INSTRUCTIONS, KB_SEARCH_INSTRUCTIONS, QA_SEARCH_INSTRUCTIONS, EVALUATOR_INSTRUCTIONS +from src.utils import ( + AsyncWeaviateKnowledgeBase, + Configs, + get_weaviate_async_client, + oai_agent_stream_to_gradio_messages, + setup_langfuse_tracer, +) +from src.utils.langfuse.shared_client import langfuse_client + + +load_dotenv(verbose=True) + + +logging.basicConfig(level=logging.INFO) + + +configs = Configs.from_env_var() +async_weaviate_client = get_weaviate_async_client( + http_host=configs.weaviate_http_host, + http_port=configs.weaviate_http_port, + http_secure=configs.weaviate_http_secure, + grpc_host=configs.weaviate_grpc_host, + grpc_port=configs.weaviate_grpc_port, + grpc_secure=configs.weaviate_grpc_secure, + api_key=configs.weaviate_api_key, +) +async_openai_client = AsyncOpenAI() +async_knowledgebase = AsyncWeaviateKnowledgeBase( + async_weaviate_client, + collection_name="enwiki_20250520", +) + + +async def _cleanup_clients() -> None: + """Close async clients.""" + await async_weaviate_client.close() + await async_openai_client.close() + + +def _handle_sigint(signum: int, frame: object) -> None: + """Handle SIGINT signal to gracefully shutdown.""" + with contextlib.suppress(Exception): + asyncio.get_event_loop().run_until_complete(_cleanup_clients()) + sys.exit(0) + + +@function_tool +async def qa_search_tool(user_query:str) -> list: + """Search the QA dataset for a question that is semantically similar to the user query.""" + qa_dataset = { + 1 : { + "question": "What is the capital of France?", + "answer": "The capital of France is Paris.", + "context": "Paris is the capital city of France, known for its art, fashion, and culture." + }, + 2: { + "question": "What software using the Blinnk layout engine is Fluff Busting Purity available for?", + "answer": "Opera", + "context": """{"title": ["Vivaldi (web browser)","Internet Explorer shell","GtkHTML","Firefox","Web browser engine","Opera (web browser)","Presto (layout engine)","Fluff Busting Purity","Tasman (layout engine)","KaXUL"], "sentences": [["Vivaldi is a freeware, cross-platform web browser developed by Vivaldi Technologies, a company founded by Opera Software co-founder and former CEO Jon Stephenson von Tetzchner and Tatsuki Tomita."," The browser was officially launched on April 12, 2016."," The browser is aimed at staunch technologists, heavy Internet users, and previous Opera web browser users disgruntled by Opera's transition from the Presto layout engine to the Blink layout engine, which removed many popular features."," Vivaldi aims to revive the old, popular features of Opera 12."," The browser has gained popularity since the launch of its first technical preview."," The browser has 1 million users as of January 2017."],["An Internet Explorer shell is any computer software that uses the Trident rendering engine of the Internet Explorer web browser."," Although the term \"Trident shell\" is probably more accurate for describing these applications (including Internet Explorer itself), the term \"Internet Explorer shell\", or \"IE shell\", is in common parlance."," This means that these software products are not actually full-fledged web browsers in their own right but are simply an alternate interface for Internet Explorer; they share the same limitations of the Trident engine, typically contain the same bugs as IE browsers based on the same version of Trident, and any security vulnerabilities found in IE will generally apply to these browsers as well."," Strictly speaking, programs that use Tasman (layout engine), used in Internet Explorer 5 for Apple Mac, are also IE shells, but, because Internet Explorer for Mac was discontinued in 2003, and Tasman was further developed independent of IE, it tends to be thought of as a separate layout engine."],["GtkHTML is a layout engine written in C using the GTK+ widget toolkit."," It is primarily used by Novell Evolution and other GTK+ applications."," The Balsa email client used GtkHTML as its layout engine for displaying emails until recently."," In the long run, GtkHTML is planned to be phased out in favor of WebKit in GNOME."],["Mozilla Firefox (or simply Firefox) is a free and open-source web browser developed by the Mozilla Foundation and its subsidiary the Mozilla Corporation."," Firefox is available for Windows, macOS and Linux operating systems, with its Firefox for Android available for Android (formerly Firefox for mobile, it also ran on the discontinued Firefox OS), and uses the Gecko layout engine to render web pages, which implements current and anticipated web standards."," An additional version, Firefox for iOS, was released in late 2015, but this version does not use Gecko due to Apple's restrictions limiting third-party web browsers to the WebKit-based layout engine built into iOS."],["A web browser engine (sometimes called web layout engine or web rendering engine) is a computer program that renders marked up content (such as HTML, XML, image files, etc.) and formatting information (such as CSS, XSL, etc.)."," A layout engine is a typical component of web browsers, email clients, e-book readers, on-line help systems, or other applications that require the displaying (and editing) of web pages."],["Opera is a web browser for Windows, macOS, and Linux operating systems developed by Opera Software."," It uses the Blink layout engine."," An earlier version using the Presto layout engine is still available, and runs on FreeBSD systems."," According to Opera, the browser had more than 350 million users worldwide in the 4th quarter of 2014."," Total Opera mobile users reached 291 million in June 2015."," According to SlashGeek, Opera has originated features later adopted by other web browsers, including Speed Dial, pop-up blocking, browser sessions, private browsing, and tabbed browsing."],["Presto was the layout engine of the Opera web browser for a decade."," It was released on 28 January 2003 in Opera 7, and later used to power the Opera Mini and Opera Mobile browsers."," As of Opera 15, the desktop browser uses a Chromium backend, replacing Presto with the Blink layout engine."],["Fluff Busting Purity, or FB Purity for short (previously known as Facebook Purity) is a web browser extension designed to customise the Facebook website's user interface and add extra functionality."," Developed by Steve Fernandez, a UK-based programmer, it was first released in 2009 as a Greasemonkey script, as donationware."," It is available for Firefox , Google Chrome , Microsoft Edge , Safari, Opera and the Maxthon Cloud Browser ."],["Tasman is a discontinued layout engine developed by Microsoft for inclusion in the Macintosh version of Internet Explorer 5."," Tasman was an attempt to improve support for web standards, as defined by the World Wide Web Consortium."," At the time of its release, Tasman was seen as the layout engine with the best support for web standards such as HTML and CSS."," Internet Explorer for Mac is no longer supported, but newer versions of Tasman are incorporated in some other Microsoft products."],["KaXUL (\"KDE Advanced XUL\") is a reimplemetation of Mozilla's own XUL framework for KDE."," Written by George Staikos, it allows for XUL applications - both client- and server-side - to be read by native Qt widgets."," uXUL (\"UI XUL\"), also made by Staikos, takes a XUL application, uses KaXUL to convert it, and then run it as a native KDE plugin."," Used together, one can access XUL applications using Konqueror or any other Web browser using the KHTML layout engine."," Previously, XUL applications were only used by browsers using the Gecko layout engine, which is used, most famously, by Mozilla Firefox for the generation of its extensions."]]}""" + }, + } + + # _embed_client = openai.OpenAI( + # api_key=os.getenv("EMBEDDING_API_KEY"), + # base_url=os.getenv("EMBEDDING_BASE_URL"), + # max_retries=5) + + # #embed user query + # user_query_embedding = _embed_client.embeddings.create(input=user_query, model=os.getenv('EMBEDDING_MODEL_NAME')) + # user_query_embedding = np.array(user_query_embedding.data[0].embedding) + # user_query_embedding = user_query_embedding.reshape(1, -1) + + # cosi_list = [] + # qa_embedding_list = _embed_client.embeddings.create(input=qa_dataset, model=os.getenv('EMBEDDING_MODEL_NAME')) + # for i, qa_embedding in enumerate(qa_embedding_list.data): + # qa_embedding = np.array(qa_embedding.embedding) + # qa_embedding = qa_embedding.reshape(1,-1) + # similarity_score = cosine_similarity(user_query_embedding, qa_embedding)[0][0] + # cosi_list.append({"faq":faq_list[i], "sim":similarity_score}) + + # sorted_qa = sorted(cosi_list, key=lambda d: d["sim"], reverse=True) + # sorted_faqs_list = [i["faq"] for i in sorted_qa] + # + # return "\n".join(f" {i}\n"for i in sorted_faqs_list) + + return json.dumps(qa_dataset) + +qa_search_agent = agents.Agent( + name="QASearchAgent", + instructions=QA_SEARCH_INSTRUCTIONS, + tools=[qa_search_tool], + # a faster, smaller model for quick searches + model=agents.OpenAIChatCompletionsModel( + model="gemini-2.5-flash", openai_client=async_openai_client + ) +) + +kb_search_agent = agents.Agent( + name="KBSearchAgent", + instructions=KB_SEARCH_INSTRUCTIONS, + tools=[ + agents.function_tool(async_knowledgebase.search_knowledgebase), + ], + + model=agents.OpenAIChatCompletionsModel( + model="gemini-2.5-flash", openai_client=async_openai_client + ) +) + +evaluator_agent = agents.Agent( + name="EvaluatorAgent", + instructions=EVALUATOR_INSTRUCTIONS, + model=agents.OpenAIChatCompletionsModel( + model="gemini-2.5-flash", openai_client=async_openai_client + ) +) + +main_agent = agents.Agent( + name="MainAgent", + instructions=REACT_INSTRUCTIONS, + + tools=[ + qa_search_agent.as_tool( + tool_name="qa_search_Agent", + tool_description = "Perform a search of the QA database and retrieve question/answer/context tuples related to input query." + ), + kb_search_agent.as_tool( + tool_name="kb_search_agent", + tool_description="Perform a search of a knowledge base and synthesize the search results to answer input question.", + ), + evaluator_agent.as_tool( + tool_name="evaluator_agent", + tool_description="Evaluate the output of the knowledge base search agent.", + ) + ], + + model=agents.OpenAIChatCompletionsModel( + model="gemini-2.5-pro", openai_client=async_openai_client + ), +) + + +async def _main(question: str, gr_messages: list[ChatMessage]): + setup_langfuse_tracer() + + # Use the main agent as the entry point- not the worker agent. + with langfuse_client.start_as_current_span(name="Calen-Multi-Agent-V1.0") as span: + score_is_answer_correct = [] + score_explanation = [] + + span.update(input=question) + result_stream = agents.Runner.run_streamed(main_agent, input=question) + + async for _item in result_stream.stream_events(): + gr_messages += oai_agent_stream_to_gradio_messages(_item) + + if len(gr_messages) > 0: + yield gr_messages + + try: + # Assume `event` is your RunItemStreamEvent + if _item.name == "tool_output" and _item.item.type == "tool_call_output_item": + tool_output = json.loads(_item.item.output) + + explanation = tool_output.get("explanation") + is_correct = tool_output.get("is_answer_correct") + + score_is_answer_correct.append(is_correct) + score_explanation.append(explanation) + + print("✅ is_answer_correct:", is_correct) + print("🧠 explanation:", explanation) + + except: + continue + + span.update(output=result_stream.final_output) + + if len(score_is_answer_correct) > 0: + langfuse_client.create_score( + name="is_answer_correct", + value=score_is_answer_correct[0], + comment=score_explanation[0], + trace_id=langfuse_client.get_current_trace_id() + ) + +demo = gr.ChatInterface( + _main, + title="Hitachi Multi-Agent Knowledge Retrieval System", + type="messages", + examples=[ + "Where should I go in France? ", + "Where is the government of France located? ", + "Check expected answers for 'web browser' topic?" + ], +) + +if __name__ == "__main__": + async_openai_client = AsyncOpenAI() + + signal.signal(signal.SIGINT, _handle_sigint) + + try: + demo.launch(server_name="0.0.0.0") + finally: + asyncio.run(_cleanup_clients()) diff --git a/src/4_hitachi/1_multi_agent/README.md b/src/4_hitachi/1_multi_agent/README.md new file mode 100644 index 0000000..d3ff2de --- /dev/null +++ b/src/4_hitachi/1_multi_agent/README.md @@ -0,0 +1,12 @@ +# 4.1 Multi-agent Orchestrator-QA Search-Knowledge Base Search via OpenAI Agents SDK + +This folder introduces a multi-agent architecture, featuring an orchestrator agent and two search agents, one with access to QA dataset and the other with access to Knowledge Base dataset. + +The orchestrator agents take a user query and breaks it down into search queries for the QA dataset. It then takes the returned QA pairs and breaks down searches for the Knowledge Base. The Knowledge Base search agent calls the search tool and synthesizes the results into an answer for each question. The orchestrator agent then receives the resulting answers and evaluates them based on the ground truth answers retrieved from the QA search. + +## Run + +```bash +uv run --env-file .env \ +-m src.4_hitachi.1_multi_agent.orchestrator_gradio +``` diff --git a/src/4_hitachi/1_multi_agent/orchestrator_gradio.py b/src/4_hitachi/1_multi_agent/orchestrator_gradio.py new file mode 100644 index 0000000..7213588 --- /dev/null +++ b/src/4_hitachi/1_multi_agent/orchestrator_gradio.py @@ -0,0 +1,141 @@ +"""Example code for orchestrator-worker agent collaboration. + +With reference to: + +github.com/ComplexData-MILA/misinfo-datasets +/blob/3304e6e/misinfo_data_eval/tasks/web_search.py +""" + +import asyncio +import contextlib +import logging +import signal +import sys + +import agents +import gradio as gr +from dotenv import load_dotenv +from gradio.components.chatbot import ChatMessage +from openai import AsyncOpenAI + +from src.prompts import REACT_INSTRUCTIONS, KB_SEARCH_INSTRUCTIONS +from src.utils import ( + AsyncWeaviateKnowledgeBase, + Configs, + get_weaviate_async_client, + oai_agent_stream_to_gradio_messages, + setup_langfuse_tracer, +) +from src.utils.langfuse.shared_client import langfuse_client + + +load_dotenv(verbose=True) + + +logging.basicConfig(level=logging.INFO) + +DATASET_NAME = "hitachi-multi-agent-orchestrator" +AGENT_LLM_NAMES = { + "worker": "gemini-2.5-flash", # less expensive, + "planner": "gemini-2.5-pro", # more expensive, better at reasoning and planning +} + +configs = Configs.from_env_var() +async_weaviate_client = get_weaviate_async_client( + http_host=configs.weaviate_http_host, + http_port=configs.weaviate_http_port, + http_secure=configs.weaviate_http_secure, + grpc_host=configs.weaviate_grpc_host, + grpc_port=configs.weaviate_grpc_port, + grpc_secure=configs.weaviate_grpc_secure, + api_key=configs.weaviate_api_key, +) +async_openai_client = AsyncOpenAI() +async_knowledgebase = AsyncWeaviateKnowledgeBase( + async_weaviate_client, + collection_name="enwiki_20250520", +) + + +async def _cleanup_clients() -> None: + """Close async clients.""" + await async_weaviate_client.close() + await async_openai_client.close() + + +def _handle_sigint(signum: int, frame: object) -> None: + """Handle SIGINT signal to gracefully shutdown.""" + with contextlib.suppress(Exception): + asyncio.get_event_loop().run_until_complete(_cleanup_clients()) + sys.exit(0) + + +# Knowledgebase Search Agent: a simple agent that searches the knowledge base +knowledgebase_agent = agents.Agent( + name="KnowledgeBaseSearchAgent", + instructions=KB_SEARCH_INSTRUCTIONS, + tools=[ + agents.function_tool(async_knowledgebase.search_knowledgebase), + ], + # a faster, smaller model for quick searches + model=agents.OpenAIChatCompletionsModel( + model="gemini-2.5-flash", openai_client=async_openai_client + ), +) + +# Main Agent: more expensive and slower, but better at complex planning +orchestrator_agent = agents.Agent( + name="OrchestratorAgent", + instructions=REACT_INSTRUCTIONS, + + # Allow the planner agent to invoke the worker agent. + # The long context provided to the worker agent is hidden from the main agent. + tools=[ + knowledgebase_agent.as_tool( + tool_name="KnowledgeBaseSearchAgent", + tool_description="Perform a search in the knowledge base and return a concise answer.", + ) + ], + # a larger, more capable model for planning and reasoning over summaries + model=agents.OpenAIChatCompletionsModel( + model="gemini-2.5-pro", openai_client=async_openai_client + ), +) + + +async def _main(question: str, gr_messages: list[ChatMessage]): + setup_langfuse_tracer() + + # Use the main agent as the entry point- not the worker agent. + with langfuse_client.start_as_current_span(name="Agents-SDK-Trace") as span: + span.update(input=question) + + result_stream = agents.Runner.run_streamed(orchestrator_agent, input=question) + async for _item in result_stream.stream_events(): + gr_messages += oai_agent_stream_to_gradio_messages(_item) + if len(gr_messages) > 0: + yield gr_messages + + span.update(output=result_stream.final_output) + + +demo = gr.ChatInterface( + _main, + title="Hitachi Multi-Agent Knowledge Retrieval System", + type="messages", + examples=[ + "What city are George Washington University Hospital" + " and MedStar Washington Hospital Center located in?" + ], +) + + +if __name__ == "__main__": + async_openai_client = AsyncOpenAI() + + signal.signal(signal.SIGINT, _handle_sigint) + + try: + demo.launch(server_name="0.0.0.0") + finally: + asyncio.run(_cleanup_clients()) diff --git a/src/4_hitachi/__init__.py b/src/4_hitachi/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/prompts.py b/src/prompts.py index 7dc95d4..419c486 100644 --- a/src/prompts.py +++ b/src/prompts.py @@ -1,12 +1,141 @@ """Centralized location for all system prompts.""" -REACT_INSTRUCTIONS = """\ -Answer the question using the search tool. \ -EACH TIME before invoking the function, you must explain your reasons for doing so. \ -Be sure to mention the sources in your response. \ -If the search tool did not return intended results, try again. \ -For best performance, divide complex queries into simpler sub-queries. \ -Do not make up information. \ -For facts that might change over time, you must use the search tool to retrieve the \ -most up-to-date information. +REACT_INSTRUCTIONS = """ +You are a high-level orchestration agent that executes a multi-step process to answer and evaluate a user's query. You must reason about each step, use the available tools in a specific sequence, and use the output of one step as the input for the next. +**## Your Tools** +You have access to the following agents as tools. Retry the knowledge_base_search_agent tool call up to 3 times if it cannot find the answer. +1. `qa_search_agent(user_query: str, topic: str)` + * **Description:** Searches a QA dataset for a question that is semantically similar to the `user_query`. + * **Returns:** A JSON string with `matched_question`, `ground_truth_answer`, and `ground_truth_context`. +2. `knowledge_base_search_agent(question: str, topic: str)` + * **Description:** Takes a question and searche topics that match the conditions specifid in the question, to create a proposed. + * **Returns:** A JSON string with the `proposed_answer` and the `supporting_facts` used to create it. +3. `evaluator_agent(question: str, ground_truth_answer: str, proposed_answer: str, `supporting_facts: str)` + * **Description:** Compares `proposed_answer` to the `ground_truth` answer to determine if it is correct. + * **Returns:** A JSON string with `proposed_answer`,a boolean `is_answer_correct`, and 'supporting_facts' +**## Execution Plan** +You must follow this exact four-step sequence: +1. **Step 1: Parse user's query ** Analyze the user's query. Find the main topic of the query. This main topic is referred to as [Topic]. +2. **Step 2: Use the `qa_search_agent` with the initial user query to find the [Question] and [Expected Answer] pairs, for which the [Topic] matches with the [Context] defined for the [Question]. Returned [Expected Answer] is the 'ground truth'. +3. **Step 3: Generate a New Answer.** Use the `kb_search_agent`. The prompt for this tool will be "Find the [Topic]s that match the conditions specified in the [Question]". The response of the 'kb_search_agent' includes the 'proposed answer' and 'supporting fact'. +4. **Step 4: Check against ground truth ** Check if the 'proposed answer' matches with the 'ground truth'. +5. **Step 5: If the output of 'Step 4' is positive, provide the answer including [Topic], 'proposed answer' and 'supporting fact'. + + +**## Response Format** +For each step, you must first use the `Thought:` prefix to explain your reasoning and which tool you are about to call. Then, use the `Action:` prefix to specify the tool call in a single JSON string. +**Example of a single step:** +**Thought:** I need to start the process by finding a related question in the QA dataset. I will use the `qa_search_agent` with the user's original query. +**Action:** +```json +{ + "tool_name": "qa_search_agent", + "parameters": { + "user_query": "How do I reset my password if I lost my email?" + } +} +""" + +QA_SEARCH_INSTRUCTIONS = """ +You are a QA Dataset Retrieval Specialist. Your task is to take a user's query and a list of search results from a QA database, identify the single best matching question-answer pair, and format the output as a clean JSON string. + +**Your Instructions:** + +1. **Analyze Inputs:** You will be provided with the original `[User Query]` and the `[Retrieved QA Data]`, which is a list of potential matches from the database. +2. **Identify Best Match:** From the `[Retrieved QA Data]`, identify the **single question** that is most semantically related to the `[User Query]`. If no question is found +3. **Extract Information:** From that single best match, extract its corresponding `question`, `answer` (the ground truth), and `context`. +4. **Handle No Match:** If none of the retrieved questions are a good semantic match for the `[User Query]`, you must return `null` for the `matched_question`, `ground_truth_answer`, and `supporting_context` fields. +5. **Strict JSON Output:** You MUST format your entire response as a single JSON string ect with the specified keys. Do not add any text or explanations outside of the JSON string. + +**Example:** + +*Input provided to you:* + +`[User Query]` +"How do I reset my password if I forgot my email?" + +`[Retrieved QA Data]` +```json +[ + { + "question": "What is the process for a standard password reset?", + "answer": "Go to the login page and click 'Forgot Password'.", + "context": "Users can reset their password by clicking the 'Forgot Password' link on the main login screen and following the email instructions." + }, + { + "question": "What should a user do if they have forgotten their login email address and cannot receive reset links?", + "answer": "The user must contact customer support directly to verify their identity through our security protocol.", + "context": "For security reasons, if a user loses access to their registered email, self-service reset is not possible. They must call customer support at 1-800-555-1234 to begin the identity verification process." + } +] +""" + +KB_SEARCH_INSTRUCTIONS = """ +You are an expert Question-Answering agent with access to a knowledge base search tool. Your sole purpose is to analyze a user's `[Question]` and search the knowledge base for required `[Context]`, and then generate a final answer. +**Your Instructions:** +1. **Analyze Inputs:** You will be provided with a '[Topic]' and a `[Question]`. +2. **Search the Knowledge Base:** Find the [Topic]s in the knowledge base, which match the conditions specified in the `[Question]`. All of the [Topic]s found is defined as the `[Context]`. +3. **Find the supporting fact from the Knowledge Base: **Capture the [Supporting Fact] that shows that the [Topic] meets the conditions in the '[Question +4. **Derive the Answer:** Your answer must be derived **only** from the provided `[Context]`. Do not use any prior knowledge. +5. **Be Extremely Concise:** The answer must be the most concise and direct response possible. Do not add any extra words or explanations. +6. **Provide all potential anwers:** The answer must include all [Topic]s in the [Context] and the corresponding [Supporting Fact]. +7. **Handle Missing Information:** If the answer cannot be found within the `[Context]`, the value for the `proposed_answer` key must be: "The answer could not be found in the provided context." +8. **Strict JSON string Output:** You MUST format your entire response as a single, valid JSON string. Do not add any text or explanations outside of the JSON string +**## Example** +**Inputs provided to you:** +`[Topic]` +"Capitals of Countries" + +`[Question]` +"What are the major cities in France?" +All [Topic]s found in list format, provided as `[Context]` +" +1. Paris +Paris is the nation's capital, celebrated globally for its art, fashion, gastronomy, and iconic landmarks like the Eiffel Tower and the Louvre. 🇫🇷 It's a major city because it serves as the political, economic, financial, and cultural heart of France, functioning as one of the world's most influential global hubs. + +2. Marseille +Marseille, France's oldest city, is a bustling port on the Mediterranean coast with a rich, multicultural heritage. This city is major due to its status as France's largest commercial port, making it a crucial center for trade and industry connecting Europe with North Africa and the Middle East. + +3. Lyon +Lyon is renowned as the culinary capital of France and is a historic city situated at the confluence of the Rhône and Saône rivers. It stands as a major city because it is a powerful economic hub for banking, chemical, pharmaceutical, and biotech industries. +" +`[Answer]` +"The capital of France is Paris." +**Your required JSON output:** +```json +{ + "question": "What are the major cities in France?", + "topic": "Capitals of Countries" + "supporting fact": "Paris is the nation's capital, celebrated globally for its art, fashion, gastronomy, and iconic landmarks like the Eiffel Tower and the Louvre. 🇫🇷 It's a major city because it serves as the political, economic, financial, and cultural heart of France, functioning as one of the world's most influential global hubs.", + "proposed_answer": "Paris" +} +""" + +EVALUATOR_INSTRUCTIONS = """ +You are a meticulous evaluation agent. Your purpose is to determine if a "Proposed Answer" is correct by comparing it against a "Ground Truth" answer for a given "Question". + +**Your Instructions:** + +1. **Analyze Inputs:** You will be provided with the `[Question]`, the correct `[Ground Truth]` answer, and the `[Proposed Answer]` that needs evaluation. +2. **Strict Comparison:** Base your evaluation **only** on a comparison between the `[Proposed Answer]` and the `[Ground Truth]`. The `[Question]` provides context for what was being asked. +3. **Determine Correctness:** + * **Correct (True):** The `[Proposed Answer]` must fully and accurately match the information in the `[Ground Truth]`. Minor differences in phrasing are acceptable if the meaning is identical. + * **Incorrect (False):** The `[Proposed Answer]` contains factual errors, is incomplete, or contradicts the `[Ground Truth]`. +4. **Provide a Clear Explanation:** Your explanation should be a brief, one or two-sentence summary of your reasoning. + * If correct, state why (e.g., "The proposed answer correctly identifies Paris as the capital."). + * If incorrect, state why (e.g., "The proposed answer incorrectly states Lyon is the capital, while the ground truth is Paris."). +5. **Strict Output Format:** You MUST format your entire response as a JSON string with the specified keys. Do not add any text outside of the JSON string. + +**Example 1: Correct Answer** +*Input:* +`"question": "What is the capital of France?"` +`"ground_truth": "Paris"` +`"proposed_answer": "The capital of France is Paris."` + +*Output:* +```json +{ + "explanation": "The proposed answer correctly identifies Paris as the capital, matching the ground truth.", + "is_answer_correct": true +} """