From 3600c2d5ca66426de2fabc1f1c90c7a0db2df1a3 Mon Sep 17 00:00:00 2001 From: Andrei Rusu Date: Mon, 7 Apr 2025 10:58:10 +0300 Subject: [PATCH] feat: add context grounding vectorstore update comment revert version increment --- samples/uipath_retrieval_chain/.env.example | 9 + samples/uipath_retrieval_chain/README.md | 43 +++ samples/uipath_retrieval_chain/main.py | 135 +++++++++ samples/uipath_retrieval_chain/pyproject.toml | 9 + .../retrievers/context_grounding_retriever.py | 28 +- .../context_grounding_vectorstore.py | 265 ++++++++++++++++++ 6 files changed, 488 insertions(+), 1 deletion(-) create mode 100644 samples/uipath_retrieval_chain/.env.example create mode 100644 samples/uipath_retrieval_chain/README.md create mode 100644 samples/uipath_retrieval_chain/main.py create mode 100644 samples/uipath_retrieval_chain/pyproject.toml create mode 100644 src/uipath_langchain/vectorstores/context_grounding_vectorstore.py diff --git a/samples/uipath_retrieval_chain/.env.example b/samples/uipath_retrieval_chain/.env.example new file mode 100644 index 00000000..d56c2686 --- /dev/null +++ b/samples/uipath_retrieval_chain/.env.example @@ -0,0 +1,9 @@ +UIPATH_BASE_URL="https://alpha.uipath.com" +UIPATH_URL="https://alpha.uipath.com/goldenagents/DefaultTenant" +UIPATH_ORGANIZATION_ID="xxx" +UIPATH_TENANT_ID="xxx" +UIPATH_REQUESTING_PRODUCT="xxx" +UIPATH_REQUESTING_FEATURE="xxx" +UIPATH_ACCESS_TOKEN="xxx" +UIPATH_FOLDER_PATH="" +UIPATH_FOLDER_KEY="" diff --git a/samples/uipath_retrieval_chain/README.md b/samples/uipath_retrieval_chain/README.md new file mode 100644 index 00000000..b3e83373 --- /dev/null +++ b/samples/uipath_retrieval_chain/README.md @@ -0,0 +1,43 @@ +# Retrieval chain and Context Grounding vectorstore example + +Use the UiPath Context Grounding vectorstore to retrieve relevant documents for a query, and integrate this into a Langchain retrieval chain to answer that query. + +## Debug + +1. Clone the repository: +```bash +git clone +cd samples\uipath_retrieval_chain +``` + +2. Install dependencies: +```bash +pip install uv +uv venv -p 3.11 .venv +.venv\Scripts\activate +uv sync +``` + +3. Create a `.env` file in the project root using the template `.env.example`. + +### Run + +To check the vectorstore and retrieval chain outputs, you should run: + +```bash +python main.py --index_name $INDEX_NAME --query $QUERY --k $NUM_RESULTS +``` + +### Input Format + +The CLI parameters for the sample script are follows: +$INDEX_NAME -> The name of the index to use (string) +#QUERY -> The query for which documents will be retrieved (string) +$NUM_RESULTS -> The number of documents to retrieve + + +### Output Format + +The script first outputs the result of retrieving the most relevant K documents, first with the distance score, then with the relevance score. +Finally, it outputs the result of running the retrieval chain on the query, mentioning the sources alongside the answer. +``` diff --git a/samples/uipath_retrieval_chain/main.py b/samples/uipath_retrieval_chain/main.py new file mode 100644 index 00000000..b727c1d6 --- /dev/null +++ b/samples/uipath_retrieval_chain/main.py @@ -0,0 +1,135 @@ +"""Example demonstrating how to use the ContextGroundingVectorStore class with LangChain.""" + +import argparse +import asyncio +from pprint import pprint +from typing import Any + +from dotenv import find_dotenv, load_dotenv +from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.output_parsers import StrOutputParser +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.runnables import RunnablePassthrough +from langchain_core.vectorstores import VectorStore +from uipath_langchain.chat.models import UiPathAzureChatOpenAI +from uipath_langchain.vectorstores.context_grounding_vectorstore import ( + ContextGroundingVectorStore, +) + + +def create_retrieval_chain(vectorstore: VectorStore, model: BaseChatModel, k: int = 3): + """Create a retrieval chain using a vector store. + + Args: + vectorstore: Vector store to use for the chain + model: LangChain language model to use for the chain + + Returns: + A retrieval chain ready to answer questions + """ + # Create a retriever from the vector store + retriever = vectorstore.as_retriever( + search_kwargs={"k": k}, + ) + + # Create a prompt template + template = """Answer the question based on the following context: + {context} + Question: {question} + """ + prompt = ChatPromptTemplate.from_template(template) + + # Create the retrieval chain + chain = ( + {"context": retriever, "question": RunnablePassthrough()} + | prompt + | model + | StrOutputParser() + ) + + # Return a function that will run the chain and include source documents + def retrieval_chain(query: str) -> dict[str, Any]: + # Get documents separately to include them in the result + docs = retriever.invoke(query) + # Run the chain + answer = chain.invoke(query) + # Return combined result + return {"result": answer, "source_documents": docs} + + return retrieval_chain + + +async def main(index_name: str, query: str, k: int = 3): + load_dotenv(find_dotenv()) + + """Run a simple example of ContextGroundingVectorStore.""" + vectorstore = ContextGroundingVectorStore( + index_name=index_name, + ) + + # Example query + query = "What is the ECCN for a laptop?" + + # Perform semantic searches with distance scores + docs_with_scores = await vectorstore.asimilarity_search_with_score(query=query, k=5) + print("==== Docs with distance scores ====") + pprint( + [ + {"page_content": doc.page_content, "distance_score": distance_score} + for doc, distance_score in docs_with_scores + ] + ) + + # Perform a similarity search with relevance scores + docs_with_relevance_scores = ( + await vectorstore.asimilarity_search_with_relevance_scores(query=query, k=5) + ) + print("==== Docs with relevance scores ====") + pprint( + [ + {"page_content": doc.page_content, "relevance_score": relevance_score} + for doc, relevance_score in docs_with_relevance_scores + ] + ) + + # Run a retrieval chain + model = UiPathAzureChatOpenAI( + model="gpt-4o-2024-08-06", + max_retries=3, + ) + + retrieval_chain = create_retrieval_chain( + vectorstore=vectorstore, + model=model, + ) + + # Run a retrieval chain + result = retrieval_chain(query) + print("==== Retrieval chain result ====") + print(f"Query: {query}") + print(f"Answer: {result['result']}") + print("\nSource Documents:") + for i, doc in enumerate(result["source_documents"]): + print(f"\nDocument {i + 1}:") + print(f"Content: {doc.page_content[:100]}...") + print( + f"Source: {doc.metadata.get('source', 'N/A')}, Page Number: {doc.metadata.get('page_number', '0')}" + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--index_name", type=str, default="ECCN", help="The name of the index to use" + ) + parser.add_argument( + "--query", + type=str, + default="What is the ECCN for a laptop?", + help="The query for which documents will be retrieved", + ) + parser.add_argument( + "--k", type=int, default=3, help="The number of documents to retrieve" + ) + args = parser.parse_args() + asyncio.run(main(args.index_name, args.query, args.k)) diff --git a/samples/uipath_retrieval_chain/pyproject.toml b/samples/uipath_retrieval_chain/pyproject.toml new file mode 100644 index 00000000..c1131335 --- /dev/null +++ b/samples/uipath_retrieval_chain/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "uipath_retrieval_chain" +version = "0.0.1" +description = "Sample retrieval chain using UiPath Context Grounding API" +authors = [{ name = "Andrei Rusu", email = "andrei.rusu@uipath.com" }] +dependencies = [ + "uipath-langchain>=0.0.85", +] +requires-python = ">=3.10" diff --git a/src/uipath_langchain/retrievers/context_grounding_retriever.py b/src/uipath_langchain/retrievers/context_grounding_retriever.py index 4ecc2757..bed56786 100644 --- a/src/uipath_langchain/retrievers/context_grounding_retriever.py +++ b/src/uipath_langchain/retrievers/context_grounding_retriever.py @@ -1,6 +1,9 @@ from typing import List, Optional -from langchain_core.callbacks import CallbackManagerForRetrieverRun +from langchain_core.callbacks import ( + AsyncCallbackManagerForRetrieverRun, + CallbackManagerForRetrieverRun, +) from langchain_core.documents import Document from langchain_core.retrievers import BaseRetriever from uipath_sdk import UiPathSDK @@ -33,3 +36,26 @@ def _get_relevant_documents( ) for x in results ] + + async def _aget_relevant_documents( + self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun + ) -> List[Document]: + """Async implementations for retriever calls context_grounding API to search the requested index.""" + + sdk = self.uipath_sdk if self.uipath_sdk is not None else UiPathSDK() + results = await sdk.context_grounding.search_async( + self.index_name, + query, + self.number_of_results if self.number_of_results is not None else 10, + ) + + return [ + Document( + page_content=x.content, + metadata={ + "source": x.source, + "page_number": x.page_number, + }, + ) + for x in results + ] diff --git a/src/uipath_langchain/vectorstores/context_grounding_vectorstore.py b/src/uipath_langchain/vectorstores/context_grounding_vectorstore.py new file mode 100644 index 00000000..3bdcf7cc --- /dev/null +++ b/src/uipath_langchain/vectorstores/context_grounding_vectorstore.py @@ -0,0 +1,265 @@ +""" +Vector store implementation that connects to UiPath Context Grounding as a backend. + +This is a read-only vector store that uses the UiPath Context Grounding API to retrieve documents. + +You need to set the following environment variables (also see .env.example): +### - UIPATH_URL="https://alpha.uipath.com/{ORG_ID}/{TENANT_ID}" +### - UIPATH_ACCESS_TOKEN={BEARER_TOKEN_WITH_CONTEXT_GROUNDING_PERMISSIONS} +### - UIPATH_FOLDER_PATH="" - this can be left empty +### - UIPATH_FOLDER_KEY="" - this can be left empty +""" + +from collections.abc import Iterable +from typing import Any, Optional, TypeVar + +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.vectorstores import VectorStore +from uipath_sdk import UiPathSDK + +VST = TypeVar("VST", bound="ContextGroundingVectorStore") + + +class ContextGroundingVectorStore(VectorStore): + """Vector store that uses UiPath Context Grounding (ECS) as a backend. + + This class provides a straightforward implementation that connects to the + UiPath Context Grounding API for semantic searching. + + Example: + .. code-block:: python + + from uipath_agents_gym.tools.ecs_vectorstore import ContextGroundingVectorStore + + # Initialize the vector store with an index name + vectorstore = ContextGroundingVectorStore(index_name="ECCN") + + # Perform similarity search + docs_with_scores = vectorstore.similarity_search_with_score( + "How do I process an invoice?", k=5 + ) + """ + + def __init__( + self, + index_name: str, + uipath_sdk: Optional[UiPathSDK] = None, + ): + """Initialize the ContextGroundingVectorStore. + + Args: + index_name: Name of the context grounding index to use + uipath_sdk: Optional SDK instance to use. If not provided, a new instance will be created. + """ + self.index_name = index_name + self.sdk = uipath_sdk or UiPathSDK() + + def similarity_search_with_score( + self, query: str, k: int = 4, **kwargs: Any + ) -> list[tuple[Document, float]]: + """Return documents most similar to the query along with the distances. + The distance is 1 - score, where score is the relevance score returned by the Context Grounding API. + + Args: + query: The query string + k: Number of results to return (default=4) + + Returns: + list of tuples of (document, score) + """ + # Call the UiPath SDK to perform the search + results = self.sdk.context_grounding.search( + name=self.index_name, + query=query, + number_of_results=k, + ) + + # Convert the results to Documents with scores + docs_with_scores = [] + for result in results: + # Create metadata from result fields + metadata = { + "source": result.source, + "id": result.id, + "reference": result.reference, + "page_number": result.page_number, + "source_document_id": result.source_document_id, + "caption": result.caption, + } + + # Add any operation metadata if available + if result.metadata: + metadata["operation_id"] = result.metadata.operation_id + metadata["strategy"] = result.metadata.strategy + + # Create a Document with the content and metadata + doc = Document( + page_content=result.content, + metadata=metadata, + ) + + score = 1.0 - float(result.score) + + docs_with_scores.append((doc, score)) + + return docs_with_scores + + def similarity_search_with_relevance_scores( + self, query: str, k: int = 4, **kwargs: Any + ) -> list[tuple[Document, float]]: + """Return documents along with their relevance scores on a scale from 0 to 1. + + This directly uses the scores provided by the Context Grounding API, + which are already normalized between 0 and 1. + + Args: + query: The query string + k: Number of documents to return (default=4) + + Returns: + list of tuples of (document, relevance_score) + """ + return [ + (doc, 1.0 - score) + for doc, score in self.similarity_search_with_score(query, k, **kwargs) + ] + + async def asimilarity_search_with_score( + self, query: str, k: int = 4, **kwargs: Any + ) -> list[tuple[Document, float]]: + """Asynchronously return documents most similar to the query along with scores. + + Args: + query: The query string + k: Number of results to return (default=4) + + Returns: + list of tuples of (document, score) + """ + # Call the UiPath SDK to perform the search asynchronously + results = await self.sdk.context_grounding.search_async( + name=self.index_name, + query=query, + number_of_results=k, + ) + + # Convert the results to Documents with scores + docs_with_scores = [] + for result in results: + # Create metadata from result fields + metadata = { + "source": result.source, + "id": result.id, + "reference": result.reference, + "page_number": result.page_number, + "source_document_id": result.source_document_id, + "caption": result.caption, + } + + # Add any operation metadata if available + if result.metadata: + metadata["operation_id"] = result.metadata.operation_id + metadata["strategy"] = result.metadata.strategy + + # Create a Document with the content and metadata + doc = Document( + page_content=result.content, + metadata=metadata, + ) + + # Get the distance score as 1 - ecs_score + score = 1.0 - float(result.score) + + docs_with_scores.append((doc, score)) + + return docs_with_scores + + async def asimilarity_search_with_relevance_scores( + self, query: str, k: int = 4, **kwargs: Any + ) -> list[tuple[Document, float]]: + """Asynchronously return documents along with their relevance scores on a scale from 0 to 1. + + This directly uses the scores provided by the Context Grounding API, + which are already normalized between 0 and 1. + + Args: + query: The query string + k: Number of documents to return (default=4) + + Returns: + list of tuples of (document, relevance_score) + """ + return [ + (doc, 1.0 - score) + for doc, score in await self.asimilarity_search_with_score( + query, k, **kwargs + ) + ] + + def similarity_search( + self, query: str, k: int = 4, **kwargs: Any + ) -> list[Document]: + """Return documents most similar to the query. + + Args: + query: The query string + k: Number of results to return (default=4) + + Returns: + list of documents most similar to the query + """ + docs_and_scores = self.similarity_search_with_score(query, k, **kwargs) + return [doc for doc, _ in docs_and_scores] + + async def asimilarity_search( + self, query: str, k: int = 4, **kwargs: Any + ) -> list[Document]: + """Asynchronously return documents most similar to the query. + + Args: + query: The query string + k: Number of results to return (default=4) + + Returns: + list of documents most similar to the query + """ + docs_and_scores = await self.asimilarity_search_with_score(query, k, **kwargs) + return [doc for doc, _ in docs_and_scores] + + @classmethod + def from_texts( + cls: type[VST], + texts: list[str], + embedding: Embeddings, + metadatas: Optional[list[dict[str, Any]]] = None, + **kwargs: Any, + ) -> VST: + """This method is required by the VectorStore abstract class, but is not supported + by ContextGroundingVectorStore which is read-only. + + Raises: + NotImplementedError: This method is not supported by ContextGroundingVectorStore + """ + raise NotImplementedError( + "ContextGroundingVectorStore is a read-only wrapper for UiPath Context Grounding. " + "Creating a vector store from texts is not supported." + ) + + # Other required methods with minimal implementation to satisfy the interface + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[list[dict[str, Any]]] = None, + **kwargs: Any, + ) -> list[str]: + """Not implemented for ContextGroundingVectorStore as this is a read-only wrapper.""" + raise NotImplementedError( + "ContextGroundingVectorStore is a read-only wrapper for UiPath Context Grounding." + ) + + def delete(self, ids: Optional[list[str]] = None, **kwargs: Any) -> Optional[bool]: + """Not implemented for ContextGroundingVectorStore as this is a read-only wrapper.""" + raise NotImplementedError( + "ContextGroundingVectorStore is a read-only wrapper for UiPath Context Grounding." + )