feat: add context grounding vectorstore

andrei-rusu · andrei-rusu · commit 51b19d3e7e27 · 2025-04-08T11:14:46.000+03:00
update comment

revert version increment
diff --git a/samples/uipath_retrieval_chain/.env.example b/samples/uipath_retrieval_chain/.env.example
@@ -0,0 +1,9 @@
+UIPATH_BASE_URL="https://alpha.uipath.com"
+UIPATH_URL="https://alpha.uipath.com/goldenagents/DefaultTenant"
+UIPATH_ORGANIZATION_ID="xxx"
+UIPATH_TENANT_ID="xxx"
+UIPATH_REQUESTING_PRODUCT="xxx"
+UIPATH_REQUESTING_FEATURE="xxx"
+UIPATH_ACCESS_TOKEN="xxx"
+UIPATH_FOLDER_PATH=""
+UIPATH_FOLDER_KEY=""
diff --git a/samples/uipath_retrieval_chain/README.md b/samples/uipath_retrieval_chain/README.md
@@ -0,0 +1,43 @@
+# Retrieval chain and Context Grounding vectorstore example
+
+Use the UiPath Context Grounding vectorstore to retrieve relevant documents for a query, and integrate this into a Langchain retrieval chain to answer that query.
+
+## Debug
+
+1. Clone the repository:
+```bash
+git clone
+cd samples\uipath_retrieval_chain
+```
+
+2. Install dependencies:
+```bash
+pip install uv
+uv venv -p 3.11 .venv
+.venv\Scripts\activate
+uv sync
+```
+
+3. Create a `.env` file in the project root using the template `.env.example`.
+
+### Run
+
+To check the vectorstore and retrieval chain outputs, you should run:
+
+```bash
+python main.py --index_name $INDEX_NAME --query $QUERY --k $NUM_RESULTS
+```
+
+### Input Format
+
+The CLI parameters for the sample script are follows:
+$INDEX_NAME -> The name of the index to use (string)
+#QUERY -> The query for which documents will be retrieved (string)
+$NUM_RESULTS -> The number of documents to retrieve
+
+
+### Output Format
+
+The script first outputs the result of retrieving the most relevant K documents, first with the distance score, then with the relevance score.
+Finally, it outputs the result of running the retrieval chain on the query, mentioning the sources alongside the answer.
+```
diff --git a/samples/uipath_retrieval_chain/main.py b/samples/uipath_retrieval_chain/main.py
@@ -0,0 +1,135 @@
+"""Example demonstrating how to use the ContextGroundingVectorStore class with LangChain."""
+
+import argparse
+import asyncio
+from pprint import pprint
+from typing import Any
+
+from dotenv import find_dotenv, load_dotenv
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.vectorstores import VectorStore
+from uipath_langchain.chat.models import UiPathAzureChatOpenAI
+from uipath_langchain.vectorstores.context_grounding_vectorstore import (
+    ContextGroundingVectorStore,
+)
+
+
+def create_retrieval_chain(vectorstore: VectorStore, model: BaseChatModel, k: int = 3):
+    """Create a retrieval chain using a vector store.
+
+    Args:
+        vectorstore: Vector store to use for the chain
+        model: LangChain language model to use for the chain
+
+    Returns:
+        A retrieval chain ready to answer questions
+    """
+    # Create a retriever from the vector store
+    retriever = vectorstore.as_retriever(
+        search_kwargs={"k": k},
+    )
+
+    # Create a prompt template
+    template = """Answer the question based on the following context:
+    {context}
+    Question: {question}
+    """
+    prompt = ChatPromptTemplate.from_template(template)
+
+    # Create the retrieval chain
+    chain = (
+        {"context": retriever, "question": RunnablePassthrough()}
+        | prompt
+        | model
+        | StrOutputParser()
+    )
+
+    # Return a function that will run the chain and include source documents
+    def retrieval_chain(query: str) -> dict[str, Any]:
+        # Get documents separately to include them in the result
+        docs = retriever.invoke(query)
+        # Run the chain
+        answer = chain.invoke(query)
+        # Return combined result
+        return {"result": answer, "source_documents": docs}
+
+    return retrieval_chain
+
+
+async def main(index_name: str, query: str, k: int = 3):
+    load_dotenv(find_dotenv())
+
+    """Run a simple example of ContextGroundingVectorStore."""
+    vectorstore = ContextGroundingVectorStore(
+        index_name=index_name,
+    )
+
+    # Example query
+    query = "What is the ECCN for a laptop?"
+
+    # Perform semantic searches with distance scores
+    docs_with_scores = await vectorstore.asimilarity_search_with_score(query=query, k=5)
+    print("==== Docs with distance scores ====")
+    pprint(
+        [
+            {"page_content": doc.page_content, "distance_score": distance_score}
+            for doc, distance_score in docs_with_scores
+        ]
+    )
+
+    # Perform a similarity search with relevance scores
+    docs_with_relevance_scores = (
+        await vectorstore.asimilarity_search_with_relevance_scores(query=query, k=5)
+    )
+    print("==== Docs with relevance scores ====")
+    pprint(
+        [
+            {"page_content": doc.page_content, "relevance_score": relevance_score}
+            for doc, relevance_score in docs_with_relevance_scores
+        ]
+    )
+
+    # Run a retrieval chain
+    model = UiPathAzureChatOpenAI(
+        model="gpt-4o-2024-08-06",
+        max_retries=3,
+    )
+
+    retrieval_chain = create_retrieval_chain(
+        vectorstore=vectorstore,
+        model=model,
+    )
+
+    # Run a retrieval chain
+    result = retrieval_chain(query)
+    print("==== Retrieval chain result ====")
+    print(f"Query: {query}")
+    print(f"Answer: {result['result']}")
+    print("\nSource Documents:")
+    for i, doc in enumerate(result["source_documents"]):
+        print(f"\nDocument {i + 1}:")
+        print(f"Content: {doc.page_content[:100]}...")
+        print(
+            f"Source: {doc.metadata.get('source', 'N/A')}, Page Number: {doc.metadata.get('page_number', '0')}"
+        )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--index_name", type=str, default="ECCN", help="The name of the index to use"
+    )
+    parser.add_argument(
+        "--query",
+        type=str,
+        default="What is the ECCN for a laptop?",
+        help="The query for which documents will be retrieved",
+    )
+    parser.add_argument(
+        "--k", type=int, default=3, help="The number of documents to retrieve"
+    )
+    args = parser.parse_args()
+    asyncio.run(main(args.index_name, args.query, args.k))
diff --git a/samples/uipath_retrieval_chain/pyproject.toml b/samples/uipath_retrieval_chain/pyproject.toml
@@ -0,0 +1,9 @@
+[project]
+name = "uipath_retrieval_chain"
+version = "0.0.1"
+description = "Sample retrieval chain using UiPath Context Grounding API"
+authors = [{ name = "Andrei Rusu", email = "andrei.rusu@uipath.com" }]
+dependencies = [
+    "uipath-langchain>=0.0.85",
+]
+requires-python = ">=3.10"
diff --git a/src/uipath_langchain/retrievers/context_grounding_retriever.py b/src/uipath_langchain/retrievers/context_grounding_retriever.py
@@ -1,6 +1,9 @@
 from typing import List, Optional
 
-from langchain_core.callbacks import CallbackManagerForRetrieverRun
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForRetrieverRun,
+    CallbackManagerForRetrieverRun,
+)
 from langchain_core.documents import Document
 from langchain_core.retrievers import BaseRetriever
 from uipath_sdk import UiPathSDK
@@ -33,3 +36,26 @@ def _get_relevant_documents(
             )
             for x in results
         ]
+
+    async def _aget_relevant_documents(
+        self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun
+    ) -> List[Document]:
+        """Async implementations for retriever calls context_grounding API to search the requested index."""
+
+        sdk = self.uipath_sdk if self.uipath_sdk is not None else UiPathSDK()
+        results = await sdk.context_grounding.search_async(
+            self.index_name,
+            query,
+            self.number_of_results if self.number_of_results is not None else 10,
+        )
+
+        return [
+            Document(
+                page_content=x.content,
+                metadata={
+                    "source": x.source,
+                    "page_number": x.page_number,
+                },
+            )
+            for x in results
+        ]
diff --git a/src/uipath_langchain/vectorstores/context_grounding_vectorstore.py b/src/uipath_langchain/vectorstores/context_grounding_vectorstore.py