diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index 989f73871..dc4db97c5 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -18,5 +18,11 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install -r ClientAdvisor/App/requirements.txt
-    - name: Run flake8
-      run: flake8 --config=ClientAdvisor/App/.flake8 ClientAdvisor/App
+        pip install -r ResearchAssistant/App/requirements.txt
+        pip install pylint flake8 azure-identity pymssql
+    - name: Run flake8 and pylint
+      run: |
+        flake8 --config=ClientAdvisor/App/.flake8 ClientAdvisor/App
+        pylint --rcfile=ClientAdvisor/App/.pylintrc ClientAdvisor/App
+        flake8 --config=ResearchAssistant/App/.flake8 ResearchAssistant/App
+        pylint --rcfile=ResearchAssistant/App/.pylintrc ResearchAssistant/App
\ No newline at end of file
diff --git a/ClientAdvisor/App/.flake8 b/ClientAdvisor/App/.flake8
index 74ee71d52..bc2f0943d 100644
--- a/ClientAdvisor/App/.flake8
+++ b/ClientAdvisor/App/.flake8
@@ -1,4 +1,4 @@
 [flake8]
-max-line-length = 88
-extend-ignore = E501, E203
-exclude = .venv, frontend,
\ No newline at end of file
+max-line-length = 160
+extend-ignore = E203, W503, E501
+exclude = .venv, frontend
\ No newline at end of file
diff --git a/ClientAdvisor/App/.pylintrc b/ClientAdvisor/App/.pylintrc
new file mode 100644
index 000000000..dc035c349
--- /dev/null
+++ b/ClientAdvisor/App/.pylintrc
@@ -0,0 +1,41 @@
+[MASTER]
+ignore=tests  ; Ignore the tests folder globally.
+
+[MESSAGES CONTROL]
+disable=
+    invalid-name,                  # C0103: Ignore naming style errors
+    line-too-long,                 # C0301: Ignore long lines
+    missing-function-docstring,    # C0116: Ignore missing function docstrings
+    missing-class-docstring,       # C0115: Ignore missing class docstrings
+    missing-module-docstring,      # C0114: Ignore missing module docstrings
+    redefined-outer-name,          # W0621: Ignore redefined variables warnings
+    broad-exception-raised,        # W0719: Ignore broad exception raised warnings
+    broad-exception-caught,        # W0718: Ignore broad exception caught warnings
+    too-many-arguments,            # R0913: Ignore too many arguments
+    too-many-locals,               # R0914: Ignore too many local variables
+    too-many-return-statements,    # R0911: Ignore too many return statements
+    too-many-branches,             # R0912: Ignore too many branches
+    unused-argument,               # W0613: Ignore unused arguments
+    unspecified-encoding,          # W1514: Ignore unspecified encoding in open()
+    logging-fstring-interpolation, # W1203: Ignore lazy f-string interpolation
+    missing-timeout,               # W3101: Ignore missing timeout in requests.get
+    no-else-return,                # R1705: Ignore unnecessary 'else' after return
+    redefined-builtin,             # W0622: Ignore redefining built-ins
+    global-statement,              # W0603: Ignore global statement usage
+    no-name-in-module,             # E0611: Ignore unresolved module names
+    no-member,                     # E1101: Ignore module has no 'member'
+    pointless-string-statement,    # W0105: Ignore pointless string statements
+    unnecessary-comprehension,     # R1721: Ignore unnecessary comprehensions
+    fixme,                         # W0511: Ignore TODO comments
+    too-many-instance-attributes,  # R0902: Ignore too many attributes in class
+    too-many-positional-arguments, # R0917: Ignore too many positional arguments
+    raise-missing-from,            # W0707: Ignore re-raising without 'raise from'
+    import-outside-toplevel,       # C0415: Ignore imports outside top-level
+    no-value-for-parameter         # E1120: Ignore missing arguments in function
+
+[TYPECHECK]
+generated-members=get_bearer_token_provider
+
+[FORMAT]
+max-module-lines=1700              # Allow large modules up to 1700 lines
+max-line-length=160                # Allow lines up to 160 characters
\ No newline at end of file
diff --git a/ClientAdvisor/App/app.py b/ClientAdvisor/App/app.py
index e82212438..2a25c2d14 100644
--- a/ClientAdvisor/App/app.py
+++ b/ClientAdvisor/App/app.py
@@ -8,22 +8,31 @@
 
 import httpx
 import requests
-from azure.identity.aio import (DefaultAzureCredential,
-                                get_bearer_token_provider)
+from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider
+from backend.auth.auth_utils import get_authenticated_user_details, get_tenantid
+from backend.history.cosmosdbservice import CosmosConversationClient
+from backend.utils import (
+    convert_to_pf_format,
+    format_as_ndjson,
+    format_pf_non_streaming_response,
+    format_stream_response,
+    generateFilterString,
+    parse_multi_columns,
+)
+from db import get_connection
 from dotenv import load_dotenv
+
 # from quart.sessions import SecureCookieSessionInterface
 from openai import AsyncAzureOpenAI
-from quart import (Blueprint, Quart, jsonify, make_response, render_template,
-                   request, send_from_directory)
-
-from backend.auth.auth_utils import (get_authenticated_user_details,
-                                     get_tenantid)
-from backend.history.cosmosdbservice import CosmosConversationClient
-from backend.utils import (convert_to_pf_format, format_as_ndjson,
-                           format_pf_non_streaming_response,
-                           format_stream_response, generateFilterString,
-                           parse_multi_columns)
-from db import get_connection
+from quart import (
+    Blueprint,
+    Quart,
+    jsonify,
+    make_response,
+    render_template,
+    request,
+    send_from_directory,
+)
 
 bp = Blueprint("routes", __name__, static_folder="static", template_folder="static")
 
@@ -172,7 +181,7 @@ async def assets(path):
     "AZURE_COSMOSDB_MONGO_VCORE_VECTOR_COLUMNS"
 )
 
-SHOULD_STREAM = True if AZURE_OPENAI_STREAM.lower() == "true" else False
+SHOULD_STREAM = AZURE_OPENAI_STREAM.lower() == "true"
 
 # Chat History CosmosDB Integration Settings
 AZURE_COSMOSDB_DATABASE = os.environ.get("AZURE_COSMOSDB_DATABASE")
@@ -373,7 +382,7 @@ def init_openai_client(use_data=SHOULD_USE_DATA):
 
         return azure_openai_client
     except Exception as e:
-        logging.exception("Exception in Azure OpenAI initialization", e)
+        logging.exception("Exception in Azure OpenAI initialization", exc_info=True)
         azure_openai_client = None
         raise e
 
@@ -399,7 +408,7 @@ def init_cosmosdb_client():
                 enable_message_feedback=AZURE_COSMOSDB_ENABLE_FEEDBACK,
             )
         except Exception as e:
-            logging.exception("Exception in CosmosDB initialization", e)
+            logging.exception("Exception in CosmosDB initialization", exc_info=True)
             cosmos_conversation_client = None
             raise e
     else:
@@ -472,9 +481,7 @@ def get_configured_data_source():
                         else []
                     ),
                 },
-                "in_scope": (
-                    True if AZURE_SEARCH_ENABLE_IN_DOMAIN.lower() == "true" else False
-                ),
+                "in_scope": (AZURE_SEARCH_ENABLE_IN_DOMAIN.lower() == "true"),
                 "top_n_documents": (
                     int(AZURE_SEARCH_TOP_K) if AZURE_SEARCH_TOP_K else int(SEARCH_TOP_K)
                 ),
@@ -534,9 +541,7 @@ def get_configured_data_source():
                     ),
                 },
                 "in_scope": (
-                    True
-                    if AZURE_COSMOSDB_MONGO_VCORE_ENABLE_IN_DOMAIN.lower() == "true"
-                    else False
+                    AZURE_COSMOSDB_MONGO_VCORE_ENABLE_IN_DOMAIN.lower() == "true"
                 ),
                 "top_n_documents": (
                     int(AZURE_COSMOSDB_MONGO_VCORE_TOP_K)
@@ -590,9 +595,7 @@ def get_configured_data_source():
                         else []
                     ),
                 },
-                "in_scope": (
-                    True if ELASTICSEARCH_ENABLE_IN_DOMAIN.lower() == "true" else False
-                ),
+                "in_scope": (ELASTICSEARCH_ENABLE_IN_DOMAIN.lower() == "true"),
                 "top_n_documents": (
                     int(ELASTICSEARCH_TOP_K)
                     if ELASTICSEARCH_TOP_K
@@ -642,9 +645,7 @@ def get_configured_data_source():
                         else []
                     ),
                 },
-                "in_scope": (
-                    True if AZURE_MLINDEX_ENABLE_IN_DOMAIN.lower() == "true" else False
-                ),
+                "in_scope": (AZURE_MLINDEX_ENABLE_IN_DOMAIN.lower() == "true"),
                 "top_n_documents": (
                     int(AZURE_MLINDEX_TOP_K)
                     if AZURE_MLINDEX_TOP_K
@@ -687,9 +688,7 @@ def get_configured_data_source():
                         else []
                     ),
                 },
-                "in_scope": (
-                    True if PINECONE_ENABLE_IN_DOMAIN.lower() == "true" else False
-                ),
+                "in_scope": (PINECONE_ENABLE_IN_DOMAIN.lower() == "true"),
                 "top_n_documents": (
                     int(PINECONE_TOP_K) if PINECONE_TOP_K else int(SEARCH_TOP_K)
                 ),
diff --git a/ClientAdvisor/App/backend/history/cosmosdbservice.py b/ClientAdvisor/App/backend/history/cosmosdbservice.py
index 70c2df5b1..85dc1695e 100644
--- a/ClientAdvisor/App/backend/history/cosmosdbservice.py
+++ b/ClientAdvisor/App/backend/history/cosmosdbservice.py
@@ -27,8 +27,7 @@ def __init__(
         except exceptions.CosmosHttpResponseError as e:
             if e.status_code == 401:
                 raise ValueError("Invalid credentials") from e
-            else:
-                raise ValueError("Invalid CosmosDB endpoint") from e
+            raise ValueError("Invalid CosmosDB endpoint") from e
 
         try:
             self.database_client = self.cosmosdb_client.get_database_client(
diff --git a/ClientAdvisor/App/tests/backend/auth/test_auth.py b/ClientAdvisor/App/tests/backend/auth/test_auth.py
index 1adf323d5..7854d9b07 100644
--- a/ClientAdvisor/App/tests/backend/auth/test_auth.py
+++ b/ClientAdvisor/App/tests/backend/auth/test_auth.py
@@ -2,8 +2,7 @@
 import json
 from unittest.mock import patch
 
-from backend.auth.auth_utils import (get_authenticated_user_details,
-                                     get_tenantid)
+from backend.auth.auth_utils import get_authenticated_user_details, get_tenantid
 
 
 def test_get_authenticated_user_details_no_principal_id():
diff --git a/ClientAdvisor/App/tests/backend/history/test_cosmosdb_service.py b/ClientAdvisor/App/tests/backend/history/test_cosmosdb_service.py
index ff0a51e5b..b28096d9e 100644
--- a/ClientAdvisor/App/tests/backend/history/test_cosmosdb_service.py
+++ b/ClientAdvisor/App/tests/backend/history/test_cosmosdb_service.py
@@ -2,7 +2,6 @@
 
 import pytest
 from azure.cosmos import exceptions
-
 from backend.history.cosmosdbservice import CosmosConversationClient
 
 
diff --git a/ClientAdvisor/App/tests/backend/test_utils.py b/ClientAdvisor/App/tests/backend/test_utils.py
index 1585cd7fb..4880c98c9 100644
--- a/ClientAdvisor/App/tests/backend/test_utils.py
+++ b/ClientAdvisor/App/tests/backend/test_utils.py
@@ -3,12 +3,17 @@
 from unittest.mock import MagicMock, patch
 
 import pytest
-
-from backend.utils import (JSONEncoder, convert_to_pf_format, fetchUserGroups,
-                           format_as_ndjson, format_non_streaming_response,
-                           format_pf_non_streaming_response,
-                           format_stream_response, generateFilterString,
-                           parse_multi_columns)
+from backend.utils import (
+    JSONEncoder,
+    convert_to_pf_format,
+    fetchUserGroups,
+    format_as_ndjson,
+    format_non_streaming_response,
+    format_pf_non_streaming_response,
+    format_stream_response,
+    generateFilterString,
+    parse_multi_columns,
+)
 
 
 @dataclasses.dataclass
diff --git a/ClientAdvisor/App/tests/test_app.py b/ClientAdvisor/App/tests/test_app.py
index d456ac702..3cfd1269f 100644
--- a/ClientAdvisor/App/tests/test_app.py
+++ b/ClientAdvisor/App/tests/test_app.py
@@ -2,9 +2,14 @@
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
-
-from app import (create_app, delete_all_conversations, generate_title,
-                 init_cosmosdb_client, init_openai_client, stream_chat_request)
+from app import (
+    create_app,
+    delete_all_conversations,
+    generate_title,
+    init_cosmosdb_client,
+    init_openai_client,
+    stream_chat_request,
+)
 
 # Constants for testing
 INVALID_API_VERSION = "2022-01-01"
diff --git a/ClientAdvisor/App/tools/data_collection.py b/ClientAdvisor/App/tools/data_collection.py
index c0bb184bc..60644c092 100644
--- a/ClientAdvisor/App/tools/data_collection.py
+++ b/ClientAdvisor/App/tools/data_collection.py
@@ -3,9 +3,8 @@
 import os
 import sys
 
-from dotenv import load_dotenv
-
 import app
+from dotenv import load_dotenv
 
 # import the app.py module to gain access to the methods to construct payloads and
 # call the API through the sdk
diff --git a/ClientAdvisor/AzureFunction/function_app.py b/ClientAdvisor/AzureFunction/function_app.py
index f9bfd8dc8..fea37ad50 100644
--- a/ClientAdvisor/AzureFunction/function_app.py
+++ b/ClientAdvisor/AzureFunction/function_app.py
@@ -1,13 +1,16 @@
-import azure.functions as func
-import openai
-from azurefunctions.extensions.http.fastapi import Request, StreamingResponse
 import asyncio
 import os
-
 from typing import Annotated
 
+import azure.functions as func
+import openai
+import pymssql
+from azurefunctions.extensions.http.fastapi import Request, StreamingResponse
 from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
-from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion, OpenAIChatCompletion
+from semantic_kernel.connectors.ai.open_ai import (
+    AzureChatCompletion,
+    OpenAIChatCompletion,
+)
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
     OpenAIChatPromptExecutionSettings,
 )
@@ -17,7 +20,6 @@
 from semantic_kernel.functions.kernel_arguments import KernelArguments
 from semantic_kernel.functions.kernel_function_decorator import kernel_function
 from semantic_kernel.kernel import Kernel
-import pymssql
 
 # Azure Function App
 app = func.FunctionApp(http_auth_level=func.AuthLevel.ANONYMOUS)
@@ -28,43 +30,53 @@
 deployment = os.environ.get("AZURE_OPEN_AI_DEPLOYMENT_MODEL")
 temperature = 0
 
-search_endpoint = os.environ.get("AZURE_AI_SEARCH_ENDPOINT") 
+search_endpoint = os.environ.get("AZURE_AI_SEARCH_ENDPOINT")
 search_key = os.environ.get("AZURE_AI_SEARCH_API_KEY")
 
+
 class ChatWithDataPlugin:
-    @kernel_function(name="Greeting", description="Respond to any greeting or general questions")
-    def greeting(self, input: Annotated[str, "the question"]) -> Annotated[str, "The output is a string"]:
-        query = input.split(':::')[0]
+    @kernel_function(
+        name="Greeting", description="Respond to any greeting or general questions"
+    )
+    def greeting(
+        self, input: Annotated[str, "the question"]
+    ) -> Annotated[str, "The output is a string"]:
+        query = input.split(":::")[0]
         endpoint = os.environ.get("AZURE_OPEN_AI_ENDPOINT")
         api_key = os.environ.get("AZURE_OPEN_AI_API_KEY")
         client = openai.AzureOpenAI(
-            azure_endpoint=endpoint,
-            api_key=api_key,
-            api_version="2023-09-01-preview"
+            azure_endpoint=endpoint, api_key=api_key, api_version="2023-09-01-preview"
         )
         deployment = os.environ.get("AZURE_OPEN_AI_DEPLOYMENT_MODEL")
         try:
             completion = client.chat.completions.create(
                 model=deployment,
                 messages=[
-                    {"role": "system", "content": "You are a helpful assistant to repond to any greeting or general questions."},
+                    {
+                        "role": "system",
+                        "content": "You are a helpful assistant to repond to any greeting or general questions.",
+                    },
                     {"role": "user", "content": query},
                 ],
                 temperature=0,
             )
             answer = completion.choices[0].message.content
         except Exception as e:
-            answer = str(e) # 'Information from database could not be retrieved. Please try again later.'
+            answer = str(
+                e
+            )  # 'Information from database could not be retrieved. Please try again later.'
         return answer
 
-    
-    @kernel_function(name="ChatWithSQLDatabase", description="Given a query about client assets, investements and meeting dates or times, get details from the database")
+    @kernel_function(
+        name="ChatWithSQLDatabase",
+        description="Given a query about client assets, investements and meeting dates or times, get details from the database",
+    )
     def get_SQL_Response(
         self,
         input: Annotated[str, "the question"],
-        ClientId: Annotated[str, "the ClientId"]
-        ):
-        
+        ClientId: Annotated[str, "the ClientId"],
+    ):
+
         # clientid = input.split(':::')[-1]
         # query = input.split(':::')[0] + ' . ClientId = ' + input.split(':::')[-1]
         clientid = ClientId
@@ -73,13 +85,11 @@ def get_SQL_Response(
         api_key = os.environ.get("AZURE_OPEN_AI_API_KEY")
 
         client = openai.AzureOpenAI(
-            azure_endpoint=endpoint,
-            api_key=api_key,
-            api_version="2023-09-01-preview"
+            azure_endpoint=endpoint, api_key=api_key, api_version="2023-09-01-preview"
         )
         deployment = os.environ.get("AZURE_OPEN_AI_DEPLOYMENT_MODEL")
 
-        sql_prompt = f'''A valid T-SQL query to find {query} for tables and columns provided below:
+        sql_prompt = f"""A valid T-SQL query to find {query} for tables and columns provided below:
         1. Table: Clients
         Columns: ClientId,Client,Email,Occupation,MaritalStatus,Dependents
         2. Table: InvestmentGoals
@@ -100,7 +110,7 @@ def get_SQL_Response(
         Do not include assets values unless asked for.
         Always use ClientId = {clientid} in the query filter.
         Always return client name in the query.
-        Only return the generated sql query. do not return anything else''' 
+        Only return the generated sql query. do not return anything else"""
         try:
 
             completion = client.chat.completions.create(
@@ -112,9 +122,9 @@ def get_SQL_Response(
                 temperature=0,
             )
             sql_query = completion.choices[0].message.content
-            sql_query = sql_query.replace("```sql",'').replace("```",'')
-            #print(sql_query)
-        
+            sql_query = sql_query.replace("```sql", "").replace("```", "")
+            # print(sql_query)
+
             connectionString = os.environ.get("SQLDB_CONNECTION_STRING")
             server = os.environ.get("SQLDB_SERVER")
             database = os.environ.get("SQLDB_DATABASE")
@@ -125,57 +135,55 @@ def get_SQL_Response(
             # conn = pyodbc.connect(connectionString)
             cursor = conn.cursor()
             cursor.execute(sql_query)
-            answer = ''
+            answer = ""
             for row in cursor.fetchall():
                 answer += str(row)
         except Exception as e:
-            answer = str(e) # 'Information from database could not be retrieved. Please try again later.'
+            answer = str(
+                e
+            )  # 'Information from database could not be retrieved. Please try again later.'
         return answer
-        #return sql_query
+        # return sql_query
 
-    
-    @kernel_function(name="ChatWithCallTranscripts", description="given a query about meetings summary or actions or notes, get answer from search index for a given ClientId")
+    @kernel_function(
+        name="ChatWithCallTranscripts",
+        description="given a query about meetings summary or actions or notes, get answer from search index for a given ClientId",
+    )
     def get_answers_from_calltranscripts(
         self,
         question: Annotated[str, "the question"],
-        ClientId: Annotated[str, "the ClientId"]
+        ClientId: Annotated[str, "the ClientId"],
     ):
 
-        endpoint=os.environ.get("AZURE_OPEN_AI_ENDPOINT")
-        deployment=os.environ.get("AZURE_OPEN_AI_DEPLOYMENT_MODEL")
-        apikey=os.environ.get("AZURE_OPEN_AI_API_KEY")
+        endpoint = os.environ.get("AZURE_OPEN_AI_ENDPOINT")
+        deployment = os.environ.get("AZURE_OPEN_AI_DEPLOYMENT_MODEL")
+        apikey = os.environ.get("AZURE_OPEN_AI_API_KEY")
 
-        search_endpoint = os.environ.get("AZURE_AI_SEARCH_ENDPOINT") 
+        search_endpoint = os.environ.get("AZURE_AI_SEARCH_ENDPOINT")
         search_key = os.environ.get("AZURE_AI_SEARCH_API_KEY")
         index_name = os.environ.get("AZURE_SEARCH_INDEX")
 
         client = openai.AzureOpenAI(
-            azure_endpoint= endpoint, #f"{endpoint}/openai/deployments/{deployment}/extensions", 
-            api_key=apikey, 
-            api_version="2024-02-01"
+            azure_endpoint=endpoint,  # f"{endpoint}/openai/deployments/{deployment}/extensions",
+            api_key=apikey,
+            api_version="2024-02-01",
         )
 
         query = question
-        system_message = '''You are an assistant who provides wealth advisors with helpful information to prepare for client meetings. 
+        system_message = """You are an assistant who provides wealth advisors with helpful information to prepare for client meetings. 
         You have access to the client’s meeting call transcripts. 
-        You can use this information to answer questions about the clients'''
+        You can use this information to answer questions about the clients"""
 
         completion = client.chat.completions.create(
-            model = deployment,
-            messages = [
-                {
-                    "role": "system",
-                    "content": system_message
-                },
-                {
-                    "role": "user",
-                    "content": query
-                }
+            model=deployment,
+            messages=[
+                {"role": "system", "content": system_message},
+                {"role": "user", "content": query},
             ],
-            seed = 42,
-            temperature = 0,
-            max_tokens = 800,
-            extra_body = {
+            seed=42,
+            temperature=0,
+            max_tokens=800,
+            extra_body={
                 "data_sources": [
                     {
                         "type": "azure_search",
@@ -183,47 +191,45 @@ def get_answers_from_calltranscripts(
                             "endpoint": search_endpoint,
                             "index_name": index_name,
                             "semantic_configuration": "default",
-                            "query_type": "vector_simple_hybrid", #"vector_semantic_hybrid"
+                            "query_type": "vector_simple_hybrid",  # "vector_semantic_hybrid"
                             "fields_mapping": {
                                 "content_fields_separator": "\n",
                                 "content_fields": ["content"],
                                 "filepath_field": "chunk_id",
-                                "title_field": "", #null,
+                                "title_field": "",  # null,
                                 "url_field": "sourceurl",
-                                "vector_fields": ["contentVector"]
+                                "vector_fields": ["contentVector"],
                             },
-                            "semantic_configuration": 'my-semantic-config',
+                            "semantic_configuration": "my-semantic-config",
                             "in_scope": "true",
                             "role_information": system_message,
                             # "vector_filter_mode": "preFilter", #VectorFilterMode.PRE_FILTER,
-                            "filter": f"client_id eq '{ClientId}'", #"", #null,
+                            "filter": f"client_id eq '{ClientId}'",  # "", #null,
                             "strictness": 3,
                             "top_n_documents": 5,
-                            "authentication": {
-                                "type": "api_key",
-                                "key": search_key
-                            },
+                            "authentication": {"type": "api_key", "key": search_key},
                             "embedding_dependency": {
                                 "type": "deployment_name",
-                                "deployment_name": "text-embedding-ada-002"
+                                "deployment_name": "text-embedding-ada-002",
                             },
-
-                        }
+                        },
                     }
                 ]
-            }
+            },
         )
 
         answer = completion.choices[0].message.content
         return answer
 
+
 # Get data from Azure Open AI
 async def stream_processor(response):
     async for message in response:
-        if str(message[0]): # Get remaining generated response if applicable
+        if str(message[0]):  # Get remaining generated response if applicable
             await asyncio.sleep(0.1)
             yield str(message[0])
 
+
 @app.route(route="stream_openai_text", methods=[func.HttpMethod.GET])
 async def stream_openai_text(req: Request) -> StreamingResponse:
 
@@ -242,15 +248,15 @@ async def stream_openai_text(req: Request) -> StreamingResponse:
         endpoint=endpoint,
         api_key=api_key,
         api_version=api_version,
-        deployment_name=deployment
+        deployment_name=deployment,
     )
 
     kernel.add_service(ai_service)
 
     kernel.add_plugin(ChatWithDataPlugin(), plugin_name="ChatWithData")
 
-    settings: OpenAIChatPromptExecutionSettings = kernel.get_prompt_execution_settings_from_service_id(
-        service_id=service_id
+    settings: OpenAIChatPromptExecutionSettings = (
+        kernel.get_prompt_execution_settings_from_service_id(service_id=service_id)
     )
     settings.function_call_behavior = FunctionCallBehavior.EnableFunctions(
         auto_invoke=True, filters={"included_plugins": ["ChatWithData"]}
@@ -259,25 +265,28 @@ async def stream_openai_text(req: Request) -> StreamingResponse:
     settings.max_tokens = 800
     settings.temperature = 0
 
-    system_message = '''you are a helpful assistant to a wealth advisor. 
+    system_message = """you are a helpful assistant to a wealth advisor. 
     Do not answer any questions not related to wealth advisors queries.
     If the client name and client id do not match, only return - Please only ask questions about the selected client or select another client to inquire about their details. do not return any other information.
     Only use the client name returned from database in the response.
     If you cannot answer the question, always return - I cannot answer this question from the data available. Please rephrase or add more details.
     ** Remove any client identifiers or ids or numbers or ClientId in the final response.
-    '''
-
-    user_query = query.replace('?',' ')
+    """
 
-    user_query_prompt = f'''{user_query}. Always send clientId as {user_query.split(':::')[-1]} '''
-    query_prompt = f'''<message role="system">{system_message}</message><message role="user">{user_query_prompt}</message>'''
+    user_query = query.replace("?", " ")
 
+    user_query_prompt = (
+        f"""{user_query}. Always send clientId as {user_query.split(':::')[-1]} """
+    )
+    query_prompt = f"""<message role="system">{system_message}</message><message role="user">{user_query_prompt}</message>"""
 
     sk_response = kernel.invoke_prompt_stream(
         function_name="prompt_test",
         plugin_name="weather_test",
         prompt=query_prompt,
-        settings=settings
-    )   
+        settings=settings,
+    )
 
-    return StreamingResponse(stream_processor(sk_response), media_type="text/event-stream")
\ No newline at end of file
+    return StreamingResponse(
+        stream_processor(sk_response), media_type="text/event-stream"
+    )
diff --git a/ClientAdvisor/Deployment/scripts/fabric_scripts/create_fabric_items.py b/ClientAdvisor/Deployment/scripts/fabric_scripts/create_fabric_items.py
index 9a718a425..e5e159d76 100644
--- a/ClientAdvisor/Deployment/scripts/fabric_scripts/create_fabric_items.py
+++ b/ClientAdvisor/Deployment/scripts/fabric_scripts/create_fabric_items.py
@@ -1,88 +1,85 @@
-from azure.identity import DefaultAzureCredential
 import base64
 import json
-import requests
-import pandas as pd
 import os
-from glob import iglob
 import time
+from glob import iglob
 
+import pandas as pd
+import requests
 
 # credential = DefaultAzureCredential()
-from azure.identity import AzureCliCredential
+from azure.identity import AzureCliCredential, DefaultAzureCredential
+
 credential = AzureCliCredential()
 
-cred = credential.get_token('https://api.fabric.microsoft.com/.default')
+cred = credential.get_token("https://api.fabric.microsoft.com/.default")
 token = cred.token
 
 fabric_headers = {"Authorization": "Bearer " + token.strip()}
 
-key_vault_name = 'kv_to-be-replaced'
+key_vault_name = "kv_to-be-replaced"
 workspaceId = "workspaceId_to-be-replaced"
 solutionname = "solutionName_to-be-replaced"
 create_workspace = False
 
-pipeline_notebook_name = 'pipeline_notebook'
-pipeline_name = 'data_pipeline'
-lakehouse_name = 'lakehouse_' + solutionname
+pipeline_notebook_name = "pipeline_notebook"
+pipeline_name = "data_pipeline"
+lakehouse_name = "lakehouse_" + solutionname
 
-print("workspace id: " ,workspaceId)
+print("workspace id: ", workspaceId)
 
 if create_workspace == True:
-  workspace_name = 'workspace_' + solutionname
+    workspace_name = "workspace_" + solutionname
 
-  # create workspace
-  ws_url = 'https://api.fabric.microsoft.com/v1/workspaces'
+    # create workspace
+    ws_url = "https://api.fabric.microsoft.com/v1/workspaces"
 
-  ws_data = {
-    "displayName": workspace_name
-  }
-  ws_res = requests.post(ws_url, headers=fabric_headers, json=ws_data)
-  ws_details = ws_res.json()
-  # print(ws_details['id'])
-  workspaceId = ws_details['id']
+    ws_data = {"displayName": workspace_name}
+    ws_res = requests.post(ws_url, headers=fabric_headers, json=ws_data)
+    ws_details = ws_res.json()
+    # print(ws_details['id'])
+    workspaceId = ws_details["id"]
 
 fabric_base_url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspaceId}/"
-fabric_items_url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspaceId}/items/"
+fabric_items_url = (
+    f"https://api.fabric.microsoft.com/v1/workspaces/{workspaceId}/items/"
+)
 
 fabric_create_workspace_url = f"https://api.fabric.microsoft.com/v1/workspaces"
 
-#get workspace name
+# get workspace name
 ws_res = requests.get(fabric_base_url, headers=fabric_headers)
-workspace_name = ws_res.json()['displayName']
+workspace_name = ws_res.json()["displayName"]
 
-#create lakehouse
-lakehouse_data = {
-  "displayName": lakehouse_name,
-  "type": "Lakehouse"
-}
-lakehouse_res = requests.post(fabric_items_url, headers=fabric_headers, json=lakehouse_data)
+# create lakehouse
+lakehouse_data = {"displayName": lakehouse_name, "type": "Lakehouse"}
+lakehouse_res = requests.post(
+    fabric_items_url, headers=fabric_headers, json=lakehouse_data
+)
 
 # print("lakehouse name: ", lakehouse_name)
 
 # copy local files to lakehouse
-from azure.storage.filedatalake import (
-    DataLakeServiceClient
-)
+from azure.storage.filedatalake import DataLakeServiceClient
 
-account_name = "onelake" #always onelake
+account_name = "onelake"  # always onelake
 data_path = f"{lakehouse_name}.Lakehouse/Files"
 folder_path = "/"
 
 account_url = f"https://{account_name}.dfs.fabric.microsoft.com"
 service_client = DataLakeServiceClient(account_url, credential=credential)
 
-#Create a file system client for the workspace
+# Create a file system client for the workspace
 file_system_client = service_client.get_file_system_client(workspace_name)
 
 directory_client = file_system_client.get_directory_client(f"{data_path}/{folder_path}")
 
-local_path = 'data/**/*'
+local_path = "data/**/*"
 file_names = [f for f in iglob(local_path, recursive=True) if os.path.isfile(f)]
 for file_name in file_names:
-  file_client = directory_client.get_file_client(file_name)
-  with open(file=file_name, mode="rb") as data:
-    file_client.upload_data(data, overwrite=True)
+    file_client = directory_client.get_file_client(file_name)
+    with open(file=file_name, mode="rb") as data:
+        file_client.upload_data(data, overwrite=True)
 
 # #get environments
 # try:
@@ -93,28 +90,34 @@
 # except:
 #   env_res_id = ''
 
-#create notebook items
-notebook_names =['pipeline_notebook','01_process_data','02_create_calendar_data']
+# create notebook items
+notebook_names = ["pipeline_notebook", "01_process_data", "02_create_calendar_data"]
 # notebook_names =['process_data_new']
 
 # add sleep timer
 time.sleep(120)  # 1 minute
 
 for notebook_name in notebook_names:
-    with open('notebooks/'+ notebook_name +'.ipynb', 'r') as f:
+    with open("notebooks/" + notebook_name + ".ipynb", "r") as f:
         notebook_json = json.load(f)
 
     print("lakehouse_res")
     print(lakehouse_res)
     print(lakehouse_res.json())
-    
+
     try:
-        notebook_json['metadata']['dependencies']['lakehouse']['default_lakehouse'] = lakehouse_res.json()['id']
-        notebook_json['metadata']['dependencies']['lakehouse']['default_lakehouse_name'] = lakehouse_res.json()['displayName']
-        notebook_json['metadata']['dependencies']['lakehouse']['default_lakehouse_workspace_id'] = lakehouse_res.json()['workspaceId']
+        notebook_json["metadata"]["dependencies"]["lakehouse"][
+            "default_lakehouse"
+        ] = lakehouse_res.json()["id"]
+        notebook_json["metadata"]["dependencies"]["lakehouse"][
+            "default_lakehouse_name"
+        ] = lakehouse_res.json()["displayName"]
+        notebook_json["metadata"]["dependencies"]["lakehouse"][
+            "default_lakehouse_workspace_id"
+        ] = lakehouse_res.json()["workspaceId"]
     except:
         pass
-    
+
     # if env_res_id != '':
     #     try:
     #         notebook_json['metadata']['dependencies']['environment']['environmentId'] = env_res_id
@@ -122,41 +125,44 @@
     #     except:
     #         pass
 
+    notebook_base64 = base64.b64encode(json.dumps(notebook_json).encode("utf-8"))
 
-    notebook_base64 = base64.b64encode(json.dumps(notebook_json).encode('utf-8'))
-    
     notebook_data = {
-        "displayName":notebook_name,
-        "type":"Notebook",
-        "definition" : {
+        "displayName": notebook_name,
+        "type": "Notebook",
+        "definition": {
             "format": "ipynb",
             "parts": [
                 {
                     "path": "notebook-content.ipynb",
-                    "payload": notebook_base64.decode('utf-8'),
-                    "payloadType": "InlineBase64"
+                    "payload": notebook_base64.decode("utf-8"),
+                    "payloadType": "InlineBase64",
                 }
-            ]
-        }
+            ],
+        },
     }
-    
-    fabric_response = requests.post(fabric_items_url, headers=fabric_headers, json=notebook_data)
-    #print(fabric_response.json())
+
+    fabric_response = requests.post(
+        fabric_items_url, headers=fabric_headers, json=notebook_data
+    )
+    # print(fabric_response.json())
 
 time.sleep(120)
 
 # get wrapper notebook id
-fabric_notebooks_url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspaceId}/notebooks"
+fabric_notebooks_url = (
+    f"https://api.fabric.microsoft.com/v1/workspaces/{workspaceId}/notebooks"
+)
 notebooks_res = requests.get(fabric_notebooks_url, headers=fabric_headers)
 notebooks_res.json()
 
-pipeline_notebook_id = ''
+pipeline_notebook_id = ""
 print("notebook_res.json.values: ", notebooks_res.json().values())
 for n in notebooks_res.json().values():
     for notebook in n:
-        print("notebook displayname", notebook['displayName'])
-        if notebook['displayName'] == pipeline_notebook_name:
-            pipeline_notebook_id = notebook['id']
+        print("notebook displayname", notebook["displayName"])
+        if notebook["displayName"] == pipeline_notebook_name:
+            pipeline_notebook_id = notebook["id"]
             break
 print("pipeline_notebook_id: ", pipeline_notebook_id)
 
@@ -175,37 +181,42 @@
                     "retry": 0,
                     "retryIntervalInSeconds": 30,
                     "secureOutput": "false",
-                    "secureInput": "false"
+                    "secureInput": "false",
                 },
                 "typeProperties": {
                     "notebookId": pipeline_notebook_id,
-                    "workspaceId": workspaceId
-                }
+                    "workspaceId": workspaceId,
+                },
             }
         ]
-    }
+    },
 }
 
-pipeline_base64 = base64.b64encode(json.dumps(pipeline_json).encode('utf-8'))
+pipeline_base64 = base64.b64encode(json.dumps(pipeline_json).encode("utf-8"))
 
 pipeline_data = {
-        "displayName":pipeline_name,
-        "type":"DataPipeline",
-        "definition" : {
-            # "format": "json",
-            "parts": [
-                {
-                    "path": "pipeline-content.json",
-                    "payload": pipeline_base64.decode('utf-8'),
-                    "payloadType": "InlineBase64"
-                }
-            ]
-        }
-    }
+    "displayName": pipeline_name,
+    "type": "DataPipeline",
+    "definition": {
+        # "format": "json",
+        "parts": [
+            {
+                "path": "pipeline-content.json",
+                "payload": pipeline_base64.decode("utf-8"),
+                "payloadType": "InlineBase64",
+            }
+        ]
+    },
+}
 
-pipeline_response = requests.post(fabric_items_url, headers=fabric_headers, json=pipeline_data)
+pipeline_response = requests.post(
+    fabric_items_url, headers=fabric_headers, json=pipeline_data
+)
 pipeline_response.json()
 
 # run the pipeline once
-job_url = fabric_base_url + f"items/{pipeline_response.json()['id']}/jobs/instances?jobType=Pipeline"
-job_response = requests.post(job_url, headers=fabric_headers)
\ No newline at end of file
+job_url = (
+    fabric_base_url
+    + f"items/{pipeline_response.json()['id']}/jobs/instances?jobType=Pipeline"
+)
+job_response = requests.post(job_url, headers=fabric_headers)
diff --git a/ClientAdvisor/Deployment/scripts/index_scripts/create_search_index.py b/ClientAdvisor/Deployment/scripts/index_scripts/create_search_index.py
index af89d88c6..3a0cd2fac 100644
--- a/ClientAdvisor/Deployment/scripts/index_scripts/create_search_index.py
+++ b/ClientAdvisor/Deployment/scripts/index_scripts/create_search_index.py
@@ -1,147 +1,172 @@
-#Get Azure Key Vault Client
-key_vault_name = 'kv_to-be-replaced' #'nc6262-kv-2fpeafsylfd2e' 
+# Get Azure Key Vault Client
+key_vault_name = "kv_to-be-replaced"  #'nc6262-kv-2fpeafsylfd2e'
 
 index_name = "transcripts_index"
 
 file_system_client_name = "data"
-directory = 'clienttranscripts/meeting_transcripts' 
-csv_file_name = 'clienttranscripts/meeting_transcripts_metadata/transcripts_metadata.csv'
+directory = "clienttranscripts/meeting_transcripts"
+csv_file_name = (
+    "clienttranscripts/meeting_transcripts_metadata/transcripts_metadata.csv"
+)
+
+from azure.identity import DefaultAzureCredential
+from azure.keyvault.secrets import SecretClient
 
-from azure.keyvault.secrets import SecretClient  
-from azure.identity import DefaultAzureCredential 
 
 def get_secrets_from_kv(kv_name, secret_name):
-    
-  # Set the name of the Azure Key Vault  
-  key_vault_name = kv_name 
-  credential = DefaultAzureCredential()
 
-  # Create a secret client object using the credential and Key Vault name  
-  secret_client = SecretClient(vault_url=f"https://{key_vault_name}.vault.azure.net/", credential=credential)  
-    
-  # Retrieve the secret value  
-  return(secret_client.get_secret(secret_name).value)
+    # Set the name of the Azure Key Vault
+    key_vault_name = kv_name
+    credential = DefaultAzureCredential()
+
+    # Create a secret client object using the credential and Key Vault name
+    secret_client = SecretClient(
+        vault_url=f"https://{key_vault_name}.vault.azure.net/", credential=credential
+    )
 
-search_endpoint =  get_secrets_from_kv(key_vault_name,"AZURE-SEARCH-ENDPOINT")
-search_key =  get_secrets_from_kv(key_vault_name,"AZURE-SEARCH-KEY")
+    # Retrieve the secret value
+    return secret_client.get_secret(secret_name).value
+
+
+search_endpoint = get_secrets_from_kv(key_vault_name, "AZURE-SEARCH-ENDPOINT")
+search_key = get_secrets_from_kv(key_vault_name, "AZURE-SEARCH-KEY")
 
 # openai_api_type = get_secrets_from_kv(key_vault_name,"OPENAI-API-TYPE")
-openai_api_key  = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-KEY")
-openai_api_base = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-ENDPOINT")
-openai_api_version = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-PREVIEW-API-VERSION") 
+openai_api_key = get_secrets_from_kv(key_vault_name, "AZURE-OPENAI-KEY")
+openai_api_base = get_secrets_from_kv(key_vault_name, "AZURE-OPENAI-ENDPOINT")
+openai_api_version = get_secrets_from_kv(
+    key_vault_name, "AZURE-OPENAI-PREVIEW-API-VERSION"
+)
 
 
 # Create the search index
-from azure.core.credentials import AzureKeyCredential 
+from azure.core.credentials import AzureKeyCredential
+
 search_credential = AzureKeyCredential(search_key)
 
 from azure.search.documents.indexes import SearchIndexClient
 from azure.search.documents.indexes.models import (
-    SimpleField,
-    SearchFieldDataType,
+    HnswAlgorithmConfiguration,
     SearchableField,
     SearchField,
-    VectorSearch,
-    HnswAlgorithmConfiguration,
-    VectorSearchProfile,
+    SearchFieldDataType,
+    SearchIndex,
     SemanticConfiguration,
-    SemanticPrioritizedFields,
     SemanticField,
+    SemanticPrioritizedFields,
     SemanticSearch,
-    SearchIndex
+    SimpleField,
+    VectorSearch,
+    VectorSearchProfile,
 )
 
 # Create a search index
 index_client = SearchIndexClient(endpoint=search_endpoint, credential=search_credential)
 
 fields = [
-    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
+    SimpleField(
+        name="id",
+        type=SearchFieldDataType.String,
+        key=True,
+        sortable=True,
+        filterable=True,
+        facetable=True,
+    ),
     SearchableField(name="chunk_id", type=SearchFieldDataType.String),
     SearchableField(name="content", type=SearchFieldDataType.String),
     SearchableField(name="sourceurl", type=SearchFieldDataType.String),
-    SearchableField(name="client_id", type=SearchFieldDataType.String,filterable=True),
-    SearchField(name="contentVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
-                searchable=True, vector_search_dimensions=1536, vector_search_profile_name="myHnswProfile"),
+    SearchableField(name="client_id", type=SearchFieldDataType.String, filterable=True),
+    SearchField(
+        name="contentVector",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
+        searchable=True,
+        vector_search_dimensions=1536,
+        vector_search_profile_name="myHnswProfile",
+    ),
 ]
 
-# Configure the vector search configuration  
+# Configure the vector search configuration
 vector_search = VectorSearch(
-    algorithms=[
-        HnswAlgorithmConfiguration(
-            name="myHnsw"
-        )
-    ],
+    algorithms=[HnswAlgorithmConfiguration(name="myHnsw")],
     profiles=[
         VectorSearchProfile(
             name="myHnswProfile",
             algorithm_configuration_name="myHnsw",
         )
-    ]
+    ],
 )
 
 semantic_config = SemanticConfiguration(
     name="my-semantic-config",
     prioritized_fields=SemanticPrioritizedFields(
         keywords_fields=[SemanticField(field_name="client_id")],
-        content_fields=[SemanticField(field_name="content")]
-    )
+        content_fields=[SemanticField(field_name="content")],
+    ),
 )
 
 # Create the semantic settings with the configuration
 semantic_search = SemanticSearch(configurations=[semantic_config])
 
 # Create the search index with the semantic settings
-index = SearchIndex(name=index_name, fields=fields,
-                    vector_search=vector_search, semantic_search=semantic_search)
+index = SearchIndex(
+    name=index_name,
+    fields=fields,
+    vector_search=vector_search,
+    semantic_search=semantic_search,
+)
 result = index_client.create_or_update_index(index)
-print(f' {result.name} created')
+print(f" {result.name} created")
 
 
 from openai import AzureOpenAI
 
+
 # Function: Get Embeddings
-def get_embeddings(text: str,openai_api_base,openai_api_version,openai_api_key):
+def get_embeddings(text: str, openai_api_base, openai_api_version, openai_api_key):
     model_id = "text-embedding-ada-002"
     client = AzureOpenAI(
         api_version=openai_api_version,
         azure_endpoint=openai_api_base,
-        api_key = openai_api_key
+        api_key=openai_api_key,
     )
-    
+
     embedding = client.embeddings.create(input=text, model=model_id).data[0].embedding
 
     return embedding
 
+
 import re
 
+
 def clean_spaces_with_regex(text):
     # Use a regular expression to replace multiple spaces with a single space
-    cleaned_text = re.sub(r'\s+', ' ', text)
+    cleaned_text = re.sub(r"\s+", " ", text)
     # Use a regular expression to replace consecutive dots with a single dot
-    cleaned_text = re.sub(r'\.{2,}', '.', cleaned_text)
+    cleaned_text = re.sub(r"\.{2,}", ".", cleaned_text)
     return cleaned_text
 
+
 def chunk_data(text):
-    tokens_per_chunk = 1024 #500
+    tokens_per_chunk = 1024  # 500
     text = clean_spaces_with_regex(text)
     SENTENCE_ENDINGS = [".", "!", "?"]
-    WORDS_BREAKS = ['\n', '\t', '}', '{', ']', '[', ')', '(', ' ', ':', ';', ',']
+    WORDS_BREAKS = ["\n", "\t", "}", "{", "]", "[", ")", "(", " ", ":", ";", ","]
 
-    sentences = text.split('. ') # Split text into sentences
+    sentences = text.split(". ")  # Split text into sentences
     chunks = []
-    current_chunk = ''
+    current_chunk = ""
     current_chunk_token_count = 0
-    
+
     # Iterate through each sentence
     for sentence in sentences:
         # Split sentence into tokens
         tokens = sentence.split()
-        
+
         # Check if adding the current sentence exceeds tokens_per_chunk
         if current_chunk_token_count + len(tokens) <= tokens_per_chunk:
             # Add the sentence to the current chunk
             if current_chunk:
-                current_chunk += '. ' + sentence
+                current_chunk += ". " + sentence
             else:
                 current_chunk += sentence
             current_chunk_token_count += len(tokens)
@@ -150,21 +175,28 @@ def chunk_data(text):
             chunks.append(current_chunk)
             current_chunk = sentence
             current_chunk_token_count = len(tokens)
-    
+
     # Add the last chunk
     if current_chunk:
         chunks.append(current_chunk)
-    
+
     return chunks
 
-#add documents to the index
 
-import json
+# add documents to the index
+
 import base64
+import json
+import os
 import time
+
 import pandas as pd
 from azure.search.documents import SearchClient
-import os
+from azure.storage.filedatalake import (
+    DataLakeDirectoryClient,
+    DataLakeServiceClient,
+    FileSystemClient,
+)
 
 # foldername = 'clienttranscripts'
 # path_name = f'Data/{foldername}/meeting_transcripts'
@@ -172,20 +204,17 @@ def chunk_data(text):
 
 # paths = os.listdir(path_name)
 
-from azure.storage.filedatalake import (
-    DataLakeServiceClient,
-    DataLakeDirectoryClient,
-    FileSystemClient
-)
 
 account_name = get_secrets_from_kv(key_vault_name, "ADLS-ACCOUNT-NAME")
 credential = DefaultAzureCredential()
 
 account_url = f"https://{account_name}.dfs.core.windows.net"
 
-service_client = DataLakeServiceClient(account_url, credential=credential,api_version='2023-01-03') 
+service_client = DataLakeServiceClient(
+    account_url, credential=credential, api_version="2023-01-03"
+)
 
-file_system_client = service_client.get_file_system_client(file_system_client_name)  
+file_system_client = service_client.get_file_system_client(file_system_client_name)
 directory_name = directory
 paths = file_system_client.get_paths(path=directory_name)
 print(paths)
@@ -200,12 +229,13 @@ def chunk_data(text):
 # # display(df_metadata)
 
 import pandas as pd
+
 # Read the CSV file into a Pandas DataFrame
 file_path = csv_file_name
 print(file_path)
 file_client = file_system_client.get_file_client(file_path)
 csv_file = file_client.download_file()
-df_metadata = pd.read_csv(csv_file, encoding='utf-8')
+df_metadata = pd.read_csv(csv_file, encoding="utf-8")
 
 docs = []
 counter = 0
@@ -216,49 +246,59 @@ def chunk_data(text):
     file_client = file_system_client.get_file_client(path.name)
     data_file = file_client.download_file()
     data = json.load(data_file)
-    text = data['Content']
+    text = data["Content"]
 
-    filename = path.name.split('/')[-1]
-    document_id = filename.replace('.json','').replace('convo_','')
+    filename = path.name.split("/")[-1]
+    document_id = filename.replace(".json", "").replace("convo_", "")
     # print(document_id)
-    df_file_metadata = df_metadata[df_metadata['ConversationId']==str(document_id)].iloc[0]
-   
+    df_file_metadata = df_metadata[
+        df_metadata["ConversationId"] == str(document_id)
+    ].iloc[0]
+
     chunks = chunk_data(text)
     chunk_num = 0
     for chunk in chunks:
         chunk_num += 1
         d = {
-                "chunk_id" : document_id + '_' + str(chunk_num).zfill(2),
-                "client_id": str(df_file_metadata['ClientId']),
-                "content": 'ClientId is ' + str(df_file_metadata['ClientId']) + ' . '  + chunk,       
-            }
+            "chunk_id": document_id + "_" + str(chunk_num).zfill(2),
+            "client_id": str(df_file_metadata["ClientId"]),
+            "content": "ClientId is "
+            + str(df_file_metadata["ClientId"])
+            + " . "
+            + chunk,
+        }
 
         counter += 1
 
         try:
-            v_contentVector = get_embeddings(d["content"],openai_api_base,openai_api_version,openai_api_key)
+            v_contentVector = get_embeddings(
+                d["content"], openai_api_base, openai_api_version, openai_api_key
+            )
         except:
             time.sleep(30)
-            v_contentVector = get_embeddings(d["content"],openai_api_base,openai_api_version,openai_api_key)
-
+            v_contentVector = get_embeddings(
+                d["content"], openai_api_base, openai_api_version, openai_api_key
+            )
 
         docs.append(
             {
-                    "id": base64.urlsafe_b64encode(bytes(d["chunk_id"], encoding='utf-8')).decode('utf-8'),
-                    "chunk_id": d["chunk_id"],
-                    "client_id": d["client_id"],
-                    "content": d["content"],
-                    "sourceurl": path.name.split('/')[-1],
-                    "contentVector": v_contentVector
+                "id": base64.urlsafe_b64encode(
+                    bytes(d["chunk_id"], encoding="utf-8")
+                ).decode("utf-8"),
+                "chunk_id": d["chunk_id"],
+                "client_id": d["client_id"],
+                "content": d["content"],
+                "sourceurl": path.name.split("/")[-1],
+                "contentVector": v_contentVector,
             }
         )
-        
+
         if counter % 10 == 0:
             result = search_client.upload_documents(documents=docs)
             docs = []
-            print(f' {str(counter)} uploaded')
-    
+            print(f" {str(counter)} uploaded")
+
     time.sleep(4)
-#upload the last batch
+# upload the last batch
 if docs != []:
-    search_client.upload_documents(documents=docs)
\ No newline at end of file
+    search_client.upload_documents(documents=docs)
diff --git a/ClientAdvisor/Deployment/scripts/index_scripts/create_sql_tables.py b/ClientAdvisor/Deployment/scripts/index_scripts/create_sql_tables.py
index cb43e8e8b..e84e18758 100644
--- a/ClientAdvisor/Deployment/scripts/index_scripts/create_sql_tables.py
+++ b/ClientAdvisor/Deployment/scripts/index_scripts/create_sql_tables.py
@@ -1,47 +1,51 @@
-key_vault_name = 'kv_to-be-replaced'
+key_vault_name = "kv_to-be-replaced"
 
-import pandas as pd
-import pymssql
 import os
 from datetime import datetime
 
-from azure.keyvault.secrets import SecretClient  
-from azure.identity import DefaultAzureCredential 
+import pandas as pd
+import pymssql
+from azure.identity import DefaultAzureCredential
+from azure.keyvault.secrets import SecretClient
+
 
 def get_secrets_from_kv(kv_name, secret_name):
-    key_vault_name = kv_name  # Set the name of the Azure Key Vault  
+    key_vault_name = kv_name  # Set the name of the Azure Key Vault
     credential = DefaultAzureCredential()
-    secret_client = SecretClient(vault_url=f"https://{key_vault_name}.vault.azure.net/", credential=credential)  # Create a secret client object using the credential and Key Vault name  
-    return(secret_client.get_secret(secret_name).value) # Retrieve the secret value  
+    secret_client = SecretClient(
+        vault_url=f"https://{key_vault_name}.vault.azure.net/", credential=credential
+    )  # Create a secret client object using the credential and Key Vault name
+    return secret_client.get_secret(secret_name).value  # Retrieve the secret value
+
 
-server = get_secrets_from_kv(key_vault_name,"SQLDB-SERVER")
-database = get_secrets_from_kv(key_vault_name,"SQLDB-DATABASE")
-username = get_secrets_from_kv(key_vault_name,"SQLDB-USERNAME")
-password = get_secrets_from_kv(key_vault_name,"SQLDB-PASSWORD")
+server = get_secrets_from_kv(key_vault_name, "SQLDB-SERVER")
+database = get_secrets_from_kv(key_vault_name, "SQLDB-DATABASE")
+username = get_secrets_from_kv(key_vault_name, "SQLDB-USERNAME")
+password = get_secrets_from_kv(key_vault_name, "SQLDB-PASSWORD")
 
 conn = pymssql.connect(server, username, password, database)
 cursor = conn.cursor()
 
-from azure.storage.filedatalake import (
-    DataLakeServiceClient
-)
+from azure.storage.filedatalake import DataLakeServiceClient
 
 account_name = get_secrets_from_kv(key_vault_name, "ADLS-ACCOUNT-NAME")
 credential = DefaultAzureCredential()
 
 account_url = f"https://{account_name}.dfs.core.windows.net"
 
-service_client = DataLakeServiceClient(account_url, credential=credential,api_version='2023-01-03') 
+service_client = DataLakeServiceClient(
+    account_url, credential=credential, api_version="2023-01-03"
+)
 
 file_system_client_name = "data"
-directory = 'clientdata' 
+directory = "clientdata"
 
-file_system_client = service_client.get_file_system_client(file_system_client_name)  
+file_system_client = service_client.get_file_system_client(file_system_client_name)
 directory_name = directory
 
 cursor = conn.cursor()
 
-cursor.execute('DROP TABLE IF EXISTS Clients')
+cursor.execute("DROP TABLE IF EXISTS Clients")
 conn.commit()
 
 create_client_sql = """CREATE TABLE Clients (
@@ -56,13 +60,23 @@ def get_secrets_from_kv(kv_name, secret_name):
 conn.commit()
 
 # Read the CSV file into a Pandas DataFrame
-file_path = directory + '/Clients.csv'
+file_path = directory + "/Clients.csv"
 file_client = file_system_client.get_file_client(file_path)
 csv_file = file_client.download_file()
-df = pd.read_csv(csv_file, encoding='utf-8')
+df = pd.read_csv(csv_file, encoding="utf-8")
 
 for index, item in df.iterrows():
-    cursor.execute(f"INSERT INTO Clients (ClientId,Client, Email, Occupation, MaritalStatus, Dependents) VALUES (%s,%s,%s,%s,%s,%s)", (item.ClientId, item.Client, item.Email, item.Occupation, item.MaritalStatus, item.Dependents))
+    cursor.execute(
+        f"INSERT INTO Clients (ClientId,Client, Email, Occupation, MaritalStatus, Dependents) VALUES (%s,%s,%s,%s,%s,%s)",
+        (
+            item.ClientId,
+            item.Client,
+            item.Email,
+            item.Occupation,
+            item.MaritalStatus,
+            item.Dependents,
+        ),
+    )
 conn.commit()
 
 
@@ -90,15 +104,15 @@ def get_secrets_from_kv(kv_name, secret_name):
 # csv_file = file_client.download_file()
 # df = pd.read_csv(csv_file, encoding='utf-8')
 
-# for index, item in df.iterrows():    
+# for index, item in df.iterrows():
 #     cursor.execute(f"INSERT INTO ClientInvestmentPortfolio (ClientId, AssetDate, AssetType, Investment, ROI, RevenueWithoutStrategy) VALUES (%s,%s, %s,%s, %s, %s)", (item.ClientId, item.AssetDate, item.AssetType, item.Investment, item.ROI, item.RevenueWithoutStrategy))
-    
+
 # conn.commit()
 
 
 from decimal import Decimal
 
-cursor.execute('DROP TABLE IF EXISTS Assets')
+cursor.execute("DROP TABLE IF EXISTS Assets")
 conn.commit()
 
 create_assets_sql = """CREATE TABLE Assets (
@@ -113,34 +127,44 @@ def get_secrets_from_kv(kv_name, secret_name):
 cursor.execute(create_assets_sql)
 conn.commit()
 
-file_path = directory + '/Assets.csv'
+file_path = directory + "/Assets.csv"
 file_client = file_system_client.get_file_client(file_path)
 csv_file = file_client.download_file()
-df = pd.read_csv(csv_file, encoding='utf-8')
+df = pd.read_csv(csv_file, encoding="utf-8")
 
 # # to adjust the dates to current date
-df['AssetDate'] = pd.to_datetime(df['AssetDate'])
+df["AssetDate"] = pd.to_datetime(df["AssetDate"])
 today = datetime.today()
-days_difference = (today - max(df['AssetDate'])).days - 30
-months_difference = int(days_difference/30)
+days_difference = (today - max(df["AssetDate"])).days - 30
+months_difference = int(days_difference / 30)
 # print(months_difference)
 # df['AssetDate'] = df['AssetDate'] + pd.Timedelta(days=days_difference)
-df['AssetDate'] = df['AssetDate'] + pd.DateOffset(months=months_difference)
+df["AssetDate"] = df["AssetDate"] + pd.DateOffset(months=months_difference)
 
-df['AssetDate'] = pd.to_datetime(df['AssetDate'], format='%m/%d/%Y') #   %Y-%m-%d')
-df['ClientId'] = df['ClientId'].astype(int)
-df['Investment'] = df['Investment'].astype(float)
-df['ROI'] = df['ROI'].astype(float)
-df['Revenue'] = df['Revenue'].astype(float)
+df["AssetDate"] = pd.to_datetime(df["AssetDate"], format="%m/%d/%Y")  #   %Y-%m-%d')
+df["ClientId"] = df["ClientId"].astype(int)
+df["Investment"] = df["Investment"].astype(float)
+df["ROI"] = df["ROI"].astype(float)
+df["Revenue"] = df["Revenue"].astype(float)
 
 
 for index, item in df.iterrows():
-    cursor.execute(f"INSERT INTO Assets (ClientId,AssetDate, Investment, ROI, Revenue, AssetType) VALUES (%s,%s,%s,%s,%s,%s)", (item.ClientId, item.AssetDate, item.Investment, item.ROI, item.Revenue, item.AssetType))
+    cursor.execute(
+        f"INSERT INTO Assets (ClientId,AssetDate, Investment, ROI, Revenue, AssetType) VALUES (%s,%s,%s,%s,%s,%s)",
+        (
+            item.ClientId,
+            item.AssetDate,
+            item.Investment,
+            item.ROI,
+            item.Revenue,
+            item.AssetType,
+        ),
+    )
 conn.commit()
 
 
-#InvestmentGoals
-cursor.execute('DROP TABLE IF EXISTS InvestmentGoals')
+# InvestmentGoals
+cursor.execute("DROP TABLE IF EXISTS InvestmentGoals")
 conn.commit()
 
 create_ig_sql = """CREATE TABLE InvestmentGoals (
@@ -151,19 +175,22 @@ def get_secrets_from_kv(kv_name, secret_name):
 cursor.execute(create_ig_sql)
 conn.commit()
 
-file_path = directory + '/InvestmentGoals.csv'
+file_path = directory + "/InvestmentGoals.csv"
 file_client = file_system_client.get_file_client(file_path)
 csv_file = file_client.download_file()
-df = pd.read_csv(csv_file, encoding='utf-8')
+df = pd.read_csv(csv_file, encoding="utf-8")
 
-df['ClientId'] = df['ClientId'].astype(int)
+df["ClientId"] = df["ClientId"].astype(int)
 
 for index, item in df.iterrows():
-    cursor.execute(f"INSERT INTO InvestmentGoals (ClientId,InvestmentGoal) VALUES (%s,%s)", (item.ClientId, item.InvestmentGoal))
+    cursor.execute(
+        f"INSERT INTO InvestmentGoals (ClientId,InvestmentGoal) VALUES (%s,%s)",
+        (item.ClientId, item.InvestmentGoal),
+    )
 conn.commit()
 
 
-cursor.execute('DROP TABLE IF EXISTS InvestmentGoalsDetails')
+cursor.execute("DROP TABLE IF EXISTS InvestmentGoalsDetails")
 conn.commit()
 
 create_ig_sql = """CREATE TABLE InvestmentGoalsDetails (
@@ -176,19 +203,22 @@ def get_secrets_from_kv(kv_name, secret_name):
 cursor.execute(create_ig_sql)
 conn.commit()
 
-file_path = directory + '/InvestmentGoalsDetails.csv'
+file_path = directory + "/InvestmentGoalsDetails.csv"
 file_client = file_system_client.get_file_client(file_path)
 csv_file = file_client.download_file()
-df = pd.read_csv(csv_file, encoding='utf-8')
+df = pd.read_csv(csv_file, encoding="utf-8")
 
-df['ClientId'] = df['ClientId'].astype(int)
+df["ClientId"] = df["ClientId"].astype(int)
 
 for index, item in df.iterrows():
-    cursor.execute(f"INSERT INTO InvestmentGoalsDetails (ClientId,InvestmentGoal, TargetAmount, Contribution) VALUES (%s,%s,%s,%s)", (item.ClientId, item.InvestmentGoal, item.TargetAmount, item.Contribution))
+    cursor.execute(
+        f"INSERT INTO InvestmentGoalsDetails (ClientId,InvestmentGoal, TargetAmount, Contribution) VALUES (%s,%s,%s,%s)",
+        (item.ClientId, item.InvestmentGoal, item.TargetAmount, item.Contribution),
+    )
 conn.commit()
 
-#ClientSummaries
-cursor.execute('DROP TABLE IF EXISTS ClientSummaries')
+# ClientSummaries
+cursor.execute("DROP TABLE IF EXISTS ClientSummaries")
 conn.commit()
 
 create_cs_sql = """CREATE TABLE ClientSummaries (
@@ -199,19 +229,22 @@ def get_secrets_from_kv(kv_name, secret_name):
 cursor.execute(create_cs_sql)
 conn.commit()
 
-file_path = directory + '/ClientSummaries.csv'
+file_path = directory + "/ClientSummaries.csv"
 file_client = file_system_client.get_file_client(file_path)
 csv_file = file_client.download_file()
-df = pd.read_csv(csv_file, encoding='utf-8')
+df = pd.read_csv(csv_file, encoding="utf-8")
 
-df['ClientId'] = df['ClientId'].astype(int)
+df["ClientId"] = df["ClientId"].astype(int)
 
 for index, item in df.iterrows():
-    cursor.execute(f"INSERT INTO ClientSummaries (ClientId,ClientSummary) VALUES (%s,%s)", (item.ClientId, item.ClientSummary))
+    cursor.execute(
+        f"INSERT INTO ClientSummaries (ClientId,ClientSummary) VALUES (%s,%s)",
+        (item.ClientId, item.ClientSummary),
+    )
 conn.commit()
 
 # Retirement
-cursor.execute('DROP TABLE IF EXISTS Retirement')
+cursor.execute("DROP TABLE IF EXISTS Retirement")
 conn.commit()
 
 create_cs_sql = """CREATE TABLE Retirement (
@@ -225,30 +258,39 @@ def get_secrets_from_kv(kv_name, secret_name):
 conn.commit()
 
 
-file_path = directory + '/Retirement.csv'
+file_path = directory + "/Retirement.csv"
 file_client = file_system_client.get_file_client(file_path)
 csv_file = file_client.download_file()
-df = pd.read_csv(csv_file, encoding='utf-8')
+df = pd.read_csv(csv_file, encoding="utf-8")
 
-df['ClientId'] = df['ClientId'].astype(int)
+df["ClientId"] = df["ClientId"].astype(int)
 
 # to adjust the dates to current date
-df['StatusDate'] = pd.to_datetime(df['StatusDate'])
+df["StatusDate"] = pd.to_datetime(df["StatusDate"])
 today = datetime.today()
-days_difference = (today - max(df['StatusDate'])).days - 30
-months_difference = int(days_difference/30)
-df['StatusDate'] = df['StatusDate'] + pd.DateOffset(months=months_difference)
-df['StatusDate'] = pd.to_datetime(df['StatusDate']).dt.date
+days_difference = (today - max(df["StatusDate"])).days - 30
+months_difference = int(days_difference / 30)
+df["StatusDate"] = df["StatusDate"] + pd.DateOffset(months=months_difference)
+df["StatusDate"] = pd.to_datetime(df["StatusDate"]).dt.date
 
 for index, item in df.iterrows():
-    cursor.execute(f"INSERT INTO Retirement (ClientId,StatusDate, RetirementGoalProgress, EducationGoalProgress) VALUES (%s,%s,%s,%s)", (item.ClientId, item.StatusDate, item.RetirementGoalProgress, item.EducationGoalProgress))
+    cursor.execute(
+        f"INSERT INTO Retirement (ClientId,StatusDate, RetirementGoalProgress, EducationGoalProgress) VALUES (%s,%s,%s,%s)",
+        (
+            item.ClientId,
+            item.StatusDate,
+            item.RetirementGoalProgress,
+            item.EducationGoalProgress,
+        ),
+    )
 conn.commit()
 
 
 import pandas as pd
+
 cursor = conn.cursor()
 
-cursor.execute('DROP TABLE IF EXISTS ClientMeetings')
+cursor.execute("DROP TABLE IF EXISTS ClientMeetings")
 conn.commit()
 
 create_cs_sql = """CREATE TABLE ClientMeetings (
@@ -265,43 +307,65 @@ def get_secrets_from_kv(kv_name, secret_name):
 conn.commit()
 
 
-file_path = directory + '/ClientMeetingsMetadata.csv'
+file_path = directory + "/ClientMeetingsMetadata.csv"
 file_client = file_system_client.get_file_client(file_path)
 csv_file = file_client.download_file()
-df = pd.read_csv(csv_file, encoding='utf-8')
+df = pd.read_csv(csv_file, encoding="utf-8")
 
 # to adjust the dates to current date
-df['StartTime'] = pd.to_datetime(df['StartTime'])
-df['EndTime'] = pd.to_datetime(df['EndTime'])
+df["StartTime"] = pd.to_datetime(df["StartTime"])
+df["EndTime"] = pd.to_datetime(df["EndTime"])
 today = datetime.today()
-days_difference = (today - min(df['StartTime'])).days - 30
+days_difference = (today - min(df["StartTime"])).days - 30
 days_difference
 
-df['StartTime'] = df['StartTime'] + pd.Timedelta(days=days_difference)
-df['EndTime'] = df['EndTime'] + pd.Timedelta(days=days_difference)
+df["StartTime"] = df["StartTime"] + pd.Timedelta(days=days_difference)
+df["EndTime"] = df["EndTime"] + pd.Timedelta(days=days_difference)
 
 for index, item in df.iterrows():
-    
-    cursor.execute(f"INSERT INTO ClientMeetings (ClientId,ConversationId,Title,StartTime,EndTime,Advisor,ClientEmail) VALUES (%s,%s,%s,%s,%s,%s,%s)", (item.ClientId, item.ConversationId, item.Title, item.StartTime, item.EndTime, item.Advisor, item.ClientEmail))
+
+    cursor.execute(
+        f"INSERT INTO ClientMeetings (ClientId,ConversationId,Title,StartTime,EndTime,Advisor,ClientEmail) VALUES (%s,%s,%s,%s,%s,%s,%s)",
+        (
+            item.ClientId,
+            item.ConversationId,
+            item.Title,
+            item.StartTime,
+            item.EndTime,
+            item.Advisor,
+            item.ClientEmail,
+        ),
+    )
 conn.commit()
 
 
-file_path = directory + '/ClientFutureMeetings.csv'
+file_path = directory + "/ClientFutureMeetings.csv"
 file_client = file_system_client.get_file_client(file_path)
 csv_file = file_client.download_file()
-df = pd.read_csv(csv_file, encoding='utf-8')
+df = pd.read_csv(csv_file, encoding="utf-8")
 
 # to adjust the dates to current date
-df['StartTime'] = pd.to_datetime(df['StartTime'])
-df['EndTime'] = pd.to_datetime(df['EndTime'])
+df["StartTime"] = pd.to_datetime(df["StartTime"])
+df["EndTime"] = pd.to_datetime(df["EndTime"])
 today = datetime.today()
-days_difference = (today - min(df['StartTime'])).days + 1
-df['StartTime'] = df['StartTime'] + pd.Timedelta(days=days_difference)
-df['EndTime'] = df['EndTime'] + pd.Timedelta(days=days_difference)
+days_difference = (today - min(df["StartTime"])).days + 1
+df["StartTime"] = df["StartTime"] + pd.Timedelta(days=days_difference)
+df["EndTime"] = df["EndTime"] + pd.Timedelta(days=days_difference)
 
-df['ClientId'] = df['ClientId'].astype(int)
-df['ConversationId'] = ''
+df["ClientId"] = df["ClientId"].astype(int)
+df["ConversationId"] = ""
 
 for index, item in df.iterrows():
-    cursor.execute(f"INSERT INTO ClientMeetings (ClientId,ConversationId,Title,StartTime,EndTime,Advisor,ClientEmail) VALUES (%s,%s,%s,%s,%s,%s,%s)", (item.ClientId, item.ConversationId, item.Title, item.StartTime, item.EndTime, item.Advisor, item.ClientEmail))
-conn.commit()
\ No newline at end of file
+    cursor.execute(
+        f"INSERT INTO ClientMeetings (ClientId,ConversationId,Title,StartTime,EndTime,Advisor,ClientEmail) VALUES (%s,%s,%s,%s,%s,%s,%s)",
+        (
+            item.ClientId,
+            item.ConversationId,
+            item.Title,
+            item.StartTime,
+            item.EndTime,
+            item.Advisor,
+            item.ClientEmail,
+        ),
+    )
+conn.commit()
diff --git a/ClientAdvisor/Deployment/scripts/index_scripts/create_update_sql_dates.py b/ClientAdvisor/Deployment/scripts/index_scripts/create_update_sql_dates.py
index d0e8c725c..a9ccdd1bc 100644
--- a/ClientAdvisor/Deployment/scripts/index_scripts/create_update_sql_dates.py
+++ b/ClientAdvisor/Deployment/scripts/index_scripts/create_update_sql_dates.py
@@ -1,48 +1,52 @@
-key_vault_name = 'kv_to-be-replaced'
+key_vault_name = "kv_to-be-replaced"
 
-import pandas as pd
-import pymssql
 import os
 from datetime import datetime
 
-from azure.keyvault.secrets import SecretClient  
-from azure.identity import DefaultAzureCredential 
+import pandas as pd
+import pymssql
+from azure.identity import DefaultAzureCredential
+from azure.keyvault.secrets import SecretClient
+
 
 def get_secrets_from_kv(kv_name, secret_name):
-    key_vault_name = kv_name  # Set the name of the Azure Key Vault  
+    key_vault_name = kv_name  # Set the name of the Azure Key Vault
     credential = DefaultAzureCredential()
-    secret_client = SecretClient(vault_url=f"https://{key_vault_name}.vault.azure.net/", credential=credential)  # Create a secret client object using the credential and Key Vault name  
-    return(secret_client.get_secret(secret_name).value) # Retrieve the secret value  
+    secret_client = SecretClient(
+        vault_url=f"https://{key_vault_name}.vault.azure.net/", credential=credential
+    )  # Create a secret client object using the credential and Key Vault name
+    return secret_client.get_secret(secret_name).value  # Retrieve the secret value
 
-server = get_secrets_from_kv(key_vault_name,"SQLDB-SERVER")
-database = get_secrets_from_kv(key_vault_name,"SQLDB-DATABASE")
-username = get_secrets_from_kv(key_vault_name,"SQLDB-USERNAME")
-password = get_secrets_from_kv(key_vault_name,"SQLDB-PASSWORD")
+
+server = get_secrets_from_kv(key_vault_name, "SQLDB-SERVER")
+database = get_secrets_from_kv(key_vault_name, "SQLDB-DATABASE")
+username = get_secrets_from_kv(key_vault_name, "SQLDB-USERNAME")
+password = get_secrets_from_kv(key_vault_name, "SQLDB-PASSWORD")
 
 conn = pymssql.connect(server, username, password, database)
 cursor = conn.cursor()
 
-from azure.storage.filedatalake import (
-    DataLakeServiceClient
-)
+from azure.storage.filedatalake import DataLakeServiceClient
 
 account_name = get_secrets_from_kv(key_vault_name, "ADLS-ACCOUNT-NAME")
 credential = DefaultAzureCredential()
 
 account_url = f"https://{account_name}.dfs.core.windows.net"
 
-service_client = DataLakeServiceClient(account_url, credential=credential,api_version='2023-01-03') 
+service_client = DataLakeServiceClient(
+    account_url, credential=credential, api_version="2023-01-03"
+)
 
 
 file_system_client_name = "data"
-directory = 'clientdata' 
+directory = "clientdata"
 
-file_system_client = service_client.get_file_system_client(file_system_client_name)  
+file_system_client = service_client.get_file_system_client(file_system_client_name)
 directory_name = directory
 
 cursor = conn.cursor()
 
-cursor.execute('DROP TABLE IF EXISTS Clients')
+cursor.execute("DROP TABLE IF EXISTS Clients")
 conn.commit()
 
 create_client_sql = """CREATE TABLE Clients (
@@ -54,4 +58,4 @@ def get_secrets_from_kv(kv_name, secret_name):
                 Dependents int
             );"""
 cursor.execute(create_client_sql)
-conn.commit()
\ No newline at end of file
+conn.commit()
diff --git a/ResearchAssistant/App/.flake8 b/ResearchAssistant/App/.flake8
index c462975ac..bc2f0943d 100644
--- a/ResearchAssistant/App/.flake8
+++ b/ResearchAssistant/App/.flake8
@@ -1,4 +1,4 @@
 [flake8]
-max-line-length = 88
-extend-ignore = E501, E203
-exclude = .venv, frontend, 
\ No newline at end of file
+max-line-length = 160
+extend-ignore = E203, W503, E501
+exclude = .venv, frontend
\ No newline at end of file
diff --git a/ResearchAssistant/App/.pylintrc b/ResearchAssistant/App/.pylintrc
new file mode 100644
index 000000000..a35c43970
--- /dev/null
+++ b/ResearchAssistant/App/.pylintrc
@@ -0,0 +1,39 @@
+[MASTER]
+ignore=tests  ; Ignore the tests folder globally.
+
+[MESSAGES CONTROL]
+disable=
+    invalid-name,                  # C0103: Ignore naming style errors
+    line-too-long,                 # C0301: Ignore long lines
+    missing-function-docstring,    # C0116: Ignore missing function docstrings
+    missing-class-docstring,       # C0115: Ignore missing class docstrings
+    missing-module-docstring,      # C0114: Ignore missing module docstrings
+    redefined-outer-name,          # W0621: Ignore redefined variables warnings
+    broad-exception-raised,        # W0719: Ignore broad exception raised warnings
+    broad-exception-caught,        # W0718: Ignore broad exception caught warnings
+    too-many-arguments,            # R0913: Ignore too many arguments
+    too-many-locals,               # R0914: Ignore too many local variables
+    too-many-return-statements,    # R0911: Ignore too many return statements
+    too-many-statements,           # R0915: Ignore too many statements in a function
+    too-many-branches,             # R0912: Ignore too many branches
+    unused-argument,               # W0613: Ignore unused arguments
+    unspecified-encoding,          # W1514: Ignore unspecified encoding in open()
+    logging-fstring-interpolation, # W1203: Ignore lazy f-string interpolation
+    missing-timeout,               # W3101: Ignore missing timeout in requests.get
+    no-else-return,                # R1705: Ignore unnecessary 'else' after return
+    redefined-builtin,             # W0622: Ignore redefining built-ins
+    global-statement,              # W0603: Ignore global statement usage
+    no-name-in-module,             # E0611: Ignore unresolved module names
+    no-member,                     # E1101: Ignore module has no 'member'
+    pointless-string-statement,    # W0105: Ignore pointless string statements
+    unnecessary-comprehension,     # R1721: Ignore unnecessary comprehensions
+    simplifiable-if-expression,    # R1719: Ignore simplifiable if expressions
+    dangerous-default-value,       # W0102: Ignore mutable default arguments
+    consider-using-with            # R1732: Ignore using 'with' for file or resource management
+
+[TYPECHECK]
+generated-members=get_bearer_token_provider
+
+[FORMAT]
+max-module-lines=1700              # Allow large modules up to 1700 lines
+max-line-length=160                # Allow lines up to 160 character
\ No newline at end of file
diff --git a/ResearchAssistant/Deployment/scripts/aihub_scripts/create_ai_hub.py b/ResearchAssistant/Deployment/scripts/aihub_scripts/create_ai_hub.py
index cf0b8c3a6..5c78fc98d 100644
--- a/ResearchAssistant/Deployment/scripts/aihub_scripts/create_ai_hub.py
+++ b/ResearchAssistant/Deployment/scripts/aihub_scripts/create_ai_hub.py
@@ -1,16 +1,17 @@
 # Get Azure Key Vault Client
-key_vault_name = 'kv_to-be-replaced'
+key_vault_name = "kv_to-be-replaced"
 
 from azure.ai.ml import MLClient
 from azure.ai.ml.entities import (
-    Hub,
-    Project,
     ApiKeyConfiguration,
     AzureAISearchConnection,
     AzureOpenAIConnection,
+    Hub,
+    Project,
 )
-from azure.keyvault.secrets import SecretClient
 from azure.identity import DefaultAzureCredential
+from azure.keyvault.secrets import SecretClient
+
 
 def get_secrets_from_kv(kv_name, secret_name):
     # Set the name of the Azure Key Vault
@@ -27,15 +28,16 @@ def get_secrets_from_kv(kv_name, secret_name):
     # Retrieve the secret value
     return secret_client.get_secret(secret_name).value
 
+
 # Azure configuration
 
-key_vault_name = 'kv_to-be-replaced'
-subscription_id = 'subscription_to-be-replaced'
-resource_group_name = 'rg_to-be-replaced'
-aihub_name = 'ai_hub_' + 'solutionname_to-be-replaced'
-project_name = 'ai_project_' + 'solutionname_to-be-replaced'
-deployment_name = 'draftsinference-' + 'solutionname_to-be-replaced'
-solutionLocation = 'solutionlocation_to-be-replaced'
+key_vault_name = "kv_to-be-replaced"
+subscription_id = "subscription_to-be-replaced"
+resource_group_name = "rg_to-be-replaced"
+aihub_name = "ai_hub_" + "solutionname_to-be-replaced"
+project_name = "ai_project_" + "solutionname_to-be-replaced"
+deployment_name = "draftsinference-" + "solutionname_to-be-replaced"
+solutionLocation = "solutionlocation_to-be-replaced"
 
 # Open AI Details
 open_ai_key = get_secrets_from_kv(key_vault_name, "AZURE-OPENAI-KEY")
@@ -90,7 +92,7 @@ def get_secrets_from_kv(kv_name, secret_name):
     api_key=open_ai_key,
     api_version=openai_api_version,
     azure_endpoint=f"https://{open_ai_res_name}.openai.azure.com/",
-    open_ai_resource_id=f"/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/providers/Microsoft.CognitiveServices/accounts/{open_ai_res_name}"
+    open_ai_resource_id=f"/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/providers/Microsoft.CognitiveServices/accounts/{open_ai_res_name}",
 )
 
 ml_client.connections.create_or_update(open_ai_connection)
@@ -104,7 +106,9 @@ def get_secrets_from_kv(kv_name, secret_name):
     credentials=ApiKeyConfiguration(key=ai_search_key),
 )
 
-aisearch_connection.tags["ResourceId"] = f"/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/providers/Microsoft.Search/searchServices/{ai_search_res_name}"
+aisearch_connection.tags["ResourceId"] = (
+    f"/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/providers/Microsoft.Search/searchServices/{ai_search_res_name}"
+)
 aisearch_connection.tags["ApiVersion"] = "2024-05-01-preview"
 
 ml_client.connections.create_or_update(aisearch_connection)
diff --git a/ResearchAssistant/Deployment/scripts/fabric_scripts/create_fabric_items.py b/ResearchAssistant/Deployment/scripts/fabric_scripts/create_fabric_items.py
index 510cb6699..f77b4a182 100644
--- a/ResearchAssistant/Deployment/scripts/fabric_scripts/create_fabric_items.py
+++ b/ResearchAssistant/Deployment/scripts/fabric_scripts/create_fabric_items.py
@@ -1,61 +1,70 @@
-from azure.identity import DefaultAzureCredential 
 import base64
 import json
-import requests
+
 import pandas as pd
+import requests
+from azure.identity import AzureCliCredential, DefaultAzureCredential
 
 # credential = DefaultAzureCredential()
 
-from azure.identity import AzureCliCredential
 
 credential = AzureCliCredential()
 
-cred = credential.get_token('https://api.fabric.microsoft.com/.default')
+cred = credential.get_token("https://api.fabric.microsoft.com/.default")
 token = cred.token
 
-key_vault_name = 'kv_to-be-replaced'
+key_vault_name = "kv_to-be-replaced"
 workspaceId = "workspaceId_to-be-replaced"
 
 fabric_headers = {"Authorization": "Bearer " + token.strip()}
 fabric_base_url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspaceId}/"
-fabric_items_url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspaceId}/items/"
+fabric_items_url = (
+    f"https://api.fabric.microsoft.com/v1/workspaces/{workspaceId}/items/"
+)
 
-lakehouse_name = 'Lakehouse1'
+lakehouse_name = "Lakehouse1"
 
-lakehouse_data = {
-  "displayName": lakehouse_name,
-  "type": "Lakehouse"
-}
+lakehouse_data = {"displayName": lakehouse_name, "type": "Lakehouse"}
 
-lakehouse_res = requests.post(fabric_items_url, headers=fabric_headers, json=lakehouse_data)
+lakehouse_res = requests.post(
+    fabric_items_url, headers=fabric_headers, json=lakehouse_data
+)
 
 
-notebook_names =['create_articles_index','create_grants_index','create_drafts_index']
+notebook_names = ["create_articles_index", "create_grants_index", "create_drafts_index"]
 
 for notebook_name in notebook_names:
 
-    with open('notebooks/'+ notebook_name +'.ipynb', 'r') as f:
+    with open("notebooks/" + notebook_name + ".ipynb", "r") as f:
         notebook_json = json.load(f)
 
-    notebook_json['metadata']['trident']['lakehouse']['default_lakehouse'] = lakehouse_res.json()['id']
-    notebook_json['metadata']['trident']['lakehouse']['default_lakehouse_name'] = lakehouse_res.json()['displayName']
-    notebook_json['metadata']['trident']['lakehouse']['workspaceId'] = lakehouse_res.json()['workspaceId']
+    notebook_json["metadata"]["trident"]["lakehouse"][
+        "default_lakehouse"
+    ] = lakehouse_res.json()["id"]
+    notebook_json["metadata"]["trident"]["lakehouse"][
+        "default_lakehouse_name"
+    ] = lakehouse_res.json()["displayName"]
+    notebook_json["metadata"]["trident"]["lakehouse"][
+        "workspaceId"
+    ] = lakehouse_res.json()["workspaceId"]
 
-    notebook_base64 = base64.b64encode(json.dumps(notebook_json).encode('utf-8'))
+    notebook_base64 = base64.b64encode(json.dumps(notebook_json).encode("utf-8"))
 
     notebook_data = {
-        "displayName":notebook_name,
-        "type":"Notebook",
-        "definition" : {
+        "displayName": notebook_name,
+        "type": "Notebook",
+        "definition": {
             "format": "ipynb",
             "parts": [
                 {
                     "path": "notebook-content.ipynb",
-                    "payload": notebook_base64.decode('utf-8'),
-                    "payloadType": "InlineBase64"
+                    "payload": notebook_base64.decode("utf-8"),
+                    "payloadType": "InlineBase64",
                 }
-            ]
-        }
+            ],
+        },
     }
-    fabric_response = requests.post(fabric_items_url, headers=fabric_headers, json=notebook_data)
-    #print(fabric_response.json())    
\ No newline at end of file
+    fabric_response = requests.post(
+        fabric_items_url, headers=fabric_headers, json=notebook_data
+    )
+    # print(fabric_response.json())
diff --git a/ResearchAssistant/Deployment/scripts/index_scripts/create_articles_index.py b/ResearchAssistant/Deployment/scripts/index_scripts/create_articles_index.py
index 21b4624c5..cd0e16678 100644
--- a/ResearchAssistant/Deployment/scripts/index_scripts/create_articles_index.py
+++ b/ResearchAssistant/Deployment/scripts/index_scripts/create_articles_index.py
@@ -1,140 +1,124 @@
-#Get Azure Key Vault Client
-key_vault_name = 'kv_to-be-replaced'
+# Get Azure Key Vault Client
+key_vault_name = "kv_to-be-replaced"
 
 import time
- 
-time.sleep(120) # to fix the issue of the script 
 
-#hardcoded values
+time.sleep(120)  # to fix the issue of the script
+
+# hardcoded values
 index_name = "articlesindex"
-drafts_index_name = 'draftsindex'
+drafts_index_name = "draftsindex"
 file_system_client_name = "data"
-directory = 'demodata/pubmed_articles' 
-csv_file_name = '/metadata/pubmed_articles.csv'
+directory = "demodata/pubmed_articles"
+csv_file_name = "/metadata/pubmed_articles.csv"
 
 num_pages = 10
 
-from azure.keyvault.secrets import SecretClient  
-from azure.identity import DefaultAzureCredential  
+from azure.identity import DefaultAzureCredential
+from azure.keyvault.secrets import SecretClient
+
 
 def get_secrets_from_kv(kv_name, secret_name):
-    
-  # Set the name of the Azure Key Vault  
-  key_vault_name = kv_name 
-    
-  # Create a credential object using the default Azure credentials  
-  credential = DefaultAzureCredential()
-
-    # Create a secret client object using the credential and Key Vault name  
-  secret_client = SecretClient(vault_url=f"https://{key_vault_name}.vault.azure.net/", credential=credential)  
-    
-  # Retrieve the secret value  
-  return(secret_client.get_secret(secret_name).value)
-
-
-#Utils 
- # Import required libraries  
-import os  
-import json  
-import openai
 
-import os  
-from azure.core.credentials import AzureKeyCredential  
-from azure.ai.textanalytics import TextAnalyticsClient  
+    # Set the name of the Azure Key Vault
+    key_vault_name = kv_name
 
-from azure.core.credentials import AzureKeyCredential  
-from azure.search.documents import SearchClient, SearchIndexingBufferedSender  
-from azure.search.documents.indexes import SearchIndexClient  
-from azure.search.documents.models import (
-    QueryAnswerType,
-    QueryCaptionType,
-    QueryCaptionResult,
-    QueryAnswerResult,
-    SemanticErrorMode,
-    SemanticErrorReason,
-    SemanticSearchResultsType,
-    QueryType,
-    VectorizedQuery,
-    VectorQuery,
-    VectorFilterMode,    
-)
-from azure.search.documents.indexes.models import (  
+    # Create a credential object using the default Azure credentials
+    credential = DefaultAzureCredential()
+
+    # Create a secret client object using the credential and Key Vault name
+    secret_client = SecretClient(
+        vault_url=f"https://{key_vault_name}.vault.azure.net/", credential=credential
+    )
+
+    # Retrieve the secret value
+    return secret_client.get_secret(secret_name).value
+
+
+# Utils
+# Import required libraries
+import json
+import os
+
+import openai
+from azure.ai.textanalytics import TextAnalyticsClient
+from azure.core.credentials import AzureKeyCredential
+from azure.search.documents import SearchClient, SearchIndexingBufferedSender
+from azure.search.documents.indexes import SearchIndexClient
+from azure.search.documents.indexes.models import (
     ExhaustiveKnnAlgorithmConfiguration,
     ExhaustiveKnnParameters,
-    SearchIndex,  
-    SearchField,  
-    SearchFieldDataType,  
-    SimpleField,  
-    SearchableField,  
-    SearchIndex,  
-    SemanticConfiguration,  
-    SemanticPrioritizedFields,
-    SemanticField,  
-    SearchField,  
-    SemanticSearch,
-    VectorSearch,  
     HnswAlgorithmConfiguration,
-    HnswParameters,  
-    VectorSearch,
-    VectorSearchAlgorithmConfiguration,
-    VectorSearchAlgorithmKind,
-    VectorSearchProfile,
-    SearchIndex,
+    HnswParameters,
+    SearchableField,
     SearchField,
     SearchFieldDataType,
+    SearchIndex,
+    SemanticConfiguration,
+    SemanticField,
+    SemanticPrioritizedFields,
+    SemanticSearch,
     SimpleField,
-    SearchableField,
-    VectorSearch,
-    ExhaustiveKnnParameters,
-    SearchIndex,  
-    SearchField,  
-    SearchFieldDataType,  
-    SimpleField,  
-    SearchableField,  
-    SearchIndex,  
-    SemanticConfiguration,  
-    SemanticField,  
-    SearchField,  
-    VectorSearch,  
-    HnswParameters,  
     VectorSearch,
+    VectorSearchAlgorithmConfiguration,
     VectorSearchAlgorithmKind,
     VectorSearchAlgorithmMetric,
     VectorSearchProfile,
-)  
-search_endpoint =  get_secrets_from_kv(key_vault_name,"AZURE-SEARCH-ENDPOINT")
-search_key =  get_secrets_from_kv(key_vault_name,"AZURE-SEARCH-KEY")
+)
+from azure.search.documents.models import (
+    QueryAnswerResult,
+    QueryAnswerType,
+    QueryCaptionResult,
+    QueryCaptionType,
+    QueryType,
+    SemanticErrorMode,
+    SemanticErrorReason,
+    SemanticSearchResultsType,
+    VectorFilterMode,
+    VectorizedQuery,
+    VectorQuery,
+)
 
-openai.api_key  = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-KEY")
-openai.api_base = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-ENDPOINT")
-openai.api_version = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-PREVIEW-API-VERSION")
+search_endpoint = get_secrets_from_kv(key_vault_name, "AZURE-SEARCH-ENDPOINT")
+search_key = get_secrets_from_kv(key_vault_name, "AZURE-SEARCH-KEY")
 
-openai_api_key  = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-KEY")
-openai_api_base = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-ENDPOINT")
-openai_api_version = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-PREVIEW-API-VERSION")
+openai.api_key = get_secrets_from_kv(key_vault_name, "AZURE-OPENAI-KEY")
+openai.api_base = get_secrets_from_kv(key_vault_name, "AZURE-OPENAI-ENDPOINT")
+openai.api_version = get_secrets_from_kv(
+    key_vault_name, "AZURE-OPENAI-PREVIEW-API-VERSION"
+)
 
-# Set up your Azure Text Analytics service and credentials  
-COG_SERVICES_NAME = get_secrets_from_kv(key_vault_name,"COG-SERVICES-NAME")
-COG_SERVICES_ENDPOINT = get_secrets_from_kv(key_vault_name,"COG-SERVICES-ENDPOINT")
-COG_SERVICES_KEY = get_secrets_from_kv(key_vault_name,"COG-SERVICES-KEY")
+openai_api_key = get_secrets_from_kv(key_vault_name, "AZURE-OPENAI-KEY")
+openai_api_base = get_secrets_from_kv(key_vault_name, "AZURE-OPENAI-ENDPOINT")
+openai_api_version = get_secrets_from_kv(
+    key_vault_name, "AZURE-OPENAI-PREVIEW-API-VERSION"
+)
+
+# Set up your Azure Text Analytics service and credentials
+COG_SERVICES_NAME = get_secrets_from_kv(key_vault_name, "COG-SERVICES-NAME")
+COG_SERVICES_ENDPOINT = get_secrets_from_kv(key_vault_name, "COG-SERVICES-ENDPOINT")
+COG_SERVICES_KEY = get_secrets_from_kv(key_vault_name, "COG-SERVICES-KEY")
 
-cog_services_credential = AzureKeyCredential(COG_SERVICES_KEY)  
+cog_services_credential = AzureKeyCredential(COG_SERVICES_KEY)
+
+# Create a TextAnalyticsClient using your endpoint and credentials
+cog_services_client = TextAnalyticsClient(
+    endpoint=COG_SERVICES_ENDPOINT, credential=cog_services_credential
+)
 
-# Create a TextAnalyticsClient using your endpoint and credentials  
-cog_services_client = TextAnalyticsClient(endpoint=COG_SERVICES_ENDPOINT, credential=cog_services_credential)  
 
-def get_named_entities(cog_services_client,input_text): 
-    # Call the named entity recognition API to extract named entities from your text  
-    result = cog_services_client.recognize_entities(documents=[input_text])  
-    
-    # return the named entities for each document 
-    # full list of categories #https://learn.microsoft.com/en-us/azure/ai-services/language-service/named-entity-recognition/concepts/named-entity-categories?tabs=ga-api 
+def get_named_entities(cog_services_client, input_text):
+    # Call the named entity recognition API to extract named entities from your text
+    result = cog_services_client.recognize_entities(documents=[input_text])
 
-    Person = [] 
+    # return the named entities for each document
+    # full list of categories #https://learn.microsoft.com/en-us/azure/ai-services/language-service/named-entity-recognition/concepts/named-entity-categories?tabs=ga-api
+
+    Person = []
     Location = []
-    Organization = [] 
+    Organization = []
     DateTime = []
-    URL = [] 
+    URL = []
     Email = []
     PersonType = []
     Event = []
@@ -142,7 +126,7 @@ def get_named_entities(cog_services_client,input_text):
 
     for idx, doc in enumerate(result):
         if not doc.is_error:
-            for entity in doc.entities: 
+            for entity in doc.entities:
                 if entity.category == "DateTime":
                     DateTime.append(entity.text)
                 elif entity.category == "Person":
@@ -162,39 +146,53 @@ def get_named_entities(cog_services_client,input_text):
                 elif entity.category == "Quantity":
                     Quantity.append(entity.text)
 
-        else:  
-            print("  Error: {}".format(doc.error.message)) 
-    return(list(set(DateTime)),list(set(Person)),list(set(Location)),list(set(Organization)),list(set(URL)),list(set(Email)),list(set(PersonType)),list(set(Event)),list(set(Quantity)))
-    
+        else:
+            print("  Error: {}".format(doc.error.message))
+    return (
+        list(set(DateTime)),
+        list(set(Person)),
+        list(set(Location)),
+        list(set(Organization)),
+        list(set(URL)),
+        list(set(Email)),
+        list(set(PersonType)),
+        list(set(Event)),
+        list(set(Quantity)),
+    )
+
 
 from openai import AzureOpenAI
 
+
 # Function: Get Embeddings
-def get_embeddings(text: str,openai_api_base,openai_api_version,openai_api_key):
+def get_embeddings(text: str, openai_api_base, openai_api_version, openai_api_key):
     model_id = "text-embedding-ada-002"
     client = AzureOpenAI(
         api_version=openai_api_version,
         azure_endpoint=openai_api_base,
-        api_key = openai_api_key
+        api_key=openai_api_key,
     )
-    
+
     # embedding = openai.Embedding.create(input=text, deployment_id=model_id)["data"][0]["embedding"]
     embedding = client.embeddings.create(input=text, model=model_id).data[0].embedding
 
     return embedding
 
+
 # from langchain.text_splitter import MarkdownTextSplitter, RecursiveCharacterTextSplitter, PythonCodeTextSplitter
 # import tiktoken
 
 import re
 
+
 def clean_spaces_with_regex(text):
     # Use a regular expression to replace multiple spaces with a single space
-    cleaned_text = re.sub(r'\s+', ' ', text)
+    cleaned_text = re.sub(r"\s+", " ", text)
     # Use a regular expression to replace consecutive dots with a single dot
-    cleaned_text = re.sub(r'\.{2,}', '.', cleaned_text)
+    cleaned_text = re.sub(r"\.{2,}", ".", cleaned_text)
     return cleaned_text
 
+
 # def estimate_tokens(text):
 #     GPT2_TOKENIZER = tiktoken.get_encoding("gpt2")
 #     return(len(GPT2_TOKENIZER.encode(text)))
@@ -211,27 +209,28 @@ def clean_spaces_with_regex(text):
 
 #     return(splitter.split_text(text))
 
+
 def chunk_data(text):
-    tokens_per_chunk = 500 #1024
+    tokens_per_chunk = 500  # 1024
     text = clean_spaces_with_regex(text)
     SENTENCE_ENDINGS = [".", "!", "?"]
-    WORDS_BREAKS = ['\n', '\t', '}', '{', ']', '[', ')', '(', ' ', ':', ';', ',']
+    WORDS_BREAKS = ["\n", "\t", "}", "{", "]", "[", ")", "(", " ", ":", ";", ","]
 
-    sentences = text.split('. ') # Split text into sentences
+    sentences = text.split(". ")  # Split text into sentences
     chunks = []
-    current_chunk = ''
+    current_chunk = ""
     current_chunk_token_count = 0
-    
+
     # Iterate through each sentence
     for sentence in sentences:
         # Split sentence into tokens
         tokens = sentence.split()
-        
+
         # Check if adding the current sentence exceeds tokens_per_chunk
         if current_chunk_token_count + len(tokens) <= tokens_per_chunk:
             # Add the sentence to the current chunk
             if current_chunk:
-                current_chunk += '. ' + sentence
+                current_chunk += ". " + sentence
             else:
                 current_chunk += sentence
             current_chunk_token_count += len(tokens)
@@ -240,43 +239,114 @@ def chunk_data(text):
             chunks.append(current_chunk)
             current_chunk = sentence
             current_chunk_token_count = len(tokens)
-    
+
     # Add the last chunk
     if current_chunk:
         chunks.append(current_chunk)
-    
+
     return chunks
 
+
 # Create the search index
 search_credential = AzureKeyCredential(search_key)
 
-index_client = SearchIndexClient(
-    endpoint=search_endpoint, credential=search_credential)
+index_client = SearchIndexClient(endpoint=search_endpoint, credential=search_credential)
 
 fields = [
-    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
+    SimpleField(
+        name="id",
+        type=SearchFieldDataType.String,
+        key=True,
+        sortable=True,
+        filterable=True,
+        facetable=True,
+    ),
     SearchableField(name="chunk_id", type=SearchFieldDataType.String),
     SearchableField(name="document_id", type=SearchFieldDataType.String),
     SearchableField(name="title", type=SearchFieldDataType.String),
     SearchableField(name="content", type=SearchFieldDataType.String),
     SearchableField(name="sourceurl", type=SearchFieldDataType.String),
     SearchableField(name="publicurl", type=SearchFieldDataType.String),
-    SimpleField(name="dateTime", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Person", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Location", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Organization", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="URL", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Email", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="PersonType", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Event", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Quantity", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SearchField(name="titleVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
-                searchable=True, vector_search_dimensions=1536, vector_search_profile_name="myHnswProfile"),
-    SearchField(name="contentVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
-                searchable=True, vector_search_dimensions=1536, vector_search_profile_name="myHnswProfile")
+    SimpleField(
+        name="dateTime",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Person",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Location",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Organization",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="URL",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Email",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="PersonType",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Event",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Quantity",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SearchField(
+        name="titleVector",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
+        searchable=True,
+        vector_search_dimensions=1536,
+        vector_search_profile_name="myHnswProfile",
+    ),
+    SearchField(
+        name="contentVector",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
+        searchable=True,
+        vector_search_dimensions=1536,
+        vector_search_profile_name="myHnswProfile",
+    ),
 ]
 
-# Configure the vector search configuration  
+# Configure the vector search configuration
 vector_search = VectorSearch(
     algorithms=[
         HnswAlgorithmConfiguration(
@@ -286,16 +356,16 @@ def chunk_data(text):
                 m=4,
                 ef_construction=400,
                 ef_search=500,
-                metric=VectorSearchAlgorithmMetric.COSINE
-            )
+                metric=VectorSearchAlgorithmMetric.COSINE,
+            ),
         ),
         ExhaustiveKnnAlgorithmConfiguration(
             name="myExhaustiveKnn",
             kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN,
             parameters=ExhaustiveKnnParameters(
                 metric=VectorSearchAlgorithmMetric.COSINE
-            )
-        )
+            ),
+        ),
     ],
     profiles=[
         VectorSearchProfile(
@@ -305,60 +375,70 @@ def chunk_data(text):
         VectorSearchProfile(
             name="myExhaustiveKnnProfile",
             algorithm_configuration_name="myExhaustiveKnn",
-        )
-    ]
+        ),
+    ],
 )
 
 semantic_config = SemanticConfiguration(
     name="my-semantic-config",
     prioritized_fields=SemanticPrioritizedFields(
         title_field=SemanticField(field_name="title"),
-        content_fields=[SemanticField(field_name="content")]
-    )
+        content_fields=[SemanticField(field_name="content")],
+    ),
 )
 
 # Create the semantic settings with the configuration
 semantic_search = SemanticSearch(configurations=[semantic_config])
 
 # Create the search index with the semantic settings
-index = SearchIndex(name=index_name, fields=fields,
-                    vector_search=vector_search, semantic_search=semantic_search)
+index = SearchIndex(
+    name=index_name,
+    fields=fields,
+    vector_search=vector_search,
+    semantic_search=semantic_search,
+)
 result = index_client.create_or_update_index(index)
-print(f' {result.name} created')
+print(f" {result.name} created")
 
 # Create the drafts search index with the semantic settings
-index = SearchIndex(name=drafts_index_name, fields=fields,
-                    vector_search=vector_search, semantic_search=semantic_search)
+index = SearchIndex(
+    name=drafts_index_name,
+    fields=fields,
+    vector_search=vector_search,
+    semantic_search=semantic_search,
+)
 
 result = index_client.create_or_update_index(index)
-print(f' {result.name} created')
+print(f" {result.name} created")
 
 
-#add documents to the index
+# add documents to the index
 
-from azure.core.credentials import AzureKeyCredential  
-from azure.storage.filedatalake import (
-    DataLakeServiceClient,
-    DataLakeDirectoryClient,
-    FileSystemClient
-)
-from azure.identity import ClientSecretCredential  
-import pypdf 
-from io import BytesIO
 import base64
 import time
-import pandas as pd
+from io import BytesIO
 
+import pandas as pd
+import pypdf
+from azure.core.credentials import AzureKeyCredential
+from azure.identity import ClientSecretCredential
+from azure.storage.filedatalake import (
+    DataLakeDirectoryClient,
+    DataLakeServiceClient,
+    FileSystemClient,
+)
 
 account_name = get_secrets_from_kv(key_vault_name, "ADLS-ACCOUNT-NAME")
 credential = DefaultAzureCredential()
 
 account_url = f"https://{account_name}.dfs.core.windows.net"
 
-service_client = DataLakeServiceClient(account_url, credential=credential,api_version='2023-01-03') 
+service_client = DataLakeServiceClient(
+    account_url, credential=credential, api_version="2023-01-03"
+)
 
-file_system_client = service_client.get_file_system_client(file_system_client_name)  
-directory_name = directory + '/pdfs'
+file_system_client = service_client.get_file_system_client(file_system_client_name)
+directory_name = directory + "/pdfs"
 paths = file_system_client.get_paths(path=directory_name)
 
 # Azure Cognitive Search Vector Index
@@ -375,7 +455,7 @@ def chunk_data(text):
 print(file_path)
 file_client = file_system_client.get_file_client(file_path)
 csv_file = file_client.download_file()
-df_metadata = pd.read_csv(csv_file, encoding='utf-8')
+df_metadata = pd.read_csv(csv_file, encoding="utf-8")
 
 docs = []
 num_pdfs = 0
@@ -387,59 +467,83 @@ def chunk_data(text):
     stream = BytesIO()
     pdf_file.readinto(stream)
     pdf_reader = pypdf.PdfReader(stream)
-    filename = path.name.split('/')[-1]
-    document_id = filename.replace('.pdf','')
+    filename = path.name.split("/")[-1]
+    document_id = filename.replace(".pdf", "")
+
+    df_file_metadata = df_metadata[df_metadata["pubmed_id"] == int(document_id)].iloc[0]
 
-    df_file_metadata = df_metadata[df_metadata['pubmed_id']==int(document_id)].iloc[0]
-   
-    text = "" 
+    text = ""
 
-    n = num_pages #len(pdf_reader.pages)
+    n = num_pages  # len(pdf_reader.pages)
     if len(pdf_reader.pages) < n:
         n = len(pdf_reader.pages)
-    for page_num in range(n): #range(len(pdf_reader.pages)):
-        public_url = df_file_metadata['publicurl'] + '#page=' + str(page_num) 
+    for page_num in range(n):  # range(len(pdf_reader.pages)):
+        public_url = df_file_metadata["publicurl"] + "#page=" + str(page_num)
 
         page = pdf_reader.pages[page_num]
-        text = page.extract_text()         
-        
+        text = page.extract_text()
+
         chunks = chunk_data(text)
         chunk_num = 0
         for chunk in chunks:
             chunk_num += 1
             d = {
-                "chunk_id" : path.name.split('/')[-1] + '_' + str(page_num).zfill(2) +  '_' + str(chunk_num).zfill(2),
-                "document_id": str(df_file_metadata['pubmed_id']),
-                 "content": chunk,       
-                 "title": df_file_metadata['title'],
-                 "abstract": df_file_metadata['abstract'] } #path.name.split('/')[-1] + '_' + str(page_num).zfill(2) +  '_' + str(chunk_num).zfill(2)} 
-
-            d["dateTime"],d["Person"],d["Location"],d["Organization"],d["URL"],d["Email"],d["PersonType"],d["Event"],d["Quantity"] = get_named_entities(cog_services_client,d["content"])
+                "chunk_id": path.name.split("/")[-1]
+                + "_"
+                + str(page_num).zfill(2)
+                + "_"
+                + str(chunk_num).zfill(2),
+                "document_id": str(df_file_metadata["pubmed_id"]),
+                "content": chunk,
+                "title": df_file_metadata["title"],
+                "abstract": df_file_metadata["abstract"],
+            }  # path.name.split('/')[-1] + '_' + str(page_num).zfill(2) +  '_' + str(chunk_num).zfill(2)}
+
+            (
+                d["dateTime"],
+                d["Person"],
+                d["Location"],
+                d["Organization"],
+                d["URL"],
+                d["Email"],
+                d["PersonType"],
+                d["Event"],
+                d["Quantity"],
+            ) = get_named_entities(cog_services_client, d["content"])
 
             counter += 1
 
             try:
-                v_titleVector = get_embeddings(d["title"],openai_api_base,openai_api_version,openai_api_key)
+                v_titleVector = get_embeddings(
+                    d["title"], openai_api_base, openai_api_version, openai_api_key
+                )
             except:
                 time.sleep(30)
-                v_titleVector = get_embeddings(d["title"],openai_api_base,openai_api_version,openai_api_key)
-            
+                v_titleVector = get_embeddings(
+                    d["title"], openai_api_base, openai_api_version, openai_api_key
+                )
+
             try:
-                v_contentVector = get_embeddings(d["content"],openai_api_base,openai_api_version,openai_api_key)
+                v_contentVector = get_embeddings(
+                    d["content"], openai_api_base, openai_api_version, openai_api_key
+                )
             except:
                 time.sleep(30)
-                v_contentVector = get_embeddings(d["content"],openai_api_base,openai_api_version,openai_api_key)
-
+                v_contentVector = get_embeddings(
+                    d["content"], openai_api_base, openai_api_version, openai_api_key
+                )
 
             docs.append(
-            {
-                    "id": base64.urlsafe_b64encode(bytes(d["chunk_id"], encoding='utf-8')).decode('utf-8'),
+                {
+                    "id": base64.urlsafe_b64encode(
+                        bytes(d["chunk_id"], encoding="utf-8")
+                    ).decode("utf-8"),
                     "chunk_id": d["chunk_id"],
                     "document_id": d["document_id"],
                     "title": d["title"],
                     "content": d["content"],
-                    "sourceurl": path.name.split('/')[-1], 
-                    "publicurl": public_url, 
+                    "sourceurl": path.name.split("/")[-1],
+                    "publicurl": public_url,
                     "dateTime": d["dateTime"],
                     "Person": d["Person"],
                     "Location": d["Location"],
@@ -450,18 +554,16 @@ def chunk_data(text):
                     "Event": d["Event"],
                     "Quantity": d["Quantity"],
                     "titleVector": v_titleVector,
-                    "contentVector": v_contentVector
-            }
+                    "contentVector": v_contentVector,
+                }
             )
-            
+
             if counter % 10 == 0:
                 result = client.upload_documents(documents=docs)
                 result = drafts_client.upload_documents(documents=docs)
                 docs = []
-                print(f' {str(counter)} uploaded')
-#upload the last batch
+                print(f" {str(counter)} uploaded")
+# upload the last batch
 if docs != []:
     client.upload_documents(documents=docs)
     drafts_client.upload_documents(documents=docs)
-
-
diff --git a/ResearchAssistant/Deployment/scripts/index_scripts/create_drafts_index.py b/ResearchAssistant/Deployment/scripts/index_scripts/create_drafts_index.py
index 9acb0a492..2e3c33073 100644
--- a/ResearchAssistant/Deployment/scripts/index_scripts/create_drafts_index.py
+++ b/ResearchAssistant/Deployment/scripts/index_scripts/create_drafts_index.py
@@ -1,138 +1,122 @@
-#Get Azure Key Vault Client
-key_vault_name = 'kv_to-be-replaced'
+# Get Azure Key Vault Client
+key_vault_name = "kv_to-be-replaced"
 
-#hardcoded values
+# hardcoded values
 index_name = "draftsindex"
 file_system_client_name = "data"
-directory = 'demodata/completed_grants'
-directory2 = 'demodata2/completed_grants' 
-directory3 = 'demodata3/completed_grants' 
-csv_file_name = '/metadata/completed_grants.csv'
+directory = "demodata/completed_grants"
+directory2 = "demodata2/completed_grants"
+directory3 = "demodata3/completed_grants"
+csv_file_name = "/metadata/completed_grants.csv"
 
 num_pages = 10
 
-from azure.keyvault.secrets import SecretClient  
-from azure.identity import DefaultAzureCredential  
+from azure.identity import DefaultAzureCredential
+from azure.keyvault.secrets import SecretClient
+
 
 def get_secrets_from_kv(kv_name, secret_name):
-    
-  # Set the name of the Azure Key Vault  
-  key_vault_name = kv_name 
-    
-  # Create a credential object using the default Azure credentials  
-  credential = DefaultAzureCredential()
-
-  # Create a secret client object using the credential and Key Vault name  
-  secret_client = SecretClient(vault_url=f"https://{key_vault_name}.vault.azure.net/", credential=credential)  
-    
-  # Retrieve the secret value  
-  return(secret_client.get_secret(secret_name).value)
-
-
-#Utils 
- # Import required libraries  
-import os  
-import json  
-import openai
 
-import os  
-from azure.core.credentials import AzureKeyCredential  
-from azure.ai.textanalytics import TextAnalyticsClient  
+    # Set the name of the Azure Key Vault
+    key_vault_name = kv_name
 
-from azure.core.credentials import AzureKeyCredential  
-from azure.search.documents import SearchClient, SearchIndexingBufferedSender  
-from azure.search.documents.indexes import SearchIndexClient  
-from azure.search.documents.models import (
-    QueryAnswerType,
-    QueryCaptionType,
-    QueryCaptionResult,
-    QueryAnswerResult,
-    SemanticErrorMode,
-    SemanticErrorReason,
-    SemanticSearchResultsType,
-    QueryType,
-    VectorizedQuery,
-    VectorQuery,
-    VectorFilterMode,    
-)
-from azure.search.documents.indexes.models import (  
+    # Create a credential object using the default Azure credentials
+    credential = DefaultAzureCredential()
+
+    # Create a secret client object using the credential and Key Vault name
+    secret_client = SecretClient(
+        vault_url=f"https://{key_vault_name}.vault.azure.net/", credential=credential
+    )
+
+    # Retrieve the secret value
+    return secret_client.get_secret(secret_name).value
+
+
+# Utils
+# Import required libraries
+import json
+import os
+
+import openai
+from azure.ai.textanalytics import TextAnalyticsClient
+from azure.core.credentials import AzureKeyCredential
+from azure.search.documents import SearchClient, SearchIndexingBufferedSender
+from azure.search.documents.indexes import SearchIndexClient
+from azure.search.documents.indexes.models import (
     ExhaustiveKnnAlgorithmConfiguration,
     ExhaustiveKnnParameters,
-    SearchIndex,  
-    SearchField,  
-    SearchFieldDataType,  
-    SimpleField,  
-    SearchableField,  
-    SearchIndex,  
-    SemanticConfiguration,  
-    SemanticPrioritizedFields,
-    SemanticField,  
-    SearchField,  
-    SemanticSearch,
-    VectorSearch,  
     HnswAlgorithmConfiguration,
-    HnswParameters,  
-    VectorSearch,
-    VectorSearchAlgorithmConfiguration,
-    VectorSearchAlgorithmKind,
-    VectorSearchProfile,
-    SearchIndex,
+    HnswParameters,
+    SearchableField,
     SearchField,
     SearchFieldDataType,
+    SearchIndex,
+    SemanticConfiguration,
+    SemanticField,
+    SemanticPrioritizedFields,
+    SemanticSearch,
     SimpleField,
-    SearchableField,
-    VectorSearch,
-    ExhaustiveKnnParameters,
-    SearchIndex,  
-    SearchField,  
-    SearchFieldDataType,  
-    SimpleField,  
-    SearchableField,  
-    SearchIndex,  
-    SemanticConfiguration,  
-    SemanticField,  
-    SearchField,  
-    VectorSearch,  
-    HnswParameters,  
     VectorSearch,
+    VectorSearchAlgorithmConfiguration,
     VectorSearchAlgorithmKind,
     VectorSearchAlgorithmMetric,
     VectorSearchProfile,
-)  
-search_endpoint =  get_secrets_from_kv(key_vault_name,"AZURE-SEARCH-ENDPOINT")
-search_key =  get_secrets_from_kv(key_vault_name,"AZURE-SEARCH-KEY")
+)
+from azure.search.documents.models import (
+    QueryAnswerResult,
+    QueryAnswerType,
+    QueryCaptionResult,
+    QueryCaptionType,
+    QueryType,
+    SemanticErrorMode,
+    SemanticErrorReason,
+    SemanticSearchResultsType,
+    VectorFilterMode,
+    VectorizedQuery,
+    VectorQuery,
+)
 
-openai.api_key  = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-KEY")
-openai.api_base = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-ENDPOINT")
-openai.api_version = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-PREVIEW-API-VERSION")
+search_endpoint = get_secrets_from_kv(key_vault_name, "AZURE-SEARCH-ENDPOINT")
+search_key = get_secrets_from_kv(key_vault_name, "AZURE-SEARCH-KEY")
 
-openai_api_key  = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-KEY")
-openai_api_base = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-ENDPOINT")
-openai_api_version = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-PREVIEW-API-VERSION")
+openai.api_key = get_secrets_from_kv(key_vault_name, "AZURE-OPENAI-KEY")
+openai.api_base = get_secrets_from_kv(key_vault_name, "AZURE-OPENAI-ENDPOINT")
+openai.api_version = get_secrets_from_kv(
+    key_vault_name, "AZURE-OPENAI-PREVIEW-API-VERSION"
+)
+
+openai_api_key = get_secrets_from_kv(key_vault_name, "AZURE-OPENAI-KEY")
+openai_api_base = get_secrets_from_kv(key_vault_name, "AZURE-OPENAI-ENDPOINT")
+openai_api_version = get_secrets_from_kv(
+    key_vault_name, "AZURE-OPENAI-PREVIEW-API-VERSION"
+)
 
-# Set up your Azure Text Analytics service and credentials  
-COG_SERVICES_NAME = get_secrets_from_kv(key_vault_name,"COG-SERVICES-NAME")
-COG_SERVICES_ENDPOINT = get_secrets_from_kv(key_vault_name,"COG-SERVICES-ENDPOINT")
-COG_SERVICES_KEY = get_secrets_from_kv(key_vault_name,"COG-SERVICES-KEY")
+# Set up your Azure Text Analytics service and credentials
+COG_SERVICES_NAME = get_secrets_from_kv(key_vault_name, "COG-SERVICES-NAME")
+COG_SERVICES_ENDPOINT = get_secrets_from_kv(key_vault_name, "COG-SERVICES-ENDPOINT")
+COG_SERVICES_KEY = get_secrets_from_kv(key_vault_name, "COG-SERVICES-KEY")
 
-cog_services_credential = AzureKeyCredential(COG_SERVICES_KEY)  
+cog_services_credential = AzureKeyCredential(COG_SERVICES_KEY)
 
-# Create a TextAnalyticsClient using your endpoint and credentials  
-cog_services_client = TextAnalyticsClient(endpoint=COG_SERVICES_ENDPOINT, credential=cog_services_credential)  
+# Create a TextAnalyticsClient using your endpoint and credentials
+cog_services_client = TextAnalyticsClient(
+    endpoint=COG_SERVICES_ENDPOINT, credential=cog_services_credential
+)
 
-def get_named_entities(cog_services_client,input_text): 
-    # Call the named entity recognition API to extract named entities from your text 
-    input_text = input_text[:5000] #limit to 5000 characters
-    result = cog_services_client.recognize_entities(documents=[input_text])  
-    
-    # return the named entities for each document 
-    # full list of categories #https://learn.microsoft.com/en-us/azure/ai-services/language-service/named-entity-recognition/concepts/named-entity-categories?tabs=ga-api 
 
-    Person = [] 
+def get_named_entities(cog_services_client, input_text):
+    # Call the named entity recognition API to extract named entities from your text
+    input_text = input_text[:5000]  # limit to 5000 characters
+    result = cog_services_client.recognize_entities(documents=[input_text])
+
+    # return the named entities for each document
+    # full list of categories #https://learn.microsoft.com/en-us/azure/ai-services/language-service/named-entity-recognition/concepts/named-entity-categories?tabs=ga-api
+
+    Person = []
     Location = []
-    Organization = [] 
+    Organization = []
     DateTime = []
-    URL = [] 
+    URL = []
     Email = []
     PersonType = []
     Event = []
@@ -140,7 +124,7 @@ def get_named_entities(cog_services_client,input_text):
 
     for idx, doc in enumerate(result):
         if not doc.is_error:
-            for entity in doc.entities: 
+            for entity in doc.entities:
                 if entity.category == "DateTime":
                     DateTime.append(entity.text)
                 elif entity.category == "Person":
@@ -160,39 +144,53 @@ def get_named_entities(cog_services_client,input_text):
                 elif entity.category == "Quantity":
                     Quantity.append(entity.text)
 
-        else:  
-            print("  Error: {}".format(doc.error.message)) 
-    return(list(set(DateTime)),list(set(Person)),list(set(Location)),list(set(Organization)),list(set(URL)),list(set(Email)),list(set(PersonType)),list(set(Event)),list(set(Quantity)))
-    
+        else:
+            print("  Error: {}".format(doc.error.message))
+    return (
+        list(set(DateTime)),
+        list(set(Person)),
+        list(set(Location)),
+        list(set(Organization)),
+        list(set(URL)),
+        list(set(Email)),
+        list(set(PersonType)),
+        list(set(Event)),
+        list(set(Quantity)),
+    )
+
 
 from openai import AzureOpenAI
 
+
 # Function: Get Embeddings
-def get_embeddings(text: str,openai_api_base,openai_api_version,openai_api_key):
+def get_embeddings(text: str, openai_api_base, openai_api_version, openai_api_key):
     model_id = "text-embedding-ada-002"
     client = AzureOpenAI(
         api_version=openai_api_version,
         azure_endpoint=openai_api_base,
-        api_key = openai_api_key
+        api_key=openai_api_key,
     )
-    
+
     # embedding = openai.Embedding.create(input=text, deployment_id=model_id)["data"][0]["embedding"]
     embedding = client.embeddings.create(input=text, model=model_id).data[0].embedding
 
     return embedding
 
+
 # from langchain.text_splitter import MarkdownTextSplitter, RecursiveCharacterTextSplitter, PythonCodeTextSplitter
 # import tiktoken
 
 import re
 
+
 def clean_spaces_with_regex(text):
     # Use a regular expression to replace multiple spaces with a single space
-    cleaned_text = re.sub(r'\s+', ' ', text)
+    cleaned_text = re.sub(r"\s+", " ", text)
     # Use a regular expression to replace consecutive dots with a single dot
-    cleaned_text = re.sub(r'\.{2,}', '.', cleaned_text)
+    cleaned_text = re.sub(r"\.{2,}", ".", cleaned_text)
     return cleaned_text
 
+
 # def estimate_tokens(text):
 #     GPT2_TOKENIZER = tiktoken.get_encoding("gpt2")
 #     return(len(GPT2_TOKENIZER.encode(text)))
@@ -209,27 +207,28 @@ def clean_spaces_with_regex(text):
 
 #     return(splitter.split_text(text))
 
+
 def chunk_data(text):
-    tokens_per_chunk = 500 #1024
+    tokens_per_chunk = 500  # 1024
     text = clean_spaces_with_regex(text)
     SENTENCE_ENDINGS = [".", "!", "?"]
-    WORDS_BREAKS = ['\n', '\t', '}', '{', ']', '[', ')', '(', ' ', ':', ';', ',']
+    WORDS_BREAKS = ["\n", "\t", "}", "{", "]", "[", ")", "(", " ", ":", ";", ","]
 
-    sentences = text.split('. ') # Split text into sentences
+    sentences = text.split(". ")  # Split text into sentences
     chunks = []
-    current_chunk = ''
+    current_chunk = ""
     current_chunk_token_count = 0
-    
+
     # Iterate through each sentence
     for sentence in sentences:
         # Split sentence into tokens
         tokens = sentence.split()
-        
+
         # Check if adding the current sentence exceeds tokens_per_chunk
         if current_chunk_token_count + len(tokens) <= tokens_per_chunk:
             # Add the sentence to the current chunk
             if current_chunk:
-                current_chunk += '. ' + sentence
+                current_chunk += ". " + sentence
             else:
                 current_chunk += sentence
             current_chunk_token_count += len(tokens)
@@ -238,43 +237,114 @@ def chunk_data(text):
             chunks.append(current_chunk)
             current_chunk = sentence
             current_chunk_token_count = len(tokens)
-    
+
     # Add the last chunk
     if current_chunk:
         chunks.append(current_chunk)
-    
+
     return chunks
 
+
 # Create the search index
 search_credential = AzureKeyCredential(search_key)
 
-index_client = SearchIndexClient(
-    endpoint=search_endpoint, credential=search_credential)
+index_client = SearchIndexClient(endpoint=search_endpoint, credential=search_credential)
 
 fields = [
-    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
+    SimpleField(
+        name="id",
+        type=SearchFieldDataType.String,
+        key=True,
+        sortable=True,
+        filterable=True,
+        facetable=True,
+    ),
     SearchableField(name="chunk_id", type=SearchFieldDataType.String),
     SearchableField(name="document_id", type=SearchFieldDataType.String),
     SearchableField(name="title", type=SearchFieldDataType.String),
     SearchableField(name="content", type=SearchFieldDataType.String),
     SearchableField(name="sourceurl", type=SearchFieldDataType.String),
     SearchableField(name="publicurl", type=SearchFieldDataType.String),
-    SimpleField(name="dateTime", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Person", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Location", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Organization", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="URL", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Email", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="PersonType", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Event", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Quantity", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SearchField(name="titleVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
-                searchable=True, vector_search_dimensions=1536, vector_search_profile_name="myHnswProfile"),
-    SearchField(name="contentVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
-                searchable=True, vector_search_dimensions=1536, vector_search_profile_name="myHnswProfile")
+    SimpleField(
+        name="dateTime",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Person",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Location",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Organization",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="URL",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Email",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="PersonType",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Event",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Quantity",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SearchField(
+        name="titleVector",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
+        searchable=True,
+        vector_search_dimensions=1536,
+        vector_search_profile_name="myHnswProfile",
+    ),
+    SearchField(
+        name="contentVector",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
+        searchable=True,
+        vector_search_dimensions=1536,
+        vector_search_profile_name="myHnswProfile",
+    ),
 ]
 
-# Configure the vector search configuration  
+# Configure the vector search configuration
 vector_search = VectorSearch(
     algorithms=[
         HnswAlgorithmConfiguration(
@@ -284,16 +354,16 @@ def chunk_data(text):
                 m=4,
                 ef_construction=400,
                 ef_search=500,
-                metric=VectorSearchAlgorithmMetric.COSINE
-            )
+                metric=VectorSearchAlgorithmMetric.COSINE,
+            ),
         ),
         ExhaustiveKnnAlgorithmConfiguration(
             name="myExhaustiveKnn",
             kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN,
             parameters=ExhaustiveKnnParameters(
                 metric=VectorSearchAlgorithmMetric.COSINE
-            )
-        )
+            ),
+        ),
     ],
     profiles=[
         VectorSearchProfile(
@@ -303,56 +373,62 @@ def chunk_data(text):
         VectorSearchProfile(
             name="myExhaustiveKnnProfile",
             algorithm_configuration_name="myExhaustiveKnn",
-        )
-    ]
+        ),
+    ],
 )
 
 semantic_config = SemanticConfiguration(
     name="my-semantic-config",
     prioritized_fields=SemanticPrioritizedFields(
         title_field=SemanticField(field_name="title"),
-        content_fields=[SemanticField(field_name="content")]
-    )
+        content_fields=[SemanticField(field_name="content")],
+    ),
 )
 
 # Create the semantic settings with the configuration
 semantic_search = SemanticSearch(configurations=[semantic_config])
 
 # Create the search index with the semantic settings
-index = SearchIndex(name=index_name, fields=fields,
-                    vector_search=vector_search, semantic_search=semantic_search)
+index = SearchIndex(
+    name=index_name,
+    fields=fields,
+    vector_search=vector_search,
+    semantic_search=semantic_search,
+)
 result = index_client.create_or_update_index(index)
-print(f' {result.name} created')
+print(f" {result.name} created")
 
 
 # #add documents to the index
 
-from azure.core.credentials import AzureKeyCredential  
-from azure.storage.filedatalake import (
-    DataLakeServiceClient,
-    DataLakeDirectoryClient,
-    FileSystemClient
-)
-from azure.identity import ClientSecretCredential  
-import pypdf  
-from io import BytesIO
 import base64
 import time
-import pandas as pd
+from io import BytesIO
 
+import pandas as pd
+import pypdf
+from azure.core.credentials import AzureKeyCredential
+from azure.identity import ClientSecretCredential
+from azure.storage.filedatalake import (
+    DataLakeDirectoryClient,
+    DataLakeServiceClient,
+    FileSystemClient,
+)
 
 account_name = get_secrets_from_kv(key_vault_name, "ADLS-ACCOUNT-NAME")
 credential = DefaultAzureCredential()
 
 account_url = f"https://{account_name}.dfs.core.windows.net"
 
-service_client = DataLakeServiceClient(account_url, credential=credential,api_version='2023-01-03') 
+service_client = DataLakeServiceClient(
+    account_url, credential=credential, api_version="2023-01-03"
+)
 
-file_system_client = service_client.get_file_system_client(file_system_client_name)  
-directory_name = directory + '/pdfs'
+file_system_client = service_client.get_file_system_client(file_system_client_name)
+directory_name = directory + "/pdfs"
 paths = list(file_system_client.get_paths(path=directory_name))
-paths = paths + list(file_system_client.get_paths(path=directory2 + '/pdfs'))
-paths = paths + list(file_system_client.get_paths(path=directory3 + '/pdfs'))
+paths = paths + list(file_system_client.get_paths(path=directory2 + "/pdfs"))
+paths = paths + list(file_system_client.get_paths(path=directory3 + "/pdfs"))
 
 # Azure Cognitive Search Vector Index
 search_credential = AzureKeyCredential(search_key)
@@ -366,7 +442,7 @@ def chunk_data(text):
 print(file_path)
 file_client = file_system_client.get_file_client(file_path)
 csv_file = file_client.download_file()
-df_metadata = pd.read_csv(csv_file, encoding='utf-8')
+df_metadata = pd.read_csv(csv_file, encoding="utf-8")
 
 docs = []
 num_pdfs = 0
@@ -378,57 +454,81 @@ def chunk_data(text):
     stream = BytesIO()
     pdf_file.readinto(stream)
     pdf_reader = pypdf.PdfReader(stream)
-    filename = path.name.split('/')[-1]
-    document_id = filename.replace('.pdf','')
+    filename = path.name.split("/")[-1]
+    document_id = filename.replace(".pdf", "")
+
+    df_file_metadata = df_metadata[df_metadata["grant_id"] == document_id].iloc[0]
 
-    df_file_metadata = df_metadata[df_metadata['grant_id']==document_id].iloc[0]
-   
-    text = "" 
+    text = ""
 
-    n = num_pages #len(pdf_reader.pages)
+    n = num_pages  # len(pdf_reader.pages)
     if len(pdf_reader.pages) < n:
         n = len(pdf_reader.pages)
-    for page_num in range(n): #range(len(pdf_reader.pages)):
-        public_url = df_file_metadata['publicurl'] + '#page=' + str(page_num) 
+    for page_num in range(n):  # range(len(pdf_reader.pages)):
+        public_url = df_file_metadata["publicurl"] + "#page=" + str(page_num)
 
         page = pdf_reader.pages[page_num]
-        text = page.extract_text()         
-        
+        text = page.extract_text()
+
         chunks = chunk_data(text)
         chunk_num = 0
         for chunk in chunks:
             chunk_num += 1
             d = {
-                "chunk_id" : path.name.split('/')[-1] + '_' + str(page_num).zfill(2) +  '_' + str(chunk_num).zfill(2),
-                "document_id": str(df_file_metadata['grant_id']),
-                "content": chunk,       
-                "title": df_file_metadata['title'] } 
+                "chunk_id": path.name.split("/")[-1]
+                + "_"
+                + str(page_num).zfill(2)
+                + "_"
+                + str(chunk_num).zfill(2),
+                "document_id": str(df_file_metadata["grant_id"]),
+                "content": chunk,
+                "title": df_file_metadata["title"],
+            }
 
-            d["dateTime"],d["Person"],d["Location"],d["Organization"],d["URL"],d["Email"],d["PersonType"],d["Event"],d["Quantity"] = get_named_entities(cog_services_client,d["content"])
+            (
+                d["dateTime"],
+                d["Person"],
+                d["Location"],
+                d["Organization"],
+                d["URL"],
+                d["Email"],
+                d["PersonType"],
+                d["Event"],
+                d["Quantity"],
+            ) = get_named_entities(cog_services_client, d["content"])
 
             counter += 1
 
             try:
-                v_titleVector = get_embeddings(d["title"],openai_api_base,openai_api_version,openai_api_key)
+                v_titleVector = get_embeddings(
+                    d["title"], openai_api_base, openai_api_version, openai_api_key
+                )
             except:
                 time.sleep(30)
-                v_titleVector = get_embeddings(d["title"],openai_api_base,openai_api_version,openai_api_key)
-            
+                v_titleVector = get_embeddings(
+                    d["title"], openai_api_base, openai_api_version, openai_api_key
+                )
+
             try:
-                v_contentVector = get_embeddings(d["content"],openai_api_base,openai_api_version,openai_api_key)
+                v_contentVector = get_embeddings(
+                    d["content"], openai_api_base, openai_api_version, openai_api_key
+                )
             except:
                 time.sleep(30)
-                v_contentVector = get_embeddings(d["content"],openai_api_base,openai_api_version,openai_api_key)
-
+                v_contentVector = get_embeddings(
+                    d["content"], openai_api_base, openai_api_version, openai_api_key
+                )
 
             docs.append(
-            {
-                    "id": base64.urlsafe_b64encode(bytes(d["chunk_id"], encoding='utf-8')).decode('utf-8'),
+                {
+                    "id": base64.urlsafe_b64encode(
+                        bytes(d["chunk_id"], encoding="utf-8")
+                    ).decode("utf-8"),
                     "chunk_id": d["chunk_id"],
                     "document_id": d["document_id"],
                     "title": d["title"],
                     "content": d["content"],
-                    "sourceurl": path.name.split('/')[-1],
+                    "sourceurl": path.name.split("/")[-1],
                     "publicurl": public_url,
                     "dateTime": d["dateTime"],
                     "Person": d["Person"],
@@ -440,14 +540,14 @@ def chunk_data(text):
                     "Event": d["Event"],
                     "Quantity": d["Quantity"],
                     "titleVector": v_titleVector,
-                    "contentVector": v_contentVector
-            }
+                    "contentVector": v_contentVector,
+                }
             )
-               
+
             if counter % 10 == 0:
                 result = client.upload_documents(documents=docs)
                 docs = []
-                print(f' {str(counter)} uploaded')
-#upload the last batch
+                print(f" {str(counter)} uploaded")
+# upload the last batch
 if docs != []:
     client.upload_documents(documents=docs)
diff --git a/ResearchAssistant/Deployment/scripts/index_scripts/create_grants_index.py b/ResearchAssistant/Deployment/scripts/index_scripts/create_grants_index.py
index a59871275..dcb7e2de7 100644
--- a/ResearchAssistant/Deployment/scripts/index_scripts/create_grants_index.py
+++ b/ResearchAssistant/Deployment/scripts/index_scripts/create_grants_index.py
@@ -1,136 +1,120 @@
-#Get Azure Key Vault Client
-key_vault_name = 'kv_to-be-replaced'
+# Get Azure Key Vault Client
+key_vault_name = "kv_to-be-replaced"
 
-#hardcoded values
+# hardcoded values
 index_name = "grantsindex"
-drafts_index_name = 'draftsindex'
+drafts_index_name = "draftsindex"
 file_system_client_name = "data"
-directory = 'demodata/nih_grants' 
-csv_file_name = '/metadata/nih_grants.csv'
+directory = "demodata/nih_grants"
+csv_file_name = "/metadata/nih_grants.csv"
 
 num_pages = 10
 
-from azure.keyvault.secrets import SecretClient  
-from azure.identity import DefaultAzureCredential  
+from azure.identity import DefaultAzureCredential
+from azure.keyvault.secrets import SecretClient
+
 
 def get_secrets_from_kv(kv_name, secret_name):
-    
-  # Set the name of the Azure Key Vault  
-  key_vault_name = kv_name 
-    
-  # Create a credential object using the default Azure credentials  
-  credential = DefaultAzureCredential()
-
-  # Create a secret client object using the credential and Key Vault name  
-  secret_client = SecretClient(vault_url=f"https://{key_vault_name}.vault.azure.net/", credential=credential)  
-    
-  # Retrieve the secret value  
-  return(secret_client.get_secret(secret_name).value)
-
-
-#Utils 
- # Import required libraries  
-import os  
-import json  
-import openai
 
-import os  
-from azure.core.credentials import AzureKeyCredential  
-from azure.ai.textanalytics import TextAnalyticsClient  
+    # Set the name of the Azure Key Vault
+    key_vault_name = kv_name
 
-from azure.core.credentials import AzureKeyCredential  
-from azure.search.documents import SearchClient, SearchIndexingBufferedSender  
-from azure.search.documents.indexes import SearchIndexClient  
-from azure.search.documents.models import (
-    QueryAnswerType,
-    QueryCaptionType,
-    QueryCaptionResult,
-    QueryAnswerResult,
-    SemanticErrorMode,
-    SemanticErrorReason,
-    SemanticSearchResultsType,
-    QueryType,
-    VectorizedQuery,
-    VectorQuery,
-    VectorFilterMode,    
-)
-from azure.search.documents.indexes.models import (  
+    # Create a credential object using the default Azure credentials
+    credential = DefaultAzureCredential()
+
+    # Create a secret client object using the credential and Key Vault name
+    secret_client = SecretClient(
+        vault_url=f"https://{key_vault_name}.vault.azure.net/", credential=credential
+    )
+
+    # Retrieve the secret value
+    return secret_client.get_secret(secret_name).value
+
+
+# Utils
+# Import required libraries
+import json
+import os
+
+import openai
+from azure.ai.textanalytics import TextAnalyticsClient
+from azure.core.credentials import AzureKeyCredential
+from azure.search.documents import SearchClient, SearchIndexingBufferedSender
+from azure.search.documents.indexes import SearchIndexClient
+from azure.search.documents.indexes.models import (
     ExhaustiveKnnAlgorithmConfiguration,
     ExhaustiveKnnParameters,
-    SearchIndex,  
-    SearchField,  
-    SearchFieldDataType,  
-    SimpleField,  
-    SearchableField,  
-    SearchIndex,  
-    SemanticConfiguration,  
-    SemanticPrioritizedFields,
-    SemanticField,  
-    SearchField,  
-    SemanticSearch,
-    VectorSearch,  
     HnswAlgorithmConfiguration,
-    HnswParameters,  
-    VectorSearch,
-    VectorSearchAlgorithmConfiguration,
-    VectorSearchAlgorithmKind,
-    VectorSearchProfile,
-    SearchIndex,
+    HnswParameters,
+    SearchableField,
     SearchField,
     SearchFieldDataType,
+    SearchIndex,
+    SemanticConfiguration,
+    SemanticField,
+    SemanticPrioritizedFields,
+    SemanticSearch,
     SimpleField,
-    SearchableField,
-    VectorSearch,
-    ExhaustiveKnnParameters,
-    SearchIndex,  
-    SearchField,  
-    SearchFieldDataType,  
-    SimpleField,  
-    SearchableField,  
-    SearchIndex,  
-    SemanticConfiguration,  
-    SemanticField,  
-    SearchField,  
-    VectorSearch,  
-    HnswParameters,  
     VectorSearch,
+    VectorSearchAlgorithmConfiguration,
     VectorSearchAlgorithmKind,
     VectorSearchAlgorithmMetric,
     VectorSearchProfile,
-)  
-search_endpoint =  get_secrets_from_kv(key_vault_name,"AZURE-SEARCH-ENDPOINT")
-search_key =  get_secrets_from_kv(key_vault_name,"AZURE-SEARCH-KEY")
+)
+from azure.search.documents.models import (
+    QueryAnswerResult,
+    QueryAnswerType,
+    QueryCaptionResult,
+    QueryCaptionType,
+    QueryType,
+    SemanticErrorMode,
+    SemanticErrorReason,
+    SemanticSearchResultsType,
+    VectorFilterMode,
+    VectorizedQuery,
+    VectorQuery,
+)
+
+search_endpoint = get_secrets_from_kv(key_vault_name, "AZURE-SEARCH-ENDPOINT")
+search_key = get_secrets_from_kv(key_vault_name, "AZURE-SEARCH-KEY")
 
-openai.api_key  = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-KEY")
-openai.api_base = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-ENDPOINT")
-openai.api_version = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-PREVIEW-API-VERSION")
+openai.api_key = get_secrets_from_kv(key_vault_name, "AZURE-OPENAI-KEY")
+openai.api_base = get_secrets_from_kv(key_vault_name, "AZURE-OPENAI-ENDPOINT")
+openai.api_version = get_secrets_from_kv(
+    key_vault_name, "AZURE-OPENAI-PREVIEW-API-VERSION"
+)
+
+openai_api_key = get_secrets_from_kv(key_vault_name, "AZURE-OPENAI-KEY")
+openai_api_base = get_secrets_from_kv(key_vault_name, "AZURE-OPENAI-ENDPOINT")
+openai_api_version = get_secrets_from_kv(
+    key_vault_name, "AZURE-OPENAI-PREVIEW-API-VERSION"
+)
 
-openai_api_key  = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-KEY")
-openai_api_base = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-ENDPOINT")
-openai_api_version = get_secrets_from_kv(key_vault_name,"AZURE-OPENAI-PREVIEW-API-VERSION")
+# Set up your Azure Text Analytics service and credentials
+COG_SERVICES_NAME = get_secrets_from_kv(key_vault_name, "COG-SERVICES-NAME")
+COG_SERVICES_ENDPOINT = get_secrets_from_kv(key_vault_name, "COG-SERVICES-ENDPOINT")
+COG_SERVICES_KEY = get_secrets_from_kv(key_vault_name, "COG-SERVICES-KEY")
 
-# Set up your Azure Text Analytics service and credentials  
-COG_SERVICES_NAME = get_secrets_from_kv(key_vault_name,"COG-SERVICES-NAME")
-COG_SERVICES_ENDPOINT = get_secrets_from_kv(key_vault_name,"COG-SERVICES-ENDPOINT")
-COG_SERVICES_KEY = get_secrets_from_kv(key_vault_name,"COG-SERVICES-KEY")
+cog_services_credential = AzureKeyCredential(COG_SERVICES_KEY)
 
-cog_services_credential = AzureKeyCredential(COG_SERVICES_KEY)  
+# Create a TextAnalyticsClient using your endpoint and credentials
+cog_services_client = TextAnalyticsClient(
+    endpoint=COG_SERVICES_ENDPOINT, credential=cog_services_credential
+)
 
-# Create a TextAnalyticsClient using your endpoint and credentials  
-cog_services_client = TextAnalyticsClient(endpoint=COG_SERVICES_ENDPOINT, credential=cog_services_credential)  
 
-def get_named_entities(cog_services_client,input_text): 
-    # Call the named entity recognition API to extract named entities from your text  
-    result = cog_services_client.recognize_entities(documents=[input_text])  
-    
-    # return the named entities for each document 
-    # full list of categories #https://learn.microsoft.com/en-us/azure/ai-services/language-service/named-entity-recognition/concepts/named-entity-categories?tabs=ga-api 
+def get_named_entities(cog_services_client, input_text):
+    # Call the named entity recognition API to extract named entities from your text
+    result = cog_services_client.recognize_entities(documents=[input_text])
 
-    Person = [] 
+    # return the named entities for each document
+    # full list of categories #https://learn.microsoft.com/en-us/azure/ai-services/language-service/named-entity-recognition/concepts/named-entity-categories?tabs=ga-api
+
+    Person = []
     Location = []
-    Organization = [] 
+    Organization = []
     DateTime = []
-    URL = [] 
+    URL = []
     Email = []
     PersonType = []
     Event = []
@@ -138,7 +122,7 @@ def get_named_entities(cog_services_client,input_text):
 
     for idx, doc in enumerate(result):
         if not doc.is_error:
-            for entity in doc.entities: 
+            for entity in doc.entities:
                 if entity.category == "DateTime":
                     DateTime.append(entity.text)
                 elif entity.category == "Person":
@@ -158,39 +142,53 @@ def get_named_entities(cog_services_client,input_text):
                 elif entity.category == "Quantity":
                     Quantity.append(entity.text)
 
-        else:  
-            print("  Error: {}".format(doc.error.message)) 
-    return(list(set(DateTime)),list(set(Person)),list(set(Location)),list(set(Organization)),list(set(URL)),list(set(Email)),list(set(PersonType)),list(set(Event)),list(set(Quantity)))
-    
+        else:
+            print("  Error: {}".format(doc.error.message))
+    return (
+        list(set(DateTime)),
+        list(set(Person)),
+        list(set(Location)),
+        list(set(Organization)),
+        list(set(URL)),
+        list(set(Email)),
+        list(set(PersonType)),
+        list(set(Event)),
+        list(set(Quantity)),
+    )
+
 
 from openai import AzureOpenAI
 
+
 # Function: Get Embeddings
-def get_embeddings(text: str,openai_api_base,openai_api_version,openai_api_key):
+def get_embeddings(text: str, openai_api_base, openai_api_version, openai_api_key):
     model_id = "text-embedding-ada-002"
     client = AzureOpenAI(
         api_version=openai_api_version,
         azure_endpoint=openai_api_base,
-        api_key = openai_api_key
+        api_key=openai_api_key,
     )
-    
+
     # embedding = openai.Embedding.create(input=text, deployment_id=model_id)["data"][0]["embedding"]
     embedding = client.embeddings.create(input=text, model=model_id).data[0].embedding
 
     return embedding
 
+
 # from langchain.text_splitter import MarkdownTextSplitter, RecursiveCharacterTextSplitter, PythonCodeTextSplitter
 # import tiktoken
 
 import re
 
+
 def clean_spaces_with_regex(text):
     # Use a regular expression to replace multiple spaces with a single space
-    cleaned_text = re.sub(r'\s+', ' ', text)
+    cleaned_text = re.sub(r"\s+", " ", text)
     # Use a regular expression to replace consecutive dots with a single dot
-    cleaned_text = re.sub(r'\.{2,}', '.', cleaned_text)
+    cleaned_text = re.sub(r"\.{2,}", ".", cleaned_text)
     return cleaned_text
 
+
 # def estimate_tokens(text):
 #     GPT2_TOKENIZER = tiktoken.get_encoding("gpt2")
 #     return(len(GPT2_TOKENIZER.encode(text)))
@@ -207,27 +205,28 @@ def clean_spaces_with_regex(text):
 
 #     return(splitter.split_text(text))
 
+
 def chunk_data(text):
-    tokens_per_chunk = 500 #1024
+    tokens_per_chunk = 500  # 1024
     text = clean_spaces_with_regex(text)
     SENTENCE_ENDINGS = [".", "!", "?"]
-    WORDS_BREAKS = ['\n', '\t', '}', '{', ']', '[', ')', '(', ' ', ':', ';', ',']
+    WORDS_BREAKS = ["\n", "\t", "}", "{", "]", "[", ")", "(", " ", ":", ";", ","]
 
-    sentences = text.split('. ') # Split text into sentences
+    sentences = text.split(". ")  # Split text into sentences
     chunks = []
-    current_chunk = ''
+    current_chunk = ""
     current_chunk_token_count = 0
-    
+
     # Iterate through each sentence
     for sentence in sentences:
         # Split sentence into tokens
         tokens = sentence.split()
-        
+
         # Check if adding the current sentence exceeds tokens_per_chunk
         if current_chunk_token_count + len(tokens) <= tokens_per_chunk:
             # Add the sentence to the current chunk
             if current_chunk:
-                current_chunk += '. ' + sentence
+                current_chunk += ". " + sentence
             else:
                 current_chunk += sentence
             current_chunk_token_count += len(tokens)
@@ -236,43 +235,114 @@ def chunk_data(text):
             chunks.append(current_chunk)
             current_chunk = sentence
             current_chunk_token_count = len(tokens)
-    
+
     # Add the last chunk
     if current_chunk:
         chunks.append(current_chunk)
-    
+
     return chunks
 
+
 # Create the search index
 search_credential = AzureKeyCredential(search_key)
 
-index_client = SearchIndexClient(
-    endpoint=search_endpoint, credential=search_credential)
+index_client = SearchIndexClient(endpoint=search_endpoint, credential=search_credential)
 
 fields = [
-    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
+    SimpleField(
+        name="id",
+        type=SearchFieldDataType.String,
+        key=True,
+        sortable=True,
+        filterable=True,
+        facetable=True,
+    ),
     SearchableField(name="chunk_id", type=SearchFieldDataType.String),
     SearchableField(name="document_id", type=SearchFieldDataType.String),
     SearchableField(name="title", type=SearchFieldDataType.String),
     SearchableField(name="content", type=SearchFieldDataType.String),
     SearchableField(name="sourceurl", type=SearchFieldDataType.String),
     SearchableField(name="publicurl", type=SearchFieldDataType.String),
-    SimpleField(name="dateTime", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Person", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Location", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Organization", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="URL", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Email", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="PersonType", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Event", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SimpleField(name="Quantity", type=SearchFieldDataType.Collection(SearchFieldDataType.String),Filterable=True,Sortable=True, Facetable=True),
-    SearchField(name="titleVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
-                searchable=True, vector_search_dimensions=1536, vector_search_profile_name="myHnswProfile"),
-    SearchField(name="contentVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
-                searchable=True, vector_search_dimensions=1536, vector_search_profile_name="myHnswProfile")
+    SimpleField(
+        name="dateTime",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Person",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Location",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Organization",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="URL",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Email",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="PersonType",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Event",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SimpleField(
+        name="Quantity",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
+        Filterable=True,
+        Sortable=True,
+        Facetable=True,
+    ),
+    SearchField(
+        name="titleVector",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
+        searchable=True,
+        vector_search_dimensions=1536,
+        vector_search_profile_name="myHnswProfile",
+    ),
+    SearchField(
+        name="contentVector",
+        type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
+        searchable=True,
+        vector_search_dimensions=1536,
+        vector_search_profile_name="myHnswProfile",
+    ),
 ]
 
-# Configure the vector search configuration  
+# Configure the vector search configuration
 vector_search = VectorSearch(
     algorithms=[
         HnswAlgorithmConfiguration(
@@ -282,16 +352,16 @@ def chunk_data(text):
                 m=4,
                 ef_construction=400,
                 ef_search=500,
-                metric=VectorSearchAlgorithmMetric.COSINE
-            )
+                metric=VectorSearchAlgorithmMetric.COSINE,
+            ),
         ),
         ExhaustiveKnnAlgorithmConfiguration(
             name="myExhaustiveKnn",
             kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN,
             parameters=ExhaustiveKnnParameters(
                 metric=VectorSearchAlgorithmMetric.COSINE
-            )
-        )
+            ),
+        ),
     ],
     profiles=[
         VectorSearchProfile(
@@ -301,53 +371,59 @@ def chunk_data(text):
         VectorSearchProfile(
             name="myExhaustiveKnnProfile",
             algorithm_configuration_name="myExhaustiveKnn",
-        )
-    ]
+        ),
+    ],
 )
 
 semantic_config = SemanticConfiguration(
     name="my-semantic-config",
     prioritized_fields=SemanticPrioritizedFields(
         title_field=SemanticField(field_name="title"),
-        content_fields=[SemanticField(field_name="content")]
-    )
+        content_fields=[SemanticField(field_name="content")],
+    ),
 )
 
 # Create the semantic settings with the configuration
 semantic_search = SemanticSearch(configurations=[semantic_config])
 
 # Create the search index with the semantic settings
-index = SearchIndex(name=index_name, fields=fields,
-                    vector_search=vector_search, semantic_search=semantic_search)
+index = SearchIndex(
+    name=index_name,
+    fields=fields,
+    vector_search=vector_search,
+    semantic_search=semantic_search,
+)
 result = index_client.create_or_update_index(index)
-print(f' {result.name} created')
+print(f" {result.name} created")
 
 
-#add documents to the index
+# add documents to the index
 
-from azure.core.credentials import AzureKeyCredential  
-from azure.storage.filedatalake import (
-    DataLakeServiceClient,
-    DataLakeDirectoryClient,
-    FileSystemClient
-)
-from azure.identity import ClientSecretCredential  
-import pypdf  
-from io import BytesIO
 import base64
 import time
-import pandas as pd
+from io import BytesIO
 
+import pandas as pd
+import pypdf
+from azure.core.credentials import AzureKeyCredential
+from azure.identity import ClientSecretCredential
+from azure.storage.filedatalake import (
+    DataLakeDirectoryClient,
+    DataLakeServiceClient,
+    FileSystemClient,
+)
 
 account_name = get_secrets_from_kv(key_vault_name, "ADLS-ACCOUNT-NAME")
 credential = DefaultAzureCredential()
 
 account_url = f"https://{account_name}.dfs.core.windows.net"
 
-service_client = DataLakeServiceClient(account_url, credential=credential,api_version='2023-01-03') 
+service_client = DataLakeServiceClient(
+    account_url, credential=credential, api_version="2023-01-03"
+)
 
-file_system_client = service_client.get_file_system_client(file_system_client_name)  
-directory_name = directory + '/pdfs'
+file_system_client = service_client.get_file_system_client(file_system_client_name)
+directory_name = directory + "/pdfs"
 paths = file_system_client.get_paths(path=directory_name)
 
 # Azure Cognitive Search Vector Index
@@ -363,7 +439,7 @@ def chunk_data(text):
 print(file_path)
 file_client = file_system_client.get_file_client(file_path)
 csv_file = file_client.download_file()
-df_metadata = pd.read_csv(csv_file, encoding='utf-8')
+df_metadata = pd.read_csv(csv_file, encoding="utf-8")
 
 docs = []
 num_pdfs = 0
@@ -375,57 +451,81 @@ def chunk_data(text):
     stream = BytesIO()
     pdf_file.readinto(stream)
     pdf_reader = pypdf.PdfReader(stream)
-    filename = path.name.split('/')[-1]
-    document_id = filename.replace('.pdf','')
+    filename = path.name.split("/")[-1]
+    document_id = filename.replace(".pdf", "")
 
-    df_file_metadata = df_metadata[df_metadata['grant_id']==document_id].iloc[0]
-   
-    text = "" 
+    df_file_metadata = df_metadata[df_metadata["grant_id"] == document_id].iloc[0]
 
-    n = num_pages #len(pdf_reader.pages)
+    text = ""
+
+    n = num_pages  # len(pdf_reader.pages)
     if len(pdf_reader.pages) < n:
         n = len(pdf_reader.pages)
-    for page_num in range(n): #range(len(pdf_reader.pages)):
-        public_url = df_file_metadata['publicurl'] + '#page=' + str(page_num) 
+    for page_num in range(n):  # range(len(pdf_reader.pages)):
+        public_url = df_file_metadata["publicurl"] + "#page=" + str(page_num)
 
         page = pdf_reader.pages[page_num]
-        text = page.extract_text()         
-        
+        text = page.extract_text()
+
         chunks = chunk_data(text)
         chunk_num = 0
         for chunk in chunks:
             chunk_num += 1
             d = {
-                "chunk_id" : path.name.split('/')[-1] + '_' + str(page_num).zfill(2) +  '_' + str(chunk_num).zfill(2),
-                "document_id": str(df_file_metadata['grant_id']),
-                "content": chunk,       
-                "title": df_file_metadata['title'] } 
+                "chunk_id": path.name.split("/")[-1]
+                + "_"
+                + str(page_num).zfill(2)
+                + "_"
+                + str(chunk_num).zfill(2),
+                "document_id": str(df_file_metadata["grant_id"]),
+                "content": chunk,
+                "title": df_file_metadata["title"],
+            }
 
-            d["dateTime"],d["Person"],d["Location"],d["Organization"],d["URL"],d["Email"],d["PersonType"],d["Event"],d["Quantity"] = get_named_entities(cog_services_client,d["content"])
+            (
+                d["dateTime"],
+                d["Person"],
+                d["Location"],
+                d["Organization"],
+                d["URL"],
+                d["Email"],
+                d["PersonType"],
+                d["Event"],
+                d["Quantity"],
+            ) = get_named_entities(cog_services_client, d["content"])
 
             counter += 1
 
             try:
-                v_titleVector = get_embeddings(d["title"],openai_api_base,openai_api_version,openai_api_key)
+                v_titleVector = get_embeddings(
+                    d["title"], openai_api_base, openai_api_version, openai_api_key
+                )
             except:
                 time.sleep(30)
-                v_titleVector = get_embeddings(d["title"],openai_api_base,openai_api_version,openai_api_key)
-            
+                v_titleVector = get_embeddings(
+                    d["title"], openai_api_base, openai_api_version, openai_api_key
+                )
+
             try:
-                v_contentVector = get_embeddings(d["content"],openai_api_base,openai_api_version,openai_api_key)
+                v_contentVector = get_embeddings(
+                    d["content"], openai_api_base, openai_api_version, openai_api_key
+                )
             except:
                 time.sleep(30)
-                v_contentVector = get_embeddings(d["content"],openai_api_base,openai_api_version,openai_api_key)
-
+                v_contentVector = get_embeddings(
+                    d["content"], openai_api_base, openai_api_version, openai_api_key
+                )
 
             docs.append(
-            {
-                    "id": base64.urlsafe_b64encode(bytes(d["chunk_id"], encoding='utf-8')).decode('utf-8'),
+                {
+                    "id": base64.urlsafe_b64encode(
+                        bytes(d["chunk_id"], encoding="utf-8")
+                    ).decode("utf-8"),
                     "chunk_id": d["chunk_id"],
                     "document_id": d["document_id"],
                     "title": d["title"],
                     "content": d["content"],
-                    "sourceurl": path.name.split('/')[-1],
+                    "sourceurl": path.name.split("/")[-1],
                     "publicurl": public_url,
                     "dateTime": d["dateTime"],
                     "Person": d["Person"],
@@ -437,18 +537,16 @@ def chunk_data(text):
                     "Event": d["Event"],
                     "Quantity": d["Quantity"],
                     "titleVector": v_titleVector,
-                    "contentVector": v_contentVector
-            }
+                    "contentVector": v_contentVector,
+                }
             )
-               
+
             if counter % 10 == 0:
                 result = client.upload_documents(documents=docs)
                 result = drafts_client.upload_documents(documents=docs)
                 docs = []
-                print(f' {str(counter)} uploaded')
-#upload the last batch
+                print(f" {str(counter)} uploaded")
+# upload the last batch
 if docs != []:
     client.upload_documents(documents=docs)
     drafts_client.upload_documents(documents=docs)
-
-