11# -*- coding: utf-8 -*-
2+ # pylint: disable=E0611,E1101
23"""A class to manage the lifecycle of Pinecone vector database indexes."""
34
45# document loading
910import logging
1011import os
1112
12- # pinecone integration
13- import pinecone
14- from langchain .document_loaders import PyPDFLoader
15- from langchain .embeddings import OpenAIEmbeddings
16- from langchain .text_splitter import Document
13+ # from langchain.text_splitter import Document
14+ from langchain .text_splitter import RecursiveCharacterTextSplitter
1715from langchain .vectorstores .pinecone import Pinecone as LCPinecone
16+ from langchain_community .document_loaders import PyPDFLoader
17+ from langchain_community .embeddings import OpenAIEmbeddings
18+
19+ # pinecone integration
20+ # import pinecone
21+ from pinecone import Pinecone , ServerlessSpec
22+ from pinecone .core .client .exceptions import PineconeApiException
1823
1924# this project
2025from models .conf import settings
2429
2530
2631# pylint: disable=too-few-public-methods
27- class TextSplitter :
28- """
29- Custom text splitter that adds metadata to the Document object
30- which is required by PineconeHybridSearchRetriever.
31- """
32-
33- def create_documents (self , texts ):
34- """Create documents"""
35- documents = []
36- for text in texts :
37- # Create a Document object with the text and metadata
38- document = Document (page_content = text , metadata = {"context" : text })
39- documents .append (document )
40- return documents
32+ # class TextSplitter:
33+ # """
34+ # Custom text splitter that adds metadata to the Document object
35+ # which is required by PineconeHybridSearchRetriever.
36+ # """
37+
38+ # def create_documents(self, texts):
39+ # """Create documents"""
40+ # documents = []
41+ # for text in texts:
42+ # # Create a Document object with the text and metadata
43+ # document = Document(page_content=text, metadata={"context": text})
44+ # documents.append(document)
45+ # return documents
4146
4247
4348class PineconeIndex :
4449 """Pinecone helper class."""
4550
46- _index : pinecone .Index = None
51+ _pinecone = None
52+ _index : Pinecone .Index = None
4753 _index_name : str = None
48- _text_splitter : TextSplitter = None
54+ _text_splitter : RecursiveCharacterTextSplitter = None
4955 _openai_embeddings : OpenAIEmbeddings = None
5056 _vector_store : LCPinecone = None
5157
@@ -69,11 +75,11 @@ def index_name(self, value: str) -> None:
6975 self .init_index ()
7076
7177 @property
72- def index (self ) -> pinecone .Index :
78+ def index (self ) -> Pinecone .Index :
7379 """pinecone.Index lazy read-only property."""
7480 if self ._index is None :
7581 self .init_index ()
76- self ._index = pinecone .Index (index_name = self .index_name )
82+ self ._index = self . pinecone .Index (name = self .index_name )
7783 return self ._index
7884
7985 @property
@@ -85,7 +91,7 @@ def index_stats(self) -> dict:
8591 @property
8692 def initialized (self ) -> bool :
8793 """initialized read-only property."""
88- indexes = pinecone . manage .list_indexes ()
94+ indexes = self . pinecone .list_indexes ()
8995 return self .index_name in indexes
9096
9197 @property
@@ -113,23 +119,31 @@ def openai_embeddings(self) -> OpenAIEmbeddings:
113119 return self ._openai_embeddings
114120
115121 @property
116- def text_splitter (self ) -> TextSplitter :
117- """TextSplitter lazy read-only property."""
122+ def pinecone (self ):
123+ """Pinecone lazy read-only property."""
124+ if self ._pinecone is None :
125+ self ._pinecone = Pinecone (api_key = settings .pinecone_api_key .get_secret_value ())
126+ return self ._pinecone
127+
128+ @property
129+ def text_splitter (self ) -> RecursiveCharacterTextSplitter :
130+ """lazy read-only property."""
118131 if self ._text_splitter is None :
119- self ._text_splitter = TextSplitter ()
132+ self ._text_splitter = RecursiveCharacterTextSplitter ()
120133 return self ._text_splitter
121134
122135 def init_index (self ):
123136 """Verify that an index named self.index_name exists in Pinecone. If not, create it."""
124- indexes = pinecone . manage .list_indexes ()
137+ indexes = self . pinecone .list_indexes ()
125138 if self .index_name not in indexes :
126139 logging .debug ("Index does not exist." )
127140 self .create ()
128141
129142 def init (self ):
130143 """Initialize Pinecone."""
131144 # pylint: disable=no-member
132- pinecone .init (api_key = settings .pinecone_api_key .get_secret_value (), environment = settings .pinecone_environment )
145+
146+ # pinecone.init(api_key=settings.pinecone_api_key.get_secret_value(), environment=settings.pinecone_environment)
133147 self ._index = None
134148 self ._index_name = None
135149 self ._text_splitter = None
@@ -142,23 +156,30 @@ def delete(self):
142156 logging .debug ("Index does not exist. Nothing to delete." )
143157 return
144158 print ("Deleting index..." )
145- pinecone .delete_index (self .index_name )
159+ self . pinecone .delete_index (self .index_name )
146160
147161 def create (self ):
148162 """Create index."""
149- metadata_config = {
150- "indexed" : [settings .pinecone_vectorstore_text_key , "lc_type" ],
151- "context" : ["lc_text" ],
152- }
163+ # deprecated?
164+ # metadata_config = {
165+ # "indexed": [settings.pinecone_vectorstore_text_key, "lc_type"],
166+ # "context": ["lc_text"],
167+ # }
153168 print ("Creating index. This may take a few minutes..." )
154-
155- pinecone .create_index (
156- name = self .index_name ,
157- dimension = settings .pinecone_dimensions ,
158- metric = settings .pinecone_metric ,
159- metadata_config = metadata_config ,
169+ serverless_spec = ServerlessSpec (
170+ cloud = "aws" ,
171+ region = "us-west-2" ,
160172 )
161- print ("Index created." )
173+ try :
174+ self .pinecone .create_index (
175+ name = self .index_name ,
176+ dimension = settings .pinecone_dimensions ,
177+ metric = settings .pinecone_metric ,
178+ spec = serverless_spec ,
179+ )
180+ print ("Index created." )
181+ except PineconeApiException :
182+ pass
162183
163184 def initialize (self ):
164185 """Initialize index."""
0 commit comments