fromllama_index.vector_stores.tencentvectordbimportTencentVectorDB,CollectionParams# Setupurl="http://10.0.X.X"key="eC4bLRy2va******************************"collection_params=CollectionParams(dimension=1536,drop_exists=True)# Create an instance of TencentVectorDBvector_store=TencentVectorDB(url=url,key=key,collection_params=collection_params)
Source code in llama-index-integrations/vector_stores/llama-index-vector-stores-tencentvectordb/llama_index/vector_stores/tencentvectordb/base.py
classTencentVectorDB(BasePydanticVectorStore):""" Tencent Vector Store. In this vector store, embeddings and docs are stored within a Collection. If the Collection does not exist, it will be automatically created. In order to use this you need to have a database instance. See the following documentation for details: https://cloud.tencent.com/document/product/1709/94951 Args: url (Optional[str]): url of Tencent vector database username (Optional[str]): The username for Tencent vector database. Default value is "root" key (Optional[str]): The Api-Key for Tencent vector database collection_params (Optional[CollectionParams]): The collection parameters for vector database Examples: `pip install llama-index-vector-stores-tencentvectordb` ```python from llama_index.vector_stores.tencentvectordb import TencentVectorDB, CollectionParams # Setup url = "http://10.0.X.X" key = "eC4bLRy2va******************************" collection_params = CollectionParams(dimension=1536, drop_exists=True) # Create an instance of TencentVectorDB vector_store = TencentVectorDB(url=url, key=key, collection_params=collection_params) ``` """stores_text:bool=Truefilter_fields:List[FilterField]=[]batch_size:int_tencent_client:Any=PrivateAttr()_database:Any=PrivateAttr()_collection:Any=PrivateAttr()_filter_fields:List[FilterField]=PrivateAttr()def__init__(self,url:str,key:str,username:str=DEFAULT_USERNAME,database_name:str=DEFAULT_DATABASE_NAME,read_consistency:str=READ_EVENTUAL_CONSISTENCY,collection_params:CollectionParams=CollectionParams(dimension=1536),batch_size:int=512,**kwargs:Any,):"""Init params."""super().__init__(batch_size=batch_size)self._init_client(url,username,key,read_consistency)self._create_database_if_not_exists(database_name)self._create_collection(database_name,collection_params)self._init_filter_fields()def_init_filter_fields(self)->None:fields=vars(self._collection).get("indexes",[])forfieldinfields:iffield["fieldName"]notin[FIELD_ID,DEFAULT_DOC_ID_KEY,FIELD_VECTOR]:self._filter_fields.append(FilterField(name=field["fieldName"],data_type=field["fieldType"]))@classmethoddefclass_name(cls)->str:return"TencentVectorDB"@classmethoddeffrom_params(cls,url:str,key:str,username:str=DEFAULT_USERNAME,database_name:str=DEFAULT_DATABASE_NAME,read_consistency:str=READ_EVENTUAL_CONSISTENCY,collection_params:CollectionParams=CollectionParams(dimension=1536),batch_size:int=512,**kwargs:Any,)->"TencentVectorDB":_try_import()returncls(url=url,username=username,key=key,database_name=database_name,read_consistency=read_consistency,collection_params=collection_params,batch_size=batch_size,**kwargs,)def_init_client(self,url:str,username:str,key:str,read_consistency:str)->None:importtcvectordbfromtcvectordb.model.enumimportReadConsistencyifread_consistencyisNone:raiseValueError(VALUE_RANGE_ERROR.format(read_consistency))try:v_read_consistency=ReadConsistency(read_consistency)exceptValueError:raiseValueError(VALUE_RANGE_ERROR.format(READ_CONSISTENCY,READ_CONSISTENCY_VALUES))self._tencent_client=tcvectordb.VectorDBClient(url=url,username=username,key=key,read_consistency=v_read_consistency,timeout=DEFAULT_TIMEOUT,)def_create_database_if_not_exists(self,database_name:str)->None:db_list=self._tencent_client.list_databases()ifdatabase_namein[db.database_namefordbindb_list]:self._database=self._tencent_client.database(database_name)else:self._database=self._tencent_client.create_database(database_name)def_create_collection(self,database_name:str,collection_params:CollectionParams)->None:importtcvectordbcollection_name:str=self._compute_collection_name(database_name,collection_params)collection_description=collection_params._collection_descriptionifcollection_paramsisNone:raiseValueError(VALUE_NONE_ERROR.format("collection_params"))try:self._collection=self._database.describe_collection(collection_name)ifcollection_params.drop_exists:self._database.drop_collection(collection_name)self._create_collection_in_db(collection_name,collection_description,collection_params)excepttcvectordb.exceptions.VectorDBException:self._create_collection_in_db(collection_name,collection_description,collection_params)@staticmethoddef_compute_collection_name(database_name:str,collection_params:CollectionParams)->str:ifdatabase_name==DEFAULT_DATABASE_NAME:returncollection_params._collection_nameifcollection_params._collection_name!=DEFAULT_COLLECTION_NAME:returncollection_params._collection_nameelse:returndatabase_name+"_"+DEFAULT_COLLECTION_NAMEdef_create_collection_in_db(self,collection_name:str,collection_description:str,collection_params:CollectionParams,)->None:fromtcvectordb.model.enumimportFieldType,IndexTypefromtcvectordb.model.indeximportFilterIndex,Index,VectorIndexindex_type=self._get_index_type(collection_params.index_type)metric_type=self._get_metric_type(collection_params.metric_type)index_param=self._get_index_params(index_type,collection_params)index=Index(FilterIndex(name=FIELD_ID,field_type=FieldType.String,index_type=IndexType.PRIMARY_KEY,),FilterIndex(name=DEFAULT_DOC_ID_KEY,field_type=FieldType.String,index_type=IndexType.FILTER,),VectorIndex(name=FIELD_VECTOR,dimension=collection_params.dimension,index_type=index_type,metric_type=metric_type,params=index_param,),)forfieldincollection_params.filter_fields:index.add(field.to_vdb_filter())self._collection=self._database.create_collection(name=collection_name,shard=collection_params.shard,replicas=collection_params.replicas,description=collection_description,index=index,)@staticmethoddef_get_index_params(index_type:Any,collection_params:CollectionParams)->None:fromtcvectordb.model.enumimportIndexTypefromtcvectordb.model.indeximport(HNSWParams,IVFFLATParams,IVFPQParams,IVFSQ4Params,IVFSQ8Params,IVFSQ16Params,)vector_params=({}ifcollection_params.vector_paramsisNoneelsecollection_params.vector_params)ifindex_type==IndexType.HNSW:returnHNSWParams(m=vector_params.get("M",DEFAULT_HNSW_M),efconstruction=vector_params.get("efConstruction",DEFAULT_HNSW_EF),)elifindex_type==IndexType.IVF_FLAT:returnIVFFLATParams(nlist=vector_params.get("nlist",DEFAULT_IVF_NLIST))elifindex_type==IndexType.IVF_PQ:returnIVFPQParams(m=vector_params.get("M",DEFAULT_IVF_PQ_M),nlist=vector_params.get("nlist",DEFAULT_IVF_NLIST),)elifindex_type==IndexType.IVF_SQ4:returnIVFSQ4Params(nlist=vector_params.get("nlist",DEFAULT_IVF_NLIST))elifindex_type==IndexType.IVF_SQ8:returnIVFSQ8Params(nlist=vector_params.get("nlist",DEFAULT_IVF_NLIST))elifindex_type==IndexType.IVF_SQ16:returnIVFSQ16Params(nlist=vector_params.get("nlist",DEFAULT_IVF_NLIST))returnNone@staticmethoddef_get_index_type(index_type_value:str)->Any:fromtcvectordb.model.enumimportIndexTypeindex_type_value=index_type_valueorIndexType.HNSWtry:returnIndexType(index_type_value)exceptValueError:support_index_types=[d.valuefordinIndexType.__members__.values()]raiseValueError(NOT_SUPPORT_INDEX_TYPE_ERROR.format(index_type_value,support_index_types))@staticmethoddef_get_metric_type(metric_type_value:str)->Any:fromtcvectordb.model.enumimportMetricTypemetric_type_value=metric_type_valueorMetricType.COSINEtry:returnMetricType(metric_type_value.upper())exceptValueError:support_metric_types=[d.valuefordinMetricType.__members__.values()]raiseValueError(NOT_SUPPORT_METRIC_TYPE_ERROR.format(metric_type_value,support_metric_types))@propertydefclient(self)->Any:"""Get client."""returnself._tencent_clientdefadd(self,nodes:List[BaseNode],**add_kwargs:Any,)->List[str]:""" Add nodes to index. Args: nodes: List[BaseNode]: list of nodes with embeddings """fromtcvectordb.model.documentimportDocumentids=[]entries=[]fornodeinnodes:document=Document(id=node.node_id,vector=node.get_embedding())ifnode.ref_doc_idisnotNone:document.__dict__[DEFAULT_DOC_ID_KEY]=node.ref_doc_idifnode.metadataisnotNone:document.__dict__[FIELD_METADATA]=json.dumps(node.metadata)forfieldinself._filter_fields:v=node.metadata.get(field.name)iffield.match_value(v):document.__dict__[field.name]=vifisinstance(node,TextNode)andnode.textisnotNone:document.__dict__[DEFAULT_TEXT_KEY]=node.textentries.append(document)ids.append(node.node_id)iflen(entries)>=self.batch_size:self._collection.upsert(documents=entries,build_index=True,timeout=DEFAULT_TIMEOUT)entries=[]iflen(entries)>0:self._collection.upsert(documents=entries,build_index=True,timeout=DEFAULT_TIMEOUT)returnidsdefdelete(self,ref_doc_id:str,**delete_kwargs:Any)->None:""" Delete nodes using with ref_doc_id or ids. Args: ref_doc_id (str): The doc_id of the document to delete. """ifref_doc_idisNoneorlen(ref_doc_id)==0:returnfromtcvectordb.model.documentimportFilterdelete_ids=ref_doc_idifisinstance(ref_doc_id,list)else[ref_doc_id]self._collection.delete(filter=Filter(Filter.In(DEFAULT_DOC_ID_KEY,delete_ids)))defquery_by_ids(self,ids:List[str])->List[Dict]:returnself._collection.query(document_ids=ids,limit=len(ids))deftruncate(self)->None:self._database.truncate_collection(self._collection.collection_name)defdescribe_collection(self)->Any:returnself._database.describe_collection(self._collection.collection_name)defquery(self,query:VectorStoreQuery,**kwargs:Any)->VectorStoreQueryResult:""" Query index for top k most similar nodes. Args: query (VectorStoreQuery): contains query_embedding (List[float]): query embedding similarity_top_k (int): top k most similar nodes doc_ids (Optional[List[str]]): filter by doc_id filters (Optional[MetadataFilters]): filter result kwargs.filter (Optional[str|Filter]): if `kwargs` in kwargs: using filter: `age > 20 and author in (...) and ...` elif query.filters: using filter: " and ".join([f'{f.key} = "{f.value}"' for f in query.filters.filters]) elif query.doc_ids: using filter: `doc_id in (query.doc_ids)` """search_filter=self._to_vdb_filter(query,**kwargs)results=self._collection.search(vectors=[query.query_embedding],limit=query.similarity_top_k,retrieve_vector=True,output_fields=query.output_fields,filter=search_filter,)iflen(results)==0:returnVectorStoreQueryResult(nodes=[],similarities=[],ids=[])nodes=[]similarities=[]ids=[]fordocinresults[0]:ids.append(doc.get(FIELD_ID))similarities.append(doc.get("score"))meta_str=doc.get(FIELD_METADATA)meta={}ifmeta_strisNoneelsejson.loads(meta_str)doc_id=doc.get(DEFAULT_DOC_ID_KEY)node=TextNode(id_=doc.get(FIELD_ID),text=doc.get(DEFAULT_TEXT_KEY),embedding=doc.get(FIELD_VECTOR),metadata=meta,)ifdoc_idisnotNone:node.relationships={NodeRelationship.SOURCE:RelatedNodeInfo(node_id=doc_id)}nodes.append(node)returnVectorStoreQueryResult(nodes=nodes,similarities=similarities,ids=ids)@staticmethoddef_to_vdb_filter(query:VectorStoreQuery,**kwargs:Any)->Any:fromtcvectordb.model.documentimportFiltersearch_filter=Noneif"filter"inkwargs:search_filter=kwargs.pop("filter")search_filter=(search_filteriftype(search_filter)isFilterelseFilter(search_filter))elifquery.filtersisnotNoneandlen(query.filters.legacy_filters())>0:search_filter=" and ".join([f'{f.key} = "{f.value}"'forfinquery.filters.legacy_filters()])search_filter=Filter(search_filter)elifquery.doc_idsisnotNone:search_filter=Filter(Filter.In(DEFAULT_DOC_ID_KEY,query.doc_ids))returnsearch_filter
defadd(self,nodes:List[BaseNode],**add_kwargs:Any,)->List[str]:""" Add nodes to index. Args: nodes: List[BaseNode]: list of nodes with embeddings """fromtcvectordb.model.documentimportDocumentids=[]entries=[]fornodeinnodes:document=Document(id=node.node_id,vector=node.get_embedding())ifnode.ref_doc_idisnotNone:document.__dict__[DEFAULT_DOC_ID_KEY]=node.ref_doc_idifnode.metadataisnotNone:document.__dict__[FIELD_METADATA]=json.dumps(node.metadata)forfieldinself._filter_fields:v=node.metadata.get(field.name)iffield.match_value(v):document.__dict__[field.name]=vifisinstance(node,TextNode)andnode.textisnotNone:document.__dict__[DEFAULT_TEXT_KEY]=node.textentries.append(document)ids.append(node.node_id)iflen(entries)>=self.batch_size:self._collection.upsert(documents=entries,build_index=True,timeout=DEFAULT_TIMEOUT)entries=[]iflen(entries)>0:self._collection.upsert(documents=entries,build_index=True,timeout=DEFAULT_TIMEOUT)returnids
defdelete(self,ref_doc_id:str,**delete_kwargs:Any)->None:""" Delete nodes using with ref_doc_id or ids. Args: ref_doc_id (str): The doc_id of the document to delete. """ifref_doc_idisNoneorlen(ref_doc_id)==0:returnfromtcvectordb.model.documentimportFilterdelete_ids=ref_doc_idifisinstance(ref_doc_id,list)else[ref_doc_id]self._collection.delete(filter=Filter(Filter.In(DEFAULT_DOC_ID_KEY,delete_ids)))
defquery(self,query:VectorStoreQuery,**kwargs:Any)->VectorStoreQueryResult:""" Query index for top k most similar nodes. Args: query (VectorStoreQuery): contains query_embedding (List[float]): query embedding similarity_top_k (int): top k most similar nodes doc_ids (Optional[List[str]]): filter by doc_id filters (Optional[MetadataFilters]): filter result kwargs.filter (Optional[str|Filter]): if `kwargs` in kwargs: using filter: `age > 20 and author in (...) and ...` elif query.filters: using filter: " and ".join([f'{f.key} = "{f.value}"' for f in query.filters.filters]) elif query.doc_ids: using filter: `doc_id in (query.doc_ids)` """search_filter=self._to_vdb_filter(query,**kwargs)results=self._collection.search(vectors=[query.query_embedding],limit=query.similarity_top_k,retrieve_vector=True,output_fields=query.output_fields,filter=search_filter,)iflen(results)==0:returnVectorStoreQueryResult(nodes=[],similarities=[],ids=[])nodes=[]similarities=[]ids=[]fordocinresults[0]:ids.append(doc.get(FIELD_ID))similarities.append(doc.get("score"))meta_str=doc.get(FIELD_METADATA)meta={}ifmeta_strisNoneelsejson.loads(meta_str)doc_id=doc.get(DEFAULT_DOC_ID_KEY)node=TextNode(id_=doc.get(FIELD_ID),text=doc.get(DEFAULT_TEXT_KEY),embedding=doc.get(FIELD_VECTOR),metadata=meta,)ifdoc_idisnotNone:node.relationships={NodeRelationship.SOURCE:RelatedNodeInfo(node_id=doc_id)}nodes.append(node)returnVectorStoreQueryResult(nodes=nodes,similarities=similarities,ids=ids)