classTiDBVectorStore(BasePydanticVectorStore):stores_text:bool=Trueflat_metadata:bool=False_connection_string:str=PrivateAttr()_engine_args:Dict[str,Any]=PrivateAttr()_tidb:Any=PrivateAttr()def__init__(self,connection_string:str,table_name:str=DEFAULT_VECTOR_TABLE_NAME,distance_strategy:str=DEFAULT_DISTANCE_STRATEGY,vector_dimension:int=1536,*,engine_args:Optional[Dict[str,Any]]=None,drop_existing_table:bool=False,**kwargs:Any,)->None:""" Initialize a TiDB Vector Store in Llama Index with a flexible and standardized table structure for storing vector data which remains fixed regardless of the dynamic table name setting. The vector table schema includes: - 'id': a UUID for each entry. - 'embedding': stores vector data in a VectorType column. - 'document': a Text column for the original data or additional information. - 'meta': a JSON column for flexible metadata storage. - 'create_time' and 'update_time': timestamp columns for tracking data changes. This table structure caters to general use cases and complex scenarios where the table serves as a semantic layer for advanced data integration and analysis, leveraging SQL for join queries. Args: connection_string (str): The connection string for the TiDB database. format: "mysql+pymysql://[email protected]:4000/test". table_name (str, optional): The name of the table that will be used to store vector data. If you do not provide a table name, a default table named `llama_index_vector_store` will be created automatically distance_strategy: The strategy used for similarity search, defaults to "cosine", valid values: "l2", "cosine". vector_dimension: The dimension of the vector, defaults to 1536. engine_args (Optional[Dict[str, Any]], optional): Additional engine arguments. Defaults to None. drop_existing_table: Drop the existing TiDB table before initializing, defaults to False. **kwargs (Any): Additional keyword arguments. Raises: ImportError: If the tidbvec python package is not installed. """super().__init__(**kwargs)self._connection_string=connection_stringself._engine_args=engine_argsor{}try:fromtidb_vector.integrationsimportTiDBVectorClientexceptImportError:raiseImportError("Could not import tidbvec python package. ""Please install it with `pip install tidb-vector`.")self._tidb=TiDBVectorClient(connection_string=connection_string,table_name=table_name,distance_strategy=distance_strategy,vector_dimension=vector_dimension,engine_args=engine_args,drop_existing_table=drop_existing_table,**kwargs,)@propertydefclient(self)->Any:"""Get client."""returnself._tidbdefdrop_vectorstore(self)->None:""" Drop the tidb vector store from the TiDB database. """self._tidb.drop_table()@classmethoddefclass_name(cls)->str:return"TiDBVectorStore"defadd(self,nodes:List[BaseNode],**add_kwargs:Any)->List[str]:""" Add nodes to the vector store. Args: nodes (List[BaseNode]): List of nodes to be added. **add_kwargs: Additional keyword arguments to be passed to the underlying storage. Returns: List[str]: List of node IDs that were added. """ids=[]metadatas=[]embeddings=[]texts=[]fornodeinnodes:ids.append(node.node_id)metadatas.append(node_to_metadata_dict(node,remove_text=True))embeddings.append(node.get_embedding())texts.append(node.get_content(metadata_mode=MetadataMode.NONE))self._tidb.insert(texts=texts,embeddings=embeddings,metadatas=metadatas,ids=ids,**add_kwargs,)returnidsdefdelete(self,ref_doc_id:str,**delete_kwargs:Any)->None:""" Delete all nodes of a document from the vector store. Args: ref_doc_id (str): The reference document ID to be deleted. **delete_kwargs: Additional keyword arguments to be passed to the delete method. Returns: None """delete_kwargs["filter"]={"doc_id":ref_doc_id}self._tidb.delete(**delete_kwargs)defquery(self,query:VectorStoreQuery,**kwargs:Any)->VectorStoreQueryResult:""" Perform a similarity search with the given query embedding. Args: query (VectorStoreQuery): The query object containing the query data. **kwargs: Additional keyword arguments. Returns: VectorStoreQueryResult: The result of the similarity search. Raises: ValueError: If the query embedding is not provided. """ifquery.query_embeddingisNone:raiseValueError("Query embedding must be provided.")returnself._similarity_search_with_score(query.query_embedding,query.similarity_top_k,query.filters,**kwargs,)def_similarity_search_with_score(self,embedding:List[float],limit:int=10,metadata_filters:Optional[MetadataFilters]=None,**kwargs:Any,)->VectorStoreQueryResult:""" Performs a similarity search with scores based on the given condition. Args: embedding (List[float]): The embedding vector for similarity search. limit (int, optional): The maximum number of results to return. Defaults to 10. metadata_filters (Optional[MetadataFilters], optional): Filters to apply on metadata. Defaults to None. **kwargs (Any): Additional keyword arguments. Returns: VectorStoreQueryResult: The result of the similarity search, including nodes, similarities, and ids. """filters=self._to_tidb_filters(metadata_filters)results=self._tidb.query(query_vector=embedding,k=limit,filter=filters,**kwargs)nodes=[]similarities=[]ids=[]forrowinresults:try:node=metadata_dict_to_node(row.metadata)node.set_content(str(row.document))exceptException:# NOTE: deprecated legacy logic for backward compatibility_logger.warning("Failed to parse metadata dict, falling back to legacy logic.")node=TextNode(id_=row.id,text=row.document,metadata=row.metadata,)similarities.append((1-row.distance)ifrow.distanceisnotNoneelse0)ids.append(row.id)nodes.append(node)returnVectorStoreQueryResult(nodes=nodes,similarities=similarities,ids=ids,)def_to_tidb_filters(self,metadata_filters:Optional[MetadataFilters]=None)->Optional[Dict[str,Any]]:""" Converts metadata filters to TiDB filters. Args: metadata_filters (Optional[MetadataFilters]): The metadata filters to be converted. Returns: Optional[Dict[str, Any]]: The converted TiDB filters. Raises: ValueError: If an unsupported operator is encountered. """ifmetadata_filtersisNone:returnNonecondition="$and"ifmetadata_filters.condition==FilterCondition.OR:condition="$or"filters=[]forfilterinmetadata_filters.filters:iffilter.operator==FilterOperator.EQ:filters.append({filter.key:{"$eq":filter.value}})eliffilter.operator==FilterOperator.NE:filters.append({filter.key:{"$ne":filter.value}})eliffilter.operator==FilterOperator.GT:filters.append({filter.key:{"$gt":filter.value}})eliffilter.operator==FilterOperator.GTE:filters.append({filter.key:{"$gte":filter.value}})eliffilter.operator==FilterOperator.LT:filters.append({filter.key:{"$lt":filter.value}})eliffilter.operator==FilterOperator.LTE:filters.append({filter.key:{"$lte":filter.value}})eliffilter.operator==FilterOperator.IN:filters.append({filter.key:{"$in":filter.value}})eliffilter.operator==FilterOperator.NIN:filters.append({filter.key:{"$nin":filter.value}})else:raiseValueError(f"Unsupported operator: {filter.operator}")return{condition:filters}
defadd(self,nodes:List[BaseNode],**add_kwargs:Any)->List[str]:""" Add nodes to the vector store. Args: nodes (List[BaseNode]): List of nodes to be added. **add_kwargs: Additional keyword arguments to be passed to the underlying storage. Returns: List[str]: List of node IDs that were added. """ids=[]metadatas=[]embeddings=[]texts=[]fornodeinnodes:ids.append(node.node_id)metadatas.append(node_to_metadata_dict(node,remove_text=True))embeddings.append(node.get_embedding())texts.append(node.get_content(metadata_mode=MetadataMode.NONE))self._tidb.insert(texts=texts,embeddings=embeddings,metadatas=metadatas,ids=ids,**add_kwargs,)returnids
Source code in llama-index-integrations/vector_stores/llama-index-vector-stores-tidbvector/llama_index/vector_stores/tidbvector/base.py
146147148149150151152153154155156157158159
defdelete(self,ref_doc_id:str,**delete_kwargs:Any)->None:""" Delete all nodes of a document from the vector store. Args: ref_doc_id (str): The reference document ID to be deleted. **delete_kwargs: Additional keyword arguments to be passed to the delete method. Returns: None """delete_kwargs["filter"]={"doc_id":ref_doc_id}self._tidb.delete(**delete_kwargs)
defquery(self,query:VectorStoreQuery,**kwargs:Any)->VectorStoreQueryResult:""" Perform a similarity search with the given query embedding. Args: query (VectorStoreQuery): The query object containing the query data. **kwargs: Additional keyword arguments. Returns: VectorStoreQueryResult: The result of the similarity search. Raises: ValueError: If the query embedding is not provided. """ifquery.query_embeddingisNone:raiseValueError("Query embedding must be provided.")returnself._similarity_search_with_score(query.query_embedding,query.similarity_top_k,query.filters,**kwargs,)