Please refer to the official Vector Search documentation for more details:
https://docs.aws.amazon.com/documentdb/latest/developerguide/vector-search.html
Source code in llama-index-integrations/vector_stores/llama-index-vector-stores-awsdocdb/llama_index/vector_stores/awsdocdb/base.py
classAWSDocDbVectorStore(BasePydanticVectorStore):""" AWS DocumentDB Vector Store. To use, you should have both: - the ``pymongo`` python package installed - a connection string associated with a DocumentDB Instance Please refer to the official Vector Search documentation for more details: https://docs.aws.amazon.com/documentdb/latest/developerguide/vector-search.html """stores_text:bool=Trueflat_metadata:bool=True_docdb_client:MongoClient=PrivateAttr()_similarity_score:AWSDocDbVectorStoreSimilarityType=PrivateAttr()_collection:Any=PrivateAttr()_embedding_key:str=PrivateAttr()_id_key:str=PrivateAttr()_text_key:str=PrivateAttr()_metadata_key:str=PrivateAttr()_insert_kwargs:Dict=PrivateAttr()_index_crud:DocDbIndex=PrivateAttr()def__init__(self,docdb_client:Optional[Any]=None,db_name:str="default_db",index_name:str="default_index",collection_name:str="default_collection",id_key:str="id",embedding_key:str="embedding",text_key:str="text",metadata_key:str="metadata",insert_kwargs:Optional[Dict]=None,similarity_score="cosine",**kwargs:Any,)->None:""" Initialize the vector store. Args: docdb_client: A DocumentDB client. db_name: A DocumentDB database name. collection_name: A DocumentDB collection name. id_key: The data field to use as the id. embedding_key: A DocumentDB field that will contain the embedding for each document. text_key: A DocumentDB field that will contain the text for each document. metadata_key: A DocumentDB field that will contain the metadata for each document. insert_kwargs: The kwargs used during `insert`. """super().__init__()ifdocdb_clientisnotNone:self._docdb_client=cast(MongoClient,docdb_client)else:raiseValueError("Must specify connection string to DocumentDB instance ")self._similarity_score=similarity_scoreself._collection=self._docdb_client[db_name][collection_name]self._embedding_key=embedding_keyself._id_key=id_keyself._text_key=text_keyself._metadata_key=metadata_keyself._insert_kwargs=insert_kwargsor{}self._index_crud=DocDbIndex(index_name,self._embedding_key,self._collection)@classmethoddefclass_name(cls)->str:return"AWSDocDbVectorStore"defadd(self,nodes:List[BaseNode],**add_kwargs:Any,)->List[str]:""" Add nodes to index. Args: nodes: List[BaseNode]: list of nodes with embeddings Returns: A List of ids for successfully added nodes. """ids=[]data_to_insert=[]fornodeinnodes:metadata=node_to_metadata_dict(node,remove_text=True,flat_metadata=self.flat_metadata)entry={self._id_key:node.node_id,self._embedding_key:node.get_embedding(),self._text_key:node.get_content(metadata_mode=MetadataMode.NONE)or"",self._metadata_key:metadata,}data_to_insert.append(entry)ids.append(node.node_id)logger.debug("Inserting data into DocumentDB: %s",data_to_insert)insert_result=self._collection.insert_many(data_to_insert,**self._insert_kwargs)logger.debug("Result of insert: %s",insert_result)returnidsdefdelete(self,ref_doc_id:str,**delete_kwargs:Any)->None:""" Delete nodes using by id. Args: ref_doc_id (str): The doc_id of the document to delete. """ifref_doc_idisNone:raiseValueError("No document id provided to delete.")self._collection.delete_one({self._metadata_key+".ref_doc_id":ref_doc_id})@propertydefclient(self)->Any:"""Return DocDB client."""returnself._docdb_clientdef_query(self,query:VectorStoreQuery,projection:Optional[Dict[str,int]]=None)->VectorStoreQueryResult:params:Dict[str,Any]={"vector":query.query_embedding,"path":self._embedding_key,"similarity":self._similarity_score,"k":query.similarity_top_k,}ifquery.filters:params["filter"]=_to_mongodb_filter(query.filters)ifprojectionisNone:pipeline=[{"$search":{"vectorSearch":params}}]else:pipeline=[{"$search":{"vectorSearch":params}},{"$project":projection}]logger.debug("Running query pipeline: %s",pipeline)cursor=self._collection.aggregate(pipeline)# type: ignoretop_k_nodes=[]top_k_ids=[]top_k_scores=[]forresincursor:text=res.pop(self._text_key)vector=res.pop(self._embedding_key)id=res.pop(self._id_key)metadata_dict=res.pop(self._metadata_key)score=similarity(query.query_embedding,vector,self._similarity_score)try:node=metadata_dict_to_node(metadata_dict)node.set_content(text)exceptException:# NOTE: deprecated legacy logic for backward compatibilitymetadata,node_info,relationships=legacy_metadata_dict_to_node(metadata_dict)node=TextNode(text=text,id_=id,metadata=metadata,start_char_idx=node_info.get("start",None),end_char_idx=node_info.get("end",None),relationships=relationships,)top_k_ids.append(id)top_k_nodes.append(node)top_k_scores.append(score)result=VectorStoreQueryResult(nodes=top_k_nodes,similarities=top_k_scores,ids=top_k_ids)logger.debug("Result of query: %s",result)returnresultdefquery(self,query:VectorStoreQuery,projection:Optional[Dict[str,int]]=None,**kwargs:Any,)->VectorStoreQueryResult:""" Query index for top k most similar nodes. Args: query: a VectorStoreQuery object. projection: a dictionary specifying which fields to return after the search Returns: A VectorStoreQueryResult containing the results of the query. """returnself._query(query,projection=projection)defcreate_index(self,dimensions,similarity_score=None):score=self._similarity_scoreifsimilarity_scoreisnotNone:score=similarityreturnself._index_crud.create_index(dimensions,score)defdelete_index(self):returnself._index_crud.delete_index()def__del__(self)->None:self._docdb_client.close()
defadd(self,nodes:List[BaseNode],**add_kwargs:Any,)->List[str]:""" Add nodes to index. Args: nodes: List[BaseNode]: list of nodes with embeddings Returns: A List of ids for successfully added nodes. """ids=[]data_to_insert=[]fornodeinnodes:metadata=node_to_metadata_dict(node,remove_text=True,flat_metadata=self.flat_metadata)entry={self._id_key:node.node_id,self._embedding_key:node.get_embedding(),self._text_key:node.get_content(metadata_mode=MetadataMode.NONE)or"",self._metadata_key:metadata,}data_to_insert.append(entry)ids.append(node.node_id)logger.debug("Inserting data into DocumentDB: %s",data_to_insert)insert_result=self._collection.insert_many(data_to_insert,**self._insert_kwargs)logger.debug("Result of insert: %s",insert_result)returnids
Source code in llama-index-integrations/vector_stores/llama-index-vector-stores-awsdocdb/llama_index/vector_stores/awsdocdb/base.py
225226227228229230231232233234235
defdelete(self,ref_doc_id:str,**delete_kwargs:Any)->None:""" Delete nodes using by id. Args: ref_doc_id (str): The doc_id of the document to delete. """ifref_doc_idisNone:raiseValueError("No document id provided to delete.")self._collection.delete_one({self._metadata_key+".ref_doc_id":ref_doc_id})
defquery(self,query:VectorStoreQuery,projection:Optional[Dict[str,int]]=None,**kwargs:Any,)->VectorStoreQueryResult:""" Query index for top k most similar nodes. Args: query: a VectorStoreQuery object. projection: a dictionary specifying which fields to return after the search Returns: A VectorStoreQueryResult containing the results of the query. """returnself._query(query,projection=projection)