Awadb

AwaDBVectorStore #

Bases: BasePydanticVectorStore

AwaDB向量存储。

在这个向量存储中，嵌入被存储在AwaDB表中。

在查询时，索引使用AwaDB查询前k个最相似的节点。

示例

pip install llama-index-vector-stores-awadb

from llama_index.vector_stores.awadb import AwaDBVectorStore

vector_store = AwaDBVectorStore(table_name="llamaindex")

Source code in llama_index/vector_stores/awadb/base.py

class AwaDBVectorStore(BasePydanticVectorStore):
    """AwaDB向量存储。

    在这个向量存储中，嵌入被存储在AwaDB表中。

    在查询时，索引使用AwaDB查询前k个最相似的节点。

    示例:
        `pip install llama-index-vector-stores-awadb`

        ```python
        from llama_index.vector_stores.awadb import AwaDBVectorStore

        vector_store = AwaDBVectorStore(table_name="llamaindex")
        ```"""

    flat_metadata: bool = True
    stores_text: bool = True
    DEFAULT_TABLE_NAME = "llamaindex_awadb"

    _awadb_client: Any = PrivateAttr()

    @property
    def client(self) -> Any:
        """获取AwaDB客户端。"""
        return self._awadb_client

    def __init__(
        self,
        table_name: str = DEFAULT_TABLE_NAME,
        log_and_data_dir: Optional[str] = None,
        **kwargs: Any,
    ) -> None:
        """使用AwaDB客户端进行初始化。
如果未指定table_name，
将自动创建一个随机的表名`DEFAULT_TABLE_NAME + uuid的最后一段`。

Args:
    table_name：创建的表的名称，默认为DEFAULT_TABLE_NAME。
    log_and_data_dir：可选的日志和数据的根目录。
    kwargs：将来可能的任何扩展参数。

Returns:
    无。
"""
        super().__init__()

        import_err_msg = "`awadb` package not found, please run `pip install awadb`"
        try:
            import awadb
        except ImportError:
            raise ImportError(import_err_msg)
        if log_and_data_dir is not None:
            self._awadb_client = awadb.Client(log_and_data_dir)
        else:
            self._awadb_client = awadb.Client()

        if table_name == self.DEFAULT_TABLE_NAME:
            table_name += "_"
            table_name += str(uuid.uuid4()).split("-")[-1]

        self._awadb_client.Create(table_name)

    @classmethod
    def class_name(cls) -> str:
        return "AwaDBVectorStore"

    def add(
        self,
        nodes: List[BaseNode],
        **add_kwargs: Any,
    ) -> List[str]:
        """将节点添加到AwaDB。

Args:
    节点：List[BaseNode]：带有嵌入的节点列表

Returns:
    已添加的节点ID
"""
        if not self._awadb_client:
            raise ValueError("AwaDB client not initialized")

        embeddings = []
        metadatas = []
        ids = []
        texts = []
        for node in nodes:
            embeddings.append(node.get_embedding())
            metadatas.append(
                node_to_metadata_dict(
                    node, remove_text=True, flat_metadata=self.flat_metadata
                )
            )
            ids.append(node.node_id)
            texts.append(node.get_content(metadata_mode=MetadataMode.NONE) or "")

        self._awadb_client.AddTexts(
            "embedding_text",
            "text_embedding",
            texts,
            embeddings,
            metadatas,
            is_duplicate_texts=False,
            ids=ids,
        )

        return ids

    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
        """使用ref_doc_id删除节点。

Args:
    ref_doc_id（str）：要删除的文档的doc_id。

Returns:
    无
"""
        if len(ref_doc_id) == 0:
            return
        ids: List[str] = []
        ids.append(ref_doc_id)
        self._awadb_client.Delete(ids)

    def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
        """查询前k个最相似节点的索引。

Args:
    query：存储查询的向量

Returns:
    VectorStoreQueryResult：查询结果
"""
        meta_filters = {}
        if query.filters is not None:
            for filter in query.filters.legacy_filters():
                meta_filters[filter.key] = filter.value

        not_include_fields: Set[str] = {"text_embedding"}
        results = self._awadb_client.Search(
            query=query.query_embedding,
            topn=query.similarity_top_k,
            meta_filter=meta_filters,
            not_include_fields=not_include_fields,
        )

        nodes = []
        similarities = []
        ids = []

        for item_detail in results[0]["ResultItems"]:
            content = ""
            meta_data = {}
            node_id = ""
            for item_key in item_detail:
                if item_key == "embedding_text":
                    content = item_detail[item_key]
                    continue
                elif item_key == "_id":
                    node_id = item_detail[item_key]
                    ids.append(node_id)
                    continue
                elif item_key == "score":
                    similarities.append(item_detail[item_key])
                    continue
                meta_data[item_key] = item_detail[item_key]

            try:
                node = metadata_dict_to_node(meta_data)
                node.set_content(content)
            except Exception:
                # NOTE: deprecated legacy logic for backward compatibility
                metadata, node_info, relationships = legacy_metadata_dict_to_node(
                    meta_data
                )

                node = TextNode(
                    text=content,
                    id_=node_id,
                    metadata=metadata,
                    start_char_idx=node_info.get("start", None),
                    end_char_idx=node_info.get("end", None),
                    relationships=relationships,
                )

            nodes.append(node)

        return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids)

client `property` #

client: Any

获取AwaDB客户端。

add #

add(nodes: List[BaseNode], **add_kwargs: Any) -> List[str]

将节点添加到AwaDB。

Returns:

Type	Description
`List[str]`	已添加的节点ID

Source code in llama_index/vector_stores/awadb/base.py

    def add(
        self,
        nodes: List[BaseNode],
        **add_kwargs: Any,
    ) -> List[str]:
        """将节点添加到AwaDB。

Args:
    节点：List[BaseNode]：带有嵌入的节点列表

Returns:
    已添加的节点ID
"""
        if not self._awadb_client:
            raise ValueError("AwaDB client not initialized")

        embeddings = []
        metadatas = []
        ids = []
        texts = []
        for node in nodes:
            embeddings.append(node.get_embedding())
            metadatas.append(
                node_to_metadata_dict(
                    node, remove_text=True, flat_metadata=self.flat_metadata
                )
            )
            ids.append(node.node_id)
            texts.append(node.get_content(metadata_mode=MetadataMode.NONE) or "")

        self._awadb_client.AddTexts(
            "embedding_text",
            "text_embedding",
            texts,
            embeddings,
            metadatas,
            is_duplicate_texts=False,
            ids=ids,
        )

        return ids

delete #

delete(ref_doc_id: str, **delete_kwargs: Any) -> None

使用ref_doc_id删除节点。

Returns:

Type	Description
`None`	无

Source code in llama_index/vector_stores/awadb/base.py

    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
        """使用ref_doc_id删除节点。

Args:
    ref_doc_id（str）：要删除的文档的doc_id。

Returns:
    无
"""
        if len(ref_doc_id) == 0:
            return
        ids: List[str] = []
        ids.append(ref_doc_id)
        self._awadb_client.Delete(ids)

query #

query(
    query: VectorStoreQuery, **kwargs: Any
) -> VectorStoreQueryResult

查询前k个最相似节点的索引。

Returns:

Type	Description
`VectorStoreQueryResult`	VectorStoreQueryResult：查询结果

Source code in llama_index/vector_stores/awadb/base.py

    def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
        """查询前k个最相似节点的索引。

Args:
    query：存储查询的向量

Returns:
    VectorStoreQueryResult：查询结果
"""
        meta_filters = {}
        if query.filters is not None:
            for filter in query.filters.legacy_filters():
                meta_filters[filter.key] = filter.value

        not_include_fields: Set[str] = {"text_embedding"}
        results = self._awadb_client.Search(
            query=query.query_embedding,
            topn=query.similarity_top_k,
            meta_filter=meta_filters,
            not_include_fields=not_include_fields,
        )

        nodes = []
        similarities = []
        ids = []

        for item_detail in results[0]["ResultItems"]:
            content = ""
            meta_data = {}
            node_id = ""
            for item_key in item_detail:
                if item_key == "embedding_text":
                    content = item_detail[item_key]
                    continue
                elif item_key == "_id":
                    node_id = item_detail[item_key]
                    ids.append(node_id)
                    continue
                elif item_key == "score":
                    similarities.append(item_detail[item_key])
                    continue
                meta_data[item_key] = item_detail[item_key]

            try:
                node = metadata_dict_to_node(meta_data)
                node.set_content(content)
            except Exception:
                # NOTE: deprecated legacy logic for backward compatibility
                metadata, node_info, relationships = legacy_metadata_dict_to_node(
                    meta_data
                )

                node = TextNode(
                    text=content,
                    id_=node_id,
                    metadata=metadata,
                    start_char_idx=node_info.get("start", None),
                    end_char_idx=node_info.get("end", None),
                    relationships=relationships,
                )

            nodes.append(node)

        return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids)

Awadb

AwaDBVectorStore #

client property #

add #

delete #

query #

client `property` #