Bagel

BagelReader #

Bases: BaseReader

Bagel文件的读取器。

Source code in llama_index/readers/bagel/base.py

class BagelReader(BaseReader):
    """Bagel文件的读取器。"""

    def __init__(self, collection_name: str) -> None:
        """初始化BagelReader。

Args:collection_name：要加载的集合的名称。

Returns:无。
"""
        try:
            import bagel
        except ImportError:
            raise ImportError(
                "`bagel` package not found, please run `pip install bagel`"
            )
        from bagel.config import Settings

        if not collection_name:
            raise ValueError("collection_name cannot be empty")

        self.collection_name = collection_name

        server_settings = Settings(
            bagel_api_impl="rest", bagel_server_host="api.bageldb.ai"
        )

        self.client = bagel.Client(server_settings)

        self._collection = self.client.get_cluster(collection_name)

    def create_documents(self, results: Any) -> Any:
        """根据结果创建文档。

Args:
    results: 查询结果。

Returns:
    文档列表。
"""
        documents = []
        # create a list of results
        all_results = list(
            zip(
                results["ids"][0],
                results["documents"][0],
                results["embeddings"][0],
                results["metadatas"][0],
            )
        )
        # iterate through the results
        for result in all_results:
            # create a Llama Document
            document = Document(
                doc_id=result[0],
                text=result[1],
                embedding=result[2],
                metadata=result[3],
            )
            documents.append(document)

        return documents

    def load_data(
        self,
        query_vector: Optional[OneOrMany[Embedding]] = None,
        query_texts: Optional[OneOrMany[Doc]] = None,
        limit: int = 10,
        where: Optional[Where] = None,
        where_document: Optional[WhereDocument] = None,
        include: Include = ["metadatas", "documents", "embeddings", "distances"],
    ) -> Any:
        """获取提供的查询嵌入或查询文本的前n_results个文档。

Args:
    query_embeddings：要获取最接近邻居的嵌入。可选。
    query_texts：要获取最接近邻居的文档文本。可选。
    n_results：要返回每个查询的邻居数。可选。
    where：用于筛选结果的Where类型字典。可选。
    where_document：用于筛选结果的WhereDocument类型字典。可选。
    include：结果中要包含的内容列表。可选。

Returns:
    具有与查询嵌入或查询文本最接近的嵌入的Llama索引文档。
"""
        # get the results from the collection
        # If neither query_embeddings nor query_texts are provided,
        # or both are provided, raise an error
        if (query_vector is None and query_texts is None) or (
            query_vector is not None and query_texts is not None
        ):
            raise ValueError(
                "You must provide either embeddings or texts to find, but not both"
            )

        if where is None:
            where = {}

        if where_document is None:
            where_document = {}

        results = self._collection.find(
            query_embeddings=query_vector,
            query_texts=query_texts,
            n_results=limit,
            where=where,
            where_document=where_document,
            include=include,
        )

        # check if there are results
        if not results:
            raise ValueError("No results found")

        # check if there are embeddings or documents
        if not results["embeddings"] and not results["documents"]:
            raise ValueError("No embeddings or documents found")

        # create documents from the results
        return self.create_documents(results)

create_documents #

create_documents(results: Any) -> Any

根据结果创建文档。

Parameters:

Name	Type	Description	Default
`results`	`Any`	查询结果。	required

Returns:

Type	Description
`Any`	文档列表。

Source code in llama_index/readers/bagel/base.py

    def create_documents(self, results: Any) -> Any:
        """根据结果创建文档。

Args:
    results: 查询结果。

Returns:
    文档列表。
"""
        documents = []
        # create a list of results
        all_results = list(
            zip(
                results["ids"][0],
                results["documents"][0],
                results["embeddings"][0],
                results["metadatas"][0],
            )
        )
        # iterate through the results
        for result in all_results:
            # create a Llama Document
            document = Document(
                doc_id=result[0],
                text=result[1],
                embedding=result[2],
                metadata=result[3],
            )
            documents.append(document)

        return documents

load_data #

load_data(
    query_vector: Optional[OneOrMany[Embedding]] = None,
    query_texts: Optional[OneOrMany[Doc]] = None,
    limit: int = 10,
    where: Optional[Where] = None,
    where_document: Optional[WhereDocument] = None,
    include: Include = [
        "metadatas",
        "documents",
        "embeddings",
        "distances",
    ],
) -> Any

获取提供的查询嵌入或查询文本的前n_results个文档。

Returns:

Type	Description
`Any`	具有与查询嵌入或查询文本最接近的嵌入的Llama索引文档。

Source code in llama_index/readers/bagel/base.py

    def load_data(
        self,
        query_vector: Optional[OneOrMany[Embedding]] = None,
        query_texts: Optional[OneOrMany[Doc]] = None,
        limit: int = 10,
        where: Optional[Where] = None,
        where_document: Optional[WhereDocument] = None,
        include: Include = ["metadatas", "documents", "embeddings", "distances"],
    ) -> Any:
        """获取提供的查询嵌入或查询文本的前n_results个文档。

Args:
    query_embeddings：要获取最接近邻居的嵌入。可选。
    query_texts：要获取最接近邻居的文档文本。可选。
    n_results：要返回每个查询的邻居数。可选。
    where：用于筛选结果的Where类型字典。可选。
    where_document：用于筛选结果的WhereDocument类型字典。可选。
    include：结果中要包含的内容列表。可选。

Returns:
    具有与查询嵌入或查询文本最接近的嵌入的Llama索引文档。
"""
        # get the results from the collection
        # If neither query_embeddings nor query_texts are provided,
        # or both are provided, raise an error
        if (query_vector is None and query_texts is None) or (
            query_vector is not None and query_texts is not None
        ):
            raise ValueError(
                "You must provide either embeddings or texts to find, but not both"
            )

        if where is None:
            where = {}

        if where_document is None:
            where_document = {}

        results = self._collection.find(
            query_embeddings=query_vector,
            query_texts=query_texts,
            n_results=limit,
            where=where,
            where_document=where_document,
            include=include,
        )

        # check if there are results
        if not results:
            raise ValueError("No results found")

        # check if there are embeddings or documents
        if not results["embeddings"] and not results["documents"]:
            raise ValueError("No embeddings or documents found")

        # create documents from the results
        return self.create_documents(results)