Skip to content

Bagel

BagelReader #

Bases: BaseReader

Bagel文件的读取器。

Source code in llama_index/readers/bagel/base.py
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
class BagelReader(BaseReader):
    """Bagel文件的读取器。"""

    def __init__(self, collection_name: str) -> None:
        """初始化BagelReader。

Args:collection_name:要加载的集合的名称。

Returns:无。
"""
        try:
            import bagel
        except ImportError:
            raise ImportError(
                "`bagel` package not found, please run `pip install bagel`"
            )
        from bagel.config import Settings

        if not collection_name:
            raise ValueError("collection_name cannot be empty")

        self.collection_name = collection_name

        server_settings = Settings(
            bagel_api_impl="rest", bagel_server_host="api.bageldb.ai"
        )

        self.client = bagel.Client(server_settings)

        self._collection = self.client.get_cluster(collection_name)

    def create_documents(self, results: Any) -> Any:
        """根据结果创建文档。

Args:
    results: 查询结果。

Returns:
    文档列表。
"""
        documents = []
        # create a list of results
        all_results = list(
            zip(
                results["ids"][0],
                results["documents"][0],
                results["embeddings"][0],
                results["metadatas"][0],
            )
        )
        # iterate through the results
        for result in all_results:
            # create a Llama Document
            document = Document(
                doc_id=result[0],
                text=result[1],
                embedding=result[2],
                metadata=result[3],
            )
            documents.append(document)

        return documents

    def load_data(
        self,
        query_vector: Optional[OneOrMany[Embedding]] = None,
        query_texts: Optional[OneOrMany[Doc]] = None,
        limit: int = 10,
        where: Optional[Where] = None,
        where_document: Optional[WhereDocument] = None,
        include: Include = ["metadatas", "documents", "embeddings", "distances"],
    ) -> Any:
        """获取提供的查询嵌入或查询文本的前n_results个文档。

Args:
    query_embeddings:要获取最接近邻居的嵌入。可选。
    query_texts:要获取最接近邻居的文档文本。可选。
    n_results:要返回每个查询的邻居数。可选。
    where:用于筛选结果的Where类型字典。可选。
    where_document:用于筛选结果的WhereDocument类型字典。可选。
    include:结果中要包含的内容列表。可选。

Returns:
    具有与查询嵌入或查询文本最接近的嵌入的Llama索引文档。
"""
        # get the results from the collection
        # If neither query_embeddings nor query_texts are provided,
        # or both are provided, raise an error
        if (query_vector is None and query_texts is None) or (
            query_vector is not None and query_texts is not None
        ):
            raise ValueError(
                "You must provide either embeddings or texts to find, but not both"
            )

        if where is None:
            where = {}

        if where_document is None:
            where_document = {}

        results = self._collection.find(
            query_embeddings=query_vector,
            query_texts=query_texts,
            n_results=limit,
            where=where,
            where_document=where_document,
            include=include,
        )

        # check if there are results
        if not results:
            raise ValueError("No results found")

        # check if there are embeddings or documents
        if not results["embeddings"] and not results["documents"]:
            raise ValueError("No embeddings or documents found")

        # create documents from the results
        return self.create_documents(results)

create_documents #

create_documents(results: Any) -> Any

根据结果创建文档。

Parameters:

Name Type Description Default
results Any

查询结果。

required

Returns:

Type Description
Any

文档列表。

Source code in llama_index/readers/bagel/base.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
    def create_documents(self, results: Any) -> Any:
        """根据结果创建文档。

Args:
    results: 查询结果。

Returns:
    文档列表。
"""
        documents = []
        # create a list of results
        all_results = list(
            zip(
                results["ids"][0],
                results["documents"][0],
                results["embeddings"][0],
                results["metadatas"][0],
            )
        )
        # iterate through the results
        for result in all_results:
            # create a Llama Document
            document = Document(
                doc_id=result[0],
                text=result[1],
                embedding=result[2],
                metadata=result[3],
            )
            documents.append(document)

        return documents

load_data #

load_data(
    query_vector: Optional[OneOrMany[Embedding]] = None,
    query_texts: Optional[OneOrMany[Doc]] = None,
    limit: int = 10,
    where: Optional[Where] = None,
    where_document: Optional[WhereDocument] = None,
    include: Include = [
        "metadatas",
        "documents",
        "embeddings",
        "distances",
    ],
) -> Any

获取提供的查询嵌入或查询文本的前n_results个文档。

Returns:

Type Description
Any

具有与查询嵌入或查询文本最接近的嵌入的Llama索引文档。

Source code in llama_index/readers/bagel/base.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
    def load_data(
        self,
        query_vector: Optional[OneOrMany[Embedding]] = None,
        query_texts: Optional[OneOrMany[Doc]] = None,
        limit: int = 10,
        where: Optional[Where] = None,
        where_document: Optional[WhereDocument] = None,
        include: Include = ["metadatas", "documents", "embeddings", "distances"],
    ) -> Any:
        """获取提供的查询嵌入或查询文本的前n_results个文档。

Args:
    query_embeddings:要获取最接近邻居的嵌入。可选。
    query_texts:要获取最接近邻居的文档文本。可选。
    n_results:要返回每个查询的邻居数。可选。
    where:用于筛选结果的Where类型字典。可选。
    where_document:用于筛选结果的WhereDocument类型字典。可选。
    include:结果中要包含的内容列表。可选。

Returns:
    具有与查询嵌入或查询文本最接近的嵌入的Llama索引文档。
"""
        # get the results from the collection
        # If neither query_embeddings nor query_texts are provided,
        # or both are provided, raise an error
        if (query_vector is None and query_texts is None) or (
            query_vector is not None and query_texts is not None
        ):
            raise ValueError(
                "You must provide either embeddings or texts to find, but not both"
            )

        if where is None:
            where = {}

        if where_document is None:
            where_document = {}

        results = self._collection.find(
            query_embeddings=query_vector,
            query_texts=query_texts,
            n_results=limit,
            where=where,
            where_document=where_document,
            include=include,
        )

        # check if there are results
        if not results:
            raise ValueError("No results found")

        # check if there are embeddings or documents
        if not results["embeddings"] and not results["documents"]:
            raise ValueError("No embeddings or documents found")

        # create documents from the results
        return self.create_documents(results)