Skip to content

Property graph

LlamaIndex数据结构。

PropertyGraphIndex #

Bases: BaseIndex[IndexLPG]

属性图的索引。

Parameters:

Name Type Description Default
nodes Optional[Sequence[BaseNode]]

要插入索引的节点列表。

None
llm Optional[BaseLLM]

用于提取三元组的语言模型。默认为 Settings.llm

None
kg_extractors Optional[List[TransformComponent]]

要应用于节点以提取三元组的转换列表。 默认为 [SimpleLLMPathExtractor(llm=llm), ImplicitEdgeExtractor()]

None
property_graph_store Optional[PropertyGraphStore]

要使用的属性图存储。如果未提供,将创建一个新的 SimplePropertyGraphStore

None
vector_store Optional[BasePydanticVectorStore]

要使用的向量存储索引,如果图存储不支持向量查询。

None
use_async bool

是否使用异步进行转换。默认为 True

True
embed_model Optional[EmbedType]

用于嵌入节点的嵌入模型。 如果未提供,且 embed_kg_nodes=True,则将使用 Settings.embed_model

None
embed_kg_nodes bool

是否嵌入属性图节点。默认为 True

True
callback_manager Optional[CallbackManager]

要使用的回调管理器。

None
transformations Optional[List[TransformComponent]]

要应用于节点的转换列表,然后再将其插入索引。 这些转换将在 kg_extractors 之前应用。

None
storage_context Optional[StorageContext]

要使用的存储上下文。

None
show_progress bool

是否显示转换的进度条。默认为 False

False
Source code in llama_index/core/indices/property_graph/base.py
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
class PropertyGraphIndex(BaseIndex[IndexLPG]):
    """属性图的索引。

    Args:
        nodes (Optional[Sequence[BaseNode]]):
            要插入索引的节点列表。
        llm (Optional[BaseLLM]):
            用于提取三元组的语言模型。默认为 `Settings.llm`。
        kg_extractors (Optional[List[TransformComponent]]):
            要应用于节点以提取三元组的转换列表。
            默认为 `[SimpleLLMPathExtractor(llm=llm), ImplicitEdgeExtractor()]`。
        property_graph_store (Optional[PropertyGraphStore]):
            要使用的属性图存储。如果未提供,将创建一个新的 `SimplePropertyGraphStore`。
        vector_store (Optional[BasePydanticVectorStore]):
            要使用的向量存储索引,如果图存储不支持向量查询。
        use_async (bool):
            是否使用异步进行转换。默认为 `True`。
        embed_model (Optional[EmbedType]):
            用于嵌入节点的嵌入模型。
            如果未提供,且 `embed_kg_nodes=True`,则将使用 `Settings.embed_model`。
        embed_kg_nodes (bool):
            是否嵌入属性图节点。默认为 `True`。
        callback_manager (Optional[CallbackManager]):
            要使用的回调管理器。
        transformations (Optional[List[TransformComponent]]):
            要应用于节点的转换列表,然后再将其插入索引。
            这些转换将在 `kg_extractors` 之前应用。
        storage_context (Optional[StorageContext]):
            要使用的存储上下文。
        show_progress (bool):
            是否显示转换的进度条。默认为 `False`。"""

    index_struct_cls = IndexLPG

    def __init__(
        self,
        nodes: Optional[Sequence[BaseNode]] = None,
        llm: Optional[BaseLLM] = None,
        kg_extractors: Optional[List[TransformComponent]] = None,
        property_graph_store: Optional[PropertyGraphStore] = None,
        # vector related params
        vector_store: Optional[BasePydanticVectorStore] = None,
        use_async: bool = True,
        embed_model: Optional[EmbedType] = None,
        embed_kg_nodes: bool = True,
        # parent class params
        callback_manager: Optional[CallbackManager] = None,
        transformations: Optional[List[TransformComponent]] = None,
        storage_context: Optional[StorageContext] = None,
        show_progress: bool = False,
        **kwargs: Any,
    ) -> None:
        """初始化参数。"""
        storage_context = storage_context or StorageContext.from_defaults(
            property_graph_store=property_graph_store
        )

        # lazily initialize the graph store on the storage context
        if property_graph_store is not None:
            storage_context.property_graph_store = property_graph_store
        else:
            storage_context.property_graph_store = SimplePropertyGraphStore()

        if vector_store is not None:
            storage_context.vector_store = vector_store

        if embed_kg_nodes and (
            storage_context.property_graph_store.supports_vector_queries
            or embed_kg_nodes
        ):
            self._embed_model = (
                resolve_embed_model(embed_model)
                if embed_model
                else Settings.embed_model
            )
        else:
            self._embed_model = None

        self._kg_extractors = kg_extractors or [
            SimpleLLMPathExtractor(llm=llm or Settings.llm),
            ImplicitPathExtractor(),
        ]
        self._use_async = use_async
        self._llm = llm
        self._embed_kg_nodes = embed_kg_nodes
        self._override_vector_store = (
            vector_store is not None
            or not storage_context.property_graph_store.supports_vector_queries
        )

        super().__init__(
            nodes=nodes,
            callback_manager=callback_manager,
            storage_context=storage_context,
            transformations=transformations,
            show_progress=show_progress,
            **kwargs,
        )

    @classmethod
    def from_existing(
        cls: "PropertyGraphIndex",
        property_graph_store: PropertyGraphStore,
        vector_store: Optional[BasePydanticVectorStore] = None,
        # general params
        llm: Optional[BaseLLM] = None,
        kg_extractors: Optional[List[TransformComponent]] = None,
        # vector related params
        use_async: bool = True,
        embed_model: Optional[EmbedType] = None,
        embed_kg_nodes: bool = True,
        # parent class params
        callback_manager: Optional[CallbackManager] = None,
        transformations: Optional[List[TransformComponent]] = None,
        storage_context: Optional[StorageContext] = None,
        show_progress: bool = False,
        **kwargs: Any,
    ) -> "PropertyGraphIndex":
        """从现有的属性图存储(和可选的向量存储)中创建一个索引。"""
        return cls(
            nodes=[],  # no nodes to insert
            property_graph_store=property_graph_store,
            vector_store=vector_store,
            llm=llm,
            kg_extractors=kg_extractors,
            use_async=use_async,
            embed_model=embed_model,
            embed_kg_nodes=embed_kg_nodes,
            callback_manager=callback_manager,
            transformations=transformations,
            storage_context=storage_context,
            show_progress=show_progress,
            **kwargs,
        )

    @property
    def property_graph_store(self) -> PropertyGraphStore:
        """获取带有标签的属性图存储。"""
        return self.storage_context.property_graph_store

    @property
    def vector_store(self) -> Optional[BasePydanticVectorStore]:
        if self._embed_kg_nodes and self._override_vector_store:
            return self.storage_context.vector_store
        else:
            return None

    def _insert_nodes(self, nodes: Sequence[BaseNode]) -> Sequence[BaseNode]:
        """向索引结构中插入节点。"""
        # run transformations on nodes to extract triplets
        if self._use_async:
            nodes = asyncio.run(
                arun_transformations(
                    nodes, self._kg_extractors, show_progress=self._show_progress
                )
            )
        else:
            nodes = run_transformations(
                nodes, self._kg_extractors, show_progress=self._show_progress
            )

        # ensure all nodes have nodes and/or relations in metadata
        assert all(
            node.metadata.get(KG_NODES_KEY) is not None
            or node.metadata.get(KG_RELATIONS_KEY) is not None
            for node in nodes
        )

        kg_nodes_to_insert: List[LabelledNode] = []
        kg_rels_to_insert: List[Relation] = []
        for node in nodes:
            # remove nodes and relations from metadata
            kg_nodes = node.metadata.pop(KG_NODES_KEY, [])
            kg_rels = node.metadata.pop(KG_RELATIONS_KEY, [])

            # add source id to properties
            for kg_node in kg_nodes:
                kg_node.properties[TRIPLET_SOURCE_KEY] = node.id_
            for kg_rel in kg_rels:
                kg_rel.properties[TRIPLET_SOURCE_KEY] = node.id_

            # add nodes and relations to insert lists
            kg_nodes_to_insert.extend(kg_nodes)
            kg_rels_to_insert.extend(kg_rels)

        # embed nodes (if needed)
        if self._embed_kg_nodes:
            # embed llama-index nodes
            node_texts = [
                node.get_content(metadata_mode=MetadataMode.EMBED) for node in nodes
            ]

            if self._use_async:
                embeddings = asyncio.run(
                    self._embed_model.aget_text_embedding_batch(
                        node_texts, show_progress=self._show_progress
                    )
                )
            else:
                embeddings = self._embed_model.get_text_embedding_batch(
                    node_texts, show_progress=self._show_progress
                )

            for node, embedding in zip(nodes, embeddings):
                node.embedding = embedding

            # embed kg nodes
            kg_node_texts = [str(kg_node) for kg_node in kg_nodes_to_insert]

            if self._use_async:
                kg_embeddings = asyncio.run(
                    self._embed_model.aget_text_embedding_batch(
                        kg_node_texts, show_progress=self._show_progress
                    )
                )
            else:
                kg_embeddings = self._embed_model.get_text_embedding_batch(
                    kg_node_texts,
                    show_progress=self._show_progress,
                )

            for kg_node, embedding in zip(kg_nodes_to_insert, kg_embeddings):
                kg_node.embedding = embedding

        # if graph store doesn't support vectors, or the vector index was provided, use it
        if self.vector_store is not None:
            self._insert_nodes_to_vector_index(kg_nodes_to_insert)

        self.property_graph_store.upsert_llama_nodes(nodes)
        self.property_graph_store.upsert_nodes(kg_nodes_to_insert)

        # important: upsert relations after nodes
        self.property_graph_store.upsert_relations(kg_rels_to_insert)

        # refresh schema if needed
        if self.property_graph_store.supports_structured_queries:
            self.property_graph_store.get_schema(refresh=True)

        return nodes

    def _insert_nodes_to_vector_index(self, nodes: List[LabelledNode]) -> None:
        """插入向量节点。"""
        llama_nodes = []
        for node in nodes:
            if node.embedding is not None:
                llama_nodes.append(
                    TextNode(
                        id_=node.id,
                        text=str(node),
                        metadata=node.properties,
                        embedding=[*node.embedding],
                    )
                )

            # clear the embedding to save memory, its not used now
            node.embedding = None

        self.vector_store.add(llama_nodes)

    def _build_index_from_nodes(self, nodes: Optional[Sequence[BaseNode]]) -> IndexLPG:
        """从节点构建索引。"""
        nodes = self._insert_nodes(nodes or [])

        # this isn't really used or needed
        return IndexLPG()

    def as_retriever(
        self,
        sub_retrievers: Optional[List["BasePGRetriever"]] = None,
        include_text: bool = True,
        **kwargs: Any,
    ) -> BaseRetriever:
        """返回索引的检索器。

Args:
    sub_retrievers(可选[List[BasePGRetriever]]):
        要使用的子检索器列表。如果未提供,则将使用默认列表:
        如果图存储支持向量查询,则为`[LLMSynonymRetriever, VectorContextRetriever]`。
    include_text(布尔值):
        是否在检索结果中包含源文本。
    **kwargs:
        传递给检索器的额外kwargs。
"""
        from llama_index.core.indices.property_graph.retriever import (
            PGRetriever,
        )
        from llama_index.core.indices.property_graph.sub_retrievers.vector import (
            VectorContextRetriever,
        )
        from llama_index.core.indices.property_graph.sub_retrievers.llm_synonym import (
            LLMSynonymRetriever,
        )

        if sub_retrievers is None:
            sub_retrievers = [
                LLMSynonymRetriever(
                    graph_store=self.property_graph_store,
                    include_text=include_text,
                    llm=self._llm,
                    **kwargs,
                ),
            ]

            if self._embed_model and (
                self.property_graph_store.supports_vector_queries or self.vector_store
            ):
                sub_retrievers.append(
                    VectorContextRetriever(
                        graph_store=self.property_graph_store,
                        vector_store=self.vector_store,
                        include_text=include_text,
                        **kwargs,
                    )
                )

        return PGRetriever(sub_retrievers, use_async=self._use_async, **kwargs)

    def _delete_node(self, node_id: str, **delete_kwargs: Any) -> None:
        """删除一个节点。"""
        self.property_graph_store.delete(ids=[node_id])

    def _insert(self, nodes: Sequence[BaseNode], **insert_kwargs: Any) -> None:
        """索引结构中特定于索引的逻辑,用于插入节点。"""
        self._insert_nodes(nodes)

    def ref_doc_info(self) -> Dict[str, RefDocInfo]:
        """获取已摄取文档及其节点和元数据的字典映射。"""
        raise NotImplementedError(
            "Ref doc info not implemented for PropertyGraphIndex. "
            "All inserts are already upserts."
        )

property_graph_store property #

property_graph_store: PropertyGraphStore

获取带有标签的属性图存储。

from_existing classmethod #

from_existing(
    property_graph_store: PropertyGraphStore,
    vector_store: Optional[BasePydanticVectorStore] = None,
    llm: Optional[BaseLLM] = None,
    kg_extractors: Optional[
        List[TransformComponent]
    ] = None,
    use_async: bool = True,
    embed_model: Optional[EmbedType] = None,
    embed_kg_nodes: bool = True,
    callback_manager: Optional[CallbackManager] = None,
    transformations: Optional[
        List[TransformComponent]
    ] = None,
    storage_context: Optional[StorageContext] = None,
    show_progress: bool = False,
    **kwargs: Any
) -> PropertyGraphIndex

从现有的属性图存储(和可选的向量存储)中创建一个索引。

Source code in llama_index/core/indices/property_graph/base.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
@classmethod
def from_existing(
    cls: "PropertyGraphIndex",
    property_graph_store: PropertyGraphStore,
    vector_store: Optional[BasePydanticVectorStore] = None,
    # general params
    llm: Optional[BaseLLM] = None,
    kg_extractors: Optional[List[TransformComponent]] = None,
    # vector related params
    use_async: bool = True,
    embed_model: Optional[EmbedType] = None,
    embed_kg_nodes: bool = True,
    # parent class params
    callback_manager: Optional[CallbackManager] = None,
    transformations: Optional[List[TransformComponent]] = None,
    storage_context: Optional[StorageContext] = None,
    show_progress: bool = False,
    **kwargs: Any,
) -> "PropertyGraphIndex":
    """从现有的属性图存储(和可选的向量存储)中创建一个索引。"""
    return cls(
        nodes=[],  # no nodes to insert
        property_graph_store=property_graph_store,
        vector_store=vector_store,
        llm=llm,
        kg_extractors=kg_extractors,
        use_async=use_async,
        embed_model=embed_model,
        embed_kg_nodes=embed_kg_nodes,
        callback_manager=callback_manager,
        transformations=transformations,
        storage_context=storage_context,
        show_progress=show_progress,
        **kwargs,
    )

as_retriever #

as_retriever(
    sub_retrievers: Optional[List[BasePGRetriever]] = None,
    include_text: bool = True,
    **kwargs: Any
) -> BaseRetriever

返回索引的检索器。

Source code in llama_index/core/indices/property_graph/base.py
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
    def as_retriever(
        self,
        sub_retrievers: Optional[List["BasePGRetriever"]] = None,
        include_text: bool = True,
        **kwargs: Any,
    ) -> BaseRetriever:
        """返回索引的检索器。

Args:
    sub_retrievers(可选[List[BasePGRetriever]]):
        要使用的子检索器列表。如果未提供,则将使用默认列表:
        如果图存储支持向量查询,则为`[LLMSynonymRetriever, VectorContextRetriever]`。
    include_text(布尔值):
        是否在检索结果中包含源文本。
    **kwargs:
        传递给检索器的额外kwargs。
"""
        from llama_index.core.indices.property_graph.retriever import (
            PGRetriever,
        )
        from llama_index.core.indices.property_graph.sub_retrievers.vector import (
            VectorContextRetriever,
        )
        from llama_index.core.indices.property_graph.sub_retrievers.llm_synonym import (
            LLMSynonymRetriever,
        )

        if sub_retrievers is None:
            sub_retrievers = [
                LLMSynonymRetriever(
                    graph_store=self.property_graph_store,
                    include_text=include_text,
                    llm=self._llm,
                    **kwargs,
                ),
            ]

            if self._embed_model and (
                self.property_graph_store.supports_vector_queries or self.vector_store
            ):
                sub_retrievers.append(
                    VectorContextRetriever(
                        graph_store=self.property_graph_store,
                        vector_store=self.vector_store,
                        include_text=include_text,
                        **kwargs,
                    )
                )

        return PGRetriever(sub_retrievers, use_async=self._use_async, **kwargs)

ref_doc_info #

ref_doc_info() -> Dict[str, RefDocInfo]

获取已摄取文档及其节点和元数据的字典映射。

Source code in llama_index/core/indices/property_graph/base.py
363
364
365
366
367
368
def ref_doc_info(self) -> Dict[str, RefDocInfo]:
    """获取已摄取文档及其节点和元数据的字典映射。"""
    raise NotImplementedError(
        "Ref doc info not implemented for PropertyGraphIndex. "
        "All inserts are already upserts."
    )