Skip to content

Tree

LlamaIndex数据结构。

TreeIndex #

Bases: BaseIndex[IndexGraph]

树索引。

树索引是一种树形结构的索引,其中每个节点都是子节点的摘要。在构建索引过程中,树是自底向上构建的,直到最终得到一组根节点。

在查询时有几种不同的选项(参见::ref:Ref-Query)。 主要选项是从根节点向下遍历树。 次要答案是直接从根节点合成答案。

Parameters:

Name Type Description Default
summary_template Optional[BasePromptTemplate]

摘要提示(参见::ref:Prompt-Templates)。

None
insert_prompt Optional[BasePromptTemplate]

树插入提示(参见::ref:Prompt-Templates)。

None
num_children int

每个节点应该具有的子节点数。

10
build_tree bool

是否在构建索引时构建树。

True
show_progress bool

是否显示进度条。默认为False。

False
Source code in llama_index/core/indices/tree/base.py
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
class TreeIndex(BaseIndex[IndexGraph]):
    """树索引。

    树索引是一种树形结构的索引,其中每个节点都是子节点的摘要。在构建索引过程中,树是自底向上构建的,直到最终得到一组根节点。

    在查询时有几种不同的选项(参见::ref:`Ref-Query`)。
    主要选项是从根节点向下遍历树。
    次要答案是直接从根节点合成答案。

    Args:
        summary_template (Optional[BasePromptTemplate]): 摘要提示(参见::ref:`Prompt-Templates`)。
        insert_prompt (Optional[BasePromptTemplate]): 树插入提示(参见::ref:`Prompt-Templates`)。
        num_children (int): 每个节点应该具有的子节点数。
        build_tree (bool): 是否在构建索引时构建树。
        show_progress (bool): 是否显示进度条。默认为False。"""

    index_struct_cls = IndexGraph

    def __init__(
        self,
        nodes: Optional[Sequence[BaseNode]] = None,
        objects: Optional[Sequence[IndexNode]] = None,
        index_struct: Optional[IndexGraph] = None,
        llm: Optional[LLM] = None,
        summary_template: Optional[BasePromptTemplate] = None,
        insert_prompt: Optional[BasePromptTemplate] = None,
        num_children: int = 10,
        build_tree: bool = True,
        use_async: bool = False,
        show_progress: bool = False,
        # deprecated
        service_context: Optional[ServiceContext] = None,
        **kwargs: Any,
    ) -> None:
        """初始化参数。"""
        # need to set parameters before building index in base class.
        self.num_children = num_children
        self.summary_template = summary_template or DEFAULT_SUMMARY_PROMPT
        self.insert_prompt: BasePromptTemplate = insert_prompt or DEFAULT_INSERT_PROMPT
        self.build_tree = build_tree
        self._use_async = use_async
        self._llm = llm or llm_from_settings_or_context(Settings, service_context)
        super().__init__(
            nodes=nodes,
            index_struct=index_struct,
            service_context=service_context,
            show_progress=show_progress,
            objects=objects,
            **kwargs,
        )

    def as_retriever(
        self,
        retriever_mode: Union[str, TreeRetrieverMode] = TreeRetrieverMode.SELECT_LEAF,
        embed_model: Optional[BaseEmbedding] = None,
        **kwargs: Any,
    ) -> BaseRetriever:
        # NOTE: lazy import
        from llama_index.core.indices.tree.all_leaf_retriever import (
            TreeAllLeafRetriever,
        )
        from llama_index.core.indices.tree.select_leaf_embedding_retriever import (
            TreeSelectLeafEmbeddingRetriever,
        )
        from llama_index.core.indices.tree.select_leaf_retriever import (
            TreeSelectLeafRetriever,
        )
        from llama_index.core.indices.tree.tree_root_retriever import (
            TreeRootRetriever,
        )

        self._validate_build_tree_required(TreeRetrieverMode(retriever_mode))

        if retriever_mode == TreeRetrieverMode.SELECT_LEAF:
            return TreeSelectLeafRetriever(self, object_map=self._object_map, **kwargs)
        elif retriever_mode == TreeRetrieverMode.SELECT_LEAF_EMBEDDING:
            embed_model = embed_model or embed_model_from_settings_or_context(
                Settings, self._service_context
            )
            return TreeSelectLeafEmbeddingRetriever(
                self, embed_model=embed_model, object_map=self._object_map, **kwargs
            )
        elif retriever_mode == TreeRetrieverMode.ROOT:
            return TreeRootRetriever(self, object_map=self._object_map, **kwargs)
        elif retriever_mode == TreeRetrieverMode.ALL_LEAF:
            return TreeAllLeafRetriever(self, object_map=self._object_map, **kwargs)
        else:
            raise ValueError(f"Unknown retriever mode: {retriever_mode}")

    def _validate_build_tree_required(self, retriever_mode: TreeRetrieverMode) -> None:
        """检查索引是否支持需要树的模式。"""
        if retriever_mode in REQUIRE_TREE_MODES and not self.build_tree:
            raise ValueError(
                "Index was constructed without building trees, "
                f"but retriever mode {retriever_mode} requires trees."
            )

    def _build_index_from_nodes(self, nodes: Sequence[BaseNode]) -> IndexGraph:
        """从节点构建索引。"""
        index_builder = GPTTreeIndexBuilder(
            self.num_children,
            self.summary_template,
            service_context=self.service_context,
            llm=self._llm,
            use_async=self._use_async,
            show_progress=self._show_progress,
            docstore=self._docstore,
        )
        return index_builder.build_from_nodes(nodes, build_tree=self.build_tree)

    def _insert(self, nodes: Sequence[BaseNode], **insert_kwargs: Any) -> None:
        """插入一个文档。"""
        # TODO: allow to customize insert prompt
        inserter = TreeIndexInserter(
            self.index_struct,
            service_context=self.service_context,
            llm=self._llm,
            num_children=self.num_children,
            insert_prompt=self.insert_prompt,
            summary_prompt=self.summary_template,
            docstore=self._docstore,
        )
        inserter.insert(nodes)

    def _delete_node(self, node_id: str, **delete_kwargs: Any) -> None:
        """删除一个节点。"""
        raise NotImplementedError("Delete not implemented for tree index.")

    @property
    def ref_doc_info(self) -> Dict[str, RefDocInfo]:
        """获取已摄取文档及其节点和元数据的字典映射。"""
        node_doc_ids = list(self.index_struct.all_nodes.values())
        nodes = self.docstore.get_nodes(node_doc_ids)

        all_ref_doc_info = {}
        for node in nodes:
            ref_node = node.source_node
            if not ref_node:
                continue

            ref_doc_info = self.docstore.get_ref_doc_info(ref_node.node_id)
            if not ref_doc_info:
                continue

            all_ref_doc_info[ref_node.node_id] = ref_doc_info
        return all_ref_doc_info

ref_doc_info property #

ref_doc_info: Dict[str, RefDocInfo]

获取已摄取文档及其节点和元数据的字典映射。