32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144 | class SummaryIndex(BaseIndex[IndexList]):
"""摘要索引。
摘要索引是一个简单的数据结构,其中节点按顺序存储。在索引构建过程中,文档文本被分块、转换为节点,并存储在列表中。
在查询时,摘要索引通过一些可选的过滤参数迭代遍历节点,并从所有节点中综合出一个答案。
Args:
text_qa_template (Optional[BasePromptTemplate]): 一个问题-答案提示(参见::ref:`Prompt-Templates`)。
注意:这是一个已弃用的字段。
show_progress (bool): 是否显示tqdm进度条。默认为False。"""
index_struct_cls = IndexList
def __init__(
self,
nodes: Optional[Sequence[BaseNode]] = None,
objects: Optional[Sequence[IndexNode]] = None,
index_struct: Optional[IndexList] = None,
show_progress: bool = False,
# deprecated
service_context: Optional[ServiceContext] = None,
**kwargs: Any,
) -> None:
"""初始化参数。"""
super().__init__(
nodes=nodes,
index_struct=index_struct,
service_context=service_context,
show_progress=show_progress,
objects=objects,
**kwargs,
)
def as_retriever(
self,
retriever_mode: Union[str, ListRetrieverMode] = ListRetrieverMode.DEFAULT,
llm: Optional[LLM] = None,
embed_model: Optional[BaseEmbedding] = None,
**kwargs: Any,
) -> BaseRetriever:
from llama_index.core.indices.list.retrievers import (
SummaryIndexEmbeddingRetriever,
SummaryIndexLLMRetriever,
SummaryIndexRetriever,
)
if retriever_mode == ListRetrieverMode.DEFAULT:
return SummaryIndexRetriever(self, object_map=self._object_map, **kwargs)
elif retriever_mode == ListRetrieverMode.EMBEDDING:
embed_model = embed_model or embed_model_from_settings_or_context(
Settings, self.service_context
)
return SummaryIndexEmbeddingRetriever(
self, object_map=self._object_map, embed_model=embed_model, **kwargs
)
elif retriever_mode == ListRetrieverMode.LLM:
llm = llm or llm_from_settings_or_context(Settings, self.service_context)
return SummaryIndexLLMRetriever(
self, object_map=self._object_map, llm=llm, **kwargs
)
else:
raise ValueError(f"Unknown retriever mode: {retriever_mode}")
def _build_index_from_nodes(
self, nodes: Sequence[BaseNode], show_progress: bool = False
) -> IndexList:
"""从文档中构建索引。
Args:
documents (List[BaseDocument]): 文档列表。
Returns:
IndexList: 创建的摘要索引。
"""
index_struct = IndexList()
nodes_with_progress = get_tqdm_iterable(
nodes, show_progress, "Processing nodes"
)
for n in nodes_with_progress:
index_struct.add_node(n)
return index_struct
def _insert(self, nodes: Sequence[BaseNode], **insert_kwargs: Any) -> None:
"""插入一个文档。"""
for n in nodes:
self._index_struct.add_node(n)
def _delete_node(self, node_id: str, **delete_kwargs: Any) -> None:
"""删除一个节点。"""
cur_node_ids = self._index_struct.nodes
cur_nodes = self._docstore.get_nodes(cur_node_ids)
nodes_to_keep = [n for n in cur_nodes if n.node_id != node_id]
self._index_struct.nodes = [n.node_id for n in nodes_to_keep]
@property
def ref_doc_info(self) -> Dict[str, RefDocInfo]:
"""获取已摄取文档及其节点和元数据的字典映射。"""
node_doc_ids = self._index_struct.nodes
nodes = self.docstore.get_nodes(node_doc_ids)
all_ref_doc_info = {}
for node in nodes:
ref_node = node.source_node
if not ref_node:
continue
ref_doc_info = self.docstore.get_ref_doc_info(ref_node.node_id)
if not ref_doc_info:
continue
all_ref_doc_info[ref_node.node_id] = ref_doc_info
return all_ref_doc_info
|