Skip to content

Index

响应构建器类。

该类提供了将一组文本输入并生成响应的通用函数。

将支持不同的模式,包括1)将块填充到提示中,2)分别在每个块上创建和完善,3)树状摘要。

BaseSynthesizer #

Bases: ChainableMixin, PromptMixin

响应构建器类。

Source code in llama_index/core/response_synthesizers/base.py
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
class BaseSynthesizer(ChainableMixin, PromptMixin):
    """响应构建器类。"""

    def __init__(
        self,
        llm: Optional[LLMPredictorType] = None,
        callback_manager: Optional[CallbackManager] = None,
        prompt_helper: Optional[PromptHelper] = None,
        streaming: bool = False,
        output_cls: BaseModel = None,
        # deprecated
        service_context: Optional[ServiceContext] = None,
    ) -> None:
        """初始化参数。"""
        self._llm = llm or llm_from_settings_or_context(Settings, service_context)

        if callback_manager:
            self._llm.callback_manager = callback_manager

        self._callback_manager = (
            callback_manager
            or callback_manager_from_settings_or_context(Settings, service_context)
        )

        self._prompt_helper = (
            prompt_helper
            or Settings._prompt_helper
            or PromptHelper.from_llm_metadata(
                self._llm.metadata,
            )
        )

        self._streaming = streaming
        self._output_cls = output_cls

    def _get_prompt_modules(self) -> Dict[str, Any]:
        """获取提示模块。"""
        # TODO: keep this for now since response synthesizers don't generally have sub-modules
        return {}

    @property
    def callback_manager(self) -> CallbackManager:
        return self._callback_manager

    @callback_manager.setter
    def callback_manager(self, callback_manager: CallbackManager) -> None:
        """设置回调管理器。"""
        self._callback_manager = callback_manager
        # TODO: please fix this later
        self._callback_manager = callback_manager
        self._llm.callback_manager = callback_manager

    @abstractmethod
    def get_response(
        self,
        query_str: str,
        text_chunks: Sequence[str],
        **response_kwargs: Any,
    ) -> RESPONSE_TEXT_TYPE:
        """获取响应。"""
        ...

    @abstractmethod
    async def aget_response(
        self,
        query_str: str,
        text_chunks: Sequence[str],
        **response_kwargs: Any,
    ) -> RESPONSE_TEXT_TYPE:
        """获取响应。"""
        ...

    def _log_prompt_and_response(
        self,
        formatted_prompt: str,
        response: RESPONSE_TEXT_TYPE,
        log_prefix: str = "",
    ) -> None:
        """记录LLM的提示和响应。"""
        logger.debug(f"> {log_prefix} prompt template: {formatted_prompt}")
        logger.debug(f"> {log_prefix} response: {response}")

    def _get_metadata_for_response(
        self,
        nodes: List[BaseNode],
    ) -> Optional[Dict[str, Any]]:
        """获取响应的元数据。"""
        return {node.node_id: node.metadata for node in nodes}

    def _prepare_response_output(
        self,
        response_str: Optional[RESPONSE_TEXT_TYPE],
        source_nodes: List[NodeWithScore],
    ) -> RESPONSE_TYPE:
        """从响应字符串中准备响应对象。"""
        response_metadata = self._get_metadata_for_response(
            [node_with_score.node for node_with_score in source_nodes]
        )

        if isinstance(response_str, str):
            return Response(
                response_str,
                source_nodes=source_nodes,
                metadata=response_metadata,
            )
        if isinstance(response_str, Generator):
            return StreamingResponse(
                response_str,
                source_nodes=source_nodes,
                metadata=response_metadata,
            )
        if isinstance(response_str, AsyncGenerator):
            return AsyncStreamingResponse(
                response_str,
                source_nodes=source_nodes,
                metadata=response_metadata,
            )

        if isinstance(response_str, self._output_cls):
            return PydanticResponse(
                response_str, source_nodes=source_nodes, metadata=response_metadata
            )

        raise ValueError(
            f"Response must be a string or a generator. Found {type(response_str)}"
        )

    @dispatcher.span
    def synthesize(
        self,
        query: QueryTextType,
        nodes: List[NodeWithScore],
        additional_source_nodes: Optional[Sequence[NodeWithScore]] = None,
        **response_kwargs: Any,
    ) -> RESPONSE_TYPE:
        dispatch_event = dispatcher.get_dispatch_event()

        dispatch_event(
            SynthesizeStartEvent(
                query=query,
            )
        )

        if len(nodes) == 0:
            if self._streaming:
                empty_response = StreamingResponse(
                    response_gen=empty_response_generator()
                )
                dispatch_event(
                    SynthesizeEndEvent(
                        query=query,
                        response=empty_response,
                    )
                )
                return empty_response
            else:
                empty_response = Response("Empty Response")
                dispatch_event(
                    SynthesizeEndEvent(
                        query=query,
                        response=empty_response,
                    )
                )
                return empty_response

        if isinstance(query, str):
            query = QueryBundle(query_str=query)

        with self._callback_manager.event(
            CBEventType.SYNTHESIZE,
            payload={EventPayload.QUERY_STR: query.query_str},
        ) as event:
            response_str = self.get_response(
                query_str=query.query_str,
                text_chunks=[
                    n.node.get_content(metadata_mode=MetadataMode.LLM) for n in nodes
                ],
                **response_kwargs,
            )

            additional_source_nodes = additional_source_nodes or []
            source_nodes = list(nodes) + list(additional_source_nodes)

            response = self._prepare_response_output(response_str, source_nodes)

            event.on_end(payload={EventPayload.RESPONSE: response})

        dispatch_event(
            SynthesizeEndEvent(
                query=query,
                response=response,
            )
        )
        return response

    @dispatcher.span
    async def asynthesize(
        self,
        query: QueryTextType,
        nodes: List[NodeWithScore],
        additional_source_nodes: Optional[Sequence[NodeWithScore]] = None,
        **response_kwargs: Any,
    ) -> RESPONSE_TYPE:
        dispatch_event = dispatcher.get_dispatch_event()

        dispatch_event(
            SynthesizeStartEvent(
                query=query,
            )
        )
        if len(nodes) == 0:
            if self._streaming:
                empty_response = AsyncStreamingResponse(
                    response_gen=empty_response_agenerator()
                )
                dispatch_event(
                    SynthesizeEndEvent(
                        query=query,
                        response=empty_response,
                    )
                )
                return empty_response
            else:
                empty_response = Response("Empty Response")
                dispatch_event(
                    SynthesizeEndEvent(
                        query=query,
                        response=empty_response,
                    )
                )
                return empty_response

        if isinstance(query, str):
            query = QueryBundle(query_str=query)

        with self._callback_manager.event(
            CBEventType.SYNTHESIZE,
            payload={EventPayload.QUERY_STR: query.query_str},
        ) as event:
            response_str = await self.aget_response(
                query_str=query.query_str,
                text_chunks=[
                    n.node.get_content(metadata_mode=MetadataMode.LLM) for n in nodes
                ],
                **response_kwargs,
            )

            additional_source_nodes = additional_source_nodes or []
            source_nodes = list(nodes) + list(additional_source_nodes)

            response = self._prepare_response_output(response_str, source_nodes)

            event.on_end(payload={EventPayload.RESPONSE: response})

        dispatch_event(
            SynthesizeEndEvent(
                query=query,
                response=response,
            )
        )
        return response

    def _as_query_component(self, **kwargs: Any) -> QueryComponent:
        """作为查询组件。"""
        return SynthesizerComponent(synthesizer=self)

get_response abstractmethod #

get_response(
    query_str: str,
    text_chunks: Sequence[str],
    **response_kwargs: Any
) -> RESPONSE_TEXT_TYPE

获取响应。

Source code in llama_index/core/response_synthesizers/base.py
120
121
122
123
124
125
126
127
128
@abstractmethod
def get_response(
    self,
    query_str: str,
    text_chunks: Sequence[str],
    **response_kwargs: Any,
) -> RESPONSE_TEXT_TYPE:
    """获取响应。"""
    ...

aget_response abstractmethod async #

aget_response(
    query_str: str,
    text_chunks: Sequence[str],
    **response_kwargs: Any
) -> RESPONSE_TEXT_TYPE

获取响应。

Source code in llama_index/core/response_synthesizers/base.py
130
131
132
133
134
135
136
137
138
@abstractmethod
async def aget_response(
    self,
    query_str: str,
    text_chunks: Sequence[str],
    **response_kwargs: Any,
) -> RESPONSE_TEXT_TYPE:
    """获取响应。"""
    ...

get_response_synthesizer #

get_response_synthesizer(
    llm: Optional[LLMPredictorType] = None,
    prompt_helper: Optional[PromptHelper] = None,
    service_context: Optional[ServiceContext] = None,
    text_qa_template: Optional[BasePromptTemplate] = None,
    refine_template: Optional[BasePromptTemplate] = None,
    summary_template: Optional[BasePromptTemplate] = None,
    simple_template: Optional[BasePromptTemplate] = None,
    response_mode: ResponseMode = ResponseMode.COMPACT,
    callback_manager: Optional[CallbackManager] = None,
    use_async: bool = False,
    streaming: bool = False,
    structured_answer_filtering: bool = False,
    output_cls: Optional[BaseModel] = None,
    program_factory: Optional[
        Callable[[PromptTemplate], BasePydanticProgram]
    ] = None,
    verbose: bool = False,
) -> BaseSynthesizer

获取一个响应合成器。

Source code in llama_index/core/response_synthesizers/factory.py
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
def get_response_synthesizer(
    llm: Optional[LLMPredictorType] = None,
    prompt_helper: Optional[PromptHelper] = None,
    service_context: Optional[ServiceContext] = None,
    text_qa_template: Optional[BasePromptTemplate] = None,
    refine_template: Optional[BasePromptTemplate] = None,
    summary_template: Optional[BasePromptTemplate] = None,
    simple_template: Optional[BasePromptTemplate] = None,
    response_mode: ResponseMode = ResponseMode.COMPACT,
    callback_manager: Optional[CallbackManager] = None,
    use_async: bool = False,
    streaming: bool = False,
    structured_answer_filtering: bool = False,
    output_cls: Optional[BaseModel] = None,
    program_factory: Optional[Callable[[PromptTemplate], BasePydanticProgram]] = None,
    verbose: bool = False,
) -> BaseSynthesizer:
    """获取一个响应合成器。"""
    text_qa_template = text_qa_template or DEFAULT_TEXT_QA_PROMPT_SEL
    refine_template = refine_template or DEFAULT_REFINE_PROMPT_SEL
    simple_template = simple_template or DEFAULT_SIMPLE_INPUT_PROMPT
    summary_template = summary_template or DEFAULT_TREE_SUMMARIZE_PROMPT_SEL

    callback_manager = callback_manager or callback_manager_from_settings_or_context(
        Settings, service_context
    )
    llm = llm or llm_from_settings_or_context(Settings, service_context)

    if service_context is not None:
        prompt_helper = service_context.prompt_helper
    else:
        prompt_helper = (
            prompt_helper
            or Settings._prompt_helper
            or PromptHelper.from_llm_metadata(
                llm.metadata,
            )
        )

    if response_mode == ResponseMode.REFINE:
        return Refine(
            llm=llm,
            callback_manager=callback_manager,
            prompt_helper=prompt_helper,
            text_qa_template=text_qa_template,
            refine_template=refine_template,
            output_cls=output_cls,
            streaming=streaming,
            structured_answer_filtering=structured_answer_filtering,
            program_factory=program_factory,
            verbose=verbose,
            # deprecated
            service_context=service_context,
        )
    elif response_mode == ResponseMode.COMPACT:
        return CompactAndRefine(
            llm=llm,
            callback_manager=callback_manager,
            prompt_helper=prompt_helper,
            text_qa_template=text_qa_template,
            refine_template=refine_template,
            output_cls=output_cls,
            streaming=streaming,
            structured_answer_filtering=structured_answer_filtering,
            program_factory=program_factory,
            verbose=verbose,
            # deprecated
            service_context=service_context,
        )
    elif response_mode == ResponseMode.TREE_SUMMARIZE:
        return TreeSummarize(
            llm=llm,
            callback_manager=callback_manager,
            prompt_helper=prompt_helper,
            summary_template=summary_template,
            output_cls=output_cls,
            streaming=streaming,
            use_async=use_async,
            verbose=verbose,
            # deprecated
            service_context=service_context,
        )
    elif response_mode == ResponseMode.SIMPLE_SUMMARIZE:
        return SimpleSummarize(
            llm=llm,
            callback_manager=callback_manager,
            prompt_helper=prompt_helper,
            text_qa_template=text_qa_template,
            streaming=streaming,
            # deprecated
            service_context=service_context,
        )
    elif response_mode == ResponseMode.GENERATION:
        return Generation(
            llm=llm,
            callback_manager=callback_manager,
            prompt_helper=prompt_helper,
            simple_template=simple_template,
            streaming=streaming,
            # deprecated
            service_context=service_context,
        )
    elif response_mode == ResponseMode.ACCUMULATE:
        return Accumulate(
            llm=llm,
            callback_manager=callback_manager,
            prompt_helper=prompt_helper,
            text_qa_template=text_qa_template,
            output_cls=output_cls,
            streaming=streaming,
            use_async=use_async,
            # deprecated
            service_context=service_context,
        )
    elif response_mode == ResponseMode.COMPACT_ACCUMULATE:
        return CompactAndAccumulate(
            llm=llm,
            callback_manager=callback_manager,
            prompt_helper=prompt_helper,
            text_qa_template=text_qa_template,
            output_cls=output_cls,
            streaming=streaming,
            use_async=use_async,
            # deprecated
            service_context=service_context,
        )
    elif response_mode == ResponseMode.NO_TEXT:
        return NoText(
            llm=llm,
            streaming=streaming,
            callback_manager=callback_manager,
            prompt_helper=prompt_helper,
            # deprecated
            service_context=service_context,
        )
    else:
        raise ValueError(f"Unknown mode: {response_mode}")

ResponseMode #

Bases: str, Enum

响应生成器(和合成器)的响应模式。

Source code in llama_index/core/response_synthesizers/type.py
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
class ResponseMode(str, Enum):
    """响应生成器(和合成器)的响应模式。"""

    REFINE = "refine"
    """Refine是一种生成响应的迭代方式。我们首先使用第一个节点中的上下文和查询来生成初始答案。然后,我们将这个答案、查询和第二个节点的上下文作为输入传递到“细化提示”中,以生成一个经过细化的答案。我们通过N-1个节点进行细化,其中N是总节点数。
"""

    COMPACT = "compact"
    """首先,紧凑和精炼模式将文本块合并成更大的整合块,更充分利用可用的上下文窗口,然后在它们之间精炼答案。
这种模式比精炼模式更快,因为我们对LLM的调用更少。
"""

    SIMPLE_SUMMARIZE = "simple_summarize"
    """将所有文本块合并为一个,并进行LLM调用。
如果合并的文本块超过上下文窗口的大小,这将失败。"""

    TREE_SUMMARIZE = "tree_summarize"
    """在候选节点集上构建一棵树索引,其中包含一个以查询为种子的摘要提示。
树是自底向上构建的,在最后将根节点作为响应返回。
"""

    GENERATION = "generation"
    """忽略上下文,只使用LLM生成一个回复。"""

    NO_TEXT = "no_text"
    """返回检索到的上下文节点,而不合成最终响应。"""

    ACCUMULATE = "accumulate"
    """为每个文本块合成一个响应,然后返回连接。"""

    COMPACT_ACCUMULATE = "compact_accumulate"
    """紧凑累积模式首先将文本块合并成更大的整合块,更充分利用可用的上下文窗口,然后累积每个块的答案,最后返回连接的结果。
这种模式比累积模式更快,因为我们对LLM的调用更少。
"""

REFINE class-attribute instance-attribute #

REFINE = 'refine'

Refine是一种生成响应的迭代方式。我们首先使用第一个节点中的上下文和查询来生成初始答案。然后,我们将这个答案、查询和第二个节点的上下文作为输入传递到“细化提示”中,以生成一个经过细化的答案。我们通过N-1个节点进行细化,其中N是总节点数。

COMPACT class-attribute instance-attribute #

COMPACT = 'compact'

首先,紧凑和精炼模式将文本块合并成更大的整合块,更充分利用可用的上下文窗口,然后在它们之间精炼答案。 这种模式比精炼模式更快,因为我们对LLM的调用更少。

SIMPLE_SUMMARIZE class-attribute instance-attribute #

SIMPLE_SUMMARIZE = 'simple_summarize'

将所有文本块合并为一个,并进行LLM调用。 如果合并的文本块超过上下文窗口的大小,这将失败。

TREE_SUMMARIZE class-attribute instance-attribute #

TREE_SUMMARIZE = 'tree_summarize'

在候选节点集上构建一棵树索引,其中包含一个以查询为种子的摘要提示。 树是自底向上构建的,在最后将根节点作为响应返回。

GENERATION class-attribute instance-attribute #

GENERATION = 'generation'

忽略上下文,只使用LLM生成一个回复。

NO_TEXT class-attribute instance-attribute #

NO_TEXT = 'no_text'

返回检索到的上下文节点,而不合成最终响应。

ACCUMULATE class-attribute instance-attribute #

ACCUMULATE = 'accumulate'

为每个文本块合成一个响应,然后返回连接。

COMPACT_ACCUMULATE class-attribute instance-attribute #

COMPACT_ACCUMULATE = 'compact_accumulate'

紧凑累积模式首先将文本块合并成更大的整合块,更充分利用可用的上下文窗口,然后累积每个块的答案,最后返回连接的结果。 这种模式比累积模式更快,因为我们对LLM的调用更少。