Source code for langchain.chains.mapreduce
"""Map-reduce链。
将文档拆分,将较小的部分发送到LLM与一个提示,然后用另一个提示组合结果。
"""
from __future__ import annotations
from typing import Any, Dict, List, Mapping, Optional
from langchain_core.callbacks import CallbackManagerForChainRun, Callbacks
from langchain_core.documents import Document
from langchain_core.language_models import BaseLanguageModel
from langchain_core.prompts import BasePromptTemplate
from langchain_core.pydantic_v1 import Extra
from langchain_text_splitters import TextSplitter
from langchain.chains import ReduceDocumentsChain
from langchain.chains.base import Chain
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains.llm import LLMChain
[docs]class MapReduceChain(Chain):
"""Map-reduce链。"""
combine_documents_chain: BaseCombineDocumentsChain
"""用于合并文档的链。"""
text_splitter: TextSplitter
"""文本分割器的使用。"""
input_key: str = "input_text" #: :meta private:
output_key: str = "output_text" #: :meta private:
[docs] @classmethod
def from_params(
cls,
llm: BaseLanguageModel,
prompt: BasePromptTemplate,
text_splitter: TextSplitter,
callbacks: Callbacks = None,
combine_chain_kwargs: Optional[Mapping[str, Any]] = None,
reduce_chain_kwargs: Optional[Mapping[str, Any]] = None,
**kwargs: Any,
) -> MapReduceChain:
"""构建一个使用链式操作进行map和reduce的map-reduce链。"""
llm_chain = LLMChain(llm=llm, prompt=prompt, callbacks=callbacks)
stuff_chain = StuffDocumentsChain(
llm_chain=llm_chain,
callbacks=callbacks,
**(reduce_chain_kwargs if reduce_chain_kwargs else {}),
)
reduce_documents_chain = ReduceDocumentsChain(
combine_documents_chain=stuff_chain
)
combine_documents_chain = MapReduceDocumentsChain(
llm_chain=llm_chain,
reduce_documents_chain=reduce_documents_chain,
callbacks=callbacks,
**(combine_chain_kwargs if combine_chain_kwargs else {}),
)
return cls(
combine_documents_chain=combine_documents_chain,
text_splitter=text_splitter,
callbacks=callbacks,
**kwargs,
)
class Config:
"""这个pydantic对象的配置。"""
extra = Extra.forbid
arbitrary_types_allowed = True
@property
def input_keys(self) -> List[str]:
"""期望输入键。
:元数据 私有:
"""
return [self.input_key]
@property
def output_keys(self) -> List[str]:
"""返回输出键。
:元数据 私有:
"""
return [self.output_key]
def _call(
self,
inputs: Dict[str, str],
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Dict[str, str]:
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
# Split the larger text into smaller chunks.
doc_text = inputs.pop(self.input_key)
texts = self.text_splitter.split_text(doc_text)
docs = [Document(page_content=text) for text in texts]
_inputs: Dict[str, Any] = {
**inputs,
self.combine_documents_chain.input_key: docs,
}
outputs = self.combine_documents_chain.run(
_inputs, callbacks=_run_manager.get_child()
)
return {self.output_key: outputs}