Source code for langchain_core.output_parsers.base

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import (
    TYPE_CHECKING,
    Any,
    Dict,
    Generic,
    List,
    Optional,
    Type,
    TypeVar,
    Union,
)

from typing_extensions import get_args

from langchain_core.language_models import LanguageModelOutput
from langchain_core.messages import AnyMessage, BaseMessage
from langchain_core.outputs import ChatGeneration, Generation
from langchain_core.runnables import Runnable, RunnableConfig, RunnableSerializable
from langchain_core.runnables.config import run_in_executor

if TYPE_CHECKING:
    from langchain_core.prompt_values import PromptValue

T = TypeVar("T")
OutputParserLike = Runnable[LanguageModelOutput, T]


[docs]class BaseLLMOutputParser(Generic[T], ABC):
    """模型输出解析的抽象基类。"""

[docs]    @abstractmethod
    def parse_result(self, result: List[Generation], *, partial: bool = False) -> T:
        """将候选模型生成的列表解析为特定格式。

参数：
    result：需要解析的一组生成。假定这些生成是单个模型输入的不同候选输出。

返回：
    结构化输出。
"""

[docs]    async def aparse_result(
        self, result: List[Generation], *, partial: bool = False
    ) -> T:
        """将候选模型生成的列表解析为特定格式。

参数：
    result：需要解析的一组生成。假定这些生成是单个模型输入的不同候选输出。

返回：
    结构化输出。
"""
        return await run_in_executor(None, self.parse_result, result)


[docs]class BaseGenerationOutputParser(
    BaseLLMOutputParser, RunnableSerializable[LanguageModelOutput, T]
):
    """用于解析LLM调用输出的基类。"""

    @property
    def InputType(self) -> Any:
        return Union[str, AnyMessage]

    @property
    def OutputType(self) -> Type[T]:
        # even though mypy complains this isn't valid,
        # it is good enough for pydantic to build the schema from
        return T  # type: ignore[misc]

[docs]    def invoke(
        self, input: Union[str, BaseMessage], config: Optional[RunnableConfig] = None
    ) -> T:
        if isinstance(input, BaseMessage):
            return self._call_with_config(
                lambda inner_input: self.parse_result(
                    [ChatGeneration(message=inner_input)]
                ),
                input,
                config,
                run_type="parser",
            )
        else:
            return self._call_with_config(
                lambda inner_input: self.parse_result([Generation(text=inner_input)]),
                input,
                config,
                run_type="parser",
            )

[docs]    async def ainvoke(
        self,
        input: Union[str, BaseMessage],
        config: Optional[RunnableConfig] = None,
        **kwargs: Optional[Any],
    ) -> T:
        if isinstance(input, BaseMessage):
            return await self._acall_with_config(
                lambda inner_input: self.aparse_result(
                    [ChatGeneration(message=inner_input)]
                ),
                input,
                config,
                run_type="parser",
            )
        else:
            return await self._acall_with_config(
                lambda inner_input: self.aparse_result([Generation(text=inner_input)]),
                input,
                config,
                run_type="parser",
            )


[docs]class BaseOutputParser(
    BaseLLMOutputParser, RunnableSerializable[LanguageModelOutput, T]
):
    """用于解析LLM调用输出的基类。

    Output parsers help structure language model responses.

    Example:
        .. code-block:: python

            class BooleanOutputParser(BaseOutputParser[bool]):
                true_val: str = "YES"
                false_val: str = "NO"

                def parse(self, text: str) -> bool:
                    cleaned_text = text.strip().upper()
                    if cleaned_text not in (self.true_val.upper(), self.false_val.upper()):
                        raise OutputParserException(
                            f"BooleanOutputParser expected output value to either be "
                            f"{self.true_val} or {self.false_val} (case-insensitive). "
                            f"Received {cleaned_text}."
                        )
                    return cleaned_text == self.true_val.upper()

                @property
                def _type(self) -> str:
                    return "boolean_output_parser"
    """  # noqa: E501

    @property
    def InputType(self) -> Any:
        return Union[str, AnyMessage]

    @property
    def OutputType(self) -> Type[T]:
        for cls in self.__class__.__orig_bases__:  # type: ignore[attr-defined]
            type_args = get_args(cls)
            if type_args and len(type_args) == 1:
                return type_args[0]

        raise TypeError(
            f"Runnable {self.__class__.__name__} doesn't have an inferable OutputType. "
            "Override the OutputType property to specify the output type."
        )

[docs]    def invoke(
        self, input: Union[str, BaseMessage], config: Optional[RunnableConfig] = None
    ) -> T:
        if isinstance(input, BaseMessage):
            return self._call_with_config(
                lambda inner_input: self.parse_result(
                    [ChatGeneration(message=inner_input)]
                ),
                input,
                config,
                run_type="parser",
            )
        else:
            return self._call_with_config(
                lambda inner_input: self.parse_result([Generation(text=inner_input)]),
                input,
                config,
                run_type="parser",
            )

[docs]    async def ainvoke(
        self,
        input: Union[str, BaseMessage],
        config: Optional[RunnableConfig] = None,
        **kwargs: Optional[Any],
    ) -> T:
        if isinstance(input, BaseMessage):
            return await self._acall_with_config(
                lambda inner_input: self.aparse_result(
                    [ChatGeneration(message=inner_input)]
                ),
                input,
                config,
                run_type="parser",
            )
        else:
            return await self._acall_with_config(
                lambda inner_input: self.aparse_result([Generation(text=inner_input)]),
                input,
                config,
                run_type="parser",
            )

[docs]    def parse_result(self, result: List[Generation], *, partial: bool = False) -> T:
        """将候选模型生成的列表解析为特定格式。

返回值仅从结果中的第一个生成中解析，该生成被假定为最有可能的生成。

参数：
    result：要解析的生成列表。假定这些生成是单个模型输入的不同候选输出。

返回：
    结构化输出。
"""
        return self.parse(result[0].text)

[docs]    @abstractmethod
    def parse(self, text: str) -> T:
        """将单个字符串模型输出解析为某种结构。

参数：
    text：语言模型的字符串输出。

返回：
    结构化输出。
"""

[docs]    async def aparse_result(
        self, result: List[Generation], *, partial: bool = False
    ) -> T:
        """将候选模型生成的列表解析为特定格式。

返回值仅从结果中的第一个生成中解析，该生成被假定为最有可能的生成。

参数：
    result：要解析的生成列表。假定这些生成是单个模型输入的不同候选输出。

返回：
    结构化输出。
"""
        return await run_in_executor(None, self.parse_result, result, partial=partial)

[docs]    async def aparse(self, text: str) -> T:
        """将单个字符串模型输出解析为某种结构。

参数：
    text：语言模型的字符串输出。

返回：
    结构化输出。
"""
        return await run_in_executor(None, self.parse, text)

    # TODO: rename 'completion' -> 'text'.
[docs]    def parse_with_prompt(self, completion: str, prompt: PromptValue) -> Any:
        """解析LLM调用的输出，需要输入提示来提供上下文。

在OutputParser想要重试或修复输出的情况下，prompt通常会提供信息，以便进行相应的操作。

参数:
    completion: 语言模型的字符串输出。
    prompt: 输入的PromptValue。

返回:
    结构化输出。
"""
        return self.parse(completion)

[docs]    def get_format_instructions(self) -> str:
        """LLM输出应该格式化的指示。"""
        raise NotImplementedError

    @property
    def _type(self) -> str:
        """返回用于序列化的输出解析器类型。"""
        raise NotImplementedError(
            f"_type property is not implemented in class {self.__class__.__name__}."
            " This is required for serialization."
        )

[docs]    def dict(self, **kwargs: Any) -> Dict:
        """返回输出解析器的字典表示。"""
        output_parser_dict = super().dict(**kwargs)
        try:
            output_parser_dict["_type"] = self._type
        except NotImplementedError:
            pass
        return output_parser_dict