Source code for langchain_community.chains.ernie_functions.base

"""使用Ernie函数调用API创建链的方法。"""
import inspect
from typing import (
    Any,
    Callable,
    Dict,
    List,
    Optional,
    Sequence,
    Tuple,
    Type,
    Union,
    cast,
)

from langchain.chains import LLMChain
from langchain_core.language_models import BaseLanguageModel
from langchain_core.output_parsers import (
    BaseGenerationOutputParser,
    BaseLLMOutputParser,
    BaseOutputParser,
)
from langchain_core.prompts import BasePromptTemplate
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.runnables import Runnable

from langchain_community.output_parsers.ernie_functions import (
    JsonOutputFunctionsParser,
    PydanticAttrOutputFunctionsParser,
    PydanticOutputFunctionsParser,
)
from langchain_community.utils.ernie_functions import convert_pydantic_to_ernie_function

PYTHON_TO_JSON_TYPES = {
    "str": "string",
    "int": "number",
    "float": "number",
    "bool": "boolean",
}


def _get_python_function_name(function: Callable) -> str:
    """获取Python函数的名称。"""
    return function.__name__


def _parse_python_function_docstring(function: Callable) -> Tuple[str, dict]:
    """从函数的文档字符串中解析函数和参数描述。

假设函数文档字符串遵循Google Python风格指南。
"""
    docstring = inspect.getdoc(function)
    if docstring:
        docstring_blocks = docstring.split("\n\n")
        descriptors = []
        args_block = None
        past_descriptors = False
        for block in docstring_blocks:
            if block.startswith("Args:"):
                args_block = block
                break
            elif block.startswith("Returns:") or block.startswith("Example:"):
                # Don't break in case Args come after
                past_descriptors = True
            elif not past_descriptors:
                descriptors.append(block)
            else:
                continue
        description = " ".join(descriptors)
    else:
        description = ""
        args_block = None
    arg_descriptions = {}
    if args_block:
        arg = None
        for line in args_block.split("\n")[1:]:
            if ":" in line:
                arg, desc = line.split(":")
                arg_descriptions[arg.strip()] = desc.strip()
            elif arg:
                arg_descriptions[arg.strip()] += " " + line.strip()
    return description, arg_descriptions


def _get_python_function_arguments(function: Callable, arg_descriptions: dict) -> dict:
    """获取描述Python函数参数的JsonSchema。

假设所有函数参数都是基本类型（int，float，str，bool）或是pydantic.BaseModel的子类。
"""
    properties = {}
    annotations = inspect.getfullargspec(function).annotations
    for arg, arg_type in annotations.items():
        if arg == "return":
            continue
        if isinstance(arg_type, type) and issubclass(arg_type, BaseModel):
            # Mypy error:
            # "type" has no attribute "schema"
            properties[arg] = arg_type.schema()  # type: ignore[attr-defined]
        elif arg_type.__name__ in PYTHON_TO_JSON_TYPES:
            properties[arg] = {"type": PYTHON_TO_JSON_TYPES[arg_type.__name__]}
        if arg in arg_descriptions:
            if arg not in properties:
                properties[arg] = {}
            properties[arg]["description"] = arg_descriptions[arg]
    return properties


def _get_python_function_required_args(function: Callable) -> List[str]:
    """获取Python函数所需的参数。"""
    spec = inspect.getfullargspec(function)
    required = spec.args[: -len(spec.defaults)] if spec.defaults else spec.args
    required += [k for k in spec.kwonlyargs if k not in (spec.kwonlydefaults or {})]

    is_class = type(function) is type
    if is_class and required[0] == "self":
        required = required[1:]
    return required


[docs]def convert_python_function_to_ernie_function(
    function: Callable,
) -> Dict[str, Any]:
    """将Python函数转换为与Ernie函数调用API兼容的字典。

假设Python函数具有类型提示和带有描述的文档字符串。如果文档字符串具有Google Python风格的参数描述，这些描述也将被包含在内。
"""
    description, arg_descriptions = _parse_python_function_docstring(function)
    return {
        "name": _get_python_function_name(function),
        "description": description,
        "parameters": {
            "type": "object",
            "properties": _get_python_function_arguments(function, arg_descriptions),
            "required": _get_python_function_required_args(function),
        },
    }


[docs]def convert_to_ernie_function(
    function: Union[Dict[str, Any], Type[BaseModel], Callable],
) -> Dict[str, Any]:
    """将原始函数/类转换为 Ernie 函数。

参数：
    function：可以是字典、pydantic.BaseModel 类，或 Python 函数。
        如果传入的是字典，则假定它已经是有效的 Ernie 函数。

返回：
    传入函数的字典版本，与 Ernie 函数调用 API 兼容。
"""
    if isinstance(function, dict):
        return function
    elif isinstance(function, type) and issubclass(function, BaseModel):
        return cast(Dict, convert_pydantic_to_ernie_function(function))
    elif callable(function):
        return convert_python_function_to_ernie_function(function)

    else:
        raise ValueError(
            f"Unsupported function type {type(function)}. Functions must be passed in"
            f" as Dict, pydantic.BaseModel, or Callable."
        )


[docs]def get_ernie_output_parser(
    functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
    """给定用户函数，获取适当的函数输出解析器。

参数：
    functions：一个序列，其中每个元素是一个字典、一个pydantic.BaseModel类或一个Python函数。如果传入一个字典，则假定它已经是一个有效的Ernie函数。

返回：
    如果函数是Pydantic类，则返回一个PydanticOutputFunctionsParser，否则返回一个JsonOutputFunctionsParser。如果只有一个函数且不是Pydantic类，则输出解析器将自动提取函数参数而不提取函数名称。
"""
    function_names = [convert_to_ernie_function(f)["name"] for f in functions]
    if isinstance(functions[0], type) and issubclass(functions[0], BaseModel):
        if len(functions) > 1:
            pydantic_schema: Union[Dict, Type[BaseModel]] = {
                name: fn for name, fn in zip(function_names, functions)
            }
        else:
            pydantic_schema = functions[0]
        output_parser: Union[
            BaseOutputParser, BaseGenerationOutputParser
        ] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
    else:
        output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1)
    return output_parser


[docs]def create_ernie_fn_runnable(
    functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
    llm: Runnable,
    prompt: BasePromptTemplate,
    *,
    output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
    **kwargs: Any,
) -> Runnable:
    """创建一个可运行的序列，使用 Ernie 函数。

参数：
    functions: 一个序列，可以是字典、pydantic.BaseModels 类，或 Python 函数。如果传入字典，则假定它们已经是有效的 Ernie 函数。如果只传入一个函数，则将强制模型使用该函数。pydantic.BaseModels 和 Python 函数应该有描述函数功能的文档字符串。为了获得最佳结果，pydantic.BaseModels 应该有参数的描述，Python 函数应该在文档字符串中使用 Google Python 风格的参数描述。此外，Python 函数应该只使用原始类型（str、int、float、bool）或 pydantic.BaseModels 作为参数。
    llm: 要使用的语言模型，假定支持 Ernie 函数调用 API。
    prompt: 传递给模型的 BasePromptTemplate。
    output_parser: 用于解析模型输出的 BaseLLMOutputParser。默认情况下，将从函数类型推断。如果传入 pydantic.BaseModels，则 OutputParser 将尝试使用这些模型来解析输出。否则，模型输出将简单地解析为 JSON。如果传入多个函数且它们不是 pydantic.BaseModels，则链式输出将包括返回的函数名称和要传递给函数的参数。

返回：
    一个可运行的序列，当运行时将给定的函数传递给模型。

示例：
    .. code-block:: python

            from typing import Optional

            from langchain.chains.ernie_functions import create_ernie_fn_chain
            from langchain_community.chat_models import ErnieBotChat
            from langchain_core.prompts import ChatPromptTemplate
            from langchain.pydantic_v1 import BaseModel, Field


            class RecordPerson(BaseModel):
                \"\"\"记录有关一个人的身份信息。\"\"\"

                name: str = Field(..., description="人的姓名")
                age: int = Field(..., description="人的年龄")
                fav_food: Optional[str] = Field(None, description="人喜欢的食物")


            class RecordDog(BaseModel):
                \"\"\"记录有关一只狗的身份信息。\"\"\"

                name: str = Field(..., description="狗的名字")
                color: str = Field(..., description="狗的颜色")
                fav_food: Optional[str] = Field(None, description="狗喜欢的食物")


            llm = ErnieBotChat(model_name="ERNIE-Bot-4")
            prompt = ChatPromptTemplate.from_messages(
                [
                    ("user", "调用相关函数记录以下输入中的实体：{input}"),
                    ("assistant", "好的！"),
                    ("user", "提示：确保以正确的格式回答"),
                ]
            )
            chain = create_ernie_fn_runnable([RecordPerson, RecordDog], llm, prompt)
            chain.invoke({"input": "Harry是一只胖乎乎的棕色比格犬，喜欢鸡肉"})
            # -> RecordDog(name="Harry", color="brown", fav_food="chicken")
"""  # noqa: E501
    if not functions:
        raise ValueError("Need to pass in at least one function. Received zero.")
    ernie_functions = [convert_to_ernie_function(f) for f in functions]
    llm_kwargs: Dict[str, Any] = {"functions": ernie_functions, **kwargs}
    if len(ernie_functions) == 1:
        llm_kwargs["function_call"] = {"name": ernie_functions[0]["name"]}
    output_parser = output_parser or get_ernie_output_parser(functions)
    return prompt | llm.bind(**llm_kwargs) | output_parser


[docs]def create_structured_output_runnable(
    output_schema: Union[Dict[str, Any], Type[BaseModel]],
    llm: Runnable,
    prompt: BasePromptTemplate,
    *,
    output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
    **kwargs: Any,
) -> Runnable:
    """创建一个可运行的程序，使用Ernie函数来获取结构化输出。

参数:
    output_schema: 可以是字典或pydantic.BaseModel类。如果传入的是字典，
        则假定它已经是一个有效的JsonSchema。
        为了获得最佳结果，pydantic.BaseModels应该有描述模式代表什么以及参数描述的文档字符串。
    llm: 要使用的语言模型，假定支持Ernie函数调用API。
    prompt: 传递给模型的BasePromptTemplate。
    output_parser: 用于解析模型输出的BaseLLMOutputParser。默认情况下
        将从函数类型中推断出来。如果传入pydantic.BaseModels，
        则OutputParser将尝试使用这些来解析输出。否则
        模型输出将简单地被解析为JSON。

返回:
    一个可运行的序列，当运行时将给定的函数传递给模型。

示例:
    .. code-block:: python

        from typing import Optional

        from langchain.chains.ernie_functions import create_structured_output_chain
        from langchain_community.chat_models import ErnieBotChat
        from langchain_core.prompts import ChatPromptTemplate
        from langchain.pydantic_v1 import BaseModel, Field

        class Dog(BaseModel):
            \"\"\"关于一只狗的身份信息。\"\"\"

            name: str = Field(..., description="狗的名字")
            color: str = Field(..., description="狗的颜色")
            fav_food: Optional[str] = Field(None, description="狗喜欢的食物")

        llm = ErnieBotChat(model_name="ERNIE-Bot-4")
        prompt = ChatPromptTemplate.from_messages(
            [
                ("user", "使用给定格式从以下输入中提取信息: {input}"),
                ("assistant", "好的!"),
                ("user", "提示: 确保以正确的格式回答"),
            ]
        )
        chain = create_structured_output_chain(Dog, llm, prompt)
        chain.invoke({"input": "Harry是一只胖胖的棕色小猎犬，喜欢吃鸡肉"})
        # -> Dog(name="Harry", color="brown", fav_food="chicken")
"""  # noqa: E501
    if isinstance(output_schema, dict):
        function: Any = {
            "name": "output_formatter",
            "description": (
                "Output formatter. Should always be used to format your response to the"
                " user."
            ),
            "parameters": output_schema,
        }
    else:

        class _OutputFormatter(BaseModel):
            """输出格式化程序。应始终用于格式化您向用户的响应。"""  # noqa: E501

            output: output_schema  # type: ignore

        function = _OutputFormatter
        output_parser = output_parser or PydanticAttrOutputFunctionsParser(
            pydantic_schema=_OutputFormatter, attr_name="output"
        )
    return create_ernie_fn_runnable(
        [function],
        llm,
        prompt,
        output_parser=output_parser,
        **kwargs,
    )


""" --- Legacy --- """


[docs]def create_ernie_fn_chain(
    functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
    llm: BaseLanguageModel,
    prompt: BasePromptTemplate,
    *,
    output_key: str = "function",
    output_parser: Optional[BaseLLMOutputParser] = None,
    **kwargs: Any,
) -> LLMChain:  # type: ignore[valid-type]
    """[遗留] 创建一个使用 Ernie 函数的 LLM 链。

参数：
    functions: 一个序列，可以是字典、pydantic.BaseModels 类或 Python 函数。如果传入字典，则假定它们已经是有效的 Ernie 函数。如果只传入一个函数，则将强制模型使用该函数。pydantic.BaseModels 和 Python 函数应该有描述函数功能的文档字符串。为了获得最佳结果，pydantic.BaseModels 应该有参数的描述，Python 函数应该在文档字符串中使用 Google Python 风格的参数描述。此外，Python 函数应该只使用原始类型（str、int、float、bool）或 pydantic.BaseModels 作为参数。
    llm: 要使用的语言模型，假定支持 Ernie 函数调用 API。
    prompt: 传递给模型的 BasePromptTemplate。
    output_key: 在 LLMChain.__call__ 中返回输出时要使用的键。
    output_parser: 用于解析模型输出的 BaseLLMOutputParser。默认情况下，将从函数类型中推断。如果传入 pydantic.BaseModels，则 OutputParser 将尝试使用这些来解析输出。否则，模型输出将简单地解析为 JSON。如果传入多个函数且它们不是 pydantic.BaseModels，则链输出将包括返回的函数名称和要传递给函数的参数。

返回：
    一个 LLMChain，在运行时将给定的函数传递给模型。

示例：
    .. code-block:: python

            from typing import Optional

            from langchain.chains.ernie_functions import create_ernie_fn_chain
            from langchain_community.chat_models import ErnieBotChat
            from langchain_core.prompts import ChatPromptTemplate

            from langchain.pydantic_v1 import BaseModel, Field


            class RecordPerson(BaseModel):
                \"\"\"记录有关一个人的一些身份信息。\"\"\"

                name: str = Field(..., description="人的姓名")
                age: int = Field(..., description="人的年龄")
                fav_food: Optional[str] = Field(None, description="人喜欢的食物")


            class RecordDog(BaseModel):
               \"\"\"记录有关一只狗的一些身份信息。\"\"\"

                name: str = Field(..., description="狗的名字")
                color: str = Field(..., description="狗的颜色")
                fav_food: Optional[str] = Field(None, description="狗喜欢的食物")


            llm = ErnieBotChat(model_name="ERNIE-Bot-4")
            prompt = ChatPromptTemplate.from_messages(
                [
                    ("user", "调用相关函数记录以下输入中的实体：{input}"),
                    ("assistant", "好的！"),
                    ("user", "提示：确保以正确的格式回答"),
                ]
            )
            chain = create_ernie_fn_chain([RecordPerson, RecordDog], llm, prompt)
            chain.run("Harry是一只胖乎乎的棕色比格犬，喜欢鸡肉")
            # -> RecordDog(name="Harry", color="brown", fav_food="chicken")
"""  # noqa: E501
    if not functions:
        raise ValueError("Need to pass in at least one function. Received zero.")
    ernie_functions = [convert_to_ernie_function(f) for f in functions]
    output_parser = output_parser or get_ernie_output_parser(functions)
    llm_kwargs: Dict[str, Any] = {
        "functions": ernie_functions,
    }
    if len(ernie_functions) == 1:
        llm_kwargs["function_call"] = {"name": ernie_functions[0]["name"]}
    llm_chain = LLMChain(  # type: ignore[misc]
        llm=llm,
        prompt=prompt,
        output_parser=output_parser,
        llm_kwargs=llm_kwargs,
        output_key=output_key,
        **kwargs,
    )
    return llm_chain


[docs]def create_structured_output_chain(
    output_schema: Union[Dict[str, Any], Type[BaseModel]],
    llm: BaseLanguageModel,
    prompt: BasePromptTemplate,
    *,
    output_key: str = "function",
    output_parser: Optional[BaseLLMOutputParser] = None,
    **kwargs: Any,
) -> LLMChain:  # type: ignore[valid-type]
    """[遗留] 创建一个LLMChain，使用Ernie函数来获取一个结构化的输出。

参数:
    output_schema: 可以是字典或pydantic.BaseModel类。如果传入的是字典，则假定它已经是一个有效的JsonSchema。
        为了获得最佳结果，pydantic.BaseModels应该有描述模式代表和参数描述的文档字符串。
    llm: 要使用的语言模型，假定支持Ernie函数调用API。
    prompt: 传递给模型的BasePromptTemplate。
    output_key: 在LLMChain.__call__中返回输出时要使用的键。
    output_parser: 用于解析模型输出的BaseLLMOutputParser。默认情况下将从函数类型中推断出来。如果传入pydantic.BaseModels，则OutputParser将尝试使用这些来解析输出。否则，模型输出将简单地解析为JSON。

返回:
    一个LLMChain，将给定的函数传递给模型。

示例:
    .. code-block:: python

            from typing import Optional

            from langchain.chains.ernie_functions import create_structured_output_chain
            from langchain_community.chat_models import ErnieBotChat
            from langchain_core.prompts import ChatPromptTemplate

            from langchain.pydantic_v1 import BaseModel, Field

            class Dog(BaseModel):
                \"\"\"关于一只狗的识别信息。\"\"\"

                name: str = Field(..., description="狗的名字")
                color: str = Field(..., description="狗的颜色")
                fav_food: Optional[str] = Field(None, description="狗喜欢的食物")

            llm = ErnieBotChat(model_name="ERNIE-Bot-4")
            prompt = ChatPromptTemplate.from_messages(
                [
                    ("user", "使用给定的格式从以下输入中提取信息: {input}"),
                    ("assistant", "好的!"),
                    ("user", "提示: 确保以正确的格式回答"),
                ]
            )
            chain = create_structured_output_chain(Dog, llm, prompt)
            chain.run("Harry was a chubby brown beagle who loved chicken")
            # -> Dog(name="Harry", color="brown", fav_food="chicken")
"""  # noqa: E501
    if isinstance(output_schema, dict):
        function: Any = {
            "name": "output_formatter",
            "description": (
                "Output formatter. Should always be used to format your response to the"
                " user."
            ),
            "parameters": output_schema,
        }
    else:

        class _OutputFormatter(BaseModel):
            """输出格式化程序。应始终用于格式化您向用户的响应。"""  # noqa: E501

            output: output_schema  # type: ignore

        function = _OutputFormatter
        output_parser = output_parser or PydanticAttrOutputFunctionsParser(
            pydantic_schema=_OutputFormatter, attr_name="output"
        )
    return create_ernie_fn_chain(
        [function],
        llm,
        prompt,
        output_key=output_key,
        output_parser=output_parser,
        **kwargs,
    )