Source code for langchain.chains.structured_output.base

import json
from typing import Any, Callable, Dict, Literal, Optional, Sequence, Type, Union

from langchain_core._api import deprecated
from langchain_core.output_parsers import (
    BaseGenerationOutputParser,
    BaseOutputParser,
    JsonOutputParser,
    PydanticOutputParser,
)
from langchain_core.output_parsers.openai_functions import (
    JsonOutputFunctionsParser,
    PydanticAttrOutputFunctionsParser,
    PydanticOutputFunctionsParser,
)
from langchain_core.output_parsers.openai_tools import (
    JsonOutputKeyToolsParser,
    PydanticToolsParser,
)
from langchain_core.prompts import BasePromptTemplate
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.runnables import Runnable
from langchain_core.utils.function_calling import (
    convert_to_openai_function,
    convert_to_openai_tool,
)


[docs]@deprecated(
    since="0.1.14",
    message=(
        "LangChain has introduced a method called `with_structured_output` that "
        "is available on ChatModels capable of tool calling. "
        "You can read more about the method here: "
        "https://python.langchain.com/docs/modules/model_io/chat/structured_output/ "
        "Please follow our extraction use case documentation for more guidelines "
        "on how to do information extraction with LLMs. "
        "https://python.langchain.com/docs/use_cases/extraction/. "
        "If you notice other issues, please provide "
        "feedback here: "
        "https://github.com/langchain-ai/langchain/discussions/18154"
    ),
    removal="0.3.0",
    alternative=(
        """
            from langchain_core.pydantic_v1 import BaseModel, Field
            from langchain_anthropic import ChatAnthropic
    
            class Joke(BaseModel):
                setup: str = Field(description="The setup of the joke")
                punchline: str = Field(description="The punchline to the joke") 
    
            # Or any other chat model that supports tools.
            # Please reference to to the documentation of structured_output
            # to see an up to date list of which models support 
            # with_structured_output.
            model = ChatAnthropic(model="claude-3-opus-20240229", temperature=0)
            structured_llm = model.with_structured_output(Joke)
            structured_llm.invoke("Tell me a joke about cats. 
                Make sure to call the Joke function.")
            """
    ),
)
def create_openai_fn_runnable(
    functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
    llm: Runnable,
    prompt: Optional[BasePromptTemplate] = None,
    *,
    enforce_single_function_usage: bool = True,
    output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
    **llm_kwargs: Any,
) -> Runnable:
    """创建一个可运行的序列，使用OpenAI函数。

参数：
    functions: 一个序列，可以是字典、pydantic.BaseModels类或Python函数。如果传入字典，则假定它们已经是有效的OpenAI函数。如果只传入一个函数，则将强制模型使用该函数。pydantic.BaseModels和Python函数应该有描述函数功能的文档字符串。为了获得最佳结果，pydantic.BaseModels应该有参数描述，Python函数应该在文档字符串中使用Google Python风格的参数描述。此外，Python函数应该只使用原始类型（str、int、float、bool）或pydantic.BaseModels作为参数。
    llm: 要使用的语言模型，假定支持OpenAI函数调用API。
    prompt: 传递给模型的BasePromptTemplate。
    enforce_single_function_usage: 仅在传入单个函数时使用。如果为True，则将强制模型使用给定的函数。如果为False，则模型将有选择使用给定函数或不使用的选项。
    output_parser: 用于解析模型输出的BaseLLMOutputParser。默认情况下将从函数类型中推断。如果传入pydantic.BaseModels，则OutputParser将尝试使用这些来解析输出。否则，模型输出将简单地解析为JSON。如果传入多个函数且它们不是pydantic.BaseModels，则链式输出将包括返回的函数名称和传递给函数的参数。
    **llm_kwargs: 要传递给语言模型的其他命名参数。

返回：
    一个可运行的序列，当运行时将传递给模型给定的函数。

示例：
    .. code-block:: python

            from typing import Optional

            from langchain.chains.structured_output import create_openai_fn_runnable
            from langchain_openai import ChatOpenAI
            from langchain_core.pydantic_v1 import BaseModel, Field


            class RecordPerson(BaseModel):
                '''记录有关一个人的一些身份信息。'''

                name: str = Field(..., description="人的姓名")
                age: int = Field(..., description="人的年龄")
                fav_food: Optional[str] = Field(None, description="人喜欢的食物")


            class RecordDog(BaseModel):
                '''记录有关一只狗的一些身份信息。'''

                name: str = Field(..., description="狗的名字")
                color: str = Field(..., description="狗的颜色")
                fav_food: Optional[str] = Field(None, description="狗喜欢的食物")


            llm = ChatOpenAI(model="gpt-4", temperature=0)
            structured_llm = create_openai_fn_runnable([RecordPerson, RecordDog], llm)
            structured_llm.invoke("Harry was a chubby brown beagle who loved chicken)
            # -> RecordDog(name="Harry", color="brown", fav_food="chicken")
"""  # noqa: E501
    if not functions:
        raise ValueError("Need to pass in at least one function. Received zero.")
    openai_functions = [convert_to_openai_function(f) for f in functions]
    llm_kwargs_: Dict[str, Any] = {"functions": openai_functions, **llm_kwargs}
    if len(openai_functions) == 1 and enforce_single_function_usage:
        llm_kwargs_["function_call"] = {"name": openai_functions[0]["name"]}
    output_parser = output_parser or get_openai_output_parser(functions)
    if prompt:
        return prompt | llm.bind(**llm_kwargs_) | output_parser
    else:
        return llm.bind(**llm_kwargs_) | output_parser


[docs]@deprecated(
    since="0.1.17",
    message=(
        "LangChain has introduced a method called `with_structured_output` that "
        "is available on ChatModels capable of tool calling. "
        "You can read more about the method here: "
        "https://python.langchain.com/docs/modules/model_io/chat/structured_output/ "
        "Please follow our extraction use case documentation for more guidelines "
        "on how to do information extraction with LLMs. "
        "https://python.langchain.com/docs/use_cases/extraction/. "
        "If you notice other issues, please provide "
        "feedback here: "
        "https://github.com/langchain-ai/langchain/discussions/18154"
    ),
    removal="0.3.0",
    alternative=(
        """
            from langchain_core.pydantic_v1 import BaseModel, Field
            from langchain_anthropic import ChatAnthropic

            class Joke(BaseModel):
                setup: str = Field(description="The setup of the joke")
                punchline: str = Field(description="The punchline to the joke") 

            # Or any other chat model that supports tools.
            # Please reference to to the documentation of structured_output
            # to see an up to date list of which models support 
            # with_structured_output.
            model = ChatAnthropic(model="claude-3-opus-20240229", temperature=0)
            structured_llm = model.with_structured_output(Joke)
            structured_llm.invoke("Tell me a joke about cats. 
                Make sure to call the Joke function.")
            """
    ),
)
def create_structured_output_runnable(
    output_schema: Union[Dict[str, Any], Type[BaseModel]],
    llm: Runnable,
    prompt: Optional[BasePromptTemplate] = None,
    *,
    output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
    enforce_function_usage: bool = True,
    return_single: bool = True,
    mode: Literal[
        "openai-functions", "openai-tools", "openai-json"
    ] = "openai-functions",
    **kwargs: Any,
) -> Runnable:
    """创建一个可运行的程序，用于提取结构化输出。

参数：
    output_schema：可以是字典或pydantic.BaseModel类。如果传入的是字典，
        则假定它已经是一个有效的JsonSchema。
        为了获得最佳结果，pydantic.BaseModels应该有描述模式代表什么以及参数描述的文档字符串。
    llm：要使用的语言模型。假定支持OpenAI函数调用API
        如果模式是'openai-function'。假定支持OpenAI response_format
        参数如果模式是'openai-json'。
    prompt：传递给模型的BasePromptTemplate。如果模式是'openai-json'且
        prompt具有输入变量'output_schema'，则给定的output_schema
        将被转换为JsonSchema并插入到prompt中。
    output_parser：用于解析模型输出的输出解析器。默认情况下
        将从函数类型中推断出来。如果传入pydantic.BaseModel，
        那么OutputParser将尝试使用pydantic类解析输出。否则模型输出将被解析为JSON。
    mode：从模型中提取结构化输出的方式。如果是'openai-functions'
        则使用已弃用的'functions'，'function_call'模式进行OpenAI函数调用。
        如果是'openai-tools'，则使用最新的'tools'，'tool_choice'模式进行OpenAI函数调用。
        这比'openai-functions'更推荐。如果是'openai-json'，则使用OpenAI模型
        并将response_format设置为JSON。
    enforce_function_usage：仅适用于模式为'openai-tools'或
        'openai-functions'时。如果为True，则模型将被强制使用给定的
        输出模式。如果为False，则模型可以选择是否使用输出
        模式。
    return_single：仅适用于模式为'openai-tools'时。是否返回一个结构化输出列表
        还是单个输出。如果为True且模型不返回任何
        结构化输出，则链输出为None。如果为False且模型不返回任何结构化输出
        则链输出为一个空列表。
    **kwargs：额外的命名参数。

返回：
    一个可运行的序列，将返回与给定output_schema匹配的结构化输出。

使用Pydantic模式的OpenAI工具示例（mode='openai-tools'）：
    .. code-block:: python
    
            from typing import Optional

            from langchain.chains import create_structured_output_runnable
            from langchain_openai import ChatOpenAI
            from langchain_core.pydantic_v1 import BaseModel, Field


            class RecordDog(BaseModel):
                '''记录有关狗的一些标识信息。'''

                name: str = Field(..., description="狗的名字")
                color: str = Field(..., description="狗的颜色")
                fav_food: Optional[str] = Field(None, description="狗喜欢的食物")

            llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
            prompt = ChatPromptTemplate.from_messages(
                [
                    ("system", "您是一个提取算法。请提取每个可能的实例"), 
                    ('human', '{input}')
                ]
            )
            structured_llm = create_structured_output_runnable(
                RecordDog, 
                llm, 
                mode="openai-tools", 
                enforce_function_usage=True, 
                return_single=True
            )
            structured_llm.invoke({"input": "Harry是一只胖胖的棕色比格犬，喜欢吃鸡肉"})
            # -> RecordDog(name="Harry", color="brown", fav_food="chicken")
            
使用字典模式的OpenAI工具示例（mode="openai-tools"）：
    .. code-block:: python
    
            from typing import Optional

            from langchain.chains import create_structured_output_runnable
            from langchain_openai import ChatOpenAI


            dog_schema = {
                "type": "function",
                "function": {
                    "name": "record_dog",
                    "description": "记录有关狗的一些标识信息。",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "name": {
                                "description": "狗的名字",
                                "type": "string"
                            },
                            "color": {
                                "description": "狗的颜色",
                                "type": "string"
                            },
                            "fav_food": {
                                "description": "狗喜欢的食物",
                                "type": "string"
                            }
                        },
                        "required": ["name", "color"]
                    }
                }
            }


            llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
            structured_llm = create_structured_output_runnable(
                dog_schema, 
                llm, 
                mode="openai-tools", 
                enforce_function_usage=True, 
                return_single=True
            )
            structured_llm.invoke("Harry是一只胖胖的棕色比格犬，喜欢吃鸡肉")
            # -> {'name': 'Harry', 'color': 'brown', 'fav_food': 'chicken'}

OpenAI函数示例（mode="openai-functions"）：
    .. code-block:: python

            from typing import Optional

            from langchain.chains import create_structured_output_runnable
            from langchain_openai import ChatOpenAI
            from langchain_core.pydantic_v1 import BaseModel, Field

            class Dog(BaseModel):
                '''有关狗的标识信息。'''

                name: str = Field(..., description="狗的名字")
                color: str = Field(..., description="狗的颜色")
                fav_food: Optional[str] = Field(None, description="狗喜欢的食物")

            llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
            structured_llm = create_structured_output_runnable(Dog, llm, mode="openai-functions")
            structured_llm.invoke("Harry是一只胖胖的棕色比格犬，喜欢吃鸡肉")
            # -> Dog(name="Harry", color="brown", fav_food="chicken")
            
带有提示的OpenAI函数示例：
    .. code-block:: python

            from typing import Optional

            from langchain.chains import create_structured_output_runnable
            from langchain_openai import ChatOpenAI
            from langchain_core.prompts import ChatPromptTemplate
            from langchain_core.pydantic_v1 import BaseModel, Field

            class Dog(BaseModel):
                '''有关狗的标识信息。'''

                name: str = Field(..., description="狗的名字")
                color: str = Field(..., description="狗的颜色")
                fav_food: Optional[str] = Field(None, description="狗喜欢的食物")

            llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
            structured_llm = create_structured_output_runnable(Dog, llm, mode="openai-functions")
            system = '''从用户输入中提取提到的任何狗的信息。'''
            prompt = ChatPromptTemplate.from_messages(
                [("system", system), ("human", "{input}"),]
            )
            chain = prompt | structured_llm
            chain.invoke({"input": "Harry是一只胖胖的棕色比格犬，喜欢吃鸡肉"})
            # -> Dog(name="Harry", color="brown", fav_food="chicken")
OpenAI JSON响应格式示例（mode="openai-json"）：
    .. code-block:: python
    
            from typing import Optional

            from langchain.chains import create_structured_output_runnable
            from langchain_openai import ChatOpenAI
            from langchain_core.prompts import ChatPromptTemplate
            from langchain_core.pydantic_v1 import BaseModel, Field

            class Dog(BaseModel):
                '''有关狗的标识信息。'''

                name: str = Field(..., description="狗的名字")
                color: str = Field(..., description="狗的颜色")
                fav_food: Optional[str] = Field(None, description="狗喜欢的食物")

            llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
            structured_llm = create_structured_output_runnable(Dog, llm, mode="openai-json")
            system = '''您是一个用于提取结构化JSON格式信息的世界级助手。
            从用户输入中提取与以下JSON模式匹配的有效JSON blob：
            
            {output_schema}'''
            prompt = ChatPromptTemplate.from_messages(
                [("system", system), ("human", "{input}"),]
            )
            chain = prompt | structured_llm
            chain.invoke({"input": "Harry是一只胖胖的棕色比格犬，喜欢吃鸡肉"})
"""  # noqa: E501
    # for backwards compatibility
    force_function_usage = kwargs.get(
        "enforce_single_function_usage", enforce_function_usage
    )

    if mode == "openai-tools":
        # Protect against typos in kwargs
        keys_in_kwargs = set(kwargs.keys())
        # Backwards compatibility keys
        unrecognized_keys = keys_in_kwargs - {"enforce_single_function_usage"}
        if unrecognized_keys:
            raise TypeError(
                f"Got an unexpected keyword argument(s): {unrecognized_keys}."
            )

        return _create_openai_tools_runnable(
            output_schema,
            llm,
            prompt=prompt,
            output_parser=output_parser,
            enforce_tool_usage=force_function_usage,
            first_tool_only=return_single,
        )

    elif mode == "openai-functions":
        return _create_openai_functions_structured_output_runnable(
            output_schema,
            llm,
            prompt=prompt,
            output_parser=output_parser,
            enforce_single_function_usage=force_function_usage,
            **kwargs,  # llm-specific kwargs
        )
    elif mode == "openai-json":
        if force_function_usage:
            raise ValueError(
                "enforce_single_function_usage is not supported for mode='openai-json'."
            )
        return _create_openai_json_runnable(
            output_schema, llm, prompt=prompt, output_parser=output_parser, **kwargs
        )
    else:
        raise ValueError(
            f"Invalid mode {mode}. Expected one of 'openai-tools', 'openai-functions', "
            f"'openai-json'."
        )


def _create_openai_tools_runnable(
    tool: Union[Dict[str, Any], Type[BaseModel], Callable],
    llm: Runnable,
    *,
    prompt: Optional[BasePromptTemplate],
    output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]],
    enforce_tool_usage: bool,
    first_tool_only: bool,
) -> Runnable:
    oai_tool = convert_to_openai_tool(tool)
    llm_kwargs: Dict[str, Any] = {"tools": [oai_tool]}
    if enforce_tool_usage:
        llm_kwargs["tool_choice"] = {
            "type": "function",
            "function": {"name": oai_tool["function"]["name"]},
        }
    output_parser = output_parser or _get_openai_tool_output_parser(
        tool, first_tool_only=first_tool_only
    )
    if prompt:
        return prompt | llm.bind(**llm_kwargs) | output_parser
    else:
        return llm.bind(**llm_kwargs) | output_parser


def _get_openai_tool_output_parser(
    tool: Union[Dict[str, Any], Type[BaseModel], Callable],
    *,
    first_tool_only: bool = False,
) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
    if isinstance(tool, type) and issubclass(tool, BaseModel):
        output_parser: Union[
            BaseOutputParser, BaseGenerationOutputParser
        ] = PydanticToolsParser(tools=[tool], first_tool_only=first_tool_only)
    else:
        key_name = convert_to_openai_tool(tool)["function"]["name"]
        output_parser = JsonOutputKeyToolsParser(
            first_tool_only=first_tool_only, key_name=key_name
        )
    return output_parser


[docs]def get_openai_output_parser(
    functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
    """给定用户函数，获取适当的函数输出解析器。

参数：
    functions：一个序列，其中每个元素是一个字典、一个pydantic.BaseModel类或一个Python函数。如果传入一个字典，则假定它已经是一个有效的OpenAI函数。

返回：
    如果函数是Pydantic类，则返回一个PydanticOutputFunctionsParser，否则返回一个JsonOutputFunctionsParser。如果只有一个函数且它不是Pydantic类，则输出解析器将自动提取函数参数而不是函数名称。
"""
    if isinstance(functions[0], type) and issubclass(functions[0], BaseModel):
        if len(functions) > 1:
            pydantic_schema: Union[Dict, Type[BaseModel]] = {
                convert_to_openai_function(fn)["name"]: fn for fn in functions
            }
        else:
            pydantic_schema = functions[0]
        output_parser: Union[
            BaseOutputParser, BaseGenerationOutputParser
        ] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
    else:
        output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1)
    return output_parser


def _create_openai_json_runnable(
    output_schema: Union[Dict[str, Any], Type[BaseModel]],
    llm: Runnable,
    prompt: Optional[BasePromptTemplate] = None,
    *,
    output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
) -> Runnable:
    """"""
    if isinstance(output_schema, type) and issubclass(output_schema, BaseModel):
        output_parser = output_parser or PydanticOutputParser(
            pydantic_object=output_schema,
        )
        schema_as_dict = convert_to_openai_function(output_schema)["parameters"]
    else:
        output_parser = output_parser or JsonOutputParser()
        schema_as_dict = output_schema

    llm = llm.bind(response_format={"type": "json_object"})
    if prompt:
        if "output_schema" in prompt.input_variables:
            prompt = prompt.partial(output_schema=json.dumps(schema_as_dict, indent=2))

        return prompt | llm | output_parser
    else:
        return llm | output_parser


def _create_openai_functions_structured_output_runnable(
    output_schema: Union[Dict[str, Any], Type[BaseModel]],
    llm: Runnable,
    prompt: Optional[BasePromptTemplate] = None,
    *,
    output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
    **llm_kwargs: Any,
) -> Runnable:
    if isinstance(output_schema, dict):
        function: Any = {
            "name": "output_formatter",
            "description": (
                "Output formatter. Should always be used to format your response to the"
                " user."
            ),
            "parameters": output_schema,
        }
    else:

        class _OutputFormatter(BaseModel):
            """输出格式化程序。应始终用于格式化对用户的响应。"""  # noqa: E501

            output: output_schema  # type: ignore

        function = _OutputFormatter
        output_parser = output_parser or PydanticAttrOutputFunctionsParser(
            pydantic_schema=_OutputFormatter, attr_name="output"
        )
    return create_openai_fn_runnable(
        [function],
        llm,
        prompt=prompt,
        output_parser=output_parser,
        **llm_kwargs,
    )