Source code for langchain.chains.structured_output.base

import json
from typing import Any, Callable, Dict, Literal, Optional, Sequence, Type, Union

from langchain_core._api import deprecated
from langchain_core.output_parsers import (
    BaseGenerationOutputParser,
    BaseOutputParser,
    JsonOutputParser,
    PydanticOutputParser,
)
from langchain_core.output_parsers.openai_functions import (
    JsonOutputFunctionsParser,
    PydanticAttrOutputFunctionsParser,
    PydanticOutputFunctionsParser,
)
from langchain_core.output_parsers.openai_tools import (
    JsonOutputKeyToolsParser,
    PydanticToolsParser,
)
from langchain_core.prompts import BasePromptTemplate
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.runnables import Runnable
from langchain_core.utils.function_calling import (
    convert_to_openai_function,
    convert_to_openai_tool,
)


[docs]@deprecated( since="0.1.14", message=( "LangChain has introduced a method called `with_structured_output` that " "is available on ChatModels capable of tool calling. " "You can read more about the method here: " "https://python.langchain.com/docs/modules/model_io/chat/structured_output/ " "Please follow our extraction use case documentation for more guidelines " "on how to do information extraction with LLMs. " "https://python.langchain.com/docs/use_cases/extraction/. " "If you notice other issues, please provide " "feedback here: " "https://github.com/langchain-ai/langchain/discussions/18154" ), removal="0.3.0", alternative=( """ from langchain_core.pydantic_v1 import BaseModel, Field from langchain_anthropic import ChatAnthropic class Joke(BaseModel): setup: str = Field(description="The setup of the joke") punchline: str = Field(description="The punchline to the joke") # Or any other chat model that supports tools. # Please reference to to the documentation of structured_output # to see an up to date list of which models support # with_structured_output. model = ChatAnthropic(model="claude-3-opus-20240229", temperature=0) structured_llm = model.with_structured_output(Joke) structured_llm.invoke("Tell me a joke about cats. Make sure to call the Joke function.") """ ), ) def create_openai_fn_runnable( functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]], llm: Runnable, prompt: Optional[BasePromptTemplate] = None, *, enforce_single_function_usage: bool = True, output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None, **llm_kwargs: Any, ) -> Runnable: """创建一个可运行的序列,使用OpenAI函数。 参数: functions: 一个序列,可以是字典、pydantic.BaseModels类或Python函数。如果传入字典,则假定它们已经是有效的OpenAI函数。如果只传入一个函数,则将强制模型使用该函数。pydantic.BaseModels和Python函数应该有描述函数功能的文档字符串。为了获得最佳结果,pydantic.BaseModels应该有参数描述,Python函数应该在文档字符串中使用Google Python风格的参数描述。此外,Python函数应该只使用原始类型(str、int、float、bool)或pydantic.BaseModels作为参数。 llm: 要使用的语言模型,假定支持OpenAI函数调用API。 prompt: 传递给模型的BasePromptTemplate。 enforce_single_function_usage: 仅在传入单个函数时使用。如果为True,则将强制模型使用给定的函数。如果为False,则模型将有选择使用给定函数或不使用的选项。 output_parser: 用于解析模型输出的BaseLLMOutputParser。默认情况下将从函数类型中推断。如果传入pydantic.BaseModels,则OutputParser将尝试使用这些来解析输出。否则,模型输出将简单地解析为JSON。如果传入多个函数且它们不是pydantic.BaseModels,则链式输出将包括返回的函数名称和传递给函数的参数。 **llm_kwargs: 要传递给语言模型的其他命名参数。 返回: 一个可运行的序列,当运行时将传递给模型给定的函数。 示例: .. code-block:: python from typing import Optional from langchain.chains.structured_output import create_openai_fn_runnable from langchain_openai import ChatOpenAI from langchain_core.pydantic_v1 import BaseModel, Field class RecordPerson(BaseModel): '''记录有关一个人的一些身份信息。''' name: str = Field(..., description="人的姓名") age: int = Field(..., description="人的年龄") fav_food: Optional[str] = Field(None, description="人喜欢的食物") class RecordDog(BaseModel): '''记录有关一只狗的一些身份信息。''' name: str = Field(..., description="狗的名字") color: str = Field(..., description="狗的颜色") fav_food: Optional[str] = Field(None, description="狗喜欢的食物") llm = ChatOpenAI(model="gpt-4", temperature=0) structured_llm = create_openai_fn_runnable([RecordPerson, RecordDog], llm) structured_llm.invoke("Harry was a chubby brown beagle who loved chicken) # -> RecordDog(name="Harry", color="brown", fav_food="chicken") """ # noqa: E501 if not functions: raise ValueError("Need to pass in at least one function. Received zero.") openai_functions = [convert_to_openai_function(f) for f in functions] llm_kwargs_: Dict[str, Any] = {"functions": openai_functions, **llm_kwargs} if len(openai_functions) == 1 and enforce_single_function_usage: llm_kwargs_["function_call"] = {"name": openai_functions[0]["name"]} output_parser = output_parser or get_openai_output_parser(functions) if prompt: return prompt | llm.bind(**llm_kwargs_) | output_parser else: return llm.bind(**llm_kwargs_) | output_parser
[docs]@deprecated( since="0.1.17", message=( "LangChain has introduced a method called `with_structured_output` that " "is available on ChatModels capable of tool calling. " "You can read more about the method here: " "https://python.langchain.com/docs/modules/model_io/chat/structured_output/ " "Please follow our extraction use case documentation for more guidelines " "on how to do information extraction with LLMs. " "https://python.langchain.com/docs/use_cases/extraction/. " "If you notice other issues, please provide " "feedback here: " "https://github.com/langchain-ai/langchain/discussions/18154" ), removal="0.3.0", alternative=( """ from langchain_core.pydantic_v1 import BaseModel, Field from langchain_anthropic import ChatAnthropic class Joke(BaseModel): setup: str = Field(description="The setup of the joke") punchline: str = Field(description="The punchline to the joke") # Or any other chat model that supports tools. # Please reference to to the documentation of structured_output # to see an up to date list of which models support # with_structured_output. model = ChatAnthropic(model="claude-3-opus-20240229", temperature=0) structured_llm = model.with_structured_output(Joke) structured_llm.invoke("Tell me a joke about cats. Make sure to call the Joke function.") """ ), ) def create_structured_output_runnable( output_schema: Union[Dict[str, Any], Type[BaseModel]], llm: Runnable, prompt: Optional[BasePromptTemplate] = None, *, output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None, enforce_function_usage: bool = True, return_single: bool = True, mode: Literal[ "openai-functions", "openai-tools", "openai-json" ] = "openai-functions", **kwargs: Any, ) -> Runnable: """创建一个可运行的程序,用于提取结构化输出。 参数: output_schema:可以是字典或pydantic.BaseModel类。如果传入的是字典, 则假定它已经是一个有效的JsonSchema。 为了获得最佳结果,pydantic.BaseModels应该有描述模式代表什么以及参数描述的文档字符串。 llm:要使用的语言模型。假定支持OpenAI函数调用API 如果模式是'openai-function'。假定支持OpenAI response_format 参数如果模式是'openai-json'。 prompt:传递给模型的BasePromptTemplate。如果模式是'openai-json'且 prompt具有输入变量'output_schema',则给定的output_schema 将被转换为JsonSchema并插入到prompt中。 output_parser:用于解析模型输出的输出解析器。默认情况下 将从函数类型中推断出来。如果传入pydantic.BaseModel, 那么OutputParser将尝试使用pydantic类解析输出。否则模型输出将被解析为JSON。 mode:从模型中提取结构化输出的方式。如果是'openai-functions' 则使用已弃用的'functions','function_call'模式进行OpenAI函数调用。 如果是'openai-tools',则使用最新的'tools','tool_choice'模式进行OpenAI函数调用。 这比'openai-functions'更推荐。如果是'openai-json',则使用OpenAI模型 并将response_format设置为JSON。 enforce_function_usage:仅适用于模式为'openai-tools'或 'openai-functions'时。如果为True,则模型将被强制使用给定的 输出模式。如果为False,则模型可以选择是否使用输出 模式。 return_single:仅适用于模式为'openai-tools'时。是否返回一个结构化输出列表 还是单个输出。如果为True且模型不返回任何 结构化输出,则链输出为None。如果为False且模型不返回任何结构化输出 则链输出为一个空列表。 **kwargs:额外的命名参数。 返回: 一个可运行的序列,将返回与给定output_schema匹配的结构化输出。 使用Pydantic模式的OpenAI工具示例(mode='openai-tools'): .. code-block:: python from typing import Optional from langchain.chains import create_structured_output_runnable from langchain_openai import ChatOpenAI from langchain_core.pydantic_v1 import BaseModel, Field class RecordDog(BaseModel): '''记录有关狗的一些标识信息。''' name: str = Field(..., description="狗的名字") color: str = Field(..., description="狗的颜色") fav_food: Optional[str] = Field(None, description="狗喜欢的食物") llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) prompt = ChatPromptTemplate.from_messages( [ ("system", "您是一个提取算法。请提取每个可能的实例"), ('human', '{input}') ] ) structured_llm = create_structured_output_runnable( RecordDog, llm, mode="openai-tools", enforce_function_usage=True, return_single=True ) structured_llm.invoke({"input": "Harry是一只胖胖的棕色比格犬,喜欢吃鸡肉"}) # -> RecordDog(name="Harry", color="brown", fav_food="chicken") 使用字典模式的OpenAI工具示例(mode="openai-tools"): .. code-block:: python from typing import Optional from langchain.chains import create_structured_output_runnable from langchain_openai import ChatOpenAI dog_schema = { "type": "function", "function": { "name": "record_dog", "description": "记录有关狗的一些标识信息。", "parameters": { "type": "object", "properties": { "name": { "description": "狗的名字", "type": "string" }, "color": { "description": "狗的颜色", "type": "string" }, "fav_food": { "description": "狗喜欢的食物", "type": "string" } }, "required": ["name", "color"] } } } llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) structured_llm = create_structured_output_runnable( dog_schema, llm, mode="openai-tools", enforce_function_usage=True, return_single=True ) structured_llm.invoke("Harry是一只胖胖的棕色比格犬,喜欢吃鸡肉") # -> {'name': 'Harry', 'color': 'brown', 'fav_food': 'chicken'} OpenAI函数示例(mode="openai-functions"): .. code-block:: python from typing import Optional from langchain.chains import create_structured_output_runnable from langchain_openai import ChatOpenAI from langchain_core.pydantic_v1 import BaseModel, Field class Dog(BaseModel): '''有关狗的标识信息。''' name: str = Field(..., description="狗的名字") color: str = Field(..., description="狗的颜色") fav_food: Optional[str] = Field(None, description="狗喜欢的食物") llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) structured_llm = create_structured_output_runnable(Dog, llm, mode="openai-functions") structured_llm.invoke("Harry是一只胖胖的棕色比格犬,喜欢吃鸡肉") # -> Dog(name="Harry", color="brown", fav_food="chicken") 带有提示的OpenAI函数示例: .. code-block:: python from typing import Optional from langchain.chains import create_structured_output_runnable from langchain_openai import ChatOpenAI from langchain_core.prompts import ChatPromptTemplate from langchain_core.pydantic_v1 import BaseModel, Field class Dog(BaseModel): '''有关狗的标识信息。''' name: str = Field(..., description="狗的名字") color: str = Field(..., description="狗的颜色") fav_food: Optional[str] = Field(None, description="狗喜欢的食物") llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) structured_llm = create_structured_output_runnable(Dog, llm, mode="openai-functions") system = '''从用户输入中提取提到的任何狗的信息。''' prompt = ChatPromptTemplate.from_messages( [("system", system), ("human", "{input}"),] ) chain = prompt | structured_llm chain.invoke({"input": "Harry是一只胖胖的棕色比格犬,喜欢吃鸡肉"}) # -> Dog(name="Harry", color="brown", fav_food="chicken") OpenAI JSON响应格式示例(mode="openai-json"): .. code-block:: python from typing import Optional from langchain.chains import create_structured_output_runnable from langchain_openai import ChatOpenAI from langchain_core.prompts import ChatPromptTemplate from langchain_core.pydantic_v1 import BaseModel, Field class Dog(BaseModel): '''有关狗的标识信息。''' name: str = Field(..., description="狗的名字") color: str = Field(..., description="狗的颜色") fav_food: Optional[str] = Field(None, description="狗喜欢的食物") llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) structured_llm = create_structured_output_runnable(Dog, llm, mode="openai-json") system = '''您是一个用于提取结构化JSON格式信息的世界级助手。 从用户输入中提取与以下JSON模式匹配的有效JSON blob: {output_schema}''' prompt = ChatPromptTemplate.from_messages( [("system", system), ("human", "{input}"),] ) chain = prompt | structured_llm chain.invoke({"input": "Harry是一只胖胖的棕色比格犬,喜欢吃鸡肉"}) """ # noqa: E501 # for backwards compatibility force_function_usage = kwargs.get( "enforce_single_function_usage", enforce_function_usage ) if mode == "openai-tools": # Protect against typos in kwargs keys_in_kwargs = set(kwargs.keys()) # Backwards compatibility keys unrecognized_keys = keys_in_kwargs - {"enforce_single_function_usage"} if unrecognized_keys: raise TypeError( f"Got an unexpected keyword argument(s): {unrecognized_keys}." ) return _create_openai_tools_runnable( output_schema, llm, prompt=prompt, output_parser=output_parser, enforce_tool_usage=force_function_usage, first_tool_only=return_single, ) elif mode == "openai-functions": return _create_openai_functions_structured_output_runnable( output_schema, llm, prompt=prompt, output_parser=output_parser, enforce_single_function_usage=force_function_usage, **kwargs, # llm-specific kwargs ) elif mode == "openai-json": if force_function_usage: raise ValueError( "enforce_single_function_usage is not supported for mode='openai-json'." ) return _create_openai_json_runnable( output_schema, llm, prompt=prompt, output_parser=output_parser, **kwargs ) else: raise ValueError( f"Invalid mode {mode}. Expected one of 'openai-tools', 'openai-functions', " f"'openai-json'." )
def _create_openai_tools_runnable( tool: Union[Dict[str, Any], Type[BaseModel], Callable], llm: Runnable, *, prompt: Optional[BasePromptTemplate], output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]], enforce_tool_usage: bool, first_tool_only: bool, ) -> Runnable: oai_tool = convert_to_openai_tool(tool) llm_kwargs: Dict[str, Any] = {"tools": [oai_tool]} if enforce_tool_usage: llm_kwargs["tool_choice"] = { "type": "function", "function": {"name": oai_tool["function"]["name"]}, } output_parser = output_parser or _get_openai_tool_output_parser( tool, first_tool_only=first_tool_only ) if prompt: return prompt | llm.bind(**llm_kwargs) | output_parser else: return llm.bind(**llm_kwargs) | output_parser def _get_openai_tool_output_parser( tool: Union[Dict[str, Any], Type[BaseModel], Callable], *, first_tool_only: bool = False, ) -> Union[BaseOutputParser, BaseGenerationOutputParser]: if isinstance(tool, type) and issubclass(tool, BaseModel): output_parser: Union[ BaseOutputParser, BaseGenerationOutputParser ] = PydanticToolsParser(tools=[tool], first_tool_only=first_tool_only) else: key_name = convert_to_openai_tool(tool)["function"]["name"] output_parser = JsonOutputKeyToolsParser( first_tool_only=first_tool_only, key_name=key_name ) return output_parser
[docs]def get_openai_output_parser( functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]], ) -> Union[BaseOutputParser, BaseGenerationOutputParser]: """给定用户函数,获取适当的函数输出解析器。 参数: functions:一个序列,其中每个元素是一个字典、一个pydantic.BaseModel类或一个Python函数。如果传入一个字典,则假定它已经是一个有效的OpenAI函数。 返回: 如果函数是Pydantic类,则返回一个PydanticOutputFunctionsParser,否则返回一个JsonOutputFunctionsParser。如果只有一个函数且它不是Pydantic类,则输出解析器将自动提取函数参数而不是函数名称。 """ if isinstance(functions[0], type) and issubclass(functions[0], BaseModel): if len(functions) > 1: pydantic_schema: Union[Dict, Type[BaseModel]] = { convert_to_openai_function(fn)["name"]: fn for fn in functions } else: pydantic_schema = functions[0] output_parser: Union[ BaseOutputParser, BaseGenerationOutputParser ] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema) else: output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1) return output_parser
def _create_openai_json_runnable( output_schema: Union[Dict[str, Any], Type[BaseModel]], llm: Runnable, prompt: Optional[BasePromptTemplate] = None, *, output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None, ) -> Runnable: """""" if isinstance(output_schema, type) and issubclass(output_schema, BaseModel): output_parser = output_parser or PydanticOutputParser( pydantic_object=output_schema, ) schema_as_dict = convert_to_openai_function(output_schema)["parameters"] else: output_parser = output_parser or JsonOutputParser() schema_as_dict = output_schema llm = llm.bind(response_format={"type": "json_object"}) if prompt: if "output_schema" in prompt.input_variables: prompt = prompt.partial(output_schema=json.dumps(schema_as_dict, indent=2)) return prompt | llm | output_parser else: return llm | output_parser def _create_openai_functions_structured_output_runnable( output_schema: Union[Dict[str, Any], Type[BaseModel]], llm: Runnable, prompt: Optional[BasePromptTemplate] = None, *, output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None, **llm_kwargs: Any, ) -> Runnable: if isinstance(output_schema, dict): function: Any = { "name": "output_formatter", "description": ( "Output formatter. Should always be used to format your response to the" " user." ), "parameters": output_schema, } else: class _OutputFormatter(BaseModel): """输出格式化程序。应始终用于格式化对用户的响应。""" # noqa: E501 output: output_schema # type: ignore function = _OutputFormatter output_parser = output_parser or PydanticAttrOutputFunctionsParser( pydantic_schema=_OutputFormatter, attr_name="output" ) return create_openai_fn_runnable( [function], llm, prompt=prompt, output_parser=output_parser, **llm_kwargs, )