import json
from typing import Any, Callable, Dict, Literal, Optional, Sequence, Type, Union
from langchain_core._api import deprecated
from langchain_core.output_parsers import (
BaseGenerationOutputParser,
BaseOutputParser,
JsonOutputParser,
PydanticOutputParser,
)
from langchain_core.output_parsers.openai_functions import (
JsonOutputFunctionsParser,
PydanticAttrOutputFunctionsParser,
PydanticOutputFunctionsParser,
)
from langchain_core.output_parsers.openai_tools import (
JsonOutputKeyToolsParser,
PydanticToolsParser,
)
from langchain_core.prompts import BasePromptTemplate
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.runnables import Runnable
from langchain_core.utils.function_calling import (
convert_to_openai_function,
convert_to_openai_tool,
)
[docs]@deprecated(
since="0.1.14",
message=(
"LangChain has introduced a method called `with_structured_output` that "
"is available on ChatModels capable of tool calling. "
"You can read more about the method here: "
"https://python.langchain.com/docs/modules/model_io/chat/structured_output/ "
"Please follow our extraction use case documentation for more guidelines "
"on how to do information extraction with LLMs. "
"https://python.langchain.com/docs/use_cases/extraction/. "
"If you notice other issues, please provide "
"feedback here: "
"https://github.com/langchain-ai/langchain/discussions/18154"
),
removal="0.3.0",
alternative=(
"""
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_anthropic import ChatAnthropic
class Joke(BaseModel):
setup: str = Field(description="The setup of the joke")
punchline: str = Field(description="The punchline to the joke")
# Or any other chat model that supports tools.
# Please reference to to the documentation of structured_output
# to see an up to date list of which models support
# with_structured_output.
model = ChatAnthropic(model="claude-3-opus-20240229", temperature=0)
structured_llm = model.with_structured_output(Joke)
structured_llm.invoke("Tell me a joke about cats.
Make sure to call the Joke function.")
"""
),
)
def create_openai_fn_runnable(
functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
llm: Runnable,
prompt: Optional[BasePromptTemplate] = None,
*,
enforce_single_function_usage: bool = True,
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
**llm_kwargs: Any,
) -> Runnable:
"""创建一个可运行的序列,使用OpenAI函数。
参数:
functions: 一个序列,可以是字典、pydantic.BaseModels类或Python函数。如果传入字典,则假定它们已经是有效的OpenAI函数。如果只传入一个函数,则将强制模型使用该函数。pydantic.BaseModels和Python函数应该有描述函数功能的文档字符串。为了获得最佳结果,pydantic.BaseModels应该有参数描述,Python函数应该在文档字符串中使用Google Python风格的参数描述。此外,Python函数应该只使用原始类型(str、int、float、bool)或pydantic.BaseModels作为参数。
llm: 要使用的语言模型,假定支持OpenAI函数调用API。
prompt: 传递给模型的BasePromptTemplate。
enforce_single_function_usage: 仅在传入单个函数时使用。如果为True,则将强制模型使用给定的函数。如果为False,则模型将有选择使用给定函数或不使用的选项。
output_parser: 用于解析模型输出的BaseLLMOutputParser。默认情况下将从函数类型中推断。如果传入pydantic.BaseModels,则OutputParser将尝试使用这些来解析输出。否则,模型输出将简单地解析为JSON。如果传入多个函数且它们不是pydantic.BaseModels,则链式输出将包括返回的函数名称和传递给函数的参数。
**llm_kwargs: 要传递给语言模型的其他命名参数。
返回:
一个可运行的序列,当运行时将传递给模型给定的函数。
示例:
.. code-block:: python
from typing import Optional
from langchain.chains.structured_output import create_openai_fn_runnable
from langchain_openai import ChatOpenAI
from langchain_core.pydantic_v1 import BaseModel, Field
class RecordPerson(BaseModel):
'''记录有关一个人的一些身份信息。'''
name: str = Field(..., description="人的姓名")
age: int = Field(..., description="人的年龄")
fav_food: Optional[str] = Field(None, description="人喜欢的食物")
class RecordDog(BaseModel):
'''记录有关一只狗的一些身份信息。'''
name: str = Field(..., description="狗的名字")
color: str = Field(..., description="狗的颜色")
fav_food: Optional[str] = Field(None, description="狗喜欢的食物")
llm = ChatOpenAI(model="gpt-4", temperature=0)
structured_llm = create_openai_fn_runnable([RecordPerson, RecordDog], llm)
structured_llm.invoke("Harry was a chubby brown beagle who loved chicken)
# -> RecordDog(name="Harry", color="brown", fav_food="chicken")
""" # noqa: E501
if not functions:
raise ValueError("Need to pass in at least one function. Received zero.")
openai_functions = [convert_to_openai_function(f) for f in functions]
llm_kwargs_: Dict[str, Any] = {"functions": openai_functions, **llm_kwargs}
if len(openai_functions) == 1 and enforce_single_function_usage:
llm_kwargs_["function_call"] = {"name": openai_functions[0]["name"]}
output_parser = output_parser or get_openai_output_parser(functions)
if prompt:
return prompt | llm.bind(**llm_kwargs_) | output_parser
else:
return llm.bind(**llm_kwargs_) | output_parser
[docs]@deprecated(
since="0.1.17",
message=(
"LangChain has introduced a method called `with_structured_output` that "
"is available on ChatModels capable of tool calling. "
"You can read more about the method here: "
"https://python.langchain.com/docs/modules/model_io/chat/structured_output/ "
"Please follow our extraction use case documentation for more guidelines "
"on how to do information extraction with LLMs. "
"https://python.langchain.com/docs/use_cases/extraction/. "
"If you notice other issues, please provide "
"feedback here: "
"https://github.com/langchain-ai/langchain/discussions/18154"
),
removal="0.3.0",
alternative=(
"""
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_anthropic import ChatAnthropic
class Joke(BaseModel):
setup: str = Field(description="The setup of the joke")
punchline: str = Field(description="The punchline to the joke")
# Or any other chat model that supports tools.
# Please reference to to the documentation of structured_output
# to see an up to date list of which models support
# with_structured_output.
model = ChatAnthropic(model="claude-3-opus-20240229", temperature=0)
structured_llm = model.with_structured_output(Joke)
structured_llm.invoke("Tell me a joke about cats.
Make sure to call the Joke function.")
"""
),
)
def create_structured_output_runnable(
output_schema: Union[Dict[str, Any], Type[BaseModel]],
llm: Runnable,
prompt: Optional[BasePromptTemplate] = None,
*,
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
enforce_function_usage: bool = True,
return_single: bool = True,
mode: Literal[
"openai-functions", "openai-tools", "openai-json"
] = "openai-functions",
**kwargs: Any,
) -> Runnable:
"""创建一个可运行的程序,用于提取结构化输出。
参数:
output_schema:可以是字典或pydantic.BaseModel类。如果传入的是字典,
则假定它已经是一个有效的JsonSchema。
为了获得最佳结果,pydantic.BaseModels应该有描述模式代表什么以及参数描述的文档字符串。
llm:要使用的语言模型。假定支持OpenAI函数调用API
如果模式是'openai-function'。假定支持OpenAI response_format
参数如果模式是'openai-json'。
prompt:传递给模型的BasePromptTemplate。如果模式是'openai-json'且
prompt具有输入变量'output_schema',则给定的output_schema
将被转换为JsonSchema并插入到prompt中。
output_parser:用于解析模型输出的输出解析器。默认情况下
将从函数类型中推断出来。如果传入pydantic.BaseModel,
那么OutputParser将尝试使用pydantic类解析输出。否则模型输出将被解析为JSON。
mode:从模型中提取结构化输出的方式。如果是'openai-functions'
则使用已弃用的'functions','function_call'模式进行OpenAI函数调用。
如果是'openai-tools',则使用最新的'tools','tool_choice'模式进行OpenAI函数调用。
这比'openai-functions'更推荐。如果是'openai-json',则使用OpenAI模型
并将response_format设置为JSON。
enforce_function_usage:仅适用于模式为'openai-tools'或
'openai-functions'时。如果为True,则模型将被强制使用给定的
输出模式。如果为False,则模型可以选择是否使用输出
模式。
return_single:仅适用于模式为'openai-tools'时。是否返回一个结构化输出列表
还是单个输出。如果为True且模型不返回任何
结构化输出,则链输出为None。如果为False且模型不返回任何结构化输出
则链输出为一个空列表。
**kwargs:额外的命名参数。
返回:
一个可运行的序列,将返回与给定output_schema匹配的结构化输出。
使用Pydantic模式的OpenAI工具示例(mode='openai-tools'):
.. code-block:: python
from typing import Optional
from langchain.chains import create_structured_output_runnable
from langchain_openai import ChatOpenAI
from langchain_core.pydantic_v1 import BaseModel, Field
class RecordDog(BaseModel):
'''记录有关狗的一些标识信息。'''
name: str = Field(..., description="狗的名字")
color: str = Field(..., description="狗的颜色")
fav_food: Optional[str] = Field(None, description="狗喜欢的食物")
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
prompt = ChatPromptTemplate.from_messages(
[
("system", "您是一个提取算法。请提取每个可能的实例"),
('human', '{input}')
]
)
structured_llm = create_structured_output_runnable(
RecordDog,
llm,
mode="openai-tools",
enforce_function_usage=True,
return_single=True
)
structured_llm.invoke({"input": "Harry是一只胖胖的棕色比格犬,喜欢吃鸡肉"})
# -> RecordDog(name="Harry", color="brown", fav_food="chicken")
使用字典模式的OpenAI工具示例(mode="openai-tools"):
.. code-block:: python
from typing import Optional
from langchain.chains import create_structured_output_runnable
from langchain_openai import ChatOpenAI
dog_schema = {
"type": "function",
"function": {
"name": "record_dog",
"description": "记录有关狗的一些标识信息。",
"parameters": {
"type": "object",
"properties": {
"name": {
"description": "狗的名字",
"type": "string"
},
"color": {
"description": "狗的颜色",
"type": "string"
},
"fav_food": {
"description": "狗喜欢的食物",
"type": "string"
}
},
"required": ["name", "color"]
}
}
}
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm = create_structured_output_runnable(
dog_schema,
llm,
mode="openai-tools",
enforce_function_usage=True,
return_single=True
)
structured_llm.invoke("Harry是一只胖胖的棕色比格犬,喜欢吃鸡肉")
# -> {'name': 'Harry', 'color': 'brown', 'fav_food': 'chicken'}
OpenAI函数示例(mode="openai-functions"):
.. code-block:: python
from typing import Optional
from langchain.chains import create_structured_output_runnable
from langchain_openai import ChatOpenAI
from langchain_core.pydantic_v1 import BaseModel, Field
class Dog(BaseModel):
'''有关狗的标识信息。'''
name: str = Field(..., description="狗的名字")
color: str = Field(..., description="狗的颜色")
fav_food: Optional[str] = Field(None, description="狗喜欢的食物")
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm = create_structured_output_runnable(Dog, llm, mode="openai-functions")
structured_llm.invoke("Harry是一只胖胖的棕色比格犬,喜欢吃鸡肉")
# -> Dog(name="Harry", color="brown", fav_food="chicken")
带有提示的OpenAI函数示例:
.. code-block:: python
from typing import Optional
from langchain.chains import create_structured_output_runnable
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
class Dog(BaseModel):
'''有关狗的标识信息。'''
name: str = Field(..., description="狗的名字")
color: str = Field(..., description="狗的颜色")
fav_food: Optional[str] = Field(None, description="狗喜欢的食物")
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm = create_structured_output_runnable(Dog, llm, mode="openai-functions")
system = '''从用户输入中提取提到的任何狗的信息。'''
prompt = ChatPromptTemplate.from_messages(
[("system", system), ("human", "{input}"),]
)
chain = prompt | structured_llm
chain.invoke({"input": "Harry是一只胖胖的棕色比格犬,喜欢吃鸡肉"})
# -> Dog(name="Harry", color="brown", fav_food="chicken")
OpenAI JSON响应格式示例(mode="openai-json"):
.. code-block:: python
from typing import Optional
from langchain.chains import create_structured_output_runnable
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
class Dog(BaseModel):
'''有关狗的标识信息。'''
name: str = Field(..., description="狗的名字")
color: str = Field(..., description="狗的颜色")
fav_food: Optional[str] = Field(None, description="狗喜欢的食物")
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm = create_structured_output_runnable(Dog, llm, mode="openai-json")
system = '''您是一个用于提取结构化JSON格式信息的世界级助手。
从用户输入中提取与以下JSON模式匹配的有效JSON blob:
{output_schema}'''
prompt = ChatPromptTemplate.from_messages(
[("system", system), ("human", "{input}"),]
)
chain = prompt | structured_llm
chain.invoke({"input": "Harry是一只胖胖的棕色比格犬,喜欢吃鸡肉"})
""" # noqa: E501
# for backwards compatibility
force_function_usage = kwargs.get(
"enforce_single_function_usage", enforce_function_usage
)
if mode == "openai-tools":
# Protect against typos in kwargs
keys_in_kwargs = set(kwargs.keys())
# Backwards compatibility keys
unrecognized_keys = keys_in_kwargs - {"enforce_single_function_usage"}
if unrecognized_keys:
raise TypeError(
f"Got an unexpected keyword argument(s): {unrecognized_keys}."
)
return _create_openai_tools_runnable(
output_schema,
llm,
prompt=prompt,
output_parser=output_parser,
enforce_tool_usage=force_function_usage,
first_tool_only=return_single,
)
elif mode == "openai-functions":
return _create_openai_functions_structured_output_runnable(
output_schema,
llm,
prompt=prompt,
output_parser=output_parser,
enforce_single_function_usage=force_function_usage,
**kwargs, # llm-specific kwargs
)
elif mode == "openai-json":
if force_function_usage:
raise ValueError(
"enforce_single_function_usage is not supported for mode='openai-json'."
)
return _create_openai_json_runnable(
output_schema, llm, prompt=prompt, output_parser=output_parser, **kwargs
)
else:
raise ValueError(
f"Invalid mode {mode}. Expected one of 'openai-tools', 'openai-functions', "
f"'openai-json'."
)
def _create_openai_tools_runnable(
tool: Union[Dict[str, Any], Type[BaseModel], Callable],
llm: Runnable,
*,
prompt: Optional[BasePromptTemplate],
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]],
enforce_tool_usage: bool,
first_tool_only: bool,
) -> Runnable:
oai_tool = convert_to_openai_tool(tool)
llm_kwargs: Dict[str, Any] = {"tools": [oai_tool]}
if enforce_tool_usage:
llm_kwargs["tool_choice"] = {
"type": "function",
"function": {"name": oai_tool["function"]["name"]},
}
output_parser = output_parser or _get_openai_tool_output_parser(
tool, first_tool_only=first_tool_only
)
if prompt:
return prompt | llm.bind(**llm_kwargs) | output_parser
else:
return llm.bind(**llm_kwargs) | output_parser
def _get_openai_tool_output_parser(
tool: Union[Dict[str, Any], Type[BaseModel], Callable],
*,
first_tool_only: bool = False,
) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
if isinstance(tool, type) and issubclass(tool, BaseModel):
output_parser: Union[
BaseOutputParser, BaseGenerationOutputParser
] = PydanticToolsParser(tools=[tool], first_tool_only=first_tool_only)
else:
key_name = convert_to_openai_tool(tool)["function"]["name"]
output_parser = JsonOutputKeyToolsParser(
first_tool_only=first_tool_only, key_name=key_name
)
return output_parser
[docs]def get_openai_output_parser(
functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
"""给定用户函数,获取适当的函数输出解析器。
参数:
functions:一个序列,其中每个元素是一个字典、一个pydantic.BaseModel类或一个Python函数。如果传入一个字典,则假定它已经是一个有效的OpenAI函数。
返回:
如果函数是Pydantic类,则返回一个PydanticOutputFunctionsParser,否则返回一个JsonOutputFunctionsParser。如果只有一个函数且它不是Pydantic类,则输出解析器将自动提取函数参数而不是函数名称。
"""
if isinstance(functions[0], type) and issubclass(functions[0], BaseModel):
if len(functions) > 1:
pydantic_schema: Union[Dict, Type[BaseModel]] = {
convert_to_openai_function(fn)["name"]: fn for fn in functions
}
else:
pydantic_schema = functions[0]
output_parser: Union[
BaseOutputParser, BaseGenerationOutputParser
] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
else:
output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1)
return output_parser
def _create_openai_json_runnable(
output_schema: Union[Dict[str, Any], Type[BaseModel]],
llm: Runnable,
prompt: Optional[BasePromptTemplate] = None,
*,
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
) -> Runnable:
""""""
if isinstance(output_schema, type) and issubclass(output_schema, BaseModel):
output_parser = output_parser or PydanticOutputParser(
pydantic_object=output_schema,
)
schema_as_dict = convert_to_openai_function(output_schema)["parameters"]
else:
output_parser = output_parser or JsonOutputParser()
schema_as_dict = output_schema
llm = llm.bind(response_format={"type": "json_object"})
if prompt:
if "output_schema" in prompt.input_variables:
prompt = prompt.partial(output_schema=json.dumps(schema_as_dict, indent=2))
return prompt | llm | output_parser
else:
return llm | output_parser
def _create_openai_functions_structured_output_runnable(
output_schema: Union[Dict[str, Any], Type[BaseModel]],
llm: Runnable,
prompt: Optional[BasePromptTemplate] = None,
*,
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
**llm_kwargs: Any,
) -> Runnable:
if isinstance(output_schema, dict):
function: Any = {
"name": "output_formatter",
"description": (
"Output formatter. Should always be used to format your response to the"
" user."
),
"parameters": output_schema,
}
else:
class _OutputFormatter(BaseModel):
"""输出格式化程序。应始终用于格式化对用户的响应。""" # noqa: E501
output: output_schema # type: ignore
function = _OutputFormatter
output_parser = output_parser or PydanticAttrOutputFunctionsParser(
pydantic_schema=_OutputFormatter, attr_name="output"
)
return create_openai_fn_runnable(
[function],
llm,
prompt=prompt,
output_parser=output_parser,
**llm_kwargs,
)