Source code for langchain.chains.openai_functions.extraction

from typing import Any, List, Optional

from langchain_core._api import deprecated
from langchain_core.language_models import BaseLanguageModel
from langchain_core.output_parsers.openai_functions import (
    JsonKeyOutputFunctionsParser,
    PydanticAttrOutputFunctionsParser,
)
from langchain_core.prompts import BasePromptTemplate, ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel

from langchain.chains.base import Chain
from langchain.chains.llm import LLMChain
from langchain.chains.openai_functions.utils import (
    _convert_schema,
    _resolve_schema_references,
    get_llm_kwargs,
)


def _get_extraction_function(entity_schema: dict) -> dict:
    return {
        "name": "information_extraction",
        "description": "Extracts the relevant information from the passage.",
        "parameters": {
            "type": "object",
            "properties": {
                "info": {"type": "array", "items": _convert_schema(entity_schema)}
            },
            "required": ["info"],
        },
    }


_EXTRACTION_TEMPLATE = """Extract and save the relevant entities mentioned \
in the following passage together with their properties.

Only extract the properties mentioned in the 'information_extraction' function.

If a property is not present and is not required in the function parameters, do not include it in the output.

Passage:
{input}
"""  # noqa: E501


[docs]@deprecated( since="0.1.14", message=( "LangChain has introduced a method called `with_structured_output` that" "is available on ChatModels capable of tool calling." "You can read more about the method here: " "https://python.langchain.com/docs/modules/model_io/chat/structured_output/" "Please follow our extraction use case documentation for more guidelines" "on how to do information extraction with LLMs." "https://python.langchain.com/docs/use_cases/extraction/." "If you notice other issues, please provide " "feedback here:" "https://github.com/langchain-ai/langchain/discussions/18154" ), removal="0.3.0", alternative=( """ from langchain_core.pydantic_v1 import BaseModel, Field from langchain_anthropic import ChatAnthropic class Joke(BaseModel): setup: str = Field(description="The setup of the joke") punchline: str = Field(description="The punchline to the joke") # Or any other chat model that supports tools. # Please reference to to the documentation of structured_output # to see an up to date list of which models support # with_structured_output. model = ChatAnthropic(model="claude-3-opus-20240229", temperature=0) structured_llm = model.with_structured_output(Joke) structured_llm.invoke("Tell me a joke about cats. Make sure to call the Joke function.") """ ), ) def create_extraction_chain( schema: dict, llm: BaseLanguageModel, prompt: Optional[BasePromptTemplate] = None, tags: Optional[List[str]] = None, verbose: bool = False, ) -> Chain: """创建一个从段落中提取信息的链。 参数: schema: 要提取的实体的模式。 llm: 要使用的语言模型。 prompt: 用于提取的提示。 verbose: 是否以详细模式运行。在详细模式下,一些中间日志将打印到控制台。默认为全局的 `verbose` 值,可通过 `langchain.globals.get_verbose()` 访问。 返回: 可用于从段落中提取信息的链。 """ function = _get_extraction_function(schema) extraction_prompt = prompt or ChatPromptTemplate.from_template(_EXTRACTION_TEMPLATE) output_parser = JsonKeyOutputFunctionsParser(key_name="info") llm_kwargs = get_llm_kwargs(function) chain = LLMChain( llm=llm, prompt=extraction_prompt, llm_kwargs=llm_kwargs, output_parser=output_parser, tags=tags, verbose=verbose, ) return chain
[docs]@deprecated( since="0.1.14", message=( "LangChain has introduced a method called `with_structured_output` that" "is available on ChatModels capable of tool calling." "You can read more about the method here: " "https://python.langchain.com/docs/modules/model_io/chat/structured_output/" "Please follow our extraction use case documentation for more guidelines" "on how to do information extraction with LLMs." "https://python.langchain.com/docs/use_cases/extraction/." "If you notice other issues, please provide " "feedback here:" "https://github.com/langchain-ai/langchain/discussions/18154" ), removal="0.3.0", alternative=( """ from langchain_core.pydantic_v1 import BaseModel, Field from langchain_anthropic import ChatAnthropic class Joke(BaseModel): setup: str = Field(description="The setup of the joke") punchline: str = Field(description="The punchline to the joke") # Or any other chat model that supports tools. # Please reference to to the documentation of structured_output # to see an up to date list of which models support # with_structured_output. model = ChatAnthropic(model="claude-3-opus-20240229", temperature=0) structured_llm = model.with_structured_output(Joke) structured_llm.invoke("Tell me a joke about cats. Make sure to call the Joke function.") """ ), ) def create_extraction_chain_pydantic( pydantic_schema: Any, llm: BaseLanguageModel, prompt: Optional[BasePromptTemplate] = None, verbose: bool = False, ) -> Chain: """创建一个链条,使用pydantic模式从一个段落中提取信息。 参数: pydantic_schema: 要提取的实体的pydantic模式。 llm: 要使用的语言模型。 prompt: 用于提取的提示。 verbose: 是否以详细模式运行。在详细模式下,一些中间日志将打印到控制台。默认为全局`verbose`值,可通过`langchain.globals.get_verbose()`访问。 返回: 可用于从段落中提取信息的链条。 """ class PydanticSchema(BaseModel): info: List[pydantic_schema] # type: ignore openai_schema = pydantic_schema.schema() openai_schema = _resolve_schema_references( openai_schema, openai_schema.get("definitions", {}) ) function = _get_extraction_function(openai_schema) extraction_prompt = prompt or ChatPromptTemplate.from_template(_EXTRACTION_TEMPLATE) output_parser = PydanticAttrOutputFunctionsParser( pydantic_schema=PydanticSchema, attr_name="info" ) llm_kwargs = get_llm_kwargs(function) chain = LLMChain( llm=llm, prompt=extraction_prompt, llm_kwargs=llm_kwargs, output_parser=output_parser, verbose=verbose, ) return chain