Source code for langchain_core.output_parsers.json

from __future__ import annotations

import json
from json import JSONDecodeError
from typing import Any, List, Optional, Type, TypeVar, Union

import jsonpatch  # type: ignore[import]
import pydantic  # pydantic: ignore

from langchain_core.exceptions import OutputParserException
from langchain_core.output_parsers.format_instructions import JSON_FORMAT_INSTRUCTIONS
from langchain_core.output_parsers.transform import BaseCumulativeTransformOutputParser
from langchain_core.outputs import Generation
from langchain_core.utils.json import (
    parse_and_check_json_markdown,
    parse_json_markdown,
    parse_partial_json,
)
from langchain_core.utils.pydantic import PYDANTIC_MAJOR_VERSION

if PYDANTIC_MAJOR_VERSION < 2:
    PydanticBaseModel = pydantic.BaseModel

else:
    from pydantic.v1 import BaseModel  # pydantic: ignore

    # Union type needs to be last assignment to PydanticBaseModel to make mypy happy.
    PydanticBaseModel = Union[BaseModel, pydantic.BaseModel]  # type: ignore

TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)


[docs]class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]): """将LLM调用的输出解析为JSON对象。 在流模式下使用时,它将生成包含到目前为止返回的所有键的部分JSON对象。 在流模式下,如果`diff`设置为`True`,则生成描述前一个对象和当前对象之间差异的JSONPatch操作。 """ pydantic_object: Optional[Type[TBaseModel]] = None # type: ignore def _diff(self, prev: Optional[Any], next: Any) -> Any: return jsonpatch.make_patch(prev, next).patch def _get_schema(self, pydantic_object: Type[TBaseModel]) -> dict[str, Any]: if PYDANTIC_MAJOR_VERSION == 2: if issubclass(pydantic_object, pydantic.BaseModel): return pydantic_object.model_json_schema() elif issubclass(pydantic_object, pydantic.v1.BaseModel): return pydantic_object.schema() return pydantic_object.schema()
[docs] def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any: text = result[0].text text = text.strip() if partial: try: return parse_json_markdown(text) except JSONDecodeError: return None else: try: return parse_json_markdown(text) except JSONDecodeError as e: msg = f"Invalid json output: {text}" raise OutputParserException(msg, llm_output=text) from e
[docs] def parse(self, text: str) -> Any: return self.parse_result([Generation(text=text)])
[docs] def get_format_instructions(self) -> str: if self.pydantic_object is None: return "Return a JSON object." else: # Copy schema to avoid altering original Pydantic schema. schema = {k: v for k, v in self._get_schema(self.pydantic_object).items()} # Remove extraneous fields. reduced_schema = schema if "title" in reduced_schema: del reduced_schema["title"] if "type" in reduced_schema: del reduced_schema["type"] # Ensure json in context is well-formed with double quotes. schema_str = json.dumps(reduced_schema) return JSON_FORMAT_INSTRUCTIONS.format(schema=schema_str)
@property def _type(self) -> str: return "simple_json_output_parser"
# For backwards compatibility SimpleJsonOutputParser = JsonOutputParser parse_partial_json = parse_partial_json parse_and_check_json_markdown = parse_and_check_json_markdown