Source code for langchain_core.output_parsers.pydantic

import json
from typing import Generic, List, Type, TypeVar, Union

import pydantic  # pydantic: ignore

from langchain_core.exceptions import OutputParserException
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.outputs import Generation
from langchain_core.utils.pydantic import PYDANTIC_MAJOR_VERSION

if PYDANTIC_MAJOR_VERSION < 2:
    PydanticBaseModel = pydantic.BaseModel

else:
    from pydantic.v1 import BaseModel  # pydantic: ignore

    # Union type needs to be last assignment to PydanticBaseModel to make mypy happy.
    PydanticBaseModel = Union[BaseModel, pydantic.BaseModel]  # type: ignore

TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)


[docs]class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]): """Parse an output using a pydantic model.""" pydantic_object: Type[TBaseModel] # type: ignore """The pydantic model to parse.""" def _parse_obj(self, obj: dict) -> TBaseModel: if PYDANTIC_MAJOR_VERSION == 2: try: if issubclass(self.pydantic_object, pydantic.BaseModel): return self.pydantic_object.model_validate(obj) elif issubclass(self.pydantic_object, pydantic.v1.BaseModel): return self.pydantic_object.parse_obj(obj) else: raise OutputParserException( f"Unsupported model version for PydanticOutputParser: \ {self.pydantic_object.__class__}" ) except (pydantic.ValidationError, pydantic.v1.ValidationError) as e: raise self._parser_exception(e, obj) else: # pydantic v1 try: return self.pydantic_object.parse_obj(obj) except pydantic.ValidationError as e: raise self._parser_exception(e, obj) def _parser_exception( self, e: Exception, json_object: dict ) -> OutputParserException: json_string = json.dumps(json_object) name = self.pydantic_object.__name__ msg = f"Failed to parse {name} from completion {json_string}. Got: {e}" return OutputParserException(msg, llm_output=json_string)
[docs] def parse_result( self, result: List[Generation], *, partial: bool = False ) -> TBaseModel: json_object = super().parse_result(result) return self._parse_obj(json_object)
[docs] def parse(self, text: str) -> TBaseModel: return super().parse(text)
[docs] def get_format_instructions(self) -> str: # Copy schema to avoid altering original Pydantic schema. schema = {k: v for k, v in self.pydantic_object.schema().items()} # Remove extraneous fields. reduced_schema = schema if "title" in reduced_schema: del reduced_schema["title"] if "type" in reduced_schema: del reduced_schema["type"] # Ensure json in context is well-formed with double quotes. schema_str = json.dumps(reduced_schema) return _PYDANTIC_FORMAT_INSTRUCTIONS.format(schema=schema_str)
@property def _type(self) -> str: return "pydantic" @property def OutputType(self) -> Type[TBaseModel]: """Return the pydantic model.""" return self.pydantic_object
_PYDANTIC_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below. As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}} the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted. Here is the output schema: ``` {schema} ```""" # noqa: E501