Source code for langchain.output_parsers.regex_dict
from __future__ import annotations
import re
from typing import Dict, Optional
from langchain_core.output_parsers import BaseOutputParser
[docs]class RegexDictParser(BaseOutputParser):
"""使用正则表达式将LLM调用的输出解析为字典。"""
regex_pattern: str = r"{}:\s?([^.'\n']*)\.?" # : :meta private:
"""用于解析输出的正则表达式模式。"""
output_key_to_format: Dict[str, str]
"""用于输出的键。"""
no_update_value: Optional[str] = None
"""用于输出的默认键。"""
@property
def _type(self) -> str:
"""返回类型键。"""
return "regex_dict_parser"
[docs] def parse(self, text: str) -> Dict[str, str]:
"""解析LLM调用的输出。"""
result = {}
for output_key, expected_format in self.output_key_to_format.items():
specific_regex = self.regex_pattern.format(re.escape(expected_format))
matches = re.findall(specific_regex, text)
if not matches:
raise ValueError(
f"No match found for output key: {output_key} with expected format \
{expected_format} on text {text}"
)
elif len(matches) > 1:
raise ValueError(
f"Multiple matches found for output key: {output_key} with \
expected format {expected_format} on text {text}"
)
elif (
self.no_update_value is not None and matches[0] == self.no_update_value
):
continue
else:
result[output_key] = matches[0]
return result