Source code for langchain_community.utilities.bibtex

"""调用bibtexparser的工具。"""
import logging
from typing import Any, Dict, List, Mapping

from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator

logger = logging.getLogger(__name__)

OPTIONAL_FIELDS = [
    "annotate",
    "booktitle",
    "editor",
    "howpublished",
    "journal",
    "keywords",
    "note",
    "organization",
    "publisher",
    "school",
    "series",
    "type",
    "doi",
    "issn",
    "isbn",
]


[docs]class BibtexparserWrapper(BaseModel): """bibtexparser的封装。 要使用,您应该安装``bibtexparser`` python包。 https://bibtexparser.readthedocs.io/en/master/ 这个封装将使用bibtexparser从一个bibtex文件中加载一组参考文献并获取文档摘要。""" class Config: """此pydantic对象的配置。""" extra = Extra.forbid @root_validator() def validate_environment(cls, values: Dict) -> Dict: """验证Python包是否存在于环境中。""" try: import bibtexparser # noqa except ImportError: raise ImportError( "Could not import bibtexparser python package. " "Please install it with `pip install bibtexparser`." ) return values
[docs] def load_bibtex_entries(self, path: str) -> List[Dict[str, Any]]: """从给定路径的bibtex文件中加载bibtex条目。""" import bibtexparser with open(path) as file: entries = bibtexparser.load(file).entries return entries
[docs] def get_metadata( self, entry: Mapping[str, Any], load_extra: bool = False ) -> Dict[str, Any]: """获取给定条目的元数据。""" publication = entry.get("journal") or entry.get("booktitle") if "url" in entry: url = entry["url"] elif "doi" in entry: url = f'https://doi.org/{entry["doi"]}' else: url = None meta = { "id": entry.get("ID"), "published_year": entry.get("year"), "title": entry.get("title"), "publication": publication, "authors": entry.get("author"), "abstract": entry.get("abstract"), "url": url, } if load_extra: for field in OPTIONAL_FIELDS: meta[field] = entry.get(field) return {k: v for k, v in meta.items() if v is not None}