Source code for langchain_community.graphs.rdf_graph

from __future__ import annotations

from typing import (
    TYPE_CHECKING,
    Dict,
    List,
    Optional,
)

if TYPE_CHECKING:
    import rdflib

prefixes = {
    "owl": """PREFIX owl: <http://www.w3.org/2002/07/owl#>\n""",
    "rdf": """PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n""",
    "rdfs": """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n""",
    "xsd": """PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n""",
}

cls_query_rdf = prefixes["rdfs"] + (
    """SELECT DISTINCT ?cls ?com\n"""
    """WHERE { \n"""
    """    ?instance a ?cls . \n"""
    """    OPTIONAL { ?cls rdfs:comment ?com } \n"""
    """}"""
)

cls_query_rdfs = prefixes["rdfs"] + (
    """SELECT DISTINCT ?cls ?com\n"""
    """WHERE { \n"""
    """    ?instance a/rdfs:subClassOf* ?cls . \n"""
    """    OPTIONAL { ?cls rdfs:comment ?com } \n"""
    """}"""
)

cls_query_owl = prefixes["rdfs"] + (
    """SELECT DISTINCT ?cls ?com\n"""
    """WHERE { \n"""
    """    ?instance a/rdfs:subClassOf* ?cls . \n"""
    """    FILTER (isIRI(?cls)) . \n"""
    """    OPTIONAL { ?cls rdfs:comment ?com } \n"""
    """}"""
)

rel_query_rdf = prefixes["rdfs"] + (
    """SELECT DISTINCT ?rel ?com\n"""
    """WHERE { \n"""
    """    ?subj ?rel ?obj . \n"""
    """    OPTIONAL { ?rel rdfs:comment ?com } \n"""
    """}"""
)

rel_query_rdfs = (
    prefixes["rdf"]
    + prefixes["rdfs"]
    + (
        """SELECT DISTINCT ?rel ?com\n"""
        """WHERE { \n"""
        """    ?rel a/rdfs:subPropertyOf* rdf:Property . \n"""
        """    OPTIONAL { ?rel rdfs:comment ?com } \n"""
        """}"""
    )
)

op_query_owl = (
    prefixes["rdfs"]
    + prefixes["owl"]
    + (
        """SELECT DISTINCT ?op ?com\n"""
        """WHERE { \n"""
        """    ?op a/rdfs:subPropertyOf* owl:ObjectProperty . \n"""
        """    OPTIONAL { ?op rdfs:comment ?com } \n"""
        """}"""
    )
)

dp_query_owl = (
    prefixes["rdfs"]
    + prefixes["owl"]
    + (
        """SELECT DISTINCT ?dp ?com\n"""
        """WHERE { \n"""
        """    ?dp a/rdfs:subPropertyOf* owl:DatatypeProperty . \n"""
        """    OPTIONAL { ?dp rdfs:comment ?com } \n"""
        """}"""
    )
)


[docs]class RdfGraph: """RDFlib图操作的包装器。 模式: * local: 本地文件 - 可以查询和更改 * online: 在线文件 - 只能查询,更改可以在本地存储 * store: 三元组存储 - 如果update_endpoint可用,则可以查询和更改 除了指定源文件外,还应指定序列化方式。 *安全提示*:确保数据库连接使用的凭据仅限于包括必要权限。 如果未这样做,可能会导致数据损坏或丢失,因为调用代码可能尝试执行命令,这些命令会导致删除、变异数据(如果适当提示)或读取敏感数据(如果数据库中存在这样的数据)。 防范这些负面结果的最佳方法是(根据情况)限制授予此工具使用的凭据的权限。 有关更多信息,请参见 https://python.langchain.com/docs/security。"""
[docs] def __init__( self, source_file: Optional[str] = None, serialization: Optional[str] = "ttl", query_endpoint: Optional[str] = None, update_endpoint: Optional[str] = None, standard: Optional[str] = "rdf", local_copy: Optional[str] = None, graph_kwargs: Optional[Dict] = None, store_kwargs: Optional[Dict] = None, ) -> None: """设置RDFlib图 :param source_file: 本地文件路径或URL :param serialization: 输入的序列化格式 :param query_endpoint: 用于查询的SPARQL端点,具有读取权限 :param update_endpoint: 用于UPDATE查询的SPARQL端点,具有写入权限 :param standard: RDF、RDFS或OWL :param local_copy: 用于存储更改的新本地副本 :param graph_kwargs: 用于初始化rdflib.Graph的其他特定kwargs,如果提供了query_endpoint。 :param store_kwargs: 用于初始化sparqlstore.SPARQLStore的其他特定kwargs,如果提供了query_endpoint。 """ self.source_file = source_file self.serialization = serialization self.query_endpoint = query_endpoint self.update_endpoint = update_endpoint self.standard = standard self.local_copy = local_copy try: import rdflib from rdflib.plugins.stores import sparqlstore except ImportError: raise ImportError( "Could not import rdflib python package. " "Please install it with `pip install rdflib`." ) if self.standard not in (supported_standards := ("rdf", "rdfs", "owl")): raise ValueError( f"Invalid standard. Supported standards are: {supported_standards}." ) if ( not source_file and not query_endpoint or source_file and (query_endpoint or update_endpoint) ): raise ValueError( "Could not unambiguously initialize the graph wrapper. " "Specify either a file (local or online) via the source_file " "or a triple store via the endpoints." ) if source_file: if source_file.startswith("http"): self.mode = "online" else: self.mode = "local" if self.local_copy is None: self.local_copy = self.source_file self.graph = rdflib.Graph() self.graph.parse(source_file, format=self.serialization) if query_endpoint: store_kwargs = store_kwargs or {} self.mode = "store" if not update_endpoint: self._store = sparqlstore.SPARQLStore(**store_kwargs) self._store.open(query_endpoint) else: self._store = sparqlstore.SPARQLUpdateStore(**store_kwargs) self._store.open((query_endpoint, update_endpoint)) graph_kwargs = graph_kwargs or {} self.graph = rdflib.Graph(self._store, **graph_kwargs) # Verify that the graph was loaded if not len(self.graph): raise AssertionError("The graph is empty.") # Set schema self.schema = "" self.load_schema()
@property def get_schema(self) -> str: """ 返回图数据库的模式。 """ return self.schema
[docs] def query( self, query: str, ) -> List[rdflib.query.ResultRow]: """ 查询图。 """ from rdflib.exceptions import ParserError from rdflib.query import ResultRow try: res = self.graph.query(query) except ParserError as e: raise ValueError("Generated SPARQL statement is invalid\n" f"{e}") return [r for r in res if isinstance(r, ResultRow)]
[docs] def update( self, query: str, ) -> None: """ 更新图表。 """ from rdflib.exceptions import ParserError try: self.graph.update(query) except ParserError as e: raise ValueError("Generated SPARQL statement is invalid\n" f"{e}") if self.local_copy: self.graph.serialize( destination=self.local_copy, format=self.local_copy.split(".")[-1] ) else: raise ValueError("No target file specified for saving the updated file.")
@staticmethod def _get_local_name(iri: str) -> str: if "#" in iri: local_name = iri.split("#")[-1] elif "/" in iri: local_name = iri.split("/")[-1] else: raise ValueError(f"Unexpected IRI '{iri}', contains neither '#' nor '/'.") return local_name def _res_to_str(self, res: rdflib.query.ResultRow, var: str) -> str: return ( "<" + str(res[var]) + "> (" + self._get_local_name(res[var]) + ", " + str(res["com"]) + ")" )
[docs] def load_schema(self) -> None: """ 加载图模式信息。 """ def _rdf_s_schema( classes: List[rdflib.query.ResultRow], relationships: List[rdflib.query.ResultRow], ) -> str: return ( f"In the following, each IRI is followed by the local name and " f"optionally its description in parentheses. \n" f"The RDF graph supports the following node types:\n" f'{", ".join([self._res_to_str(r, "cls") for r in classes])}\n' f"The RDF graph supports the following relationships:\n" f'{", ".join([self._res_to_str(r, "rel") for r in relationships])}\n' ) if self.standard == "rdf": clss = self.query(cls_query_rdf) rels = self.query(rel_query_rdf) self.schema = _rdf_s_schema(clss, rels) elif self.standard == "rdfs": clss = self.query(cls_query_rdfs) rels = self.query(rel_query_rdfs) self.schema = _rdf_s_schema(clss, rels) elif self.standard == "owl": clss = self.query(cls_query_owl) ops = self.query(op_query_owl) dps = self.query(dp_query_owl) self.schema = ( f"In the following, each IRI is followed by the local name and " f"optionally its description in parentheses. \n" f"The OWL graph supports the following node types:\n" f'{", ".join([self._res_to_str(r, "cls") for r in clss])}\n' f"The OWL graph supports the following object properties, " f"i.e., relationships between objects:\n" f'{", ".join([self._res_to_str(r, "op") for r in ops])}\n' f"The OWL graph supports the following data properties, " f"i.e., relationships between objects and literals:\n" f'{", ".join([self._res_to_str(r, "dp") for r in dps])}\n' ) else: raise ValueError(f"Mode '{self.standard}' is currently not supported.")