Source code for langchain_community.graphs.rdf_graph

from __future__ import annotations

from typing import (
    TYPE_CHECKING,
    Dict,
    List,
    Optional,
)

if TYPE_CHECKING:
    import rdflib

prefixes = {
    "owl": """PREFIX owl: <http://www.w3.org/2002/07/owl#>\n""",
    "rdf": """PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n""",
    "rdfs": """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n""",
    "xsd": """PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n""",
}

cls_query_rdf = prefixes["rdfs"] + (
    """SELECT DISTINCT ?cls ?com\n"""
    """WHERE { \n"""
    """    ?instance a ?cls . \n"""
    """    OPTIONAL { ?cls rdfs:comment ?com } \n"""
    """}"""
)

cls_query_rdfs = prefixes["rdfs"] + (
    """SELECT DISTINCT ?cls ?com\n"""
    """WHERE { \n"""
    """    ?instance a/rdfs:subClassOf* ?cls . \n"""
    """    OPTIONAL { ?cls rdfs:comment ?com } \n"""
    """}"""
)

cls_query_owl = prefixes["rdfs"] + (
    """SELECT DISTINCT ?cls ?com\n"""
    """WHERE { \n"""
    """    ?instance a/rdfs:subClassOf* ?cls . \n"""
    """    FILTER (isIRI(?cls)) . \n"""
    """    OPTIONAL { ?cls rdfs:comment ?com } \n"""
    """}"""
)

rel_query_rdf = prefixes["rdfs"] + (
    """SELECT DISTINCT ?rel ?com\n"""
    """WHERE { \n"""
    """    ?subj ?rel ?obj . \n"""
    """    OPTIONAL { ?rel rdfs:comment ?com } \n"""
    """}"""
)

rel_query_rdfs = (
    prefixes["rdf"]
    + prefixes["rdfs"]
    + (
        """SELECT DISTINCT ?rel ?com\n"""
        """WHERE { \n"""
        """    ?rel a/rdfs:subPropertyOf* rdf:Property . \n"""
        """    OPTIONAL { ?rel rdfs:comment ?com } \n"""
        """}"""
    )
)

op_query_owl = (
    prefixes["rdfs"]
    + prefixes["owl"]
    + (
        """SELECT DISTINCT ?op ?com\n"""
        """WHERE { \n"""
        """    ?op a/rdfs:subPropertyOf* owl:ObjectProperty . \n"""
        """    OPTIONAL { ?op rdfs:comment ?com } \n"""
        """}"""
    )
)

dp_query_owl = (
    prefixes["rdfs"]
    + prefixes["owl"]
    + (
        """SELECT DISTINCT ?dp ?com\n"""
        """WHERE { \n"""
        """    ?dp a/rdfs:subPropertyOf* owl:DatatypeProperty . \n"""
        """    OPTIONAL { ?dp rdfs:comment ?com } \n"""
        """}"""
    )
)


[docs]class RdfGraph:
    """RDFlib图操作的包装器。

    模式：
    * local: 本地文件 - 可以查询和更改
    * online: 在线文件 - 只能查询，更改可以在本地存储
    * store: 三元组存储 - 如果update_endpoint可用，则可以查询和更改
    除了指定源文件外，还应指定序列化方式。

    *安全提示*：确保数据库连接使用的凭据仅限于包括必要权限。
        如果未这样做，可能会导致数据损坏或丢失，因为调用代码可能尝试执行命令，这些命令会导致删除、变异数据（如果适当提示）或读取敏感数据（如果数据库中存在这样的数据）。
        防范这些负面结果的最佳方法是（根据情况）限制授予此工具使用的凭据的权限。

        有关更多信息，请参见 https://python.langchain.com/docs/security。"""

[docs]    def __init__(
        self,
        source_file: Optional[str] = None,
        serialization: Optional[str] = "ttl",
        query_endpoint: Optional[str] = None,
        update_endpoint: Optional[str] = None,
        standard: Optional[str] = "rdf",
        local_copy: Optional[str] = None,
        graph_kwargs: Optional[Dict] = None,
        store_kwargs: Optional[Dict] = None,
    ) -> None:
        """设置RDFlib图

:param source_file: 本地文件路径或URL
:param serialization: 输入的序列化格式
:param query_endpoint: 用于查询的SPARQL端点，具有读取权限
:param update_endpoint: 用于UPDATE查询的SPARQL端点，具有写入权限
:param standard: RDF、RDFS或OWL
:param local_copy: 用于存储更改的新本地副本
:param graph_kwargs: 用于初始化rdflib.Graph的其他特定kwargs，如果提供了query_endpoint。
:param store_kwargs: 用于初始化sparqlstore.SPARQLStore的其他特定kwargs，如果提供了query_endpoint。
"""
        self.source_file = source_file
        self.serialization = serialization
        self.query_endpoint = query_endpoint
        self.update_endpoint = update_endpoint
        self.standard = standard
        self.local_copy = local_copy

        try:
            import rdflib
            from rdflib.plugins.stores import sparqlstore
        except ImportError:
            raise ImportError(
                "Could not import rdflib python package. "
                "Please install it with `pip install rdflib`."
            )
        if self.standard not in (supported_standards := ("rdf", "rdfs", "owl")):
            raise ValueError(
                f"Invalid standard. Supported standards are: {supported_standards}."
            )

        if (
            not source_file
            and not query_endpoint
            or source_file
            and (query_endpoint or update_endpoint)
        ):
            raise ValueError(
                "Could not unambiguously initialize the graph wrapper. "
                "Specify either a file (local or online) via the source_file "
                "or a triple store via the endpoints."
            )

        if source_file:
            if source_file.startswith("http"):
                self.mode = "online"
            else:
                self.mode = "local"
                if self.local_copy is None:
                    self.local_copy = self.source_file
            self.graph = rdflib.Graph()
            self.graph.parse(source_file, format=self.serialization)

        if query_endpoint:
            store_kwargs = store_kwargs or {}
            self.mode = "store"
            if not update_endpoint:
                self._store = sparqlstore.SPARQLStore(**store_kwargs)
                self._store.open(query_endpoint)
            else:
                self._store = sparqlstore.SPARQLUpdateStore(**store_kwargs)
                self._store.open((query_endpoint, update_endpoint))
            graph_kwargs = graph_kwargs or {}
            self.graph = rdflib.Graph(self._store, **graph_kwargs)

        # Verify that the graph was loaded
        if not len(self.graph):
            raise AssertionError("The graph is empty.")

        # Set schema
        self.schema = ""
        self.load_schema()

    @property
    def get_schema(self) -> str:
        """
        返回图数据库的模式。
        """
        return self.schema

[docs]    def query(
        self,
        query: str,
    ) -> List[rdflib.query.ResultRow]:
        """
        查询图。
        """
        from rdflib.exceptions import ParserError
        from rdflib.query import ResultRow

        try:
            res = self.graph.query(query)
        except ParserError as e:
            raise ValueError("Generated SPARQL statement is invalid\n" f"{e}")
        return [r for r in res if isinstance(r, ResultRow)]

[docs]    def update(
        self,
        query: str,
    ) -> None:
        """
        更新图表。
        """
        from rdflib.exceptions import ParserError

        try:
            self.graph.update(query)
        except ParserError as e:
            raise ValueError("Generated SPARQL statement is invalid\n" f"{e}")
        if self.local_copy:
            self.graph.serialize(
                destination=self.local_copy, format=self.local_copy.split(".")[-1]
            )
        else:
            raise ValueError("No target file specified for saving the updated file.")

    @staticmethod
    def _get_local_name(iri: str) -> str:
        if "#" in iri:
            local_name = iri.split("#")[-1]
        elif "/" in iri:
            local_name = iri.split("/")[-1]
        else:
            raise ValueError(f"Unexpected IRI '{iri}', contains neither '#' nor '/'.")
        return local_name

    def _res_to_str(self, res: rdflib.query.ResultRow, var: str) -> str:
        return (
            "<"
            + str(res[var])
            + "> ("
            + self._get_local_name(res[var])
            + ", "
            + str(res["com"])
            + ")"
        )

[docs]    def load_schema(self) -> None:
        """
        加载图模式信息。
        """

        def _rdf_s_schema(
            classes: List[rdflib.query.ResultRow],
            relationships: List[rdflib.query.ResultRow],
        ) -> str:
            return (
                f"In the following, each IRI is followed by the local name and "
                f"optionally its description in parentheses. \n"
                f"The RDF graph supports the following node types:\n"
                f'{", ".join([self._res_to_str(r, "cls") for r in classes])}\n'
                f"The RDF graph supports the following relationships:\n"
                f'{", ".join([self._res_to_str(r, "rel") for r in relationships])}\n'
            )

        if self.standard == "rdf":
            clss = self.query(cls_query_rdf)
            rels = self.query(rel_query_rdf)
            self.schema = _rdf_s_schema(clss, rels)
        elif self.standard == "rdfs":
            clss = self.query(cls_query_rdfs)
            rels = self.query(rel_query_rdfs)
            self.schema = _rdf_s_schema(clss, rels)
        elif self.standard == "owl":
            clss = self.query(cls_query_owl)
            ops = self.query(op_query_owl)
            dps = self.query(dp_query_owl)
            self.schema = (
                f"In the following, each IRI is followed by the local name and "
                f"optionally its description in parentheses. \n"
                f"The OWL graph supports the following node types:\n"
                f'{", ".join([self._res_to_str(r, "cls") for r in clss])}\n'
                f"The OWL graph supports the following object properties, "
                f"i.e., relationships between objects:\n"
                f'{", ".join([self._res_to_str(r, "op") for r in ops])}\n'
                f"The OWL graph supports the following data properties, "
                f"i.e., relationships between objects and literals:\n"
                f'{", ".join([self._res_to_str(r, "dp") for r in dps])}\n'
            )
        else:
            raise ValueError(f"Mode '{self.standard}' is currently not supported.")