from __future__ import annotations
from typing import (
TYPE_CHECKING,
Dict,
List,
Optional,
)
if TYPE_CHECKING:
import rdflib
prefixes = {
"owl": """PREFIX owl: <http://www.w3.org/2002/07/owl#>\n""",
"rdf": """PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n""",
"rdfs": """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n""",
"xsd": """PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n""",
}
cls_query_rdf = prefixes["rdfs"] + (
"""SELECT DISTINCT ?cls ?com\n"""
"""WHERE { \n"""
""" ?instance a ?cls . \n"""
""" OPTIONAL { ?cls rdfs:comment ?com } \n"""
"""}"""
)
cls_query_rdfs = prefixes["rdfs"] + (
"""SELECT DISTINCT ?cls ?com\n"""
"""WHERE { \n"""
""" ?instance a/rdfs:subClassOf* ?cls . \n"""
""" OPTIONAL { ?cls rdfs:comment ?com } \n"""
"""}"""
)
cls_query_owl = prefixes["rdfs"] + (
"""SELECT DISTINCT ?cls ?com\n"""
"""WHERE { \n"""
""" ?instance a/rdfs:subClassOf* ?cls . \n"""
""" FILTER (isIRI(?cls)) . \n"""
""" OPTIONAL { ?cls rdfs:comment ?com } \n"""
"""}"""
)
rel_query_rdf = prefixes["rdfs"] + (
"""SELECT DISTINCT ?rel ?com\n"""
"""WHERE { \n"""
""" ?subj ?rel ?obj . \n"""
""" OPTIONAL { ?rel rdfs:comment ?com } \n"""
"""}"""
)
rel_query_rdfs = (
prefixes["rdf"]
+ prefixes["rdfs"]
+ (
"""SELECT DISTINCT ?rel ?com\n"""
"""WHERE { \n"""
""" ?rel a/rdfs:subPropertyOf* rdf:Property . \n"""
""" OPTIONAL { ?rel rdfs:comment ?com } \n"""
"""}"""
)
)
op_query_owl = (
prefixes["rdfs"]
+ prefixes["owl"]
+ (
"""SELECT DISTINCT ?op ?com\n"""
"""WHERE { \n"""
""" ?op a/rdfs:subPropertyOf* owl:ObjectProperty . \n"""
""" OPTIONAL { ?op rdfs:comment ?com } \n"""
"""}"""
)
)
dp_query_owl = (
prefixes["rdfs"]
+ prefixes["owl"]
+ (
"""SELECT DISTINCT ?dp ?com\n"""
"""WHERE { \n"""
""" ?dp a/rdfs:subPropertyOf* owl:DatatypeProperty . \n"""
""" OPTIONAL { ?dp rdfs:comment ?com } \n"""
"""}"""
)
)
[docs]class RdfGraph:
"""RDFlib图操作的包装器。
模式:
* local: 本地文件 - 可以查询和更改
* online: 在线文件 - 只能查询,更改可以在本地存储
* store: 三元组存储 - 如果update_endpoint可用,则可以查询和更改
除了指定源文件外,还应指定序列化方式。
*安全提示*:确保数据库连接使用的凭据仅限于包括必要权限。
如果未这样做,可能会导致数据损坏或丢失,因为调用代码可能尝试执行命令,这些命令会导致删除、变异数据(如果适当提示)或读取敏感数据(如果数据库中存在这样的数据)。
防范这些负面结果的最佳方法是(根据情况)限制授予此工具使用的凭据的权限。
有关更多信息,请参见 https://python.langchain.com/docs/security。"""
[docs] def __init__(
self,
source_file: Optional[str] = None,
serialization: Optional[str] = "ttl",
query_endpoint: Optional[str] = None,
update_endpoint: Optional[str] = None,
standard: Optional[str] = "rdf",
local_copy: Optional[str] = None,
graph_kwargs: Optional[Dict] = None,
store_kwargs: Optional[Dict] = None,
) -> None:
"""设置RDFlib图
:param source_file: 本地文件路径或URL
:param serialization: 输入的序列化格式
:param query_endpoint: 用于查询的SPARQL端点,具有读取权限
:param update_endpoint: 用于UPDATE查询的SPARQL端点,具有写入权限
:param standard: RDF、RDFS或OWL
:param local_copy: 用于存储更改的新本地副本
:param graph_kwargs: 用于初始化rdflib.Graph的其他特定kwargs,如果提供了query_endpoint。
:param store_kwargs: 用于初始化sparqlstore.SPARQLStore的其他特定kwargs,如果提供了query_endpoint。
"""
self.source_file = source_file
self.serialization = serialization
self.query_endpoint = query_endpoint
self.update_endpoint = update_endpoint
self.standard = standard
self.local_copy = local_copy
try:
import rdflib
from rdflib.plugins.stores import sparqlstore
except ImportError:
raise ImportError(
"Could not import rdflib python package. "
"Please install it with `pip install rdflib`."
)
if self.standard not in (supported_standards := ("rdf", "rdfs", "owl")):
raise ValueError(
f"Invalid standard. Supported standards are: {supported_standards}."
)
if (
not source_file
and not query_endpoint
or source_file
and (query_endpoint or update_endpoint)
):
raise ValueError(
"Could not unambiguously initialize the graph wrapper. "
"Specify either a file (local or online) via the source_file "
"or a triple store via the endpoints."
)
if source_file:
if source_file.startswith("http"):
self.mode = "online"
else:
self.mode = "local"
if self.local_copy is None:
self.local_copy = self.source_file
self.graph = rdflib.Graph()
self.graph.parse(source_file, format=self.serialization)
if query_endpoint:
store_kwargs = store_kwargs or {}
self.mode = "store"
if not update_endpoint:
self._store = sparqlstore.SPARQLStore(**store_kwargs)
self._store.open(query_endpoint)
else:
self._store = sparqlstore.SPARQLUpdateStore(**store_kwargs)
self._store.open((query_endpoint, update_endpoint))
graph_kwargs = graph_kwargs or {}
self.graph = rdflib.Graph(self._store, **graph_kwargs)
# Verify that the graph was loaded
if not len(self.graph):
raise AssertionError("The graph is empty.")
# Set schema
self.schema = ""
self.load_schema()
@property
def get_schema(self) -> str:
"""
返回图数据库的模式。
"""
return self.schema
[docs] def query(
self,
query: str,
) -> List[rdflib.query.ResultRow]:
"""
查询图。
"""
from rdflib.exceptions import ParserError
from rdflib.query import ResultRow
try:
res = self.graph.query(query)
except ParserError as e:
raise ValueError("Generated SPARQL statement is invalid\n" f"{e}")
return [r for r in res if isinstance(r, ResultRow)]
[docs] def update(
self,
query: str,
) -> None:
"""
更新图表。
"""
from rdflib.exceptions import ParserError
try:
self.graph.update(query)
except ParserError as e:
raise ValueError("Generated SPARQL statement is invalid\n" f"{e}")
if self.local_copy:
self.graph.serialize(
destination=self.local_copy, format=self.local_copy.split(".")[-1]
)
else:
raise ValueError("No target file specified for saving the updated file.")
@staticmethod
def _get_local_name(iri: str) -> str:
if "#" in iri:
local_name = iri.split("#")[-1]
elif "/" in iri:
local_name = iri.split("/")[-1]
else:
raise ValueError(f"Unexpected IRI '{iri}', contains neither '#' nor '/'.")
return local_name
def _res_to_str(self, res: rdflib.query.ResultRow, var: str) -> str:
return (
"<"
+ str(res[var])
+ "> ("
+ self._get_local_name(res[var])
+ ", "
+ str(res["com"])
+ ")"
)
[docs] def load_schema(self) -> None:
"""
加载图模式信息。
"""
def _rdf_s_schema(
classes: List[rdflib.query.ResultRow],
relationships: List[rdflib.query.ResultRow],
) -> str:
return (
f"In the following, each IRI is followed by the local name and "
f"optionally its description in parentheses. \n"
f"The RDF graph supports the following node types:\n"
f'{", ".join([self._res_to_str(r, "cls") for r in classes])}\n'
f"The RDF graph supports the following relationships:\n"
f'{", ".join([self._res_to_str(r, "rel") for r in relationships])}\n'
)
if self.standard == "rdf":
clss = self.query(cls_query_rdf)
rels = self.query(rel_query_rdf)
self.schema = _rdf_s_schema(clss, rels)
elif self.standard == "rdfs":
clss = self.query(cls_query_rdfs)
rels = self.query(rel_query_rdfs)
self.schema = _rdf_s_schema(clss, rels)
elif self.standard == "owl":
clss = self.query(cls_query_owl)
ops = self.query(op_query_owl)
dps = self.query(dp_query_owl)
self.schema = (
f"In the following, each IRI is followed by the local name and "
f"optionally its description in parentheses. \n"
f"The OWL graph supports the following node types:\n"
f'{", ".join([self._res_to_str(r, "cls") for r in clss])}\n'
f"The OWL graph supports the following object properties, "
f"i.e., relationships between objects:\n"
f'{", ".join([self._res_to_str(r, "op") for r in ops])}\n'
f"The OWL graph supports the following data properties, "
f"i.e., relationships between objects and literals:\n"
f'{", ".join([self._res_to_str(r, "dp") for r in dps])}\n'
)
else:
raise ValueError(f"Mode '{self.standard}' is currently not supported.")