Source code for langchain_community.graphs.ontotext_graphdb_graph
from __future__ import annotations
import os
from typing import (
TYPE_CHECKING,
List,
Optional,
Union,
)
if TYPE_CHECKING:
import rdflib
[docs]class OntotextGraphDBGraph:
"""Ontotext GraphDB https://graphdb.ontotext.com/ 是用于图操作的包装器。
*安全提示*: 确保数据库连接使用的凭据范围狭窄,仅包括必要的权限。
如果未这样做,可能会导致数据损坏或丢失,因为调用代码可能会尝试执行会导致删除、变异数据(如果适当提示)或读取敏感数据(如果数据库中存在此类数据)的命令。
防范这种负面结果的最佳方法是(视情况)限制授予此工具使用的凭据的权限。
有关更多信息,请参见 https://python.langchain.com/docs/security。"""
[docs] def __init__(
self,
query_endpoint: str,
query_ontology: Optional[str] = None,
local_file: Optional[str] = None,
local_file_format: Optional[str] = None,
) -> None:
"""设置GraphDB包装器
:param query_endpoint: SPARQL查询的端点,用于读取访问
如果GraphDB受保护,
请设置环境变量'GRAPHDB_USERNAME'和'GRAPHDB_PASSWORD'。
:param query_ontology: 在SPARQL端点上执行的`CONSTRUCT`查询,返回知识图谱模式语句
示例:
'CONSTRUCT {?s ?p ?o} FROM <https://example.com/ontology/> WHERE {?s ?p ?o}'
目前,不支持类似于DESCRIBE的查询
'PREFIX onto: <https://example.com/ontology/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
DESCRIBE ?term WHERE {
?term rdfs:isDefinedBy onto:
}'
因为DESCRIBE返回对称简洁有界描述(SCBD),
即还包括传入的类链接。
对于具有数百万实例的大型图形,这是低效的。
请查看 https://github.com/eclipse-rdf4j/rdf4j/issues/4857
:param local_file: 本地RDF本体文件。
支持的RDF格式:
Turtle、RDF/XML、JSON-LD、N-Triples、Notation-3、Trig、Trix、N-Quads。
如果无法从文件扩展名确定rdf格式,
请在`local_file_format`参数中明确传递rdf格式。
:param local_file_format: 如果无法从本地文件扩展名确定rdf格式,
则使用此参数。
其中之一为"json-ld"、"xml"、"n3"、"turtle"、"nt"、"trig"、"nquads"、"trix"
应传递`query_ontology`或`local_file`中的一个。
"""
if query_ontology and local_file:
raise ValueError("Both file and query provided. Only one is allowed.")
if not query_ontology and not local_file:
raise ValueError("Neither file nor query provided. One is required.")
try:
import rdflib
from rdflib.plugins.stores import sparqlstore
except ImportError:
raise ImportError(
"Could not import rdflib python package. "
"Please install it with `pip install rdflib`."
)
auth = self._get_auth()
store = sparqlstore.SPARQLStore(auth=auth)
store.open(query_endpoint)
self.graph = rdflib.Graph(store, identifier=None, bind_namespaces="none")
self._check_connectivity()
if local_file:
ontology_schema_graph = self._load_ontology_schema_from_file(
local_file,
local_file_format, # type: ignore[arg-type]
)
else:
self._validate_user_query(query_ontology) # type: ignore[arg-type]
ontology_schema_graph = self._load_ontology_schema_with_query(
query_ontology # type: ignore[arg-type]
)
self.schema = ontology_schema_graph.serialize(format="turtle")
@staticmethod
def _get_auth() -> Union[tuple, None]:
"""
返回基本身份验证配置
"""
username = os.environ.get("GRAPHDB_USERNAME", None)
password = os.environ.get("GRAPHDB_PASSWORD", None)
if username:
if not password:
raise ValueError(
"Environment variable 'GRAPHDB_USERNAME' is set, "
"but 'GRAPHDB_PASSWORD' is not set."
)
else:
return username, password
return None
def _check_connectivity(self) -> None:
"""
执行一个简单的`ASK`查询来检查连接性
"""
try:
self.graph.query("ASK { ?s ?p ?o }")
except ValueError:
raise ValueError(
"Could not query the provided endpoint. "
"Please, check, if the value of the provided "
"query_endpoint points to the right repository. "
"If GraphDB is secured, please, "
"make sure that the environment variables "
"'GRAPHDB_USERNAME' and 'GRAPHDB_PASSWORD' are set."
)
@staticmethod
def _load_ontology_schema_from_file(local_file: str, local_file_format: str = None): # type: ignore[no-untyped-def, assignment]
"""
从提供的文件中解析本体模式语句
"""
import rdflib
if not os.path.exists(local_file):
raise FileNotFoundError(f"File {local_file} does not exist.")
if not os.access(local_file, os.R_OK):
raise PermissionError(f"Read permission for {local_file} is restricted")
graph = rdflib.ConjunctiveGraph()
try:
graph.parse(local_file, format=local_file_format)
except Exception as e:
raise ValueError(f"Invalid file format for {local_file} : ", e)
return graph
@staticmethod
def _validate_user_query(query_ontology: str) -> None:
"""
验证查询是否是有效的SPARQL CONSTRUCT查询
"""
from pyparsing import ParseException
from rdflib.plugins.sparql import prepareQuery
if not isinstance(query_ontology, str):
raise TypeError("Ontology query must be provided as string.")
try:
parsed_query = prepareQuery(query_ontology)
except ParseException as e:
raise ValueError("Ontology query is not a valid SPARQL query.", e)
if parsed_query.algebra.name != "ConstructQuery":
raise ValueError(
"Invalid query type. Only CONSTRUCT queries are supported."
)
def _load_ontology_schema_with_query(self, query: str): # type: ignore[no-untyped-def]
"""
执行查询以收集本体架构语句。
"""
from rdflib.exceptions import ParserError
try:
results = self.graph.query(query)
except ParserError as e:
raise ValueError(f"Generated SPARQL statement is invalid\n{e}")
return results.graph
@property
def get_schema(self) -> str:
"""
以turtle格式返回图数据库的模式
"""
return self.schema
[docs] def query(
self,
query: str,
) -> List[rdflib.query.ResultRow]:
"""
查询图。
"""
from rdflib.query import ResultRow
res = self.graph.query(query)
return [r for r in res if isinstance(r, ResultRow)]