Source code for langchain_community.graphs.ontotext_graphdb_graph

from __future__ import annotations

import os
from typing import (
    TYPE_CHECKING,
    List,
    Optional,
    Union,
)

if TYPE_CHECKING:
    import rdflib


[docs]class OntotextGraphDBGraph: """Ontotext GraphDB https://graphdb.ontotext.com/ 是用于图操作的包装器。 *安全提示*: 确保数据库连接使用的凭据范围狭窄,仅包括必要的权限。 如果未这样做,可能会导致数据损坏或丢失,因为调用代码可能会尝试执行会导致删除、变异数据(如果适当提示)或读取敏感数据(如果数据库中存在此类数据)的命令。 防范这种负面结果的最佳方法是(视情况)限制授予此工具使用的凭据的权限。 有关更多信息,请参见 https://python.langchain.com/docs/security。"""
[docs] def __init__( self, query_endpoint: str, query_ontology: Optional[str] = None, local_file: Optional[str] = None, local_file_format: Optional[str] = None, ) -> None: """设置GraphDB包装器 :param query_endpoint: SPARQL查询的端点,用于读取访问 如果GraphDB受保护, 请设置环境变量'GRAPHDB_USERNAME'和'GRAPHDB_PASSWORD'。 :param query_ontology: 在SPARQL端点上执行的`CONSTRUCT`查询,返回知识图谱模式语句 示例: 'CONSTRUCT {?s ?p ?o} FROM <https://example.com/ontology/> WHERE {?s ?p ?o}' 目前,不支持类似于DESCRIBE的查询 'PREFIX onto: <https://example.com/ontology/> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> DESCRIBE ?term WHERE { ?term rdfs:isDefinedBy onto: }' 因为DESCRIBE返回对称简洁有界描述(SCBD), 即还包括传入的类链接。 对于具有数百万实例的大型图形,这是低效的。 请查看 https://github.com/eclipse-rdf4j/rdf4j/issues/4857 :param local_file: 本地RDF本体文件。 支持的RDF格式: Turtle、RDF/XML、JSON-LD、N-Triples、Notation-3、Trig、Trix、N-Quads。 如果无法从文件扩展名确定rdf格式, 请在`local_file_format`参数中明确传递rdf格式。 :param local_file_format: 如果无法从本地文件扩展名确定rdf格式, 则使用此参数。 其中之一为"json-ld"、"xml"、"n3"、"turtle"、"nt"、"trig"、"nquads"、"trix" 应传递`query_ontology`或`local_file`中的一个。 """ if query_ontology and local_file: raise ValueError("Both file and query provided. Only one is allowed.") if not query_ontology and not local_file: raise ValueError("Neither file nor query provided. One is required.") try: import rdflib from rdflib.plugins.stores import sparqlstore except ImportError: raise ImportError( "Could not import rdflib python package. " "Please install it with `pip install rdflib`." ) auth = self._get_auth() store = sparqlstore.SPARQLStore(auth=auth) store.open(query_endpoint) self.graph = rdflib.Graph(store, identifier=None, bind_namespaces="none") self._check_connectivity() if local_file: ontology_schema_graph = self._load_ontology_schema_from_file( local_file, local_file_format, # type: ignore[arg-type] ) else: self._validate_user_query(query_ontology) # type: ignore[arg-type] ontology_schema_graph = self._load_ontology_schema_with_query( query_ontology # type: ignore[arg-type] ) self.schema = ontology_schema_graph.serialize(format="turtle")
@staticmethod def _get_auth() -> Union[tuple, None]: """ 返回基本身份验证配置 """ username = os.environ.get("GRAPHDB_USERNAME", None) password = os.environ.get("GRAPHDB_PASSWORD", None) if username: if not password: raise ValueError( "Environment variable 'GRAPHDB_USERNAME' is set, " "but 'GRAPHDB_PASSWORD' is not set." ) else: return username, password return None def _check_connectivity(self) -> None: """ 执行一个简单的`ASK`查询来检查连接性 """ try: self.graph.query("ASK { ?s ?p ?o }") except ValueError: raise ValueError( "Could not query the provided endpoint. " "Please, check, if the value of the provided " "query_endpoint points to the right repository. " "If GraphDB is secured, please, " "make sure that the environment variables " "'GRAPHDB_USERNAME' and 'GRAPHDB_PASSWORD' are set." ) @staticmethod def _load_ontology_schema_from_file(local_file: str, local_file_format: str = None): # type: ignore[no-untyped-def, assignment] """ 从提供的文件中解析本体模式语句 """ import rdflib if not os.path.exists(local_file): raise FileNotFoundError(f"File {local_file} does not exist.") if not os.access(local_file, os.R_OK): raise PermissionError(f"Read permission for {local_file} is restricted") graph = rdflib.ConjunctiveGraph() try: graph.parse(local_file, format=local_file_format) except Exception as e: raise ValueError(f"Invalid file format for {local_file} : ", e) return graph @staticmethod def _validate_user_query(query_ontology: str) -> None: """ 验证查询是否是有效的SPARQL CONSTRUCT查询 """ from pyparsing import ParseException from rdflib.plugins.sparql import prepareQuery if not isinstance(query_ontology, str): raise TypeError("Ontology query must be provided as string.") try: parsed_query = prepareQuery(query_ontology) except ParseException as e: raise ValueError("Ontology query is not a valid SPARQL query.", e) if parsed_query.algebra.name != "ConstructQuery": raise ValueError( "Invalid query type. Only CONSTRUCT queries are supported." ) def _load_ontology_schema_with_query(self, query: str): # type: ignore[no-untyped-def] """ 执行查询以收集本体架构语句。 """ from rdflib.exceptions import ParserError try: results = self.graph.query(query) except ParserError as e: raise ValueError(f"Generated SPARQL statement is invalid\n{e}") return results.graph @property def get_schema(self) -> str: """ 以turtle格式返回图数据库的模式 """ return self.schema
[docs] def query( self, query: str, ) -> List[rdflib.query.ResultRow]: """ 查询图。 """ from rdflib.query import ResultRow res = self.graph.query(query) return [r for r in res if isinstance(r, ResultRow)]