langchain_community.chains.graph_qa.neptune_sparql ηš„ζΊδ»£η 

"""
Question answering over an RDF or OWL graph using SPARQL.
"""

from __future__ import annotations

from typing import Any, Dict, List, Optional

from langchain.chains.base import Chain
from langchain.chains.llm import LLMChain
from langchain_core.callbacks.manager import CallbackManagerForChainRun
from langchain_core.language_models import BaseLanguageModel
from langchain_core.prompts.base import BasePromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from pydantic import Field

from langchain_community.chains.graph_qa.prompts import SPARQL_QA_PROMPT
from langchain_community.graphs import NeptuneRdfGraph

INTERMEDIATE_STEPS_KEY = "intermediate_steps"

SPARQL_GENERATION_TEMPLATE = """
Task: Generate a SPARQL SELECT statement for querying a graph database.
For instance, to find all email addresses of John Doe, the following 
query in backticks would be suitable:
```
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?email
WHERE {{
    ?person foaf:name "John Doe" .
    ?person foaf:mbox ?email .
}}
```
Instructions:
Use only the node types and properties provided in the schema.
Do not use any node types and properties that are not explicitly provided.
Include all necessary prefixes.

Examples:

Schema:
{schema}
Note: Be as concise as possible.
Do not include any explanations or apologies in your responses.
Do not respond to any questions that ask for anything else than 
for you to construct a SPARQL query.
Do not include any text except the SPARQL query generated.

The question is:
{prompt}"""

SPARQL_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "prompt"], template=SPARQL_GENERATION_TEMPLATE
)


[docs] def extract_sparql(query: str) -> str: """Extract SPARQL code from a text. Args: query: Text to extract SPARQL code from. Returns: SPARQL code extracted from the text. """ query = query.strip() querytoks = query.split("```") if len(querytoks) == 3: query = querytoks[1] if query.startswith("sparql"): query = query[6:] elif query.startswith("<sparql>") and query.endswith("</sparql>"): query = query[8:-9] return query
[docs] class NeptuneSparqlQAChain(Chain): """Chain for question-answering against a Neptune graph by generating SPARQL statements. *Security note*: Make sure that the database connection uses credentials that are narrowly-scoped to only include necessary permissions. Failure to do so may result in data corruption or loss, since the calling code may attempt commands that would result in deletion, mutation of data if appropriately prompted or reading sensitive data if such data is present in the database. The best way to guard against such negative outcomes is to (as appropriate) limit the permissions granted to the credentials used with this tool. See https://python.langchain.com/docs/security for more information. Example: .. code-block:: python chain = NeptuneSparqlQAChain.from_llm( llm=llm, graph=graph ) response = chain.invoke(query) """ graph: NeptuneRdfGraph = Field(exclude=True) sparql_generation_chain: LLMChain qa_chain: LLMChain input_key: str = "query" #: :meta private: output_key: str = "result" #: :meta private: top_k: int = 10 return_intermediate_steps: bool = False """Whether or not to return the intermediate steps along with the final answer.""" return_direct: bool = False """Whether or not to return the result of querying the graph directly.""" extra_instructions: Optional[str] = None """Extra instructions by the appended to the query generation prompt.""" allow_dangerous_requests: bool = False """Forced user opt-in to acknowledge that the chain can make dangerous requests. *Security note*: Make sure that the database connection uses credentials that are narrowly-scoped to only include necessary permissions. Failure to do so may result in data corruption or loss, since the calling code may attempt commands that would result in deletion, mutation of data if appropriately prompted or reading sensitive data if such data is present in the database. The best way to guard against such negative outcomes is to (as appropriate) limit the permissions granted to the credentials used with this tool. See https://python.langchain.com/docs/security for more information. """ def __init__(self, **kwargs: Any) -> None: """Initialize the chain.""" super().__init__(**kwargs) if self.allow_dangerous_requests is not True: raise ValueError( "In order to use this chain, you must acknowledge that it can make " "dangerous requests by setting `allow_dangerous_requests` to `True`." "You must narrowly scope the permissions of the database connection " "to only include necessary permissions. Failure to do so may result " "in data corruption or loss or reading sensitive data if such data is " "present in the database." "Only use this chain if you understand the risks and have taken the " "necessary precautions. " "See https://python.langchain.com/docs/security for more information." ) @property def input_keys(self) -> List[str]: return [self.input_key] @property def output_keys(self) -> List[str]: _output_keys = [self.output_key] return _output_keys
[docs] @classmethod def from_llm( cls, llm: BaseLanguageModel, *, qa_prompt: BasePromptTemplate = SPARQL_QA_PROMPT, sparql_prompt: BasePromptTemplate = SPARQL_GENERATION_PROMPT, examples: Optional[str] = None, **kwargs: Any, ) -> NeptuneSparqlQAChain: """Initialize from LLM.""" qa_chain = LLMChain(llm=llm, prompt=qa_prompt) template_to_use = SPARQL_GENERATION_TEMPLATE if examples: template_to_use = template_to_use.replace( "Examples:", "Examples: " + examples ) sparql_prompt = PromptTemplate( input_variables=["schema", "prompt"], template=template_to_use ) sparql_generation_chain = LLMChain(llm=llm, prompt=sparql_prompt) return cls( # type: ignore[call-arg] qa_chain=qa_chain, sparql_generation_chain=sparql_generation_chain, examples=examples, **kwargs, )
def _call( self, inputs: Dict[str, Any], run_manager: Optional[CallbackManagerForChainRun] = None, ) -> Dict[str, str]: """ Generate SPARQL query, use it to retrieve a response from the gdb and answer the question. """ _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager() callbacks = _run_manager.get_child() prompt = inputs[self.input_key] intermediate_steps: List = [] generated_sparql = self.sparql_generation_chain.run( {"prompt": prompt, "schema": self.graph.get_schema}, callbacks=callbacks ) # Extract SPARQL generated_sparql = extract_sparql(generated_sparql) _run_manager.on_text("Generated SPARQL:", end="\n", verbose=self.verbose) _run_manager.on_text( generated_sparql, color="green", end="\n", verbose=self.verbose ) intermediate_steps.append({"query": generated_sparql}) context = self.graph.query(generated_sparql) if self.return_direct: final_result = context else: _run_manager.on_text("Full Context:", end="\n", verbose=self.verbose) _run_manager.on_text( str(context), color="green", end="\n", verbose=self.verbose ) intermediate_steps.append({"context": context}) result = self.qa_chain( {"prompt": prompt, "context": context}, callbacks=callbacks, ) final_result = result[self.qa_chain.output_key] chain_result: Dict[str, Any] = {self.output_key: final_result} if self.return_intermediate_steps: chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps return chain_result