JSON查询引擎¶
JSON查询引擎用于查询符合JSON模式的JSON文档。
然后,在提示的上下文中使用此JSON模式将自然语言查询转换为结构化的JSON路径查询。然后使用此JSON路径查询来检索数据以回答给定的问题。
如果您在colab上打开这个笔记本,您可能需要安装LlamaIndex 🦙。
In [ ]:
Copied!
%pip install llama-index-llms-openai
%pip install llama-index-llms-openai
In [ ]:
Copied!
!pip install llama-index
!pip install llama-index
In [ ]:
Copied!
# 首先,安装jsonpath-ng包,该包默认用于解析和执行JSONPath查询。!pip install jsonpath-ng
# 首先,安装jsonpath-ng包,该包默认用于解析和执行JSONPath查询。!pip install jsonpath-ng
Requirement already satisfied: jsonpath-ng in /Users/loganmarkewich/llama_index/llama-index/lib/python3.9/site-packages (1.5.3)
Requirement already satisfied: ply in /Users/loganmarkewich/llama_index/llama-index/lib/python3.9/site-packages (from jsonpath-ng) (3.11)
Requirement already satisfied: six in /Users/loganmarkewich/llama_index/llama-index/lib/python3.9/site-packages (from jsonpath-ng) (1.16.0)
Requirement already satisfied: decorator in /Users/loganmarkewich/llama_index/llama-index/lib/python3.9/site-packages (from jsonpath-ng) (5.1.1)
WARNING: You are using pip version 21.2.4; however, version 23.2.1 is available.
You should consider upgrading via the '/Users/loganmarkewich/llama_index/llama-index/bin/python3 -m pip install --upgrade pip' command.
In [ ]:
Copied!
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
In [ ]:
Copied!
import os
import openai
os.environ["OPENAI_API_KEY"] = "YOUR_KEY_HERE"
import os
import openai
os.environ["OPENAI_API_KEY"] = "YOUR_KEY_HERE"
In [ ]:
Copied!
from IPython.display import Markdown, display
from IPython.display import Markdown, display
In [ ]:
Copied!
# 在一些样本数据上进行测试json_value = { "blogPosts": [ { "id": 1, "title": "第一篇博客文章", "content": "这是我的第一篇博客文章", }, { "id": 2, "title": "第二篇博客文章", "content": "这是我的第二篇博客文章", }, ], "comments": [ { "id": 1, "content": "好帖子!", "username": "jerry", "blogPostId": 1, }, { "id": 2, "content": "有趣的想法", "username": "simon", "blogPostId": 2, }, { "id": 3, "content": "喜欢阅读这篇文章!", "username": "simon", "blogPostId": 2, }, ],}# 上述JSON值符合的JSON模式对象json_schema = { "$schema": "http://json-schema.org/draft-07/schema#", "description": "一个非常简单的博客文章应用程序的模式", "type": "object", "properties": { "blogPosts": { "description": "博客文章列表", "type": "array", "items": { "type": "object", "properties": { "id": { "description": "博客文章的唯一标识符", "type": "integer", }, "title": { "description": "博客文章的标题", "type": "string", }, "content": { "description": "博客文章的内容", "type": "string", }, }, "required": ["id", "title", "content"], }, }, "comments": { "description": "博客文章评论列表", "type": "array", "items": { "type": "object", "properties": { "id": { "description": "评论的唯一标识符", "type": "integer", }, "content": { "description": "评论的内容", "type": "string", }, "username": { "description": "评论者的用户名(小写)", "type": "string", }, "blogPostId": { "description": "评论所属的博客文章的标识符", "type": "integer", }, }, "required": ["id", "content", "username", "blogPostId"], }, }, }, "required": ["blogPosts", "comments"],}
# 在一些样本数据上进行测试json_value = { "blogPosts": [ { "id": 1, "title": "第一篇博客文章", "content": "这是我的第一篇博客文章", }, { "id": 2, "title": "第二篇博客文章", "content": "这是我的第二篇博客文章", }, ], "comments": [ { "id": 1, "content": "好帖子!", "username": "jerry", "blogPostId": 1, }, { "id": 2, "content": "有趣的想法", "username": "simon", "blogPostId": 2, }, { "id": 3, "content": "喜欢阅读这篇文章!", "username": "simon", "blogPostId": 2, }, ],}# 上述JSON值符合的JSON模式对象json_schema = { "$schema": "http://json-schema.org/draft-07/schema#", "description": "一个非常简单的博客文章应用程序的模式", "type": "object", "properties": { "blogPosts": { "description": "博客文章列表", "type": "array", "items": { "type": "object", "properties": { "id": { "description": "博客文章的唯一标识符", "type": "integer", }, "title": { "description": "博客文章的标题", "type": "string", }, "content": { "description": "博客文章的内容", "type": "string", }, }, "required": ["id", "title", "content"], }, }, "comments": { "description": "博客文章评论列表", "type": "array", "items": { "type": "object", "properties": { "id": { "description": "评论的唯一标识符", "type": "integer", }, "content": { "description": "评论的内容", "type": "string", }, "username": { "description": "评论者的用户名(小写)", "type": "string", }, "blogPostId": { "description": "评论所属的博客文章的标识符", "type": "integer", }, }, "required": ["id", "content", "username", "blogPostId"], }, }, }, "required": ["blogPosts", "comments"],}
In [ ]:
Copied!
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.struct_store import JSONQueryEngine
llm = OpenAI(model="gpt-4")
nl_query_engine = JSONQueryEngine(
json_value=json_value,
json_schema=json_schema,
llm=llm,
)
raw_query_engine = JSONQueryEngine(
json_value=json_value,
json_schema=json_schema,
llm=llm,
synthesize_response=False,
)
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.struct_store import JSONQueryEngine
llm = OpenAI(model="gpt-4")
nl_query_engine = JSONQueryEngine(
json_value=json_value,
json_schema=json_schema,
llm=llm,
)
raw_query_engine = JSONQueryEngine(
json_value=json_value,
json_schema=json_schema,
llm=llm,
synthesize_response=False,
)
In [ ]:
Copied!
nl_response = nl_query_engine.query(
"What comments has Jerry been writing?",
)
raw_response = raw_query_engine.query(
"What comments has Jerry been writing?",
)
nl_response = nl_query_engine.query(
"What comments has Jerry been writing?",
)
raw_response = raw_query_engine.query(
"What comments has Jerry been writing?",
)
In [ ]:
Copied!
display(
Markdown(f"<h1>Natural language Response</h1><br><b>{nl_response}</b>")
)
display(Markdown(f"<h1>Raw JSON Response</h1><br><b>{raw_response}</b>"))
display(
Markdown(f"
{nl_response}") ) display(Markdown(f"
{raw_response}"))
Natural language Response
{nl_response}") ) display(Markdown(f"
Raw JSON Response
{raw_response}"))
Natural language Response
Jerry has written the comment "Nice post!".
Raw JSON Response
["Nice post!"]
In [ ]:
Copied!
# 获取JSON路径查询字符串。对原始响应也适用。 print(nl_response.metadata["json_path_response_str"])
# 获取JSON路径查询字符串。对原始响应也适用。 print(nl_response.metadata["json_path_response_str"])
$.comments[?(@.username=='jerry')].content