%pip install llama-index-readers-file pymupdf
%pip install llama-index-program-openai
%pip install llama-index-llms-openai
from llama_index.core import PromptTemplate
choices = [
"Useful for questions related to apples",
"Useful for questions related to oranges",
]
def get_choice_str(choices):
choices_str = "\n\n".join(
[f"{idx+1}. {c}" for idx, c in enumerate(choices)]
)
return choices_str
choices_str = get_choice_str(choices)
router_prompt0 = PromptTemplate(
"Some choices are given below. It is provided in a numbered list (1 to"
" {num_choices}), where each item in the list corresponds to a"
" summary.\n---------------------\n{context_list}\n---------------------\nUsing"
" only the choices above and not prior knowledge, return the top choices"
" (no more than {max_outputs}, but only select what is needed) that are"
" most relevant to the question: '{query_str}'\n"
)
让我们尝试在一组玩具问题上使用这个提示,看看输出会是什么样的。
from llama_index.llms.openai import OpenAI
llm = OpenAI(model="gpt-3.5-turbo")
def get_formatted_prompt(query_str):
fmt_prompt = router_prompt0.format(
num_choices=len(choices),
max_outputs=2,
context_list=choices_str,
query_str=query_str,
)
return fmt_prompt
query_str = "Can you tell me more about the amount of Vitamin C in apples"
fmt_prompt = get_formatted_prompt(query_str)
response = llm.complete(fmt_prompt)
print(str(response))
1. Useful for questions related to apples
query_str = "What are the health benefits of eating orange peels?"
fmt_prompt = get_formatted_prompt(query_str)
response = llm.complete(fmt_prompt)
print(str(response))
2. Useful for questions related to oranges
query_str = (
"Can you tell me more about the amount of Vitamin C in apples and oranges."
)
fmt_prompt = get_formatted_prompt(query_str)
response = llm.complete(fmt_prompt)
print(str(response))
1. Useful for questions related to apples 2. Useful for questions related to oranges
观察:虽然响应对应于正确的选择,但解析为结构化输出(例如单个整数)可能有些巧妙。我们需要对选择进行一些字符串解析,以提取出单个数字,并使其能够应对故障模式。
2. 可以生成结构化输出的路由器提示¶
因此,下一步是尝试提示模型输出更结构化的表示(JSON)。
我们定义一个输出解析器类(RouterOutputParser
)。这个输出解析器将负责格式化提示,并将结果解析为一个结构化对象(一个Answer
)。
然后,我们在LLM调用周围应用输出解析器的format
和parse
方法,以使用路由器提示生成结构化输出。
2.a 导入Answer类¶
我们从代码库中加载Answer类。这是一个非常简单的数据类,有两个字段:choice
和 reason
。
from dataclasses import fields
from pydantic import BaseModel
import json
class Answer(BaseModel):
choice: int
reason: str
print(json.dumps(Answer.schema(), indent=2))
{ "title": "Answer", "type": "object", "properties": { "choice": { "title": "Choice", "type": "integer" }, "reason": { "title": "Reason", "type": "string" } }, "required": [ "choice", "reason" ] }
2.b 定义路由器输出解析器¶
from llama_index.core.types import BaseOutputParser
FORMAT_STR = """输出应格式化为符合以下JSON模式的JSON实例。
以下是输出模式:
{
"type": "array",
"items": {
"type": "object",
"properties": {
"choice": {
"type": "integer"
},
"reason": {
"type": "string"
}
},
"required": [
"choice",
"reason"
],
"additionalProperties": false
}
}
"""
如果我们想将 FORMAT_STR
作为提示模板的一部分放入 f-string 中,那么我们需要转义大括号,以防它们被视为模板变量。
def _escape_curly_braces(input_string: str) -> str:
# 将'{'替换为'{{',将'}'替换为'}}',以转义大括号
escaped_string = input_string.replace("{", "{{").replace("}", "}}")
return escaped_string
现在我们定义一个简单的解析函数,从LLM响应中提取出JSON字符串(通过搜索方括号)。
def _marshal_output_to_json(output: str) -> str:
output = output.strip()
left = output.find("[")
right = output.find("]")
output = output[left : right + 1]
return output
我们将这些内容放在我们的RouterOutputParser
中。
from typing import List
class RouterOutputParser(BaseOutputParser):
def parse(self, output: str) -> List[Answer]:
"""解析字符串。"""
json_output = _marshal_output_to_json(output)
json_dicts = json.loads(json_output)
answers = [Answer.from_dict(json_dict) for json_dict in json_dicts]
return answers
def format(self, prompt_template: str) -> str:
return prompt_template + "\n\n" + _escape_curly_braces(FORMAT_STR)
2.c 试一试¶
我们创建一个名为 route_query
的函数,它将接受输出解析器、llm 和提示模板,并输出一个结构化的答案。
output_parser = RouterOutputParser()
from typing import List
def route_query(
query_str: str, choices: List[str], output_parser: RouterOutputParser
):
choices_str
fmt_base_prompt = router_prompt0.format(
num_choices=len(choices),
max_outputs=len(choices),
context_list=choices_str,
query_str=query_str,
)
fmt_json_prompt = output_parser.format(fmt_base_prompt)
raw_output = llm.complete(fmt_json_prompt)
parsed = output_parser.parse(str(raw_output))
return parsed
3. 使用函数调用端点执行路由¶
在上一节中,我们展示了如何构建一个带有文本完成端点的路由器。这包括格式化提示以鼓励模型输出结构化的JSON,以及一个解析函数来加载JSON。
这个过程可能有点混乱。函数调用端点(例如OpenAI)通过允许模型原生输出结构化函数来抽象化这种复杂性。这消除了手动提示+解析输出的需要。
LlamaIndex提供了一个称为PydanticProgram
的抽象,它与函数端点集成以生成一个结构化的Pydantic对象。我们与OpenAI和Guidance集成。
我们使用注释重新定义了Answer
类,并创建了一个包含答案列表的Answers
类。
from pydantic import Field
class Answer(BaseModel):
"表示带有原因的单个选择。"
choice: int
reason: str
class Answers(BaseModel):
"""表示答案列表。"""
answers: List[Answer]
Answers.schema()
{'title': 'Answers', 'description': 'Represents a list of answers.', 'type': 'object', 'properties': {'answers': {'title': 'Answers', 'type': 'array', 'items': {'$ref': '#/definitions/Answer'}}}, 'required': ['answers'], 'definitions': {'Answer': {'title': 'Answer', 'description': 'Represents a single choice with a reason.', 'type': 'object', 'properties': {'choice': {'title': 'Choice', 'type': 'integer'}, 'reason': {'title': 'Reason', 'type': 'string'}}, 'required': ['choice', 'reason']}}}
from llama_index.program.openai import OpenAIPydanticProgram
router_prompt1 = router_prompt0.partial_format(
num_choices=len(choices),
max_outputs=len(choices),
)
program = OpenAIPydanticProgram.from_defaults(
output_cls=Answers,
prompt=router_prompt1,
verbose=True,
)
query_str = "What are the health benefits of eating orange peels?"
output = program(context_list=choices_str, query_str=query_str)
Function call: Answers with args: { "answers": [ { "choice": 2, "reason": "Orange peels are related to oranges" } ] }
output
Answers(answers=[Answer(choice=2, reason='Orange peels are related to oranges')])
4. 将路由器模块作为RAG管道的一部分¶
在本节中,我们将把路由器模块用于RAG管道中。我们将使用它动态决定是执行问答还是摘要。我们可以通过我们的向量索引轻松获得一个问答查询引擎,而摘要则是通过我们的摘要索引执行的。每个查询引擎被描述为我们的路由器的一个“选择”,我们将整个内容组合成一个单一的查询引擎。
设置:加载数据¶
我们将Llama 2论文作为数据加载。
!mkdir data
!wget --user-agent "Mozilla" "https://arxiv.org/pdf/2307.09288.pdf" -O "data/llama2.pdf"
mkdir: data: File exists --2023-09-17 23:37:11-- https://arxiv.org/pdf/2307.09288.pdf Resolving arxiv.org (arxiv.org)... 128.84.21.199 Connecting to arxiv.org (arxiv.org)|128.84.21.199|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 13661300 (13M) [application/pdf] Saving to: ‘data/llama2.pdf’ data/llama2.pdf 100%[===================>] 13.03M 1.50MB/s in 9.5s 2023-09-17 23:37:22 (1.37 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]
from pathlib import Path
from llama_index.readers.file import PyMuPDFReader
loader = PyMuPDFReader()
documents = loader.load(file_path="./data/llama2.pdf")
设置:定义索引¶
在这份数据上定义一个向量索引和一个摘要索引。
from llama_index.core import VectorStoreIndex
from llama_index.core import SummaryIndex
from llama_index.core.node_parser import SentenceSplitter
splitter = SentenceSplitter(chunk_size=1024)
vector_index = VectorStoreIndex.from_documents(
documents, transformations=[splitter]
)
summary_index = SummaryIndex.from_documents(
documents, transformations=[splitter]
)
vector_query_engine = vector_index.as_query_engine(llm=llm)
summary_query_engine = summary_index.as_query_engine(llm=llm)
定义RouterQueryEngine¶
我们通过对CustomQueryEngine
进行子类化来定义一个自定义路由器。
from llama_index.core.query_engine import CustomQueryEngine, BaseQueryEngine
from llama_index.core.response_synthesizers import TreeSummarize
class RouterQueryEngine(CustomQueryEngine):
"""使用我们的Pydantic程序执行路由。"""
query_engines: List[BaseQueryEngine]
choice_descriptions: List[str]
verbose: bool = False
router_prompt: PromptTemplate
llm: OpenAI
summarizer: TreeSummarize = Field(default_factory=TreeSummarize)
def custom_query(self, query_str: str):
"""定义自定义查询。"""
program = OpenAIPydanticProgram.from_defaults(
output_cls=Answers,
prompt=router_prompt1,
verbose=self.verbose,
llm=self.llm,
)
choices_str = get_choice_str(self.choice_descriptions)
output = program(context_list=choices_str, query_str=query_str)
# 打印选择和原因,并查询底层引擎
if self.verbose:
print(f"选择的选项:")
for answer in output.answers:
print(f"选择:{answer.choice},原因:{answer.reason}")
responses = []
for answer in output.answers:
choice_idx = answer.choice - 1
query_engine = self.query_engines[choice_idx]
response = query_engine.query(query_str)
responses.append(response)
# 如果选择了单个选项,我们可以直接返回该响应
if len(responses) == 1:
return responses[0]
else:
# 如果选择了多个选项,我们可以选择一个摘要生成器
response_strs = [str(r) for r in responses]
result_response = self.summarizer.get_response(
query_str, response_strs
)
return result_response
choices = [
(
"Useful for answering questions about specific sections of the Llama 2"
" paper"
),
"Useful for questions that ask for a summary of the whole paper",
]
router_query_engine = RouterQueryEngine(
query_engines=[vector_query_engine, summary_query_engine],
choice_descriptions=choices,
verbose=True,
router_prompt=router_prompt1,
llm=OpenAI(model="gpt-4"),
)
尝试我们构建的路由查询引擎¶
让我们来试试我们自己构建的路由查询引擎!我们提出一个问题,将其路由到向量查询引擎,还有另一个问题将被路由到摘要引擎。
response = router_query_engine.query(
"How does the Llama 2 model compare to GPT-4 in the experimental results?"
)
Function call: Answers with args: { "answers": [ { "choice": 1, "reason": "This question is asking for specific information about the Llama 2 model and its comparison to GPT-4 in the experimental results. Therefore, the summary that is useful for answering questions about specific sections of the paper would be most relevant." } ] } Selected choice(s): Choice: 1, Reason: This question is asking for specific information about the Llama 2 model and its comparison to GPT-4 in the experimental results. Therefore, the summary that is useful for answering questions about specific sections of the paper would be most relevant.
print(str(response))
The Llama 2 model performs better than GPT-4 in the experimental results.
response = router_query_engine.query("Can you give a summary of this paper?")
Function call: Answers with args: { "answers": [ { "choice": 2, "reason": "This choice is directly related to providing a summary of the whole paper, which is what the question asks for." } ] } Selected choice(s): Choice: 2, Reason: This choice is directly related to providing a summary of the whole paper, which is what the question asks for.
print(str(response))