可组合对象¶

在这个笔记本中，我们将展示如何将多个对象组合成一个顶层索引。

这种方法是通过设置IndexNode对象来实现的，其中包含一个指向以下内容的obj字段：

查询引擎
检索器
查询管道
另一个节点！

object = IndexNode(index_id="my_object", obj=query_engine, text="关于这个对象的一些文本")

数据设置¶

In [ ]:

Copied!





%pip install llama-index-storage-docstore-mongodb
%pip install llama-index-vector-stores-qdrant
%pip install llama-index-storage-docstore-firestore
%pip install llama-index-retrievers-bm25
%pip install llama-index-storage-docstore-redis
%pip install llama-index-storage-docstore-dynamodb
%pip install llama-index-readers-file pymupdf
%pip install llama-index-storage-docstore-mongodb
%pip install llama-index-vector-stores-qdrant
%pip install llama-index-storage-docstore-firestore
%pip install llama-index-retrievers-bm25
%pip install llama-index-storage-docstore-redis
%pip install llama-index-storage-docstore-dynamodb
%pip install llama-index-readers-file pymupdf

In [ ]:

Copied!

!wget --user-agent "Mozilla" "https://arxiv.org/pdf/2307.09288.pdf" -O "./llama2.pdf"
!wget --user-agent "Mozilla" "https://arxiv.org/pdf/1706.03762.pdf" -O "./attention.pdf"
!wget --user-agent "Mozilla" "https://arxiv.org/pdf/2307.09288.pdf" -O "./llama2.pdf"
!wget --user-agent "Mozilla" "https://arxiv.org/pdf/1706.03762.pdf" -O "./attention.pdf"

In [ ]:

Copied!





from llama_index.core import download_loader

from llama_index.readers.file import PyMuPDFReader

llama2_docs = PyMuPDFReader().load_data(
    file_path="./llama2.pdf", metadata=True
)
attention_docs = PyMuPDFReader().load_data(
    file_path="./attention.pdf", metadata=True
)
from llama_index.core import download_loader

from llama_index.readers.file import PyMuPDFReader

llama2_docs = PyMuPDFReader().load_data(
    file_path="./llama2.pdf", metadata=True
)
attention_docs = PyMuPDFReader().load_data(
    file_path="./attention.pdf", metadata=True
)

检索器设置¶

In [ ]:

Copied!

import os

os.environ["OPENAI_API_KEY"] = "sk-..."
import os

os.environ["OPENAI_API_KEY"] = "sk-..."

In [ ]:

Copied!

from llama_index.core.node_parser import TokenTextSplitter

nodes = TokenTextSplitter(
    chunk_size=1024, chunk_overlap=128
).get_nodes_from_documents(llama2_docs + attention_docs)
from llama_index.core.node_parser import TokenTextSplitter

nodes = TokenTextSplitter(
    chunk_size=1024, chunk_overlap=128
).get_nodes_from_documents(llama2_docs + attention_docs)

In [ ]:

Copied!





from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.storage.docstore.redis import RedisDocumentStore
from llama_index.storage.docstore.mongodb import MongoDocumentStore
from llama_index.storage.docstore.firestore import FirestoreDocumentStore
from llama_index.storage.docstore.dynamodb import DynamoDBDocumentStore

docstore = SimpleDocumentStore()
docstore.add_documents(nodes)
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.storage.docstore.redis import RedisDocumentStore
from llama_index.storage.docstore.mongodb import MongoDocumentStore
from llama_index.storage.docstore.firestore import FirestoreDocumentStore
from llama_index.storage.docstore.dynamodb import DynamoDBDocumentStore

docstore = SimpleDocumentStore()
docstore.add_documents(nodes)

In [ ]:

Copied!





from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient

client = QdrantClient(path="./qdrant_data")
vector_store = QdrantVectorStore("composable", client=client)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex(nodes=nodes)
vector_retriever = index.as_retriever(similarity_top_k=2)
bm25_retriever = BM25Retriever.from_defaults(
    docstore=docstore, similarity_top_k=2
)
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient

client = QdrantClient(path="./qdrant_data")
vector_store = QdrantVectorStore("composable", client=client)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex(nodes=nodes)
vector_retriever = index.as_retriever(similarity_top_k=2)
bm25_retriever = BM25Retriever.from_defaults(
    docstore=docstore, similarity_top_k=2
)

组合对象¶

在这里，我们构建IndexNodes。请注意，文本是顶层索引用于索引节点的内容。

对于向量索引，文本被嵌入其中，对于关键字索引，文本用于关键字。

在这个例子中，使用了SummaryIndex，它在检索时实际上不需要文本，因为它总是检索所有节点。

In [ ]:

Copied!





from llama_index.core.schema import IndexNode

vector_obj = IndexNode(
    index_id="vector", obj=vector_retriever, text="Vector Retriever"
)
bm25_obj = IndexNode(
    index_id="bm25", obj=bm25_retriever, text="BM25 Retriever"
)
from llama_index.core.schema import IndexNode

vector_obj = IndexNode(
    index_id="vector", obj=vector_retriever, text="Vector Retriever"
)
bm25_obj = IndexNode(
    index_id="bm25", obj=bm25_retriever, text="BM25 Retriever"
)

In [ ]:

Copied!

from llama_index.core import SummaryIndex

summary_index = SummaryIndex(objects=[vector_obj, bm25_obj])
from llama_index.core import SummaryIndex

summary_index = SummaryIndex(objects=[vector_obj, bm25_obj])

查询¶

当我们进行查询时，将检索所有对象并用于生成节点以获得最终答案。

使用tree_summarize和aquery()可以确保并发执行和更快的响应。

In [ ]:

Copied!

query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize", verbose=True
)
query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize", verbose=True
)

In [ ]:

Copied!

response = await query_engine.aquery(
    "How does attention work in transformers?"
)
response = await query_engine.aquery(
    "How does attention work in transformers?"
)

Retrieval entering vector: VectorIndexRetriever
Retrieval entering bm25: BM25Retriever

In [ ]:

Copied!

print(str(response))
print(str(response))

Attention in transformers works by mapping a query and a set of key-value pairs to an output. The output is computed as a weighted sum of the values, where the weights are determined by the similarity between the query and the keys. In the transformer model, attention is used in three different ways: 

1. Encoder-decoder attention: The queries come from the previous decoder layer, and the memory keys and values come from the output of the encoder. This allows every position in the decoder to attend over all positions in the input sequence.

2. Self-attention in the encoder: In a self-attention layer, all of the keys, values, and queries come from the same place, which is the output of the previous layer in the encoder. Each position in the encoder can attend to all positions in the previous layer of the encoder.

3. Self-attention in the decoder: Similar to the encoder, self-attention layers in the decoder allow each position in the decoder to attend to all positions in the decoder up to and including that position. However, leftward information flow in the decoder is prevented to preserve the auto-regressive property.

Overall, attention in transformers allows the model to jointly attend to information from different representation subspaces at different positions, improving the model's ability to capture dependencies and relationships between different parts of the input sequence.

In [ ]:

Copied!

response = await query_engine.aquery(
    "What is the architecture of Llama2 based on?"
)
response = await query_engine.aquery(
    "What is the architecture of Llama2 based on?"
)

Retrieval entering vector: VectorIndexRetriever
Retrieval entering bm25: BM25Retriever

In [ ]:

Copied!

print(str(response))
print(str(response))

The architecture of Llama 2 is based on the transformer model.

In [ ]:

Copied!

response = await query_engine.aquery(
    "What was used before attention in transformers?"
)
response = await query_engine.aquery(
    "What was used before attention in transformers?"
)

Retrieval entering vector: VectorIndexRetriever
Retrieval entering bm25: BM25Retriever

In [ ]:

Copied!

print(str(response))
print(str(response))

Recurrent neural networks, such as long short-term memory (LSTM) and gated recurrent neural networks, were commonly used before attention in transformers. These models were widely used in sequence modeling and transduction problems, including language modeling and machine translation.

保存和加载注意事项¶

由于对象在技术上不可序列化，因此在保存和加载时，需要在加载时提供它们。

以下是我可能如何保存/加载这个设置的示例。

保存¶

In [ ]:

Copied!

# qdrant已经自动保存了！# 我们只需要在这里保存docstore# 保存我们的docstore节点用于bm25docstore.persist("./docstore.json")
# qdrant已经自动保存了！# 我们只需要在这里保存docstore# 保存我们的docstore节点用于bm25docstore.persist("./docstore.json")

加载¶

In [ ]:

Copied!





from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient

docstore = SimpleDocumentStore.from_persist_path("./docstore.json")

client = QdrantClient(path="./qdrant_data")
vector_store = QdrantVectorStore("composable", client=client)
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient

docstore = SimpleDocumentStore.from_persist_path("./docstore.json")

client = QdrantClient(path="./qdrant_data")
vector_store = QdrantVectorStore("composable", client=client)

In [ ]:

Copied!





index = VectorStoreIndex.from_vector_store(vector_store)
vector_retriever = index.as_retriever(similarity_top_k=2)
bm25_retriever = BM25Retriever.from_defaults(
    docstore=docstore, similarity_top_k=2
)
index = VectorStoreIndex.from_vector_store(vector_store)
vector_retriever = index.as_retriever(similarity_top_k=2)
bm25_retriever = BM25Retriever.from_defaults(
    docstore=docstore, similarity_top_k=2
)

In [ ]:

Copied!





from llama_index.core.schema import IndexNode

vector_obj = IndexNode(
    index_id="vector", obj=vector_retriever, text="Vector Retriever"
)
bm25_obj = IndexNode(
    index_id="bm25", obj=bm25_retriever, text="BM25 Retriever"
)
from llama_index.core.schema import IndexNode

vector_obj = IndexNode(
    index_id="vector", obj=vector_retriever, text="Vector Retriever"
)
bm25_obj = IndexNode(
    index_id="bm25", obj=bm25_retriever, text="BM25 Retriever"
)

In [ ]:

Copied!

# 如果我们将常规节点添加到摘要索引中，我们也可以保存/加载它# summary_index.persist("./summary_index.json")# summary_index = load_index_from_storage(storage_context, objects=objects)from llama_index.core import SummaryIndexsummary_index = SummaryIndex(objects=[vector_obj, bm25_obj])
# 如果我们将常规节点添加到摘要索引中，我们也可以保存/加载它# summary_index.persist("./summary_index.json")# summary_index = load_index_from_storage(storage_context, objects=objects)from llama_index.core import SummaryIndexsummary_index = SummaryIndex(objects=[vector_obj, bm25_obj])