句子转换器
In [ ]:
Copied!
%pip install llama-index-readers-file pymupdf
%pip install llama-index-vector-stores-postgres
%pip install llama-index-embeddings-huggingface
%pip install llama-index-llms-llama-cpp
%pip install llama-index-readers-file pymupdf
%pip install llama-index-vector-stores-postgres
%pip install llama-index-embeddings-huggingface
%pip install llama-index-llms-llama-cpp
In [ ]:
Copied!
# 句子转换器
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en")
# 句子转换器
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en")
In [ ]:
Copied!
!pip install llama-cpp-python
!pip install llama-cpp-python
Requirement already satisfied: llama-cpp-python in /Users/jerryliu/Programming/gpt_index/.venv/lib/python3.10/site-packages (0.2.7) Requirement already satisfied: numpy>=1.20.0 in /Users/jerryliu/Programming/gpt_index/.venv/lib/python3.10/site-packages (from llama-cpp-python) (1.23.5) Requirement already satisfied: typing-extensions>=4.5.0 in /Users/jerryliu/Programming/gpt_index/.venv/lib/python3.10/site-packages (from llama-cpp-python) (4.7.1) Requirement already satisfied: diskcache>=5.6.1 in /Users/jerryliu/Programming/gpt_index/.venv/lib/python3.10/site-packages (from llama-cpp-python) (5.6.3) [notice] A new release of pip available: 22.3.1 -> 23.2.1 [notice] To update, run: pip install --upgrade pip
In [ ]:
Copied!
from llama_index.llms.llama_cpp import LlamaCPP
# model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q4_0.bin"
model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_0.gguf"
llm = LlamaCPP(
# 您可以传入GGML模型的URL以自动下载
model_url=model_url,
# 可选地,您可以设置预先下载的模型的路径,而不是model_url
model_path=None,
temperature=0.1,
max_new_tokens=256,
# llama2具有4096个标记的上下文窗口,但我们将其设置得较低以留出一些余地
context_window=3900,
# 传递给__call__()的kwargs
generate_kwargs={},
# 传递给__init__()的kwargs
# 设置为至少1以使用GPU
model_kwargs={"n_gpu_layers": 1},
verbose=True,
)
from llama_index.llms.llama_cpp import LlamaCPP
# model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q4_0.bin"
model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_0.gguf"
llm = LlamaCPP(
# 您可以传入GGML模型的URL以自动下载
model_url=model_url,
# 可选地,您可以设置预先下载的模型的路径,而不是model_url
model_path=None,
temperature=0.1,
max_new_tokens=256,
# llama2具有4096个标记的上下文窗口,但我们将其设置得较低以留出一些余地
context_window=3900,
# 传递给__call__()的kwargs
generate_kwargs={},
# 传递给__init__()的kwargs
# 设置为至少1以使用GPU
model_kwargs={"n_gpu_layers": 1},
verbose=True,
)
初始化Postgres¶
使用现有的在localhost上运行的postgres,创建我们将使用的数据库。
注意:当然还有许多其他开源/自托管的数据库可以使用!例如Chroma、Qdrant、Weaviate等等。请查看我们的向量存储指南。
注意:您需要在本地系统上设置postgres。以下是在OSX上设置它的示例:https://www.sqlshack.com/setting-up-a-postgresql-database-on-mac/。
注意:您还需要安装pgvector (https://github.com/pgvector/pgvector)。
您可以像下面这样添加一个角色:
CREATE ROLE <user> WITH LOGIN PASSWORD '<password>';
ALTER ROLE <user> SUPERUSER;
In [ ]:
Copied!
!pip install psycopg2-binary pgvector asyncpg "sqlalchemy[asyncio]" greenlet
!pip install psycopg2-binary pgvector asyncpg "sqlalchemy[asyncio]" greenlet
In [ ]:
Copied!
import psycopg2
db_name = "vector_db"
host = "localhost"
password = "password"
port = "5432"
user = "jerry"
# conn = psycopg2.connect(connection_string)
conn = psycopg2.connect(
dbname="postgres",
host=host,
password=password,
port=port,
user=user,
)
conn.autocommit = True
with conn.cursor() as c:
c.execute(f"DROP DATABASE IF EXISTS {db_name}")
c.execute(f"CREATE DATABASE {db_name}")
import psycopg2
db_name = "vector_db"
host = "localhost"
password = "password"
port = "5432"
user = "jerry"
# conn = psycopg2.connect(connection_string)
conn = psycopg2.connect(
dbname="postgres",
host=host,
password=password,
port=port,
user=user,
)
conn.autocommit = True
with conn.cursor() as c:
c.execute(f"DROP DATABASE IF EXISTS {db_name}")
c.execute(f"CREATE DATABASE {db_name}")
In [ ]:
Copied!
# 导入make_url函数
from sqlalchemy import make_url
from llama_index.vector_stores.postgres import PGVectorStore
# 使用from_params方法创建PGVectorStore对象
vector_store = PGVectorStore.from_params(
database=db_name, # 数据库名称
host=host, # 主机地址
password=password, # 密码
port=port, # 端口
user=user, # 用户名
table_name="llama2_paper", # 表名称
embed_dim=384, # openai嵌入维度
)
# 导入make_url函数
from sqlalchemy import make_url
from llama_index.vector_stores.postgres import PGVectorStore
# 使用from_params方法创建PGVectorStore对象
vector_store = PGVectorStore.from_params(
database=db_name, # 数据库名称
host=host, # 主机地址
password=password, # 密码
port=port, # 端口
user=user, # 用户名
table_name="llama2_paper", # 表名称
embed_dim=384, # openai嵌入维度
)
# 1. 加载数据
在这一部分,我们将加载我们需要使用的数据集。
In [ ]:
Copied!
!mkdir data
!wget --user-agent "Mozilla" "https://arxiv.org/pdf/2307.09288.pdf" -O "data/llama2.pdf"
!mkdir data
!wget --user-agent "Mozilla" "https://arxiv.org/pdf/2307.09288.pdf" -O "data/llama2.pdf"
In [ ]:
Copied!
from pathlib import Path
from llama_index.readers.file import PyMuPDFReader
from pathlib import Path
from llama_index.readers.file import PyMuPDFReader
In [ ]:
Copied!
loader = PyMuPDFReader()
documents = loader.load(file_path="./data/llama2.pdf")
loader = PyMuPDFReader()
documents = loader.load(file_path="./data/llama2.pdf")
2. 使用文本分割器来分割文档¶
In [ ]:
Copied!
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.node_parser import SentenceSplitter
In [ ]:
Copied!
text_parser = SentenceSplitter(
chunk_size=1024,
# 分隔符=" ",
)
text_parser = SentenceSplitter(
chunk_size=1024,
# 分隔符=" ",
)
In [ ]:
Copied!
text_chunks = []
# 保持与源文档索引的关系,以帮助在(3)中注入文档元数据
doc_idxs = []
for doc_idx, doc in enumerate(documents):
cur_text_chunks = text_parser.split_text(doc.text)
text_chunks.extend(cur_text_chunks)
doc_idxs.extend([doc_idx] * len(cur_text_chunks))
text_chunks = []
# 保持与源文档索引的关系,以帮助在(3)中注入文档元数据
doc_idxs = []
for doc_idx, doc in enumerate(documents):
cur_text_chunks = text_parser.split_text(doc.text)
text_chunks.extend(cur_text_chunks)
doc_idxs.extend([doc_idx] * len(cur_text_chunks))
3. 从文本块手动构建节点¶
In [ ]:
Copied!
from llama_index.core.schema import TextNode
nodes = []
for idx, text_chunk in enumerate(text_chunks):
node = TextNode(
text=text_chunk,
)
src_doc = documents[doc_idxs[idx]]
node.metadata = src_doc.metadata
nodes.append(node)
from llama_index.core.schema import TextNode
nodes = []
for idx, text_chunk in enumerate(text_chunks):
node = TextNode(
text=text_chunk,
)
src_doc = documents[doc_idxs[idx]]
node.metadata = src_doc.metadata
nodes.append(node)
4. 为每个节点生成嵌入向量¶
在这里,我们使用一个sentence_transformers模型为每个节点生成嵌入向量。
In [ ]:
Copied!
for node in nodes:
node_embedding = embed_model.get_text_embedding(
node.get_content(metadata_mode="all")
)
node.embedding = node_embedding
for node in nodes:
node_embedding = embed_model.get_text_embedding(
node.get_content(metadata_mode="all")
)
node.embedding = node_embedding
5. 将节点加载到向量存储中¶
现在我们将这些节点插入到我们的 PostgresVectorStore
中。
In [ ]:
Copied!
vector_store.add(nodes)
vector_store.add(nodes)
In [ ]:
Copied!
query_str = "Can you tell me about the key concepts for safety finetuning"
query_str = "Can you tell me about the key concepts for safety finetuning"
1. 生成查询嵌入¶
我们将使用bert-as-service
库来生成查询嵌入。首先,我们需要启动BERT服务。
In [ ]:
Copied!
query_embedding = embed_model.get_query_embedding(query_str)
query_embedding = embed_model.get_query_embedding(query_str)
2. 查询向量数据库¶
In [ ]:
Copied!
# 构建向量存储查询
from llama_index.core.vector_stores import VectorStoreQuery
query_mode = "default"
# query_mode = "sparse"
# query_mode = "hybrid"
vector_store_query = VectorStoreQuery(
query_embedding=query_embedding, similarity_top_k=2, mode=query_mode
)
# 构建向量存储查询
from llama_index.core.vector_stores import VectorStoreQuery
query_mode = "default"
# query_mode = "sparse"
# query_mode = "hybrid"
vector_store_query = VectorStoreQuery(
query_embedding=query_embedding, similarity_top_k=2, mode=query_mode
)
In [ ]:
Copied!
# 返回一个VectorStoreQueryResult
query_result = vector_store.query(vector_store_query)
print(query_result.nodes[0].get_content())
# 返回一个VectorStoreQueryResult
query_result = vector_store.query(vector_store_query)
print(query_result.nodes[0].get_content())
3. 将结果解析为一组节点¶
In [ ]:
Copied!
from llama_index.core.schema import NodeWithScore
from typing import Optional
nodes_with_scores = []
for index, node in enumerate(query_result.nodes):
score: Optional[float] = None
if query_result.similarities is not None:
score = query_result.similarities[index]
nodes_with_scores.append(NodeWithScore(node=node, score=score))
from llama_index.core.schema import NodeWithScore
from typing import Optional
nodes_with_scores = []
for index, node in enumerate(query_result.nodes):
score: Optional[float] = None
if query_result.similarities is not None:
score = query_result.similarities[index]
nodes_with_scores.append(NodeWithScore(node=node, score=score))
4. 放入检索器¶
In [ ]:
Copied!
from llama_index.core import QueryBundle
from llama_index.core.retrievers import BaseRetriever
from typing import Any, List
class VectorDBRetriever(BaseRetriever):
"""基于postgres向量存储的检索器。"""
def __init__(
self,
vector_store: PGVectorStore,
embed_model: Any,
query_mode: str = "default",
similarity_top_k: int = 2,
) -> None:
"""初始化参数。"""
self._vector_store = vector_store
self._embed_model = embed_model
self._query_mode = query_mode
self._similarity_top_k = similarity_top_k
super().__init__()
def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
"""检索。"""
query_embedding = embed_model.get_query_embedding(
query_bundle.query_str
)
vector_store_query = VectorStoreQuery(
query_embedding=query_embedding,
similarity_top_k=self._similarity_top_k,
mode=self._query_mode,
)
query_result = vector_store.query(vector_store_query)
nodes_with_scores = []
for index, node in enumerate(query_result.nodes):
score: Optional[float] = None
if query_result.similarities is not None:
score = query_result.similarities[index]
nodes_with_scores.append(NodeWithScore(node=node, score=score))
return nodes_with_scores
from llama_index.core import QueryBundle
from llama_index.core.retrievers import BaseRetriever
from typing import Any, List
class VectorDBRetriever(BaseRetriever):
"""基于postgres向量存储的检索器。"""
def __init__(
self,
vector_store: PGVectorStore,
embed_model: Any,
query_mode: str = "default",
similarity_top_k: int = 2,
) -> None:
"""初始化参数。"""
self._vector_store = vector_store
self._embed_model = embed_model
self._query_mode = query_mode
self._similarity_top_k = similarity_top_k
super().__init__()
def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
"""检索。"""
query_embedding = embed_model.get_query_embedding(
query_bundle.query_str
)
vector_store_query = VectorStoreQuery(
query_embedding=query_embedding,
similarity_top_k=self._similarity_top_k,
mode=self._query_mode,
)
query_result = vector_store.query(vector_store_query)
nodes_with_scores = []
for index, node in enumerate(query_result.nodes):
score: Optional[float] = None
if query_result.similarities is not None:
score = query_result.similarities[index]
nodes_with_scores.append(NodeWithScore(node=node, score=score))
return nodes_with_scores
In [ ]:
Copied!
retriever = VectorDBRetriever(
vector_store, embed_model, query_mode="default", similarity_top_k=2
)
retriever = VectorDBRetriever(
vector_store, embed_model, query_mode="default", similarity_top_k=2
)
将这个插入到我们的RetrieverQueryEngine中,以合成一个响应¶
In [ ]:
Copied!
from llama_index.core.query_engine import RetrieverQueryEngine
query_engine = RetrieverQueryEngine.from_args(retriever, llm=llm)
from llama_index.core.query_engine import RetrieverQueryEngine
query_engine = RetrieverQueryEngine.from_args(retriever, llm=llm)
In [ ]:
Copied!
query_str = "How does Llama 2 perform compared to other open-source models?"
response = query_engine.query(query_str)
query_str = "How does Llama 2 perform compared to other open-source models?"
response = query_engine.query(query_str)
In [ ]:
Copied!
print(str(response))
print(str(response))
Based on the results shown in Table 3, Llama 2 outperforms all open-source models on most of the benchmarks, with an average improvement of around 5 points over the next best model (GPT-3.5).
In [ ]:
Copied!
print(response.source_nodes[0].get_content())
print(response.source_nodes[0].get_content())