Skip to main content
Open In ColabOpen on GitHub

SAP HANA 云向量引擎

有关如何设置SAP HANA向量存储的更多信息,请查看文档

我们在这里使用相同的设置:

import os

# Use OPENAI_API_KEY env variable
# os.environ["OPENAI_API_KEY"] = "Your OpenAI API key"
from hdbcli import dbapi

# Use connection settings from the environment
connection = dbapi.connect(
address=os.environ.get("HANA_DB_ADDRESS"),
port=os.environ.get("HANA_DB_PORT"),
user=os.environ.get("HANA_DB_USER"),
password=os.environ.get("HANA_DB_PASSWORD"),
autocommit=True,
sslValidateCertificate=False,
)

为了能够以良好的性能进行自我查询,我们在HANA中为我们的向量存储表创建了额外的元数据字段:

# Create custom table with attribute
cur = connection.cursor()
cur.execute("DROP TABLE LANGCHAIN_DEMO_SELF_QUERY", ignoreErrors=True)
cur.execute(
(
"""CREATE TABLE "LANGCHAIN_DEMO_SELF_QUERY" (
"name" NVARCHAR(100), "is_active" BOOLEAN, "id" INTEGER, "height" DOUBLE,
"VEC_TEXT" NCLOB,
"VEC_META" NCLOB,
"VEC_VECTOR" REAL_VECTOR
)"""
)
)

让我们添加一些文档。

from langchain_community.vectorstores.hanavector import HanaDB
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

# Prepare some test documents
docs = [
Document(
page_content="First",
metadata={"name": "adam", "is_active": True, "id": 1, "height": 10.0},
),
Document(
page_content="Second",
metadata={"name": "bob", "is_active": False, "id": 2, "height": 5.7},
),
Document(
page_content="Third",
metadata={"name": "jane", "is_active": True, "id": 3, "height": 2.4},
),
]

db = HanaDB(
connection=connection,
embedding=embeddings,
table_name="LANGCHAIN_DEMO_SELF_QUERY",
specific_metadata_columns=["name", "is_active", "id", "height"],
)

# Delete already existing documents from the table
db.delete(filter={})
db.add_documents(docs)
API Reference:HanaDB | Document | OpenAIEmbeddings

自我查询

现在进入主要环节:以下是如何为HANA向量存储构建一个SelfQueryRetriever:

from langchain.chains.query_constructor.schema import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_community.query_constructors.hanavector import HanaTranslator
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo")

metadata_field_info = [
AttributeInfo(
name="name",
description="The name of the person",
type="string",
),
AttributeInfo(
name="is_active",
description="Whether the person is active",
type="boolean",
),
AttributeInfo(
name="id",
description="The ID of the person",
type="integer",
),
AttributeInfo(
name="height",
description="The height of the person",
type="float",
),
]

document_content_description = "A collection of persons"

hana_translator = HanaTranslator()

retriever = SelfQueryRetriever.from_llm(
llm,
db,
document_content_description,
metadata_field_info,
structured_query_translator=hana_translator,
)

让我们使用这个检索器来为一个人准备一个(自我)查询:

query_prompt = "Which person is not active?"

docs = retriever.invoke(input=query_prompt)
for doc in docs:
print("-" * 80)
print(doc.page_content, " ", doc.metadata)

我们还可以看一下查询是如何构建的:

from langchain.chains.query_constructor.base import (
StructuredQueryOutputParser,
get_query_constructor_prompt,
)

prompt = get_query_constructor_prompt(
document_content_description,
metadata_field_info,
)
output_parser = StructuredQueryOutputParser.from_components()
query_constructor = prompt | llm | output_parser

sq = query_constructor.invoke(input=query_prompt)

print("Structured query: ", sq)

print("Translated for hana vector store: ", hana_translator.visit_structured_query(sq))

这个页面有帮助吗?