Neo4j向量存储 - 元数据过滤器¶
如果您在colab上打开这个笔记本,您可能需要安装LlamaIndex 🦙。
In [ ]:
Copied!
%pip install llama-index-vector-stores-neo4jvector
%pip install llama-index-vector-stores-neo4jvector
In [ ]:
Copied!
# !pip install llama-index>=0.9.31 neo4j
# !pip install llama-index>=0.9.31 neo4j
In [ ]:
Copied!
import logging
import sys
import os
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
import logging
import sys
import os
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
构建一个Neo4j向量索引并连接到它
In [ ]:
Copied!
import os
from llama_index.vector_stores.neo4jvector import Neo4jVectorStore
os.environ["OPENAI_API_KEY"] = "sk-..."
username = "neo4j"
password = "password"
url = "bolt://localhost:7687"
embed_dim = 1536 # 维度是用于文本嵌入-ada-002
vector_store = Neo4jVectorStore(username, password, url, embed_dim)
import os
from llama_index.vector_stores.neo4jvector import Neo4jVectorStore
os.environ["OPENAI_API_KEY"] = "sk-..."
username = "neo4j"
password = "password"
url = "bolt://localhost:7687"
embed_dim = 1536 # 维度是用于文本嵌入-ada-002
vector_store = Neo4jVectorStore(username, password, url, embed_dim)
INFO:numexpr.utils:Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. INFO:numexpr.utils:NumExpr defaulting to 8 threads. NumExpr defaulting to 8 threads.
构建VectorStoreIndex
In [ ]:
Copied!
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core.schema import TextNode
nodes = [
TextNode(
text="The Shawshank Redemption",
metadata={
"author": "Stephen King",
"theme": "Friendship",
"year": 1994,
},
),
TextNode(
text="The Godfather",
metadata={
"director": "Francis Ford Coppola",
"theme": "Mafia",
"year": 1972,
},
),
TextNode(
text="Inception",
metadata={
"director": "Christopher Nolan",
"theme": "Fiction",
"year": 2010,
},
),
TextNode(
text="To Kill a Mockingbird",
metadata={
"author": "Harper Lee",
"theme": "Mafia",
"year": 1960,
},
),
TextNode(
text="1984",
metadata={
"author": "George Orwell",
"theme": "Totalitarianism",
"year": 1949,
},
),
TextNode(
text="The Great Gatsby",
metadata={
"author": "F. Scott Fitzgerald",
"theme": "The American Dream",
"year": 1925,
},
),
TextNode(
text="Harry Potter and the Sorcerer's Stone",
metadata={
"author": "J.K. Rowling",
"theme": "Fiction",
"year": 1997,
},
),
]
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core.schema import TextNode
nodes = [
TextNode(
text="The Shawshank Redemption",
metadata={
"author": "Stephen King",
"theme": "Friendship",
"year": 1994,
},
),
TextNode(
text="The Godfather",
metadata={
"director": "Francis Ford Coppola",
"theme": "Mafia",
"year": 1972,
},
),
TextNode(
text="Inception",
metadata={
"director": "Christopher Nolan",
"theme": "Fiction",
"year": 2010,
},
),
TextNode(
text="To Kill a Mockingbird",
metadata={
"author": "Harper Lee",
"theme": "Mafia",
"year": 1960,
},
),
TextNode(
text="1984",
metadata={
"author": "George Orwell",
"theme": "Totalitarianism",
"year": 1949,
},
),
TextNode(
text="The Great Gatsby",
metadata={
"author": "F. Scott Fitzgerald",
"theme": "The American Dream",
"year": 1925,
},
),
TextNode(
text="Harry Potter and the Sorcerer's Stone",
metadata={
"author": "J.K. Rowling",
"theme": "Fiction",
"year": 1997,
},
),
]
In [ ]:
Copied!
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes, storage_context=storage_context)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes, storage_context=storage_context)
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK" HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
定义元数据过滤器
In [ ]:
Copied!
from llama_index.core.vector_stores import (
MetadataFilter,
MetadataFilters,
FilterOperator,
)
filters = MetadataFilters(
filters=[
MetadataFilter(
key="theme", operator=FilterOperator.EQ, value="Fiction"
),
]
)
from llama_index.core.vector_stores import (
MetadataFilter,
MetadataFilters,
FilterOperator,
)
filters = MetadataFilters(
filters=[
MetadataFilter(
key="theme", operator=FilterOperator.EQ, value="Fiction"
),
]
)
从向量存储中使用过滤器检索
In [ ]:
Copied!
retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception about?")
retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception about?")
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK" HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
Out[ ]:
[NodeWithScore(node=TextNode(id_='814e5f2a-2150-4bae-8a59-fa728379e978', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Inception', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.9202238321304321), NodeWithScore(node=TextNode(id_='fc1df8cc-f1d3-4a7b-8c21-f83b18463758', embedding=None, metadata={'author': 'J.K. Rowling', 'theme': 'Fiction', 'year': 1997}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text="Harry Potter and the Sorcerer's Stone", start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.8823964595794678)]
使用AND
条件的多个元数据过滤器
In [ ]:
Copied!
from llama_index.core.vector_stores import FilterOperator, FilterCondition
filters = MetadataFilters(
filters=[
MetadataFilter(key="theme", value="Fiction"),
MetadataFilter(key="year", value=1997, operator=FilterOperator.GT),
],
condition=FilterCondition.AND,
)
retriever = index.as_retriever(filters=filters)
retriever.retrieve("Harry Potter?")
from llama_index.core.vector_stores import FilterOperator, FilterCondition
filters = MetadataFilters(
filters=[
MetadataFilter(key="theme", value="Fiction"),
MetadataFilter(key="year", value=1997, operator=FilterOperator.GT),
],
condition=FilterCondition.AND,
)
retriever = index.as_retriever(filters=filters)
retriever.retrieve("Harry Potter?")
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK" HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
Out[ ]:
[NodeWithScore(node=TextNode(id_='814e5f2a-2150-4bae-8a59-fa728379e978', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Inception', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.8818434476852417)]
使用OR
条件的多个元数据过滤器
In [ ]:
Copied!
from llama_index.core.vector_stores import FilterOperator, FilterCondition
filters = MetadataFilters(
filters=[
MetadataFilter(key="theme", value="Fiction"),
MetadataFilter(key="year", value=1997, operator=FilterOperator.GT),
],
condition=FilterCondition.OR,
)
retriever = index.as_retriever(filters=filters)
retriever.retrieve("Harry Potter?")
from llama_index.core.vector_stores import FilterOperator, FilterCondition
filters = MetadataFilters(
filters=[
MetadataFilter(key="theme", value="Fiction"),
MetadataFilter(key="year", value=1997, operator=FilterOperator.GT),
],
condition=FilterCondition.OR,
)
retriever = index.as_retriever(filters=filters)
retriever.retrieve("Harry Potter?")
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK" HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
Out[ ]:
[NodeWithScore(node=TextNode(id_='fc1df8cc-f1d3-4a7b-8c21-f83b18463758', embedding=None, metadata={'author': 'J.K. Rowling', 'theme': 'Fiction', 'year': 1997}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text="Harry Potter and the Sorcerer's Stone", start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.9242331385612488), NodeWithScore(node=TextNode(id_='814e5f2a-2150-4bae-8a59-fa728379e978', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Inception', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.8818434476852417)]