Nebula Graph 存储系统¶

In [ ]:

Copied!





%pip install llama-index-llms-openai
%pip install llama-index-embeddings-openai
%pip install llama-index-graph-stores-nebula
%pip install llama-index-llms-azure-openai
%pip install llama-index-llms-openai
%pip install llama-index-embeddings-openai
%pip install llama-index-graph-stores-nebula
%pip install llama-index-llms-azure-openai

In [ ]:

Copied!





# 为OpenAI

import os

os.environ["OPENAI_API_KEY"] = "插入OpenAI密钥"

import logging
import sys
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings

logging.basicConfig(stream=sys.stdout, level=logging.INFO)

# 定义LLM
# 注意：在演示时，text-davinci-002没有速率限制错误
llm = OpenAI(temperature=0, model="gpt-3.5-turbo")

Settings.llm = llm
Settings.chunk_size = 512
# 为OpenAI

import os

os.environ["OPENAI_API_KEY"] = "插入OpenAI密钥"

import logging
import sys
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings

logging.basicConfig(stream=sys.stdout, level=logging.INFO)

# 定义LLM
# 注意：在演示时，text-davinci-002没有速率限制错误
llm = OpenAI(temperature=0, model="gpt-3.5-turbo")

Settings.llm = llm
Settings.chunk_size = 512

In [ ]:

Copied!





# 用于Azure OpenAI
import os
import json
import openai
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    KnowledgeGraphIndex,
)

from llama_index.core import StorageContext
from llama_index.graph_stores.nebula import NebulaGraphStore

import logging
import sys

from IPython.display import Markdown, display

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)  # logging.DEBUG 用于更详细的输出
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

openai.api_type = "azure"
openai.api_base = "https://<foo-bar>.openai.azure.com"
openai.api_version = "2022-12-01"
os.environ["OPENAI_API_KEY"] = "<your-openai-key>"
openai.api_key = os.getenv("OPENAI_API_KEY")

llm = AzureOpenAI(
    model="<foo-bar-model>",
    engine="<foo-bar-deployment>",
    temperature=0,
    api_key=openai.api_key,
    api_type=openai.api_type,
    api_base=openai.api_base,
    api_version=openai.api_version,
)

# 您需要部署自己的嵌入模型以及自己的聊天完成模型
embedding_model = OpenAIEmbedding(
    model="text-embedding-ada-002",
    deployment_name="<foo-bar-deployment>",
    api_key=openai.api_key,
    api_base=openai.api_base,
    api_type=openai.api_type,
    api_version=openai.api_version,
)

Settings.llm = llm
Settings.chunk_size = chunk_size
Settings.embed_model = embedding_model
# 用于Azure OpenAI
import os
import json
import openai
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    KnowledgeGraphIndex,
)

from llama_index.core import StorageContext
from llama_index.graph_stores.nebula import NebulaGraphStore

import logging
import sys

from IPython.display import Markdown, display

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)  # logging.DEBUG 用于更详细的输出
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

openai.api_type = "azure"
openai.api_base = "https://.openai.azure.com"
openai.api_version = "2022-12-01"
os.environ["OPENAI_API_KEY"] = ""
openai.api_key = os.getenv("OPENAI_API_KEY")

llm = AzureOpenAI(
    model="",
    engine="",
    temperature=0,
    api_key=openai.api_key,
    api_type=openai.api_type,
    api_base=openai.api_base,
    api_version=openai.api_version,
)

# 您需要部署自己的嵌入模型以及自己的聊天完成模型
embedding_model = OpenAIEmbedding(
    model="text-embedding-ada-002",
    deployment_name="",
    api_key=openai.api_key,
    api_base=openai.api_base,
    api_type=openai.api_type,
    api_version=openai.api_version,
)

Settings.llm = llm
Settings.chunk_size = chunk_size
Settings.embed_model = embedding_model

使用知识图谱与NebulaGraphStore¶

构建知识图谱¶

In [ ]:

Copied!

from llama_index.core import KnowledgeGraphIndex, SimpleDirectoryReader
from llama_index.core import StorageContext
from llama_index.graph_stores.nebula import NebulaGraphStore

from llama_index.llms.openai import OpenAI
from IPython.display import Markdown, display
from llama_index.core import KnowledgeGraphIndex, SimpleDirectoryReader
from llama_index.core import StorageContext
from llama_index.graph_stores.nebula import NebulaGraphStore

from llama_index.llms.openai import OpenAI
from IPython.display import Markdown, display

In [ ]:

Copied!

documents = SimpleDirectoryReader(
    "../../../../examples/paul_graham_essay/data"
).load_data()
documents = SimpleDirectoryReader(
    "../../../../examples/paul_graham_essay/data"
).load_data()

准备NebulaGraph¶

在本教程中，我们将学习如何准备NebulaGraph，这是一个开源的分布式图形数据库。我们将介绍安装和配置NebulaGraph所需的步骤。

In [ ]:

Copied!





%pip install nebula3-python

os.environ["NEBULA_USER"] = "root"
os.environ[
    "NEBULA_PASSWORD"
] = "<password>"  # 替换为您的密码，默认为 "nebula"
os.environ[
    "NEBULA_ADDRESS"
] = "127.0.0.1:9669"  # 假设本地已安装 NebulaGraph 3.5.0 或更新版本

# 假设图已经被创建
# 使用以下选项创建 NebulaGraph 集群：
# 选项 0: `curl -fsSL nebula-up.siwei.io/install.sh | bash`
# 选项 1: NebulaGraph Docker Extension https://hub.docker.com/extensions/weygu/nebulagraph-dd-ext
# 并且图空间被命名为 "paul_graham_essay"
# 如果没有，可以使用以下命令从 NebulaGraph 控制台创建：
# CREATE SPACE paul_graham_essay(vid_type=FIXED_STRING(256), partition_num=1, replica_factor=1);
# :sleep 10;
# USE paul_graham_essay;
# CREATE TAG entity(name string);
# CREATE EDGE relationship(relationship string);
# CREATE TAG INDEX entity_index ON entity(name(256));

space_name = "paul_graham_essay"
edge_types, rel_prop_names = ["relationship"], [
    "relationship"
]  # 默认值，如果从空的知识图谱创建，则可以省略
tags = ["entity"]  # 默认值，如果从空的知识图谱创建，则可以省略
%pip install nebula3-python

os.environ["NEBULA_USER"] = "root"
os.environ[
    "NEBULA_PASSWORD"
] = ""  # 替换为您的密码，默认为 "nebula"
os.environ[
    "NEBULA_ADDRESS"
] = "127.0.0.1:9669"  # 假设本地已安装 NebulaGraph 3.5.0 或更新版本

# 假设图已经被创建
# 使用以下选项创建 NebulaGraph 集群：
# 选项 0: `curl -fsSL nebula-up.siwei.io/install.sh | bash`
# 选项 1: NebulaGraph Docker Extension https://hub.docker.com/extensions/weygu/nebulagraph-dd-ext
# 并且图空间被命名为 "paul_graham_essay"
# 如果没有，可以使用以下命令从 NebulaGraph 控制台创建：
# CREATE SPACE paul_graham_essay(vid_type=FIXED_STRING(256), partition_num=1, replica_factor=1);
# :sleep 10;
# USE paul_graham_essay;
# CREATE TAG entity(name string);
# CREATE EDGE relationship(relationship string);
# CREATE TAG INDEX entity_index ON entity(name(256));

space_name = "paul_graham_essay"
edge_types, rel_prop_names = ["relationship"], [
    "relationship"
]  # 默认值，如果从空的知识图谱创建，则可以省略
tags = ["entity"]  # 默认值，如果从空的知识图谱创建，则可以省略

实例化GPTNebulaGraph KG索引¶

In [ ]:

Copied!





图存储 = NebulaGraphStore(
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
)

storage_context = StorageContext.from_defaults(graph_store=graph_store)

# 注意：可能需要一段时间！
index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=2,
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
)
图存储 = NebulaGraphStore(
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
)

storage_context = StorageContext.from_defaults(graph_store=graph_store)

# 注意：可能需要一段时间！
index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=2,
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
)

查询知识图谱¶

In [ ]:

Copied!

query_engine = index.as_query_engine()

response = query_engine.query("Tell me more about Interleaf")
query_engine = index.as_query_engine()

response = query_engine.query("Tell me more about Interleaf")

INFO:llama_index.indices.knowledge_graph.retrievers:> Starting query: Tell me more about Interleaf
INFO:llama_index.indices.knowledge_graph.retrievers:> Query keywords: ['Interleaf', 'history', 'software', 'company']
ERROR:llama_index.indices.knowledge_graph.retrievers:Index was not constructed with embeddings, skipping embedding usage...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 6aa6a716-7390-4783-955b-8169fab25bb1: worth trying.

Our teacher, professor Ulivi, was a nice guy. He could see I w...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 79f2a1b4-80bb-416f-a259-ebfc3136b2fe: on a map of New York City: if you zoom in on the Upper East Side, there's a t...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 1e707b8c-b62a-4c1a-a908-c79e77b9692b: buyers pay a lot for such work. [6]

There were plenty of earnest students to...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 31c2f53c-928a-4ed0-88fc-df92dba47c33: for example, that the reason the color changes suddenly at a certain point is...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: f51d8a1c-06bc-45aa-bed1-1714ae4e5fb9: the software is an online store builder and you're hosting the stores, if you...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 008052a0-a64b-4e3c-a2af-4963896bfc19: Engineering that seemed to be at least as big as the group that actually wrot...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: b1f5a610-9e0a-4e3e-ba96-514ae7d63a84: closures stored in a hash table on the server.

It helped to have studied art...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: f7cc82a7-76e0-4a06-9f50-d681404c5bce: of Robert's apartment in Cambridge. His roommate was away for big chunks of t...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: db626325-035a-4f67-87c0-1e770b80f4a6: want to be online, and still don't, not the fancy ones. That's not how they s...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 73e76f4b-0ebe-4af6-9c2d-6affae81373b: But in the long term the growth rate takes care of the absolute number. If we...
INFO:llama_index.indices.knowledge_graph.retrievers:> Extracted relationships: The following are knowledge triplets in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`
software ['is', 'web app', 'common', 'now']
software ['is', 'web app', "wasn't clear", 'it was possible']
software ['generate', 'web sites']
software ['worked', 'via web']
software ['is', 'web app']
software ['has', 'three main parts']
software ['is', 'online store builder']
Lisp ['has dialects', 'because']
Lisp ['rare', 'C++']
Lisp ['is', 'language']
Lisp ['has dialects', '']
Lisp ['has dialects', 'because one of the distinctive features of the language is that it has dialects']
Lisp ['was regarded as', 'language of AI']
Lisp ['defined by', 'writing an interpreter']
Lisp ['was meant to be', 'formal model of computation']
Interleaf ['added', 'scripting language']
Interleaf ['made software for', 'creating documents']
Interleaf ['was how I learned that', 'low end software tends to eat high end software']
Interleaf ['was', 'on the way down']
Interleaf ['on the way down', '1993']
RISD ['was', 'art school']
RISD ['counted me as', 'transfer sophomore']
RISD ['was', 'supposed to be the best art school in the country']
RISD ['was', 'the best art school in the country']
Robert ['wrote', 'shopping cart', 'written by', 'robert']
Robert ['wrote', 'shopping cart', 'written by', 'Robert']
Robert ['wrote', 'shopping cart']
Robert Morris ['offered', 'unsolicited advice']
Yorkville ['is', 'tiny corner']
Yorkville ["wasn't", 'rich']
online ['is not', 'publishing online']
online ['is not', 'publishing online', 'means', 'you treat the online version as the primary version']
web app ['common', 'now']
web app ["wasn't clear", 'it was possible']
editor ['written by', 'author']
shopping cart ['written by', 'Robert', 'wrote', 'shopping cart']
shopping cart ['written by', 'Robert']
shopping cart ['written by', 'robert', 'wrote', 'shopping cart']
shopping cart ['written by', 'robert']
Robert ['wrote', 'shopping cart', 'written by', 'Robert']
Robert ['wrote', 'shopping cart', 'written by', 'robert']
Robert ['wrote', 'shopping cart']
Lisp ['defined by', 'writing an interpreter']
Lisp ['has dialects', 'because']
Lisp ['was meant to be', 'formal model of computation']
Lisp ['rare', 'C++']
Lisp ['is', 'language']
Lisp ['has dialects', '']
Lisp ['has dialects', 'because one of the distinctive features of the language is that it has dialects']
Lisp ['was regarded as', 'language of AI']
Y Combinator ['would have said', 'Stop being so stressed out']
Y Combinator ['helps', 'founders']
Y Combinator ['is', 'investment firm']
company ['reaches breakeven', 'when yahoo buys it']
company ['gave', 'business advice']
company ['reaches breakeven', 'when Yahoo buys it']
software ['worked', 'via web']
software ['is', 'web app', "wasn't clear", 'it was possible']
software ['generate', 'web sites']
software ['has', 'three main parts']
software ['is', 'online store builder']
software ['is', 'web app']
software ['is', 'web app', 'common', 'now']
Y Combinator ['would have said', 'Stop being so stressed out']
Y Combinator ['is', 'investment firm']
Y Combinator ['helps', 'founders']
company ['gave', 'business advice']
company ['reaches breakeven', 'when Yahoo buys it']
company ['reaches breakeven', 'when yahoo buys it']
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 5916 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens

In [ ]:

Copied!

display(Markdown(f"<b>{response}</b>"))
display(Markdown(f"{response}"))

Interleaf was a software company that made software for creating documents. Their software was inspired by Emacs, and included a scripting language that was a dialect of Lisp. The company was started in the 1990s, and eventually went out of business.

In [ ]:

Copied!

response = query_engine.query(
    "Tell me more about what the author worked on at Interleaf"
)
response = query_engine.query(
    "Tell me more about what the author worked on at Interleaf"
)

INFO:llama_index.indices.knowledge_graph.retrievers:> Starting query: Tell me more about what the author worked on at Interleaf
INFO:llama_index.indices.knowledge_graph.retrievers:> Query keywords: ['Interleaf', 'author', 'work']
ERROR:llama_index.indices.knowledge_graph.retrievers:Index was not constructed with embeddings, skipping embedding usage...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 6aa6a716-7390-4783-955b-8169fab25bb1: worth trying.

Our teacher, professor Ulivi, was a nice guy. He could see I w...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 79f2a1b4-80bb-416f-a259-ebfc3136b2fe: on a map of New York City: if you zoom in on the Upper East Side, there's a t...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 1e707b8c-b62a-4c1a-a908-c79e77b9692b: buyers pay a lot for such work. [6]

There were plenty of earnest students to...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 31c2f53c-928a-4ed0-88fc-df92dba47c33: for example, that the reason the color changes suddenly at a certain point is...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: b1f5a610-9e0a-4e3e-ba96-514ae7d63a84: closures stored in a hash table on the server.

It helped to have studied art...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 6cda9196-dcdb-4441-8f27-ff3f18779c4c: so easy. And that implies that HN was a mistake. Surely the biggest source of...
INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: a467cf4c-19cf-490f-92ad-ce03c8d91231: I've noticed in my life is how well it has worked, for me at least, to work o...
INFO:llama_index.indices.knowledge_graph.retrievers:> Extracted relationships: The following are knowledge triplets in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`
software ['is', 'web app', 'common', 'now']
software ['is', 'web app', "wasn't clear", 'it was possible']
software ['generate', 'web sites']
software ['worked', 'via web']
software ['is', 'web app']
software ['has', 'three main parts']
software ['is', 'online store builder']
Lisp ['has dialects', 'because']
Lisp ['rare', 'C++']
Lisp ['is', 'language']
Lisp ['has dialects', '']
Lisp ['has dialects', 'because one of the distinctive features of the language is that it has dialects']
Lisp ['was regarded as', 'language of AI']
Lisp ['defined by', 'writing an interpreter']
Lisp ['was meant to be', 'formal model of computation']
Interleaf ['added', 'scripting language']
Interleaf ['made software for', 'creating documents']
Interleaf ['was how I learned that', 'low end software tends to eat high end software']
Interleaf ['was', 'on the way down']
Interleaf ['on the way down', '1993']
RISD ['was', 'art school']
RISD ['counted me as', 'transfer sophomore']
RISD ['was', 'supposed to be the best art school in the country']
RISD ['was', 'the best art school in the country']
Robert ['wrote', 'shopping cart', 'written by', 'robert']
Robert ['wrote', 'shopping cart', 'written by', 'Robert']
Robert ['wrote', 'shopping cart']
Robert Morris ['offered', 'unsolicited advice']
Yorkville ['is', 'tiny corner']
Yorkville ["wasn't", 'rich']
shopping cart ['written by', 'Robert', 'wrote', 'shopping cart']
shopping cart ['written by', 'robert', 'wrote', 'shopping cart']
shopping cart ['written by', 'Robert']
shopping cart ['written by', 'robert']
online ['is not', 'publishing online', 'means', 'you treat the online version as the primary version']
online ['is not', 'publishing online']
software ['has', 'three main parts']
software ['generate', 'web sites']
software ['is', 'web app', 'common', 'now']
software ['is', 'online store builder']
software ['is', 'web app']
software ['is', 'web app', "wasn't clear", 'it was possible']
software ['worked', 'via web']
editor ['written by', 'author']
YC ['is', 'work', 'is unprestigious', '']
YC ['grew', 'more exciting']
YC ['founded in', 'Berkeley']
YC ['founded in', '2005']
YC ['founded in', '1982']
YC ['is', 'full-time job']
YC ['is', 'engaging work']
YC ['is', 'batch model']
YC ['is', 'Summer Founders Program']
YC ['was', 'coffee shop']
YC ['invests in', 'startups']
YC ['is', 'fund']
YC ['started to notice', 'other advantages']
YC ['grew', 'quickly']
YC ['controlled by', 'founders']
YC ['is', 'work']
YC ['became', 'full-time job']
YC ['is self-funded', 'by Heroku']
YC ['is', 'hard work']
YC ['funds', 'startups']
YC ['controlled by', 'LLC']
Robert ['wrote', 'shopping cart']
Robert ['wrote', 'shopping cart', 'written by', 'Robert']
Robert ['wrote', 'shopping cart', 'written by', 'robert']
Lisp ['was meant to be', 'formal model of computation']
Lisp ['defined by', 'writing an interpreter']
Lisp ['was regarded as', 'language of AI']
Lisp ['has dialects', 'because']
Lisp ['has dialects', '']
Lisp ['has dialects', 'because one of the distinctive features of the language is that it has dialects']
Lisp ['rare', 'C++']
Lisp ['is', 'language']
party ['was', 'clever idea']
Y Combinator ['would have said', 'Stop being so stressed out']
Y Combinator ['is', 'investment firm']
Y Combinator ['helps', 'founders']
Robert Morris ['offered', 'unsolicited advice']
work ['is unprestigious', '']
Jessica Livingston ['is', 'woman']
Jessica Livingston ['decided', 'compile book']
HN ['edge case', 'bizarre']
HN ['edge case', 'when you both write essays and run a forum']
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 4651 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens

In [ ]:

Copied!

display(Markdown(f"<b>{response}</b>"))
display(Markdown(f"{response}"))

The author worked on a software that allowed users to create documents, which was inspired by Emacs. The software had a scripting language that was a dialect of Lisp, and the author was responsible for writing things in this language.

The author also worked on a software that allowed users to generate web sites. This software was a web app and was written in a dialect of Lisp. The author was also responsible for writing things in this language.

可视化图形RAG¶

如果我们从术语 ['Interleaf', 'history', 'Software', 'Company'] 开始可视化基于图形的RAG，我们可以看到这些连接的上下文是什么样子，它是信息/知识的一种不同形式：

精炼和简洁的形式
细粒度的分割
互相连接的结构化特性

In [ ]:

Copied!

%pip install ipython-ngql networkx pyvis
%load_ext ngql
%pip install ipython-ngql networkx pyvis
%load_ext ngql

In [ ]:

Copied!

%ngql --address 127.0.0.1 --port 9669 --user root --password <password>
%ngql --address 127.0.0.1 --port 9669 --user root --password

Connection Pool Created
INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)
Get connection to ('127.0.0.1', 9669)

Out[ ]:

	Name
0	Apple_Vision_Pro
1	basketballplayer
2	demo_ai_ops
3	demo_basketballplayer
4	demo_data_lineage
5	demo_fifa_2022
6	demo_fraud_detection
7	demo_identity_resolution
8	demo_movie_recommendation
9	demo_sns
10	guardians
11	k8s
12	langchain
13	llamaindex
14	paul_graham_essay
15	squid_game
16	test

In [ ]:

Copied!





%%ngql
USE paul_graham_essay;
MATCH p=(n)-[*1..2]-()
  WHERE id(n) IN ['Interleaf', 'history', 'Software', 'Company'] 
RETURN p LIMIT 100;
%%ngql
USE paul_graham_essay;
MATCH p=(n)-[*1..2]-()
  WHERE id(n) IN ['Interleaf', 'history', 'Software', 'Company'] 
RETURN p LIMIT 100;

INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)
Get connection to ('127.0.0.1', 9669)

Out[ ]:

	p
0	("Interleaf" :entity{name: "Interleaf"})-[:rel...
1	("Interleaf" :entity{name: "Interleaf"})-[:rel...
2	("Interleaf" :entity{name: "Interleaf"})-[:rel...
3	("Interleaf" :entity{name: "Interleaf"})-[:rel...

In [ ]:

Copied!

%ng_draw
%ng_draw

nebulagraph_draw.html

Out[ ]:

# 用嵌入进行查询

在许多情况下，我们希望使用嵌入向量来表示项目或用户，并使用这些嵌入向量来执行查询。这种情况下，我们可以使用嵌入向量来计算项目或用户之间的相似度，并找到最相似的项目或用户。

例如，如果我们有用户和电影的嵌入向量，我们可以使用这些向量来找到与特定用户最相似的电影，或者与特定电影最相似的用户。这种方法在推荐系统中特别有用。

下面是一个使用嵌入向量进行查询的示例。

In [ ]:

Copied!





# 注意：可能需要一些时间！

index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=2,
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
    include_embeddings=True,
)

query_engine = index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
)
# 注意：可能需要一些时间！

index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=2,
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
    include_embeddings=True,
)

query_engine = index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
)

In [ ]:

Copied!





# 使用前3个三元组加关键词进行查询（重复的三元组将被移除）
response = query_engine.query(
    "告诉我更多关于作者在Interleaf工作的内容"
)
# 使用前3个三元组加关键词进行查询（重复的三元组将被移除）
response = query_engine.query(
    "告诉我更多关于作者在Interleaf工作的内容"
)

In [ ]:

Copied!

display(Markdown(f"<b>{response}</b>"))
display(Markdown(f"{response}"))

使用更全局（跨节点）上下文的查询¶

In [ ]:

Copied!





query_engine = index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
    explore_global_knowledge=True,
)

response = query_engine.query("Tell me more about what the author and Lisp")
query_engine = index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
    explore_global_knowledge=True,
)

response = query_engine.query("Tell me more about what the author and Lisp")

可视化图表¶

In [ ]:

Copied!





## 创建图形
from pyvis.network import Network

g = index.get_networkx_graph()
net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(g)
net.show("example.html")
## 创建图形
from pyvis.network import Network

g = index.get_networkx_graph()
net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(g)
net.show("example.html")

Out[ ]:

[可选] 尝试构建图并手动添加三元组！¶

In [ ]:

Copied!

from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.node_parser import SentenceSplitter

In [ ]:

Copied!

node_parser = SentenceSplitter()
node_parser = SentenceSplitter()

In [ ]:

Copied!

nodes = node_parser.get_nodes_from_documents(documents)
nodes = node_parser.get_nodes_from_documents(documents)

In [ ]:

Copied!

# 尚未实现

# 现在初始化一个空的索引
index = KnowledgeGraphIndex.from_documents([], storage_context=storage_context)
# 尚未实现

# 现在初始化一个空的索引
index = KnowledgeGraphIndex.from_documents([], storage_context=storage_context)

In [ ]:

Copied!





# 手动添加关键字映射和节点
# 添加三元组（主语，关系，宾语）

# 对于节点0
node_0_tups = [
    ("author", "worked on", "writing"),
    ("author", "worked on", "programming"),
]
for tup in node_0_tups:
    index.upsert_triplet_and_node(tup, nodes[0])

# 对于节点1
node_1_tups = [
    ("Interleaf", "made software for", "creating documents"),
    ("Interleaf", "added", "scripting language"),
    ("software", "generate", "web sites"),
]
for tup in node_1_tups:
    index.upsert_triplet_and_node(tup, nodes[1])
# 手动添加关键字映射和节点
# 添加三元组（主语，关系，宾语）

# 对于节点0
node_0_tups = [
    ("author", "worked on", "writing"),
    ("author", "worked on", "programming"),
]
for tup in node_0_tups:
    index.upsert_triplet_and_node(tup, nodes[0])

# 对于节点1
node_1_tups = [
    ("Interleaf", "made software for", "creating documents"),
    ("Interleaf", "added", "scripting language"),
    ("software", "generate", "web sites"),
]
for tup in node_1_tups:
    index.upsert_triplet_and_node(tup, nodes[1])

In [ ]:

Copied!

query_engine = index.as_query_engine(
    include_text=False, response_mode="tree_summarize"
)

response = query_engine.query("Tell me more about Interleaf")
query_engine = index.as_query_engine(
    include_text=False, response_mode="tree_summarize"
)

response = query_engine.query("Tell me more about Interleaf")

In [ ]:

Copied!

str(response)
str(response)