GithubRepositoryReaderDemo
这是一个用于读取Github仓库信息的Python程序。
如果您在colab上打开这个笔记本,您可能需要安装LlamaIndex 🦙。
In [ ]:
Copied!
%pip install llama-index-readers-github
%pip install llama-index-readers-github
In [ ]:
Copied!
!pip install llama-index
!pip install llama-index
In [ ]:
Copied!
# 这是因为我们在DiscordReader中使用了asyncio.loop_until_complete。由于Jupyter内核本身运行在一个事件循环上,我们需要在嵌套方面进行一些帮助。 import nest_asyncionest_asyncio.apply()
# 这是因为我们在DiscordReader中使用了asyncio.loop_until_complete。由于Jupyter内核本身运行在一个事件循环上,我们需要在嵌套方面进行一些帮助。 import nest_asyncionest_asyncio.apply()
In [ ]:
Copied!
%env OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
from llama_index.core import VectorStoreIndex
from llama_index.readers.github import GithubRepositoryReader, GithubClient
from IPython.display import Markdown, display
import os
%env OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
from llama_index.core import VectorStoreIndex
from llama_index.readers.github import GithubRepositoryReader, GithubClient
from IPython.display import Markdown, display
import os
env: OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
In [ ]:
Copied!
%env GITHUB_TOKEN=github_pat_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
github_token = os.environ.get("GITHUB_TOKEN")
owner = "jerryjliu"
repo = "llama_index"
branch = "main"
github_client = GithubClient(github_token=github_token, verbose=True)
documents = GithubRepositoryReader(
github_client=github_client,
owner=owner,
repo=repo,
use_parser=False,
verbose=False,
filter_directories=(
["docs"],
GithubRepositoryReader.FilterType.INCLUDE,
),
filter_file_extensions=(
[
".png",
".jpg",
".jpeg",
".gif",
".svg",
".ico",
"json",
".ipynb",
],
GithubRepositoryReader.FilterType.EXCLUDE,
),
).load_data(branch=branch)
%env GITHUB_TOKEN=github_pat_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
github_token = os.environ.get("GITHUB_TOKEN")
owner = "jerryjliu"
repo = "llama_index"
branch = "main"
github_client = GithubClient(github_token=github_token, verbose=True)
documents = GithubRepositoryReader(
github_client=github_client,
owner=owner,
repo=repo,
use_parser=False,
verbose=False,
filter_directories=(
["docs"],
GithubRepositoryReader.FilterType.INCLUDE,
),
filter_file_extensions=(
[
".png",
".jpg",
".jpeg",
".gif",
".svg",
".ico",
"json",
".ipynb",
],
GithubRepositoryReader.FilterType.EXCLUDE,
),
).load_data(branch=branch)
In [ ]:
Copied!
index = VectorStoreIndex.from_documents(documents)
index = VectorStoreIndex.from_documents(documents)
In [ ]:
Copied!
query_engine = index.as_query_engine()
response = query_engine.query(
"What is the difference between VectorStoreIndex and SummaryIndex?",
verbose=True,
)
query_engine = index.as_query_engine()
response = query_engine.query(
"What is the difference between VectorStoreIndex and SummaryIndex?",
verbose=True,
)
In [ ]:
Copied!
display(Markdown(f"<b>{response}</b>"))
display(Markdown(f"{response}"))