Bases: BaseReader
Reddit的Subreddit帖子和一级评论阅读器。
Source code in llama_index/readers/reddit/base.py
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 | class RedditReader(BaseReader):
"""Reddit的Subreddit帖子和一级评论阅读器。"""
def load_data(
self,
subreddits: List[str],
search_keys: List[str],
post_limit: Optional[int] = [10],
) -> List[Document]:
"""从相关帖子和顶层评论中加载文本,给定搜索关键词。
Args:
subreddits(List[str]):您想要阅读的子社区列表
search_keys(List[str]):您想要从子社区搜索的关键词列表
post_limit(Optional[int]):您想要从每个子社区读取的最大帖子数,默认为10
"""
import os
import praw
from praw.models import MoreComments
reddit = praw.Reddit(
client_id=os.getenv("REDDIT_CLIENT_ID"),
client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
user_agent=os.getenv("REDDIT_USER_AGENT"),
username=os.getenv("REDDIT_USERNAME"),
password=os.getenv("REDDIT_PASSWORD"),
)
posts = []
for sr in subreddits:
ml_subreddit = reddit.subreddit(sr)
for kw in search_keys:
relevant_posts = ml_subreddit.search(kw, limit=post_limit)
for post in relevant_posts:
posts.append(Document(text=post.selftext))
for top_level_comment in post.comments:
if isinstance(top_level_comment, MoreComments):
continue
posts.append(Document(text=top_level_comment.body))
return posts
|
load_data
load_data(
subreddits: List[str],
search_keys: List[str],
post_limit: Optional[int] = [10],
) -> List[Document]
从相关帖子和顶层评论中加载文本,给定搜索关键词。
Source code in llama_index/readers/reddit/base.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 | def load_data(
self,
subreddits: List[str],
search_keys: List[str],
post_limit: Optional[int] = [10],
) -> List[Document]:
"""从相关帖子和顶层评论中加载文本,给定搜索关键词。
Args:
subreddits(List[str]):您想要阅读的子社区列表
search_keys(List[str]):您想要从子社区搜索的关键词列表
post_limit(Optional[int]):您想要从每个子社区读取的最大帖子数,默认为10
"""
import os
import praw
from praw.models import MoreComments
reddit = praw.Reddit(
client_id=os.getenv("REDDIT_CLIENT_ID"),
client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
user_agent=os.getenv("REDDIT_USER_AGENT"),
username=os.getenv("REDDIT_USERNAME"),
password=os.getenv("REDDIT_PASSWORD"),
)
posts = []
for sr in subreddits:
ml_subreddit = reddit.subreddit(sr)
for kw in search_keys:
relevant_posts = ml_subreddit.search(kw, limit=post_limit)
for post in relevant_posts:
posts.append(Document(text=post.selftext))
for top_level_comment in post.comments:
if isinstance(top_level_comment, MoreComments):
continue
posts.append(Document(text=top_level_comment.body))
return posts
|