Skip to content

Reddit

RedditReader #

Bases: BaseReader

Reddit的Subreddit帖子和一级评论阅读器。

Source code in llama_index/readers/reddit/base.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
class RedditReader(BaseReader):
    """Reddit的Subreddit帖子和一级评论阅读器。"""

    def load_data(
        self,
        subreddits: List[str],
        search_keys: List[str],
        post_limit: Optional[int] = [10],
    ) -> List[Document]:
        """从相关帖子和顶层评论中加载文本,给定搜索关键词。

Args:
    subreddits(List[str]):您想要阅读的子社区列表
    search_keys(List[str]):您想要从子社区搜索的关键词列表
    post_limit(Optional[int]):您想要从每个子社区读取的最大帖子数,默认为10
"""
        import os

        import praw
        from praw.models import MoreComments

        reddit = praw.Reddit(
            client_id=os.getenv("REDDIT_CLIENT_ID"),
            client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
            user_agent=os.getenv("REDDIT_USER_AGENT"),
            username=os.getenv("REDDIT_USERNAME"),
            password=os.getenv("REDDIT_PASSWORD"),
        )

        posts = []

        for sr in subreddits:
            ml_subreddit = reddit.subreddit(sr)

            for kw in search_keys:
                relevant_posts = ml_subreddit.search(kw, limit=post_limit)

                for post in relevant_posts:
                    posts.append(Document(text=post.selftext))
                    for top_level_comment in post.comments:
                        if isinstance(top_level_comment, MoreComments):
                            continue
                        posts.append(Document(text=top_level_comment.body))

        return posts

load_data #

load_data(
    subreddits: List[str],
    search_keys: List[str],
    post_limit: Optional[int] = [10],
) -> List[Document]

从相关帖子和顶层评论中加载文本,给定搜索关键词。

Source code in llama_index/readers/reddit/base.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
    def load_data(
        self,
        subreddits: List[str],
        search_keys: List[str],
        post_limit: Optional[int] = [10],
    ) -> List[Document]:
        """从相关帖子和顶层评论中加载文本,给定搜索关键词。

Args:
    subreddits(List[str]):您想要阅读的子社区列表
    search_keys(List[str]):您想要从子社区搜索的关键词列表
    post_limit(Optional[int]):您想要从每个子社区读取的最大帖子数,默认为10
"""
        import os

        import praw
        from praw.models import MoreComments

        reddit = praw.Reddit(
            client_id=os.getenv("REDDIT_CLIENT_ID"),
            client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
            user_agent=os.getenv("REDDIT_USER_AGENT"),
            username=os.getenv("REDDIT_USERNAME"),
            password=os.getenv("REDDIT_PASSWORD"),
        )

        posts = []

        for sr in subreddits:
            ml_subreddit = reddit.subreddit(sr)

            for kw in search_keys:
                relevant_posts = ml_subreddit.search(kw, limit=post_limit)

                for post in relevant_posts:
                    posts.append(Document(text=post.selftext))
                    for top_level_comment in post.comments:
                        if isinstance(top_level_comment, MoreComments):
                            continue
                        posts.append(Document(text=top_level_comment.body))

        return posts