Source code for langchain_community.document_loaders.twitter

from __future__ import annotations

from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Sequence, Union

from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseLoader

if TYPE_CHECKING:
    import tweepy
    from tweepy import OAuth2BearerHandler, OAuthHandler


def _dependable_tweepy_import() -> tweepy:
    try:
        import tweepy
    except ImportError:
        raise ImportError(
            "tweepy package not found, please install it with `pip install tweepy`"
        )
    return tweepy


[docs]class TwitterTweetLoader(BaseLoader): """加载`Twitter`推文。 读取用户的Twitter句柄的推文。 首先,您需要访问 `https://developer.twitter.com/en/docs/twitter-api /getting-started/getting-access-to-the-twitter-api` 获取您的令牌。并创建应用程序的v2版本。"""
[docs] def __init__( self, auth_handler: Union[OAuthHandler, OAuth2BearerHandler], twitter_users: Sequence[str], number_tweets: Optional[int] = 100, ): self.auth = auth_handler self.twitter_users = twitter_users self.number_tweets = number_tweets
[docs] def load(self) -> List[Document]: """加载推文。""" tweepy = _dependable_tweepy_import() api = tweepy.API(self.auth, parser=tweepy.parsers.JSONParser()) results: List[Document] = [] for username in self.twitter_users: tweets = api.user_timeline(screen_name=username, count=self.number_tweets) user = api.get_user(screen_name=username) docs = self._format_tweets(tweets, user) results.extend(docs) return results
def _format_tweets( self, tweets: List[Dict[str, Any]], user_info: dict ) -> Iterable[Document]: """将推文格式化为字符串。""" for tweet in tweets: metadata = { "created_at": tweet["created_at"], "user_info": user_info, } yield Document( page_content=tweet["text"], metadata=metadata, )
[docs] @classmethod def from_bearer_token( cls, oauth2_bearer_token: str, twitter_users: Sequence[str], number_tweets: Optional[int] = 100, ) -> TwitterTweetLoader: """使用OAuth2令牌创建一个TwitterTweetLoader。""" tweepy = _dependable_tweepy_import() auth = tweepy.OAuth2BearerHandler(oauth2_bearer_token) return cls( auth_handler=auth, twitter_users=twitter_users, number_tweets=number_tweets, )
[docs] @classmethod def from_secrets( cls, access_token: str, access_token_secret: str, consumer_key: str, consumer_secret: str, twitter_users: Sequence[str], number_tweets: Optional[int] = 100, ) -> TwitterTweetLoader: """从访问令牌和密钥创建一个TwitterTweetLoader。""" tweepy = _dependable_tweepy_import() auth = tweepy.OAuthHandler( access_token=access_token, access_token_secret=access_token_secret, consumer_key=consumer_key, consumer_secret=consumer_secret, ) return cls( auth_handler=auth, twitter_users=twitter_users, number_tweets=number_tweets, )