Genius

GeniusReader #

Bases: BaseReader

GeniusReader用于处理歌词genius的各种操作。

Source code in llama_index/readers/genius/base.py

class GeniusReader(BaseReader):
    """GeniusReader用于处理歌词genius的各种操作。"""

    def __init__(self, access_token: str):
        """使用访问令牌初始化GeniusReader。"""
        try:
            import lyricsgenius
        except ImportError:
            raise ImportError(
                "Please install lyricsgenius via 'pip install lyricsgenius'"
            )
        self.genius = lyricsgenius.Genius(access_token)

    def load_artist_songs(
        self, artist_name: str, max_songs: Optional[int] = None
    ) -> List[Document]:
        """加载所有或指定数量的歌曲。"""
        artist = self.genius.search_artist(artist_name, max_songs=max_songs)
        return [Document(text=song.lyrics) for song in artist.songs] if artist else []

    def load_all_artist_songs(self, artist_name: str) -> List[Document]:
        artist = self.genius.search_artist(artist_name)
        artist.save_lyrics()
        return [Document(text=song.lyrics) for song in artist.songs]

    def load_artist_songs_with_filters(
        self,
        artist_name: str,
        most_popular: bool = True,
        max_songs: Optional[int] = None,
        max_pages: int = 50,
    ) -> Document:
        """加载艺术家最受欢迎或最不受欢迎的歌曲。

Args:
    artist_name（str）：艺术家的名字。
    most_popular（bool）：True表示最受欢迎，False表示最不受欢迎的歌曲。
    max_songs（Optional[int]）：考虑受欢迎程度的最大歌曲数。
    max_pages（int）：要获取的最大页面数。

Returns:
    Document：包含最受欢迎/最不受欢迎歌曲歌词的文档。
"""
        artist = self.genius.search_artist(artist_name, max_songs=1)
        if not artist:
            return None

        songs_fetched = 0
        page = 1
        songs = []
        while (
            page
            and page <= max_pages
            and (max_songs is None or songs_fetched < max_songs)
        ):
            request = self.genius.artist_songs(
                artist.id, sort="popularity", per_page=50, page=page
            )
            songs.extend(request["songs"])
            songs_fetched += len(request["songs"])
            page = (
                request["next_page"]
                if (max_songs is None or songs_fetched < max_songs)
                else None
            )

        target_song = songs[0] if most_popular else songs[-1]
        song_details = self.genius.search_song(target_song["title"], artist.name)
        return Document(text=song_details.lyrics) if song_details else None

    def load_song_by_url_or_id(
        self, song_url: Optional[str] = None, song_id: Optional[int] = None
    ) -> List[Document]:
        """通过URL或ID加载歌曲。"""
        if song_url:
            song = self.genius.song(url=song_url)
        elif song_id:
            song = self.genius.song(song_id)
        else:
            return []

        return [Document(text=song.lyrics)] if song else []

    def search_songs_by_lyrics(self, lyrics: str) -> List[Document]:
        """根据歌词片段搜索歌曲。

Args:
    lyrics (str): 你要查找的歌词片段。

Returns:
    List[Document]: 包含具有这些歌词的歌曲的文档列表。
"""
        search_results = self.genius.search_songs(lyrics)
        songs = search_results["hits"] if search_results else []

        results = []
        for hit in songs:
            song_url = hit["result"]["url"]
            song_lyrics = self.genius.lyrics(song_url=song_url)
            results.append(Document(text=song_lyrics))

        return results

    def load_songs_by_tag(
        self, tag: str, max_songs: Optional[int] = None, max_pages: int = 50
    ) -> List[Document]:
        """加载特定标签的歌曲。

Args:
    tag (str): 要加载歌曲的标签或流派。
    max_songs (Optional[int]): 要获取的最大歌曲数。如果为None，则没有特定限制。
    max_pages (int): 要获取的最大页面数。

Returns:
    List[Document]: 包含歌词的文档列表。
"""
        lyrics = []
        total_songs_fetched = 0
        page = 1

        while (
            page
            and page <= max_pages
            and (max_songs is None or total_songs_fetched < max_songs)
        ):
            res = self.genius.tag(tag, page=page)
            for hit in res["hits"]:
                if max_songs is None or total_songs_fetched < max_songs:
                    song_lyrics = self.genius.lyrics(song_url=hit["url"])
                    lyrics.append(Document(text=song_lyrics))
                    total_songs_fetched += 1
                else:
                    break
            page = (
                res["next_page"]
                if max_songs is None or total_songs_fetched < max_songs
                else None
            )

        return lyrics

load_artist_songs #

load_artist_songs(
    artist_name: str, max_songs: Optional[int] = None
) -> List[Document]

加载所有或指定数量的歌曲。

Source code in llama_index/readers/genius/base.py

def load_artist_songs(
    self, artist_name: str, max_songs: Optional[int] = None
) -> List[Document]:
    """加载所有或指定数量的歌曲。"""
    artist = self.genius.search_artist(artist_name, max_songs=max_songs)
    return [Document(text=song.lyrics) for song in artist.songs] if artist else []

load_artist_songs_with_filters #

load_artist_songs_with_filters(
    artist_name: str,
    most_popular: bool = True,
    max_songs: Optional[int] = None,
    max_pages: int = 50,
) -> Document

加载艺术家最受欢迎或最不受欢迎的歌曲。

Returns:

Type	Description
`Document`	Document：包含最受欢迎/最不受欢迎歌曲歌词的文档。

Source code in llama_index/readers/genius/base.py

    def load_artist_songs_with_filters(
        self,
        artist_name: str,
        most_popular: bool = True,
        max_songs: Optional[int] = None,
        max_pages: int = 50,
    ) -> Document:
        """加载艺术家最受欢迎或最不受欢迎的歌曲。

Args:
    artist_name（str）：艺术家的名字。
    most_popular（bool）：True表示最受欢迎，False表示最不受欢迎的歌曲。
    max_songs（Optional[int]）：考虑受欢迎程度的最大歌曲数。
    max_pages（int）：要获取的最大页面数。

Returns:
    Document：包含最受欢迎/最不受欢迎歌曲歌词的文档。
"""
        artist = self.genius.search_artist(artist_name, max_songs=1)
        if not artist:
            return None

        songs_fetched = 0
        page = 1
        songs = []
        while (
            page
            and page <= max_pages
            and (max_songs is None or songs_fetched < max_songs)
        ):
            request = self.genius.artist_songs(
                artist.id, sort="popularity", per_page=50, page=page
            )
            songs.extend(request["songs"])
            songs_fetched += len(request["songs"])
            page = (
                request["next_page"]
                if (max_songs is None or songs_fetched < max_songs)
                else None
            )

        target_song = songs[0] if most_popular else songs[-1]
        song_details = self.genius.search_song(target_song["title"], artist.name)
        return Document(text=song_details.lyrics) if song_details else None

load_song_by_url_or_id #

load_song_by_url_or_id(
    song_url: Optional[str] = None,
    song_id: Optional[int] = None,
) -> List[Document]

通过URL或ID加载歌曲。

Source code in llama_index/readers/genius/base.py

def load_song_by_url_or_id(
    self, song_url: Optional[str] = None, song_id: Optional[int] = None
) -> List[Document]:
    """通过URL或ID加载歌曲。"""
    if song_url:
        song = self.genius.song(url=song_url)
    elif song_id:
        song = self.genius.song(song_id)
    else:
        return []

    return [Document(text=song.lyrics)] if song else []

search_songs_by_lyrics #

search_songs_by_lyrics(lyrics: str) -> List[Document]

根据歌词片段搜索歌曲。

Parameters:

Name	Type	Description	Default
`lyrics`	`str`	你要查找的歌词片段。	required

Returns:

Type	Description
`List[Document]`	List[Document]: 包含具有这些歌词的歌曲的文档列表。

Source code in llama_index/readers/genius/base.py

    def search_songs_by_lyrics(self, lyrics: str) -> List[Document]:
        """根据歌词片段搜索歌曲。

Args:
    lyrics (str): 你要查找的歌词片段。

Returns:
    List[Document]: 包含具有这些歌词的歌曲的文档列表。
"""
        search_results = self.genius.search_songs(lyrics)
        songs = search_results["hits"] if search_results else []

        results = []
        for hit in songs:
            song_url = hit["result"]["url"]
            song_lyrics = self.genius.lyrics(song_url=song_url)
            results.append(Document(text=song_lyrics))

        return results

load_songs_by_tag #

load_songs_by_tag(
    tag: str,
    max_songs: Optional[int] = None,
    max_pages: int = 50,
) -> List[Document]

加载特定标签的歌曲。

Parameters:

Name	Type	Description	Default
`tag`	`str`	要加载歌曲的标签或流派。	required
`max_songs`	`Optional[int]`	要获取的最大歌曲数。如果为None，则没有特定限制。	`None`
`max_pages`	`int`	要获取的最大页面数。	`50`

Returns:

Type	Description
`List[Document]`	List[Document]: 包含歌词的文档列表。

Source code in llama_index/readers/genius/base.py

    def load_songs_by_tag(
        self, tag: str, max_songs: Optional[int] = None, max_pages: int = 50
    ) -> List[Document]:
        """加载特定标签的歌曲。

Args:
    tag (str): 要加载歌曲的标签或流派。
    max_songs (Optional[int]): 要获取的最大歌曲数。如果为None，则没有特定限制。
    max_pages (int): 要获取的最大页面数。

Returns:
    List[Document]: 包含歌词的文档列表。
"""
        lyrics = []
        total_songs_fetched = 0
        page = 1

        while (
            page
            and page <= max_pages
            and (max_songs is None or total_songs_fetched < max_songs)
        ):
            res = self.genius.tag(tag, page=page)
            for hit in res["hits"]:
                if max_songs is None or total_songs_fetched < max_songs:
                    song_lyrics = self.genius.lyrics(song_url=hit["url"])
                    lyrics.append(Document(text=song_lyrics))
                    total_songs_fetched += 1
                else:
                    break
            page = (
                res["next_page"]
                if max_songs is None or total_songs_fetched < max_songs
                else None
            )

        return lyrics