Source code for langchain_community.document_loaders.srt

from pathlib import Path
from typing import List, Union

from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseLoader


[docs]class SRTLoader(BaseLoader): """加载 `.srt`(字幕)文件。"""
[docs] def __init__(self, file_path: Union[str, Path]): """使用文件路径进行初始化。""" try: import pysrt # noqa:F401 except ImportError: raise ImportError( "package `pysrt` not found, please install it with `pip install pysrt`" ) self.file_path = str(file_path)
[docs] def load(self) -> List[Document]: """使用pysrt文件加载。""" import pysrt parsed_info = pysrt.open(self.file_path) text = " ".join([t.text for t in parsed_info]) metadata = {"source": self.file_path} return [Document(page_content=text, metadata=metadata)]