Source code for langchain_community.document_loaders.tsv

from pathlib import Path
from typing import Any, List, Union

from langchain_community.document_loaders.unstructured import (
    UnstructuredFileLoader,
    validate_unstructured_version,
)


[docs]class UnstructuredTSVLoader(UnstructuredFileLoader): """使用`Unstructured`加载`TSV`文件。 与其他Unstructured加载器一样,UnstructuredTSVLoader可以在"single"和"elements"模式下使用。如果您在"elements"模式下使用加载器,则TSV文件将成为单个Unstructured表元素。如果您在"elements"模式下使用加载器,则表的HTML表示将在文档元数据中的"text_as_html"键中可用。 示例 -------- from langchain_community.document_loaders.tsv import UnstructuredTSVLoader loader = UnstructuredTSVLoader("stanley-cups.tsv", mode="elements") docs = loader.load()"""
[docs] def __init__( self, file_path: Union[str, Path], mode: str = "single", **unstructured_kwargs: Any, ): validate_unstructured_version(min_unstructured_version="0.7.6") super().__init__(file_path=file_path, mode=mode, **unstructured_kwargs)
def _get_elements(self) -> List: from unstructured.partition.tsv import partition_tsv return partition_tsv(filename=self.file_path, **self.unstructured_kwargs)