Source code for langchain_community.document_loaders.tsv
from pathlib import Path
from typing import Any, List, Union
from langchain_community.document_loaders.unstructured import (
UnstructuredFileLoader,
validate_unstructured_version,
)
[docs]class UnstructuredTSVLoader(UnstructuredFileLoader):
"""使用`Unstructured`加载`TSV`文件。
与其他Unstructured加载器一样,UnstructuredTSVLoader可以在"single"和"elements"模式下使用。如果您在"elements"模式下使用加载器,则TSV文件将成为单个Unstructured表元素。如果您在"elements"模式下使用加载器,则表的HTML表示将在文档元数据中的"text_as_html"键中可用。
示例
--------
from langchain_community.document_loaders.tsv import UnstructuredTSVLoader
loader = UnstructuredTSVLoader("stanley-cups.tsv", mode="elements")
docs = loader.load()"""
[docs] def __init__(
self,
file_path: Union[str, Path],
mode: str = "single",
**unstructured_kwargs: Any,
):
validate_unstructured_version(min_unstructured_version="0.7.6")
super().__init__(file_path=file_path, mode=mode, **unstructured_kwargs)
def _get_elements(self) -> List:
from unstructured.partition.tsv import partition_tsv
return partition_tsv(filename=self.file_path, **self.unstructured_kwargs)