Source code for langchain_community.document_loaders.markdown

from typing import List

from langchain_community.document_loaders.unstructured import UnstructuredFileLoader


[docs]class UnstructuredMarkdownLoader(UnstructuredFileLoader): """使用`Unstructured`加载`Markdown`文件。 您可以在两种模式中的一种中运行加载程序:"single"和"elements"。 如果使用"single"模式,文档将作为单个`langchain`文档对象返回。 如果使用"elements"模式,`unstructured`库将文档拆分为诸如Title和NarrativeText之类的元素。 您可以在模式之后传递额外的`unstructured`关键字参数以应用不同的`unstructured`设置。 示例 -------- from langchain_community.document_loaders import UnstructuredMarkdownLoader loader = UnstructuredMarkdownLoader( "example.md", mode="elements", strategy="fast", ) docs = loader.load() 参考 ---------- https://unstructured-io.github.io/unstructured/core/partition.html#partition-md""" def _get_elements(self) -> List: from unstructured.__version__ import __version__ as __unstructured_version__ from unstructured.partition.md import partition_md # NOTE(MthwRobinson) - enables the loader to work when you're using pre-release # versions of unstructured like 0.4.17-dev1 _unstructured_version = __unstructured_version__.split("-")[0] unstructured_version = tuple([int(x) for x in _unstructured_version.split(".")]) if unstructured_version < (0, 4, 16): raise ValueError( f"You are on unstructured version {__unstructured_version__}. " "Partitioning markdown files is only supported in unstructured>=0.4.16." ) return partition_md(filename=self.file_path, **self.unstructured_kwargs)