Source code for langchain_community.document_loaders.markdown
from typing import List
from langchain_community.document_loaders.unstructured import UnstructuredFileLoader
[docs]class UnstructuredMarkdownLoader(UnstructuredFileLoader):
"""使用`Unstructured`加载`Markdown`文件。
您可以在两种模式中的一种中运行加载程序:"single"和"elements"。
如果使用"single"模式,文档将作为单个`langchain`文档对象返回。
如果使用"elements"模式,`unstructured`库将文档拆分为诸如Title和NarrativeText之类的元素。
您可以在模式之后传递额外的`unstructured`关键字参数以应用不同的`unstructured`设置。
示例
--------
from langchain_community.document_loaders import UnstructuredMarkdownLoader
loader = UnstructuredMarkdownLoader(
"example.md", mode="elements", strategy="fast",
)
docs = loader.load()
参考
----------
https://unstructured-io.github.io/unstructured/core/partition.html#partition-md"""
def _get_elements(self) -> List:
from unstructured.__version__ import __version__ as __unstructured_version__
from unstructured.partition.md import partition_md
# NOTE(MthwRobinson) - enables the loader to work when you're using pre-release
# versions of unstructured like 0.4.17-dev1
_unstructured_version = __unstructured_version__.split("-")[0]
unstructured_version = tuple([int(x) for x in _unstructured_version.split(".")])
if unstructured_version < (0, 4, 16):
raise ValueError(
f"You are on unstructured version {__unstructured_version__}. "
"Partitioning markdown files is only supported in unstructured>=0.4.16."
)
return partition_md(filename=self.file_path, **self.unstructured_kwargs)