Source code for langchain_community.document_loaders.tomarkdown

from __future__ import annotations

from typing import Iterator

import requests
from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseLoader


[docs]class ToMarkdownLoader(BaseLoader): """使用`2markdown API`加载`HTML`。"""
[docs] def __init__(self, url: str, api_key: str): """使用URL和API密钥进行初始化。""" self.url = url self.api_key = api_key
[docs] def lazy_load( self, ) -> Iterator[Document]: """懒加载文件。""" response = requests.post( "https://2markdown.com/api/2md", headers={"X-Api-Key": self.api_key}, json={"url": self.url}, ) text = response.json()["article"] metadata = {"source": self.url} yield Document(page_content=text, metadata=metadata)