Source code for langchain_community.document_loaders.azure_ai_data

from typing import Iterator, Optional

from langchain_community.docstore.document import Document
from langchain_community.document_loaders.base import BaseLoader
from langchain_community.document_loaders.unstructured import UnstructuredFileIOLoader


[docs]class AzureAIDataLoader(BaseLoader): """从Azure AI数据加载。"""
[docs] def __init__(self, url: str, glob: Optional[str] = None): """初始化,使用数据资产或存储位置的URL。 """ self.url = url """URL to the data asset or storage location.""" self.glob_pattern = glob """Optional glob pattern to select files. Defaults to None."""
[docs] def lazy_load(self) -> Iterator[Document]: """一个用于文档的惰性加载器。""" try: from azureml.fsspec import AzureMachineLearningFileSystem except ImportError as exc: raise ImportError( "Could not import azureml-fspec package." "Please install it with `pip install azureml-fsspec`." ) from exc fs = AzureMachineLearningFileSystem(self.url) if self.glob_pattern: remote_paths_list = fs.glob(self.glob_pattern) else: remote_paths_list = fs.ls() for remote_path in remote_paths_list: with fs.open(remote_path) as f: loader = UnstructuredFileIOLoader(file=f) yield from loader.load()