Skip to content

Index

基础阅读器类。

BaseReader #

Bases: ABC

从目录中加载数据的工具。

Source code in llama_index/core/readers/base.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
class BaseReader(ABC):
    """从目录中加载数据的工具。"""

    def lazy_load_data(self, *args: Any, **load_kwargs: Any) -> Iterable[Document]:
        """从输入目录中惰性加载数据。"""
        raise NotImplementedError(
            f"{self.__class__.__name__} does not provide lazy_load_data method currently"
        )

    async def alazy_load_data(
        self, *args: Any, **load_kwargs: Any
    ) -> Iterable[Document]:
        """从输入目录中惰性加载数据。"""
        # Fake async - just calls the sync method. Override in subclasses for real async implementations.
        return self.lazy_load_data(*args, **load_kwargs)

    def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
        """从输入目录加载数据。"""
        return list(self.lazy_load_data(*args, **load_kwargs))

    async def aload_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
        """从输入目录加载数据。"""
        return self.load_data(*args, **load_kwargs)

    def load_langchain_documents(self, **load_kwargs: Any) -> List["LCDocument"]:
        """以LangChain文档格式加载数据。"""
        docs = self.load_data(**load_kwargs)
        return [d.to_langchain_format() for d in docs]

    @classmethod
    def __modify_schema__(cls, field_schema: Dict[str, Any], field: Optional[Any]):
        field_schema.update({"title": cls.__name__})

    @classmethod
    def __get_pydantic_json_schema__(
        cls, core_schema, handler
    ):  # Needed for pydantic v2 to work
        json_schema = handler(core_schema)
        json_schema = handler.resolve_ref_schema(json_schema)
        json_schema["title"] = cls.__name__
        return json_schema

lazy_load_data #

lazy_load_data(
    *args: Any, **load_kwargs: Any
) -> Iterable[Document]

从输入目录中惰性加载数据。

Source code in llama_index/core/readers/base.py
22
23
24
25
26
def lazy_load_data(self, *args: Any, **load_kwargs: Any) -> Iterable[Document]:
    """从输入目录中惰性加载数据。"""
    raise NotImplementedError(
        f"{self.__class__.__name__} does not provide lazy_load_data method currently"
    )

alazy_load_data async #

alazy_load_data(
    *args: Any, **load_kwargs: Any
) -> Iterable[Document]

从输入目录中惰性加载数据。

Source code in llama_index/core/readers/base.py
28
29
30
31
32
33
async def alazy_load_data(
    self, *args: Any, **load_kwargs: Any
) -> Iterable[Document]:
    """从输入目录中惰性加载数据。"""
    # Fake async - just calls the sync method. Override in subclasses for real async implementations.
    return self.lazy_load_data(*args, **load_kwargs)

load_data #

load_data(*args: Any, **load_kwargs: Any) -> List[Document]

从输入目录加载数据。

Source code in llama_index/core/readers/base.py
35
36
37
def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
    """从输入目录加载数据。"""
    return list(self.lazy_load_data(*args, **load_kwargs))

aload_data async #

aload_data(
    *args: Any, **load_kwargs: Any
) -> List[Document]

从输入目录加载数据。

Source code in llama_index/core/readers/base.py
39
40
41
async def aload_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
    """从输入目录加载数据。"""
    return self.load_data(*args, **load_kwargs)

load_langchain_documents #

load_langchain_documents(
    **load_kwargs: Any,
) -> List[Document]

以LangChain文档格式加载数据。

Source code in llama_index/core/readers/base.py
43
44
45
46
def load_langchain_documents(self, **load_kwargs: Any) -> List["LCDocument"]:
    """以LangChain文档格式加载数据。"""
    docs = self.load_data(**load_kwargs)
    return [d.to_langchain_format() for d in docs]

BasePydanticReader #

Bases: BaseReader, BaseComponent

可序列化数据加载器与Pydantic。

Source code in llama_index/core/readers/base.py
62
63
64
65
66
67
68
69
70
71
class BasePydanticReader(BaseReader, BaseComponent):
    """可序列化数据加载器与Pydantic。"""

    is_remote: bool = Field(
        default=False,
        description="Whether the data is loaded from a remote API or a local file.",
    )

    class Config:
        arbitrary_types_allowed = True