Index

RefDocInfo `dataclass` #

Bases: DataClassJsonMixin

用于表示已摄入文档的数据类。

Source code in llama_index/core/storage/docstore/types.py

@dataclass
class RefDocInfo(DataClassJsonMixin):
    """用于表示已摄入文档的数据类。"""

    node_ids: List = field(default_factory=list)
    metadata: Dict[str, Any] = field(default_factory=dict)

BaseDocumentStore #

Bases: ABC

Source code in llama_index/core/storage/docstore/types.py

class BaseDocumentStore(ABC):
    # ===== Save/load =====
    def persist(
        self,
        persist_path: str = DEFAULT_PERSIST_PATH,
        fs: Optional[fsspec.AbstractFileSystem] = None,
    ) -> None:
        """将文档存储持久化到文件中。"""

    # ===== Main interface =====
    @property
    @abstractmethod
    def docs(self) -> Dict[str, BaseNode]:
        ...

    @abstractmethod
    def add_documents(
        self,
        docs: Sequence[BaseNode],
        allow_update: bool = True,
        batch_size: int = DEFAULT_BATCH_SIZE,
        store_text: bool = True,
    ) -> None:
        ...

    @abstractmethod
    async def async_add_documents(
        self,
        docs: Sequence[BaseNode],
        allow_update: bool = True,
        batch_size: int = DEFAULT_BATCH_SIZE,
        store_text: bool = True,
    ) -> None:
        ...

    @abstractmethod
    def get_document(self, doc_id: str, raise_error: bool = True) -> Optional[BaseNode]:
        ...

    @abstractmethod
    async def aget_document(
        self, doc_id: str, raise_error: bool = True
    ) -> Optional[BaseNode]:
        ...

    @abstractmethod
    def delete_document(self, doc_id: str, raise_error: bool = True) -> None:
        """从存储中删除一个文档。"""
        ...

    @abstractmethod
    async def adelete_document(self, doc_id: str, raise_error: bool = True) -> None:
        """从存储中删除一个文档。"""
        ...

    @abstractmethod
    def document_exists(self, doc_id: str) -> bool:
        ...

    @abstractmethod
    async def adocument_exists(self, doc_id: str) -> bool:
        ...

    # ===== Hash =====
    @abstractmethod
    def set_document_hash(self, doc_id: str, doc_hash: str) -> None:
        ...

    @abstractmethod
    async def aset_document_hash(self, doc_id: str, doc_hash: str) -> None:
        ...

    @abstractmethod
    def set_document_hashes(self, doc_hashes: Dict[str, str]) -> None:
        ...

    @abstractmethod
    async def aset_document_hashes(self, doc_hashes: Dict[str, str]) -> None:
        ...

    @abstractmethod
    def get_document_hash(self, doc_id: str) -> Optional[str]:
        ...

    @abstractmethod
    async def aget_document_hash(self, doc_id: str) -> Optional[str]:
        ...

    @abstractmethod
    def get_all_document_hashes(self) -> Dict[str, str]:
        ...

    @abstractmethod
    async def aget_all_document_hashes(self) -> Dict[str, str]:
        ...

    # ==== Ref Docs =====
    @abstractmethod
    def get_all_ref_doc_info(self) -> Optional[Dict[str, RefDocInfo]]:
        """获取所有已摄取文档的 ref_doc_id -> RefDocInfo 的映射。"""

    @abstractmethod
    async def aget_all_ref_doc_info(self) -> Optional[Dict[str, RefDocInfo]]:
        """获取所有已摄取文档的 ref_doc_id -> RefDocInfo 的映射。"""

    @abstractmethod
    def get_ref_doc_info(self, ref_doc_id: str) -> Optional[RefDocInfo]:
        """获取给定ref_doc_id的RefDocInfo。"""

    @abstractmethod
    async def aget_ref_doc_info(self, ref_doc_id: str) -> Optional[RefDocInfo]:
        """获取给定ref_doc_id的RefDocInfo。"""

    @abstractmethod
    def delete_ref_doc(self, ref_doc_id: str, raise_error: bool = True) -> None:
        """删除一个ref_doc及其关联的所有节点。"""

    @abstractmethod
    async def adelete_ref_doc(self, ref_doc_id: str, raise_error: bool = True) -> None:
        """删除一个ref_doc及其关联的所有节点。"""

    # ===== Nodes =====
    def get_nodes(
        self, node_ids: List[str], raise_error: bool = True
    ) -> List[BaseNode]:
        """从文档存储中获取节点。

Args:
    node_ids (List[str]): 节点id
    raise_error (bool): 如果未找到节点id，则抛出错误
"""
        return [self.get_node(node_id, raise_error=raise_error) for node_id in node_ids]

    async def aget_nodes(
        self, node_ids: List[str], raise_error: bool = True
    ) -> List[BaseNode]:
        """从文档存储中获取节点。

Args:
    node_ids (List[str]): 节点id
    raise_error (bool): 如果未找到节点id，则抛出错误
"""
        return [
            await self.aget_node(node_id, raise_error=raise_error)
            for node_id in node_ids
        ]

    def get_node(self, node_id: str, raise_error: bool = True) -> BaseNode:
        """从文档存储中获取节点。

Args:
    node_id（str）：节点ID
    raise_error（bool）：如果未找到node_id，则引发错误
"""
        doc = self.get_document(node_id, raise_error=raise_error)
        if not isinstance(doc, BaseNode):
            raise ValueError(f"Document {node_id} is not a Node.")
        return doc

    async def aget_node(self, node_id: str, raise_error: bool = True) -> BaseNode:
        """从文档存储中获取节点。

Args:
    node_id（str）：节点ID
    raise_error（bool）：如果未找到node_id，则引发错误
"""
        doc = await self.aget_document(node_id, raise_error=raise_error)
        if not isinstance(doc, BaseNode):
            raise ValueError(f"Document {node_id} is not a Node.")
        return doc

    def get_node_dict(self, node_id_dict: Dict[int, str]) -> Dict[int, BaseNode]:
        """从文档存储中获取节点字典，给定索引到节点ID的映射。

Args:
    node_id_dict（Dict[int, str]）：索引到节点ID的映射
"""
        return {
            index: self.get_node(node_id) for index, node_id in node_id_dict.items()
        }

    async def aget_node_dict(self, node_id_dict: Dict[int, str]) -> Dict[int, BaseNode]:
        """从文档存储中获取节点字典，给定索引到节点ID的映射。

Args:
    node_id_dict（Dict[int, str]）：索引到节点ID的映射
"""
        return {
            index: await self.aget_node(node_id)
            for index, node_id in node_id_dict.items()
        }

persist #

persist(
    persist_path: str = DEFAULT_PERSIST_PATH,
    fs: Optional[AbstractFileSystem] = None,
) -> None

将文档存储持久化到文件中。

Source code in llama_index/core/storage/docstore/types.py

def persist(
    self,
    persist_path: str = DEFAULT_PERSIST_PATH,
    fs: Optional[fsspec.AbstractFileSystem] = None,
) -> None:
    """将文档存储持久化到文件中。"""

delete_document `abstractmethod` #

delete_document(
    doc_id: str, raise_error: bool = True
) -> None

从存储中删除一个文档。

Source code in llama_index/core/storage/docstore/types.py

@abstractmethod
def delete_document(self, doc_id: str, raise_error: bool = True) -> None:
    """从存储中删除一个文档。"""
    ...

adelete_document `abstractmethod` `async` #

adelete_document(
    doc_id: str, raise_error: bool = True
) -> None

从存储中删除一个文档。

Source code in llama_index/core/storage/docstore/types.py

@abstractmethod
async def adelete_document(self, doc_id: str, raise_error: bool = True) -> None:
    """从存储中删除一个文档。"""
    ...

get_all_ref_doc_info `abstractmethod` #

get_all_ref_doc_info() -> Optional[Dict[str, RefDocInfo]]

获取所有已摄取文档的 ref_doc_id -> RefDocInfo 的映射。

Source code in llama_index/core/storage/docstore/types.py

@abstractmethod
def get_all_ref_doc_info(self) -> Optional[Dict[str, RefDocInfo]]:
    """获取所有已摄取文档的 ref_doc_id -> RefDocInfo 的映射。"""

aget_all_ref_doc_info `abstractmethod` `async` #

aget_all_ref_doc_info() -> Optional[Dict[str, RefDocInfo]]

获取所有已摄取文档的 ref_doc_id -> RefDocInfo 的映射。

Source code in llama_index/core/storage/docstore/types.py

@abstractmethod
async def aget_all_ref_doc_info(self) -> Optional[Dict[str, RefDocInfo]]:
    """获取所有已摄取文档的 ref_doc_id -> RefDocInfo 的映射。"""

get_ref_doc_info `abstractmethod` #

get_ref_doc_info(ref_doc_id: str) -> Optional[RefDocInfo]

获取给定ref_doc_id的RefDocInfo。

Source code in llama_index/core/storage/docstore/types.py

@abstractmethod
def get_ref_doc_info(self, ref_doc_id: str) -> Optional[RefDocInfo]:
    """获取给定ref_doc_id的RefDocInfo。"""

aget_ref_doc_info `abstractmethod` `async` #

aget_ref_doc_info(ref_doc_id: str) -> Optional[RefDocInfo]

获取给定ref_doc_id的RefDocInfo。

Source code in llama_index/core/storage/docstore/types.py

@abstractmethod
async def aget_ref_doc_info(self, ref_doc_id: str) -> Optional[RefDocInfo]:
    """获取给定ref_doc_id的RefDocInfo。"""

delete_ref_doc `abstractmethod` #

delete_ref_doc(
    ref_doc_id: str, raise_error: bool = True
) -> None

删除一个ref_doc及其关联的所有节点。

Source code in llama_index/core/storage/docstore/types.py

@abstractmethod
def delete_ref_doc(self, ref_doc_id: str, raise_error: bool = True) -> None:
    """删除一个ref_doc及其关联的所有节点。"""

adelete_ref_doc `abstractmethod` `async` #

adelete_ref_doc(
    ref_doc_id: str, raise_error: bool = True
) -> None

删除一个ref_doc及其关联的所有节点。

Source code in llama_index/core/storage/docstore/types.py

@abstractmethod
async def adelete_ref_doc(self, ref_doc_id: str, raise_error: bool = True) -> None:
    """删除一个ref_doc及其关联的所有节点。"""

get_nodes #

get_nodes(
    node_ids: List[str], raise_error: bool = True
) -> List[BaseNode]

从文档存储中获取节点。

Parameters:

Name	Type	Description	Default
`node_ids`	`List[str]`	节点id	required
`raise_error`	`bool`	如果未找到节点id，则抛出错误	`True`

Source code in llama_index/core/storage/docstore/types.py

    def get_nodes(
        self, node_ids: List[str], raise_error: bool = True
    ) -> List[BaseNode]:
        """从文档存储中获取节点。

Args:
    node_ids (List[str]): 节点id
    raise_error (bool): 如果未找到节点id，则抛出错误
"""
        return [self.get_node(node_id, raise_error=raise_error) for node_id in node_ids]

aget_nodes `async` #

aget_nodes(
    node_ids: List[str], raise_error: bool = True
) -> List[BaseNode]

从文档存储中获取节点。

Parameters:

Name	Type	Description	Default
`node_ids`	`List[str]`	节点id	required
`raise_error`	`bool`	如果未找到节点id，则抛出错误	`True`

Source code in llama_index/core/storage/docstore/types.py

    async def aget_nodes(
        self, node_ids: List[str], raise_error: bool = True
    ) -> List[BaseNode]:
        """从文档存储中获取节点。

Args:
    node_ids (List[str]): 节点id
    raise_error (bool): 如果未找到节点id，则抛出错误
"""
        return [
            await self.aget_node(node_id, raise_error=raise_error)
            for node_id in node_ids
        ]

get_node #

get_node(
    node_id: str, raise_error: bool = True
) -> BaseNode

从文档存储中获取节点。

Source code in llama_index/core/storage/docstore/types.py

    def get_node(self, node_id: str, raise_error: bool = True) -> BaseNode:
        """从文档存储中获取节点。

Args:
    node_id（str）：节点ID
    raise_error（bool）：如果未找到node_id，则引发错误
"""
        doc = self.get_document(node_id, raise_error=raise_error)
        if not isinstance(doc, BaseNode):
            raise ValueError(f"Document {node_id} is not a Node.")
        return doc

aget_node `async` #

aget_node(
    node_id: str, raise_error: bool = True
) -> BaseNode

从文档存储中获取节点。

Source code in llama_index/core/storage/docstore/types.py

    async def aget_node(self, node_id: str, raise_error: bool = True) -> BaseNode:
        """从文档存储中获取节点。

Args:
    node_id（str）：节点ID
    raise_error（bool）：如果未找到node_id，则引发错误
"""
        doc = await self.aget_document(node_id, raise_error=raise_error)
        if not isinstance(doc, BaseNode):
            raise ValueError(f"Document {node_id} is not a Node.")
        return doc

get_node_dict #

get_node_dict(
    node_id_dict: Dict[int, str]
) -> Dict[int, BaseNode]

从文档存储中获取节点字典，给定索引到节点ID的映射。

Source code in llama_index/core/storage/docstore/types.py

    def get_node_dict(self, node_id_dict: Dict[int, str]) -> Dict[int, BaseNode]:
        """从文档存储中获取节点字典，给定索引到节点ID的映射。

Args:
    node_id_dict（Dict[int, str]）：索引到节点ID的映射
"""
        return {
            index: self.get_node(node_id) for index, node_id in node_id_dict.items()
        }

aget_node_dict `async` #

aget_node_dict(
    node_id_dict: Dict[int, str]
) -> Dict[int, BaseNode]

从文档存储中获取节点字典，给定索引到节点ID的映射。

Source code in llama_index/core/storage/docstore/types.py

    async def aget_node_dict(self, node_id_dict: Dict[int, str]) -> Dict[int, BaseNode]:
        """从文档存储中获取节点字典，给定索引到节点ID的映射。

Args:
    node_id_dict（Dict[int, str]）：索引到节点ID的映射
"""
        return {
            index: await self.aget_node(node_id)
            for index, node_id in node_id_dict.items()
        }

Index

RefDocInfo dataclass #

BaseDocumentStore #

persist #

delete_document abstractmethod #

adelete_document abstractmethod async #

get_all_ref_doc_info abstractmethod #

aget_all_ref_doc_info abstractmethod async #

get_ref_doc_info abstractmethod #

aget_ref_doc_info abstractmethod async #

delete_ref_doc abstractmethod #

adelete_ref_doc abstractmethod async #

get_nodes #

aget_nodes async #

get_node #

aget_node async #

get_node_dict #

aget_node_dict async #

RefDocInfo `dataclass` #

delete_document `abstractmethod` #

adelete_document `abstractmethod` `async` #

get_all_ref_doc_info `abstractmethod` #

aget_all_ref_doc_info `abstractmethod` `async` #

get_ref_doc_info `abstractmethod` #

aget_ref_doc_info `abstractmethod` `async` #

delete_ref_doc `abstractmethod` #

adelete_ref_doc `abstractmethod` `async` #

aget_nodes `async` #

aget_node `async` #

aget_node_dict `async` #