Index

数据结构的基本模式。

BaseComponent #

Bases: BaseModel

基础组件对象，用于捕获类名。

Source code in llama_index/core/schema.py

class BaseComponent(BaseModel):
    """基础组件对象，用于捕获类名。"""

    class Config:
        @staticmethod
        def schema_extra(schema: Dict[str, Any], model: "BaseComponent") -> None:
            """将类名添加到模式中。"""
            schema["properties"]["class_name"] = {
                "title": "Class Name",
                "type": "string",
                "default": model.class_name(),
            }

    @classmethod
    def class_name(cls) -> str:
        """获取类名，用作序列化中的唯一标识。

这提供了一个键，使得序列化对实际类名更改具有鲁棒性。
"""
        return "base_component"

    def json(self, **kwargs: Any) -> str:
        return self.to_json(**kwargs)

    def dict(self, **kwargs: Any) -> Dict[str, Any]:
        data = super().dict(**kwargs)
        data["class_name"] = self.class_name()
        return data

    def __getstate__(self) -> Dict[str, Any]:
        state = super().__getstate__()

        # tiktoken is not pickleable
        # state["__dict__"] = self.dict()
        state["__dict__"].pop("tokenizer", None)

        # remove local functions
        keys_to_remove = []
        for key, val in state["__dict__"].items():
            if key.endswith("_fn"):
                keys_to_remove.append(key)
            if "<lambda>" in str(val):
                keys_to_remove.append(key)
        for key in keys_to_remove:
            state["__dict__"].pop(key, None)

        # remove private attributes -- kind of dangerous
        state["__private_attribute_values__"] = {}

        return state

    def __setstate__(self, state: Dict[str, Any]) -> None:
        # Use the __dict__ and __init__ method to set state
        # so that all variable initialize
        try:
            self.__init__(**state["__dict__"])  # type: ignore
        except Exception:
            # Fall back to the default __setstate__ method
            super().__setstate__(state)

    def to_dict(self, **kwargs: Any) -> Dict[str, Any]:
        data = self.dict(**kwargs)
        data["class_name"] = self.class_name()
        return data

    def to_json(self, **kwargs: Any) -> str:
        data = self.to_dict(**kwargs)
        return json.dumps(data)

    # TODO: return type here not supported by current mypy version
    @classmethod
    def from_dict(cls, data: Dict[str, Any], **kwargs: Any) -> Self:  # type: ignore
        if isinstance(kwargs, dict):
            data.update(kwargs)

        data.pop("class_name", None)
        return cls(**data)

    @classmethod
    def from_json(cls, data_str: str, **kwargs: Any) -> Self:  # type: ignore
        data = json.loads(data_str)
        return cls.from_dict(data, **kwargs)

Config #

Source code in llama_index/core/schema.py

class Config:
    @staticmethod
    def schema_extra(schema: Dict[str, Any], model: "BaseComponent") -> None:
        """将类名添加到模式中。"""
        schema["properties"]["class_name"] = {
            "title": "Class Name",
            "type": "string",
            "default": model.class_name(),
        }

schema_extra `staticmethod` #

schema_extra(
    schema: Dict[str, Any], model: BaseComponent
) -> None

将类名添加到模式中。

Source code in llama_index/core/schema.py

@staticmethod
def schema_extra(schema: Dict[str, Any], model: "BaseComponent") -> None:
    """将类名添加到模式中。"""
    schema["properties"]["class_name"] = {
        "title": "Class Name",
        "type": "string",
        "default": model.class_name(),
    }

class_name `classmethod` #

class_name() -> str

获取类名，用作序列化中的唯一标识。

这提供了一个键，使得序列化对实际类名更改具有鲁棒性。

Source code in llama_index/core/schema.py

    @classmethod
    def class_name(cls) -> str:
        """获取类名，用作序列化中的唯一标识。

这提供了一个键，使得序列化对实际类名更改具有鲁棒性。
"""
        return "base_component"

TransformComponent #

Bases: BaseComponent

转换组件的基类。

Source code in llama_index/core/schema.py

class TransformComponent(BaseComponent):
    """转换组件的基类。"""

    class Config:
        arbitrary_types_allowed = True

    @abstractmethod
    def __call__(self, nodes: List["BaseNode"], **kwargs: Any) -> List["BaseNode"]:
        """转换节点。"""

    async def acall(self, nodes: List["BaseNode"], **kwargs: Any) -> List["BaseNode"]:
        """异步转换节点。"""
        return self.__call__(nodes, **kwargs)

acall `async` #

acall(
    nodes: List[BaseNode], **kwargs: Any
) -> List[BaseNode]

异步转换节点。

Source code in llama_index/core/schema.py

async def acall(self, nodes: List["BaseNode"], **kwargs: Any) -> List["BaseNode"]:
    """异步转换节点。"""
    return self.__call__(nodes, **kwargs)

NodeRelationship #

Bases: str, Enum

节点关系在BaseNode类中使用。

属性

SOURCE: 节点是源文档。 PREVIOUS: 节点是文档中的前一个节点。 NEXT: 节点是文档中的下一个节点。 PARENT: 节点是文档中的父节点。 CHILD: 节点是文档中的子节点。

Source code in llama_index/core/schema.py

class NodeRelationship(str, Enum):
    """节点关系在`BaseNode`类中使用。

    属性:
        SOURCE: 节点是源文档。
        PREVIOUS: 节点是文档中的前一个节点。
        NEXT: 节点是文档中的下一个节点。
        PARENT: 节点是文档中的父节点。
        CHILD: 节点是文档中的子节点。"""

    SOURCE = auto()
    PREVIOUS = auto()
    NEXT = auto()
    PARENT = auto()
    CHILD = auto()

BaseNode #

Bases: BaseComponent

基本节点对象。

可检索节点的通用抽象接口

Source code in llama_index/core/schema.py

class BaseNode(BaseComponent):
    """基本节点对象。

    可检索节点的通用抽象接口

"""

    class Config:
        allow_population_by_field_name = True
        # hash is computed on local field, during the validation process
        validate_assignment = True

    id_: str = Field(
        default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the node."
    )
    embedding: Optional[List[float]] = Field(
        default=None, description="Embedding of the node."
    )

    """
    元数据字段
    - 作为呈现给LLMs的上下文的一部分注入
    - 作为生成嵌入的文本的一部分注入
    - 被向量数据库用于元数据过滤
    """
    metadata: Dict[str, Any] = Field(
        default_factory=dict,
        description="A flat dictionary of metadata fields",
        alias="extra_info",
    )
    excluded_embed_metadata_keys: List[str] = Field(
        default_factory=list,
        description="Metadata keys that are excluded from text for the embed model.",
    )
    excluded_llm_metadata_keys: List[str] = Field(
        default_factory=list,
        description="Metadata keys that are excluded from text for the LLM.",
    )
    relationships: Dict[NodeRelationship, RelatedNodeType] = Field(
        default_factory=dict,
        description="A mapping of relationships to other node information.",
    )

    @classmethod
    @abstractmethod
    def get_type(cls) -> str:
        """获取对象类型。"""

    @abstractmethod
    def get_content(self, metadata_mode: MetadataMode = MetadataMode.ALL) -> str:
        """获取对象内容。"""

    @abstractmethod
    def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:
        """元数据字符串。"""

    @abstractmethod
    def set_content(self, value: Any) -> None:
        """设置节点的内容。"""

    @property
    @abstractmethod
    def hash(self) -> str:
        """获取节点的哈希值。"""

    @property
    def node_id(self) -> str:
        return self.id_

    @node_id.setter
    def node_id(self, value: str) -> None:
        self.id_ = value

    @property
    def source_node(self) -> Optional[RelatedNodeInfo]:
        """源对象节点。

从关系字段中提取。
"""
        if NodeRelationship.SOURCE not in self.relationships:
            return None

        relation = self.relationships[NodeRelationship.SOURCE]
        if isinstance(relation, list):
            raise ValueError("Source object must be a single RelatedNodeInfo object")
        return relation

    @property
    def prev_node(self) -> Optional[RelatedNodeInfo]:
        """前一个节点。"""
        if NodeRelationship.PREVIOUS not in self.relationships:
            return None

        relation = self.relationships[NodeRelationship.PREVIOUS]
        if not isinstance(relation, RelatedNodeInfo):
            raise ValueError("Previous object must be a single RelatedNodeInfo object")
        return relation

    @property
    def next_node(self) -> Optional[RelatedNodeInfo]:
        """下一个节点。"""
        if NodeRelationship.NEXT not in self.relationships:
            return None

        relation = self.relationships[NodeRelationship.NEXT]
        if not isinstance(relation, RelatedNodeInfo):
            raise ValueError("Next object must be a single RelatedNodeInfo object")
        return relation

    @property
    def parent_node(self) -> Optional[RelatedNodeInfo]:
        """父节点。"""
        if NodeRelationship.PARENT not in self.relationships:
            return None

        relation = self.relationships[NodeRelationship.PARENT]
        if not isinstance(relation, RelatedNodeInfo):
            raise ValueError("Parent object must be a single RelatedNodeInfo object")
        return relation

    @property
    def child_nodes(self) -> Optional[List[RelatedNodeInfo]]:
        """子节点。"""
        if NodeRelationship.CHILD not in self.relationships:
            return None

        relation = self.relationships[NodeRelationship.CHILD]
        if not isinstance(relation, list):
            raise ValueError("Child objects must be a list of RelatedNodeInfo objects.")
        return relation

    @property
    def ref_doc_id(self) -> Optional[str]:
        """已弃用：获取参考文档的ID。"""
        source_node = self.source_node
        if source_node is None:
            return None
        return source_node.node_id

    @property
    def extra_info(self) -> Dict[str, Any]:
        """TODO: 已弃用：额外信息。"""
        return self.metadata

    def __str__(self) -> str:
        source_text_truncated = truncate_text(
            self.get_content().strip(), TRUNCATE_LENGTH
        )
        source_text_wrapped = textwrap.fill(
            f"Text: {source_text_truncated}\n", width=WRAP_WIDTH
        )
        return f"Node ID: {self.node_id}\n{source_text_wrapped}"

    def get_embedding(self) -> List[float]:
        """获取嵌入。

如果嵌入为None，则报错。
"""
        if self.embedding is None:
            raise ValueError("embedding not set.")
        return self.embedding

    def as_related_node_info(self) -> RelatedNodeInfo:
        """获取节点作为RelatedNodeInfo。"""
        return RelatedNodeInfo(
            node_id=self.node_id,
            node_type=self.get_type(),
            metadata=self.metadata,
            hash=self.hash,
        )

embedding `class-attribute` `instance-attribute` #

embedding: Optional[List[float]] = Field(
    default=None, description="Embedding of the node."
)

元数据字段 - 作为呈现给LLMs的上下文的一部分注入 - 作为生成嵌入的文本的一部分注入 - 被向量数据库用于元数据过滤

hash `abstractmethod` `property` #

hash: str

获取节点的哈希值。

source_node `property` #

source_node: Optional[RelatedNodeInfo]

源对象节点。

从关系字段中提取。

prev_node `property` #

prev_node: Optional[RelatedNodeInfo]

前一个节点。

next_node `property` #

next_node: Optional[RelatedNodeInfo]

下一个节点。

parent_node `property` #

parent_node: Optional[RelatedNodeInfo]

父节点。

child_nodes `property` #

child_nodes: Optional[List[RelatedNodeInfo]]

子节点。

ref_doc_id `property` #

ref_doc_id: Optional[str]

已弃用：获取参考文档的ID。

extra_info `property` #

extra_info: Dict[str, Any]

TODO: 已弃用：额外信息。

get_type `abstractmethod` `classmethod` #

get_type() -> str

获取对象类型。

Source code in llama_index/core/schema.py

@classmethod
@abstractmethod
def get_type(cls) -> str:
    """获取对象类型。"""

get_content `abstractmethod` #

get_content(
    metadata_mode: MetadataMode = MetadataMode.ALL,
) -> str

获取对象内容。

Source code in llama_index/core/schema.py

@abstractmethod
def get_content(self, metadata_mode: MetadataMode = MetadataMode.ALL) -> str:
    """获取对象内容。"""

get_metadata_str `abstractmethod` #

get_metadata_str(
    mode: MetadataMode = MetadataMode.ALL,
) -> str

元数据字符串。

Source code in llama_index/core/schema.py

@abstractmethod
def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:
    """元数据字符串。"""

set_content `abstractmethod` #

set_content(value: Any) -> None

设置节点的内容。

Source code in llama_index/core/schema.py

@abstractmethod
def set_content(self, value: Any) -> None:
    """设置节点的内容。"""

get_embedding #

get_embedding() -> List[float]

获取嵌入。

如果嵌入为None，则报错。

Source code in llama_index/core/schema.py

    def get_embedding(self) -> List[float]:
        """获取嵌入。

如果嵌入为None，则报错。
"""
        if self.embedding is None:
            raise ValueError("embedding not set.")
        return self.embedding

as_related_node_info #

as_related_node_info() -> RelatedNodeInfo

获取节点作为RelatedNodeInfo。

Source code in llama_index/core/schema.py

def as_related_node_info(self) -> RelatedNodeInfo:
    """获取节点作为RelatedNodeInfo。"""
    return RelatedNodeInfo(
        node_id=self.node_id,
        node_type=self.get_type(),
        metadata=self.metadata,
        hash=self.hash,
    )

TextNode #

Bases: BaseNode

Source code in llama_index/core/schema.py

class TextNode(BaseNode):
    text: str = Field(default="", description="Text content of the node.")
    start_char_idx: Optional[int] = Field(
        default=None, description="Start char index of the node."
    )
    end_char_idx: Optional[int] = Field(
        default=None, description="End char index of the node."
    )
    text_template: str = Field(
        default=DEFAULT_TEXT_NODE_TMPL,
        description=(
            "Template for how text is formatted, with {content} and "
            "{metadata_str} placeholders."
        ),
    )
    metadata_template: str = Field(
        default=DEFAULT_METADATA_TMPL,
        description=(
            "Template for how metadata is formatted, with {key} and "
            "{value} placeholders."
        ),
    )
    metadata_seperator: str = Field(
        default="\n",
        description="Separator between metadata fields when converting to string.",
    )

    @classmethod
    def class_name(cls) -> str:
        return "TextNode"

    @property
    def hash(self) -> str:
        doc_identity = str(self.text) + str(self.metadata)
        return str(sha256(doc_identity.encode("utf-8", "surrogatepass")).hexdigest())

    @classmethod
    def get_type(cls) -> str:
        """获取对象类型。"""
        return ObjectType.TEXT

    def get_content(self, metadata_mode: MetadataMode = MetadataMode.NONE) -> str:
        """获取对象内容。"""
        metadata_str = self.get_metadata_str(mode=metadata_mode).strip()
        if not metadata_str:
            return self.text

        return self.text_template.format(
            content=self.text, metadata_str=metadata_str
        ).strip()

    def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:
        """元数据信息字符串。"""
        if mode == MetadataMode.NONE:
            return ""

        usable_metadata_keys = set(self.metadata.keys())
        if mode == MetadataMode.LLM:
            for key in self.excluded_llm_metadata_keys:
                if key in usable_metadata_keys:
                    usable_metadata_keys.remove(key)
        elif mode == MetadataMode.EMBED:
            for key in self.excluded_embed_metadata_keys:
                if key in usable_metadata_keys:
                    usable_metadata_keys.remove(key)

        return self.metadata_seperator.join(
            [
                self.metadata_template.format(key=key, value=str(value))
                for key, value in self.metadata.items()
                if key in usable_metadata_keys
            ]
        )

    def set_content(self, value: str) -> None:
        """设置节点的内容。"""
        self.text = value

    def get_node_info(self) -> Dict[str, Any]:
        """获取节点信息。"""
        return {"start": self.start_char_idx, "end": self.end_char_idx}

    def get_text(self) -> str:
        return self.get_content(metadata_mode=MetadataMode.NONE)

    @property
    def node_info(self) -> Dict[str, Any]:
        """已弃用：获取节点信息。"""
        return self.get_node_info()

node_info `property` #

node_info: Dict[str, Any]

已弃用：获取节点信息。

get_type `classmethod` #

get_type() -> str

获取对象类型。

Source code in llama_index/core/schema.py

@classmethod
def get_type(cls) -> str:
    """获取对象类型。"""
    return ObjectType.TEXT

get_content #

get_content(
    metadata_mode: MetadataMode = MetadataMode.NONE,
) -> str

获取对象内容。

Source code in llama_index/core/schema.py

def get_content(self, metadata_mode: MetadataMode = MetadataMode.NONE) -> str:
    """获取对象内容。"""
    metadata_str = self.get_metadata_str(mode=metadata_mode).strip()
    if not metadata_str:
        return self.text

    return self.text_template.format(
        content=self.text, metadata_str=metadata_str
    ).strip()

get_metadata_str #

get_metadata_str(
    mode: MetadataMode = MetadataMode.ALL,
) -> str

元数据信息字符串。

Source code in llama_index/core/schema.py

def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:
    """元数据信息字符串。"""
    if mode == MetadataMode.NONE:
        return ""

    usable_metadata_keys = set(self.metadata.keys())
    if mode == MetadataMode.LLM:
        for key in self.excluded_llm_metadata_keys:
            if key in usable_metadata_keys:
                usable_metadata_keys.remove(key)
    elif mode == MetadataMode.EMBED:
        for key in self.excluded_embed_metadata_keys:
            if key in usable_metadata_keys:
                usable_metadata_keys.remove(key)

    return self.metadata_seperator.join(
        [
            self.metadata_template.format(key=key, value=str(value))
            for key, value in self.metadata.items()
            if key in usable_metadata_keys
        ]
    )

set_content #

set_content(value: str) -> None

设置节点的内容。

Source code in llama_index/core/schema.py

def set_content(self, value: str) -> None:
    """设置节点的内容。"""
    self.text = value

get_node_info #

get_node_info() -> Dict[str, Any]

获取节点信息。

Source code in llama_index/core/schema.py

def get_node_info(self) -> Dict[str, Any]:
    """获取节点信息。"""
    return {"start": self.start_char_idx, "end": self.end_char_idx}

ImageNode #

Bases: TextNode

带有图像的节点。

Source code in llama_index/core/schema.py

class ImageNode(TextNode):
    """带有图像的节点。"""

    # TODO: store reference instead of actual image
    # base64 encoded image str
    image: Optional[str] = None
    image_path: Optional[str] = None
    image_url: Optional[str] = None
    image_mimetype: Optional[str] = None
    text_embedding: Optional[List[float]] = Field(
        default=None,
        description="Text embedding of image node, if text field is filled out",
    )

    @classmethod
    def get_type(cls) -> str:
        return ObjectType.IMAGE

    @classmethod
    def class_name(cls) -> str:
        return "ImageNode"

    def resolve_image(self) -> ImageType:
        """解析图像，使PIL能够读取它。"""
        if self.image is not None:
            import base64

            return BytesIO(base64.b64decode(self.image))
        elif self.image_path is not None:
            return self.image_path
        elif self.image_url is not None:
            # load image from URL
            import requests

            response = requests.get(self.image_url)
            return BytesIO(response.content)
        else:
            raise ValueError("No image found in node.")

resolve_image #

resolve_image() -> ImageType

解析图像，使PIL能够读取它。

Source code in llama_index/core/schema.py

def resolve_image(self) -> ImageType:
    """解析图像，使PIL能够读取它。"""
    if self.image is not None:
        import base64

        return BytesIO(base64.b64decode(self.image))
    elif self.image_path is not None:
        return self.image_path
    elif self.image_url is not None:
        # load image from URL
        import requests

        response = requests.get(self.image_url)
        return BytesIO(response.content)
    else:
        raise ValueError("No image found in node.")

IndexNode #

Bases: TextNode

具有对任何对象的引用的节点。

这可以包括其他索引、查询引擎、检索器。

这也可以包括其他节点（尽管这与节点类上的“关系”重叠）。

Source code in llama_index/core/schema.py

class IndexNode(TextNode):
    """具有对任何对象的引用的节点。

这可以包括其他索引、查询引擎、检索器。

这也可以包括其他节点（尽管这与节点类上的“关系”重叠）。"""

    index_id: str
    obj: Any = None

    def dict(self, **kwargs: Any) -> Dict[str, Any]:
        from llama_index.core.storage.docstore.utils import doc_to_json

        data = super().dict(**kwargs)

        try:
            if self.obj is None:
                data["obj"] = None
            elif isinstance(self.obj, BaseNode):
                data["obj"] = doc_to_json(self.obj)
            elif isinstance(self.obj, BaseModel):
                data["obj"] = self.obj.dict()
            else:
                data["obj"] = json.dumps(self.obj)
        except Exception:
            raise ValueError("IndexNode obj is not serializable: " + str(self.obj))

        return data

    @classmethod
    def from_text_node(
        cls,
        node: TextNode,
        index_id: str,
    ) -> "IndexNode":
        """从文本节点创建索引节点。"""
        # copy all attributes from text node, add index id
        return cls(
            **node.dict(),
            index_id=index_id,
        )

    # TODO: return type here not supported by current mypy version
    @classmethod
    def from_dict(cls, data: Dict[str, Any], **kwargs: Any) -> Self:  # type: ignore
        output = super().from_dict(data, **kwargs)

        obj = data.get("obj", None)
        parsed_obj = None

        if isinstance(obj, str):
            parsed_obj = TextNode(text=obj)
        elif isinstance(obj, dict):
            from llama_index.core.storage.docstore.utils import json_to_doc

            # check if its a node, else assume stringable
            try:
                parsed_obj = json_to_doc(obj)
            except Exception:
                parsed_obj = TextNode(text=str(obj))

        output.obj = parsed_obj

        return output

    @classmethod
    def get_type(cls) -> str:
        return ObjectType.INDEX

    @classmethod
    def class_name(cls) -> str:
        return "IndexNode"

from_text_node `classmethod` #

from_text_node(node: TextNode, index_id: str) -> IndexNode

从文本节点创建索引节点。

Source code in llama_index/core/schema.py

@classmethod
def from_text_node(
    cls,
    node: TextNode,
    index_id: str,
) -> "IndexNode":
    """从文本节点创建索引节点。"""
    # copy all attributes from text node, add index id
    return cls(
        **node.dict(),
        index_id=index_id,
    )

NodeWithScore #

Bases: BaseComponent

Source code in llama_index/core/schema.py

class NodeWithScore(BaseComponent):
    node: BaseNode
    score: Optional[float] = None

    def __str__(self) -> str:
        score_str = "None" if self.score is None else f"{self.score: 0.3f}"
        return f"{self.node}\nScore: {score_str}\n"

    def get_score(self, raise_error: bool = False) -> float:
        """获取分数。"""
        if self.score is None:
            if raise_error:
                raise ValueError("Score not set.")
            else:
                return 0.0
        else:
            return self.score

    @classmethod
    def class_name(cls) -> str:
        return "NodeWithScore"

    ##### pass through methods to BaseNode #####
    @property
    def node_id(self) -> str:
        return self.node.node_id

    @property
    def id_(self) -> str:
        return self.node.id_

    @property
    def text(self) -> str:
        if isinstance(self.node, TextNode):
            return self.node.text
        else:
            raise ValueError("Node must be a TextNode to get text.")

    @property
    def metadata(self) -> Dict[str, Any]:
        return self.node.metadata

    @property
    def embedding(self) -> Optional[List[float]]:
        return self.node.embedding

    def get_text(self) -> str:
        if isinstance(self.node, TextNode):
            return self.node.get_text()
        else:
            raise ValueError("Node must be a TextNode to get text.")

    def get_content(self, metadata_mode: MetadataMode = MetadataMode.NONE) -> str:
        return self.node.get_content(metadata_mode=metadata_mode)

    def get_embedding(self) -> List[float]:
        return self.node.get_embedding()

get_score #

get_score(raise_error: bool = False) -> float

获取分数。

Source code in llama_index/core/schema.py

def get_score(self, raise_error: bool = False) -> float:
    """获取分数。"""
    if self.score is None:
        if raise_error:
            raise ValueError("Score not set.")
        else:
            return 0.0
    else:
        return self.score

Document #

Bases: TextNode

通用的数据文档接口。

这个文档连接到数据源。

Source code in llama_index/core/schema.py

class Document(TextNode):
    """通用的数据文档接口。

这个文档连接到数据源。"""

    # TODO: A lot of backwards compatibility logic here, clean up
    id_: str = Field(
        default_factory=lambda: str(uuid.uuid4()),
        description="Unique ID of the node.",
        alias="doc_id",
    )

    _compat_fields = {"doc_id": "id_", "extra_info": "metadata"}

    @classmethod
    def get_type(cls) -> str:
        """获取文档类型。"""
        return ObjectType.DOCUMENT

    @property
    def doc_id(self) -> str:
        """获取文档ID。"""
        return self.id_

    def __str__(self) -> str:
        source_text_truncated = truncate_text(
            self.get_content().strip(), TRUNCATE_LENGTH
        )
        source_text_wrapped = textwrap.fill(
            f"Text: {source_text_truncated}\n", width=WRAP_WIDTH
        )
        return f"Doc ID: {self.doc_id}\n{source_text_wrapped}"

    def get_doc_id(self) -> str:
        """TODO：已弃用：获取文档ID。"""
        return self.id_

    def __setattr__(self, name: str, value: object) -> None:
        if name in self._compat_fields:
            name = self._compat_fields[name]
        super().__setattr__(name, value)

    def to_langchain_format(self) -> "LCDocument":
        """将结构转换为LangChain文档格式。"""
        from llama_index.core.bridge.langchain import Document as LCDocument

        metadata = self.metadata or {}
        return LCDocument(page_content=self.text, metadata=metadata)

    @classmethod
    def from_langchain_format(cls, doc: "LCDocument") -> "Document":
        """将结构从LangChain文档格式转换。"""
        return cls(text=doc.page_content, metadata=doc.metadata)

    def to_haystack_format(self) -> "HaystackDocument":
        """将结构转换为Haystack文档格式。"""
        from haystack.schema import Document as HaystackDocument

        return HaystackDocument(
            content=self.text, meta=self.metadata, embedding=self.embedding, id=self.id_
        )

    @classmethod
    def from_haystack_format(cls, doc: "HaystackDocument") -> "Document":
        """将结构从Haystack文档格式转换。"""
        return cls(
            text=doc.content, metadata=doc.meta, embedding=doc.embedding, id_=doc.id
        )

    def to_embedchain_format(self) -> Dict[str, Any]:
        """将结构体转换为EmbedChain文档格式。"""
        return {
            "doc_id": self.id_,
            "data": {"content": self.text, "meta_data": self.metadata},
        }

    @classmethod
    def from_embedchain_format(cls, doc: Dict[str, Any]) -> "Document":
        """将结构从EmbedChain文档格式转换。"""
        return cls(
            text=doc["data"]["content"],
            metadata=doc["data"]["meta_data"],
            id_=doc["doc_id"],
        )

    def to_semantic_kernel_format(self) -> "MemoryRecord":
        """将结构转换为语义内核文档格式。"""
        import numpy as np
        from semantic_kernel.memory.memory_record import MemoryRecord

        return MemoryRecord(
            id=self.id_,
            text=self.text,
            additional_metadata=self.get_metadata_str(),
            embedding=np.array(self.embedding) if self.embedding else None,
        )

    @classmethod
    def from_semantic_kernel_format(cls, doc: "MemoryRecord") -> "Document":
        """将结构从语义内核文档格式转换。"""
        return cls(
            text=doc._text,
            metadata={"additional_metadata": doc._additional_metadata},
            embedding=doc._embedding.tolist() if doc._embedding is not None else None,
            id_=doc._id,
        )

    def to_vectorflow(self, client: Any) -> None:
        """将一个文档发送给vectorflow，因为他们没有文档对象。"""
        # write document to temp file
        import tempfile

        with tempfile.NamedTemporaryFile() as f:
            f.write(self.text.encode("utf-8"))
            f.flush()
            client.embed(f.name)

    @classmethod
    def example(cls) -> "Document":
        return Document(
            text=SAMPLE_TEXT,
            metadata={"filename": "README.md", "category": "codebase"},
        )

    @classmethod
    def class_name(cls) -> str:
        return "Document"

doc_id `property` #

doc_id: str

获取文档ID。

get_type `classmethod` #

get_type() -> str

获取文档类型。

Source code in llama_index/core/schema.py

@classmethod
def get_type(cls) -> str:
    """获取文档类型。"""
    return ObjectType.DOCUMENT

get_doc_id #

get_doc_id() -> str

TODO：已弃用：获取文档ID。

Source code in llama_index/core/schema.py

def get_doc_id(self) -> str:
    """TODO：已弃用：获取文档ID。"""
    return self.id_

to_langchain_format #

to_langchain_format() -> Document

将结构转换为LangChain文档格式。

Source code in llama_index/core/schema.py

def to_langchain_format(self) -> "LCDocument":
    """将结构转换为LangChain文档格式。"""
    from llama_index.core.bridge.langchain import Document as LCDocument

    metadata = self.metadata or {}
    return LCDocument(page_content=self.text, metadata=metadata)

from_langchain_format `classmethod` #

from_langchain_format(doc: Document) -> Document

将结构从LangChain文档格式转换。

Source code in llama_index/core/schema.py

@classmethod
def from_langchain_format(cls, doc: "LCDocument") -> "Document":
    """将结构从LangChain文档格式转换。"""
    return cls(text=doc.page_content, metadata=doc.metadata)

to_haystack_format #

to_haystack_format() -> Document

将结构转换为Haystack文档格式。

Source code in llama_index/core/schema.py

def to_haystack_format(self) -> "HaystackDocument":
    """将结构转换为Haystack文档格式。"""
    from haystack.schema import Document as HaystackDocument

    return HaystackDocument(
        content=self.text, meta=self.metadata, embedding=self.embedding, id=self.id_
    )

from_haystack_format `classmethod` #

from_haystack_format(doc: Document) -> Document

将结构从Haystack文档格式转换。

Source code in llama_index/core/schema.py

@classmethod
def from_haystack_format(cls, doc: "HaystackDocument") -> "Document":
    """将结构从Haystack文档格式转换。"""
    return cls(
        text=doc.content, metadata=doc.meta, embedding=doc.embedding, id_=doc.id
    )

to_embedchain_format #

to_embedchain_format() -> Dict[str, Any]

将结构体转换为EmbedChain文档格式。

Source code in llama_index/core/schema.py

def to_embedchain_format(self) -> Dict[str, Any]:
    """将结构体转换为EmbedChain文档格式。"""
    return {
        "doc_id": self.id_,
        "data": {"content": self.text, "meta_data": self.metadata},
    }

from_embedchain_format `classmethod` #

from_embedchain_format(doc: Dict[str, Any]) -> Document

将结构从EmbedChain文档格式转换。

Source code in llama_index/core/schema.py

@classmethod
def from_embedchain_format(cls, doc: Dict[str, Any]) -> "Document":
    """将结构从EmbedChain文档格式转换。"""
    return cls(
        text=doc["data"]["content"],
        metadata=doc["data"]["meta_data"],
        id_=doc["doc_id"],
    )

to_semantic_kernel_format #

to_semantic_kernel_format() -> MemoryRecord

将结构转换为语义内核文档格式。

Source code in llama_index/core/schema.py

def to_semantic_kernel_format(self) -> "MemoryRecord":
    """将结构转换为语义内核文档格式。"""
    import numpy as np
    from semantic_kernel.memory.memory_record import MemoryRecord

    return MemoryRecord(
        id=self.id_,
        text=self.text,
        additional_metadata=self.get_metadata_str(),
        embedding=np.array(self.embedding) if self.embedding else None,
    )

from_semantic_kernel_format `classmethod` #

from_semantic_kernel_format(doc: MemoryRecord) -> Document

将结构从语义内核文档格式转换。

Source code in llama_index/core/schema.py

@classmethod
def from_semantic_kernel_format(cls, doc: "MemoryRecord") -> "Document":
    """将结构从语义内核文档格式转换。"""
    return cls(
        text=doc._text,
        metadata={"additional_metadata": doc._additional_metadata},
        embedding=doc._embedding.tolist() if doc._embedding is not None else None,
        id_=doc._id,
    )

to_vectorflow #

to_vectorflow(client: Any) -> None

将一个文档发送给vectorflow，因为他们没有文档对象。

Source code in llama_index/core/schema.py

def to_vectorflow(self, client: Any) -> None:
    """将一个文档发送给vectorflow，因为他们没有文档对象。"""
    # write document to temp file
    import tempfile

    with tempfile.NamedTemporaryFile() as f:
        f.write(self.text.encode("utf-8"))
        f.flush()
        client.embed(f.name)

ImageDocument #

Bases: Document, ImageNode

数据文档包含一张图片。

Source code in llama_index/core/schema.py

class ImageDocument(Document, ImageNode):
    """数据文档包含一张图片。"""

    @classmethod
    def class_name(cls) -> str:
        return "ImageDocument"

QueryBundle `dataclass` #

Bases: DataClassJsonMixin

查询包。

这个数据类包含了原始查询字符串和相关的转换。

Parameters:

Name	Type	Description	Default
`query_str`	`str`	原始用户指定的查询字符串。目前被所有非基于嵌入的查询使用。	required
`custom_embedding_strs`	`list[str]`	用于嵌入查询的字符串列表。目前被所有基于嵌入的查询使用。	`None`
`embedding`	`list[float]`	查询的存储嵌入。	`None`

Source code in llama_index/core/schema.py

@dataclass
class QueryBundle(DataClassJsonMixin):
    """查询包。

这个数据类包含了原始查询字符串和相关的转换。

Args:
    query_str (str): 原始用户指定的查询字符串。
        目前被所有非基于嵌入的查询使用。
    custom_embedding_strs (list[str]): 用于嵌入查询的字符串列表。
        目前被所有基于嵌入的查询使用。
    embedding (list[float]): 查询的存储嵌入。"""

    query_str: str
    # using single image path as query input
    image_path: Optional[str] = None
    custom_embedding_strs: Optional[List[str]] = None
    embedding: Optional[List[float]] = None

    @property
    def embedding_strs(self) -> List[str]:
        """如果指定了自定义嵌入字符串，则使用自定义嵌入字符串，否则使用查询字符串。"""
        if self.custom_embedding_strs is None:
            if len(self.query_str) == 0:
                return []
            return [self.query_str]
        else:
            return self.custom_embedding_strs

    @property
    def embedding_image(self) -> List[ImageType]:
        """使用图像路径进行图像检索。"""
        if self.image_path is None:
            return []
        return [self.image_path]

    def __str__(self) -> str:
        """转换为字符串表示形式。"""
        return self.query_str

embedding_strs `property` #

embedding_strs: List[str]

如果指定了自定义嵌入字符串，则使用自定义嵌入字符串，否则使用查询字符串。

embedding_image `property` #

embedding_image: List[ImageType]

使用图像路径进行图像检索。

Index

BaseComponent #

Config #

schema_extra staticmethod #

class_name classmethod #

TransformComponent #

acall async #

NodeRelationship #

BaseNode #

embedding class-attribute instance-attribute #

hash abstractmethod property #

source_node property #

prev_node property #

next_node property #

parent_node property #

child_nodes property #

ref_doc_id property #

extra_info property #

get_type abstractmethod classmethod #

get_content abstractmethod #

get_metadata_str abstractmethod #

set_content abstractmethod #

get_embedding #

as_related_node_info #

TextNode #

node_info property #

get_type classmethod #

get_content #

get_metadata_str #

set_content #

get_node_info #

ImageNode #

resolve_image #

IndexNode #

from_text_node classmethod #

NodeWithScore #

get_score #

Document #

doc_id property #

get_type classmethod #

get_doc_id #

to_langchain_format #

from_langchain_format classmethod #

to_haystack_format #

from_haystack_format classmethod #

to_embedchain_format #

from_embedchain_format classmethod #

to_semantic_kernel_format #

from_semantic_kernel_format classmethod #

to_vectorflow #

ImageDocument #

QueryBundle dataclass #

embedding_strs property #

embedding_image property #

schema_extra `staticmethod` #

class_name `classmethod` #

acall `async` #

embedding `class-attribute` `instance-attribute` #

hash `abstractmethod` `property` #

source_node `property` #

prev_node `property` #

next_node `property` #

parent_node `property` #

child_nodes `property` #

ref_doc_id `property` #

extra_info `property` #

get_type `abstractmethod` `classmethod` #

get_content `abstractmethod` #

get_metadata_str `abstractmethod` #

set_content `abstractmethod` #

node_info `property` #

get_type `classmethod` #

from_text_node `classmethod` #

doc_id `property` #

get_type `classmethod` #

from_langchain_format `classmethod` #

from_haystack_format `classmethod` #

from_embedchain_format `classmethod` #

from_semantic_kernel_format `classmethod` #

QueryBundle `dataclass` #

embedding_strs `property` #

embedding_image `property` #