Skip to content

Index

数据结构的基本模式。

BaseComponent #

Bases: BaseModel

基础组件对象,用于捕获类名。

Source code in llama_index/core/schema.py
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
class BaseComponent(BaseModel):
    """基础组件对象,用于捕获类名。"""

    class Config:
        @staticmethod
        def schema_extra(schema: Dict[str, Any], model: "BaseComponent") -> None:
            """将类名添加到模式中。"""
            schema["properties"]["class_name"] = {
                "title": "Class Name",
                "type": "string",
                "default": model.class_name(),
            }

    @classmethod
    def class_name(cls) -> str:
        """获取类名,用作序列化中的唯一标识。

这提供了一个键,使得序列化对实际类名更改具有鲁棒性。
"""
        return "base_component"

    def json(self, **kwargs: Any) -> str:
        return self.to_json(**kwargs)

    def dict(self, **kwargs: Any) -> Dict[str, Any]:
        data = super().dict(**kwargs)
        data["class_name"] = self.class_name()
        return data

    def __getstate__(self) -> Dict[str, Any]:
        state = super().__getstate__()

        # tiktoken is not pickleable
        # state["__dict__"] = self.dict()
        state["__dict__"].pop("tokenizer", None)

        # remove local functions
        keys_to_remove = []
        for key, val in state["__dict__"].items():
            if key.endswith("_fn"):
                keys_to_remove.append(key)
            if "<lambda>" in str(val):
                keys_to_remove.append(key)
        for key in keys_to_remove:
            state["__dict__"].pop(key, None)

        # remove private attributes -- kind of dangerous
        state["__private_attribute_values__"] = {}

        return state

    def __setstate__(self, state: Dict[str, Any]) -> None:
        # Use the __dict__ and __init__ method to set state
        # so that all variable initialize
        try:
            self.__init__(**state["__dict__"])  # type: ignore
        except Exception:
            # Fall back to the default __setstate__ method
            super().__setstate__(state)

    def to_dict(self, **kwargs: Any) -> Dict[str, Any]:
        data = self.dict(**kwargs)
        data["class_name"] = self.class_name()
        return data

    def to_json(self, **kwargs: Any) -> str:
        data = self.to_dict(**kwargs)
        return json.dumps(data)

    # TODO: return type here not supported by current mypy version
    @classmethod
    def from_dict(cls, data: Dict[str, Any], **kwargs: Any) -> Self:  # type: ignore
        if isinstance(kwargs, dict):
            data.update(kwargs)

        data.pop("class_name", None)
        return cls(**data)

    @classmethod
    def from_json(cls, data_str: str, **kwargs: Any) -> Self:  # type: ignore
        data = json.loads(data_str)
        return cls.from_dict(data, **kwargs)

Config #

Source code in llama_index/core/schema.py
36
37
38
39
40
41
42
43
44
class Config:
    @staticmethod
    def schema_extra(schema: Dict[str, Any], model: "BaseComponent") -> None:
        """将类名添加到模式中。"""
        schema["properties"]["class_name"] = {
            "title": "Class Name",
            "type": "string",
            "default": model.class_name(),
        }

schema_extra staticmethod #

schema_extra(
    schema: Dict[str, Any], model: BaseComponent
) -> None

将类名添加到模式中。

Source code in llama_index/core/schema.py
37
38
39
40
41
42
43
44
@staticmethod
def schema_extra(schema: Dict[str, Any], model: "BaseComponent") -> None:
    """将类名添加到模式中。"""
    schema["properties"]["class_name"] = {
        "title": "Class Name",
        "type": "string",
        "default": model.class_name(),
    }

class_name classmethod #

class_name() -> str

获取类名,用作序列化中的唯一标识。

这提供了一个键,使得序列化对实际类名更改具有鲁棒性。

Source code in llama_index/core/schema.py
46
47
48
49
50
51
52
    @classmethod
    def class_name(cls) -> str:
        """获取类名,用作序列化中的唯一标识。

这提供了一个键,使得序列化对实际类名更改具有鲁棒性。
"""
        return "base_component"

TransformComponent #

Bases: BaseComponent

转换组件的基类。

Source code in llama_index/core/schema.py
117
118
119
120
121
122
123
124
125
126
127
128
129
class TransformComponent(BaseComponent):
    """转换组件的基类。"""

    class Config:
        arbitrary_types_allowed = True

    @abstractmethod
    def __call__(self, nodes: List["BaseNode"], **kwargs: Any) -> List["BaseNode"]:
        """转换节点。"""

    async def acall(self, nodes: List["BaseNode"], **kwargs: Any) -> List["BaseNode"]:
        """异步转换节点。"""
        return self.__call__(nodes, **kwargs)

acall async #

acall(
    nodes: List[BaseNode], **kwargs: Any
) -> List[BaseNode]

异步转换节点。

Source code in llama_index/core/schema.py
127
128
129
async def acall(self, nodes: List["BaseNode"], **kwargs: Any) -> List["BaseNode"]:
    """异步转换节点。"""
    return self.__call__(nodes, **kwargs)

NodeRelationship #

Bases: str, Enum

节点关系在BaseNode类中使用。

属性

SOURCE: 节点是源文档。 PREVIOUS: 节点是文档中的前一个节点。 NEXT: 节点是文档中的下一个节点。 PARENT: 节点是文档中的父节点。 CHILD: 节点是文档中的子节点。

Source code in llama_index/core/schema.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
class NodeRelationship(str, Enum):
    """节点关系在`BaseNode`类中使用。

    属性:
        SOURCE: 节点是源文档。
        PREVIOUS: 节点是文档中的前一个节点。
        NEXT: 节点是文档中的下一个节点。
        PARENT: 节点是文档中的父节点。
        CHILD: 节点是文档中的子节点。"""

    SOURCE = auto()
    PREVIOUS = auto()
    NEXT = auto()
    PARENT = auto()
    CHILD = auto()

BaseNode #

Bases: BaseComponent

基本节点对象。

可检索节点的通用抽象接口

Source code in llama_index/core/schema.py
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
class BaseNode(BaseComponent):
    """基本节点对象。

    可检索节点的通用抽象接口

"""

    class Config:
        allow_population_by_field_name = True
        # hash is computed on local field, during the validation process
        validate_assignment = True

    id_: str = Field(
        default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the node."
    )
    embedding: Optional[List[float]] = Field(
        default=None, description="Embedding of the node."
    )

    """
    元数据字段
    - 作为呈现给LLMs的上下文的一部分注入
    - 作为生成嵌入的文本的一部分注入
    - 被向量数据库用于元数据过滤
    """
    metadata: Dict[str, Any] = Field(
        default_factory=dict,
        description="A flat dictionary of metadata fields",
        alias="extra_info",
    )
    excluded_embed_metadata_keys: List[str] = Field(
        default_factory=list,
        description="Metadata keys that are excluded from text for the embed model.",
    )
    excluded_llm_metadata_keys: List[str] = Field(
        default_factory=list,
        description="Metadata keys that are excluded from text for the LLM.",
    )
    relationships: Dict[NodeRelationship, RelatedNodeType] = Field(
        default_factory=dict,
        description="A mapping of relationships to other node information.",
    )

    @classmethod
    @abstractmethod
    def get_type(cls) -> str:
        """获取对象类型。"""

    @abstractmethod
    def get_content(self, metadata_mode: MetadataMode = MetadataMode.ALL) -> str:
        """获取对象内容。"""

    @abstractmethod
    def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:
        """元数据字符串。"""

    @abstractmethod
    def set_content(self, value: Any) -> None:
        """设置节点的内容。"""

    @property
    @abstractmethod
    def hash(self) -> str:
        """获取节点的哈希值。"""

    @property
    def node_id(self) -> str:
        return self.id_

    @node_id.setter
    def node_id(self, value: str) -> None:
        self.id_ = value

    @property
    def source_node(self) -> Optional[RelatedNodeInfo]:
        """源对象节点。

从关系字段中提取。
"""
        if NodeRelationship.SOURCE not in self.relationships:
            return None

        relation = self.relationships[NodeRelationship.SOURCE]
        if isinstance(relation, list):
            raise ValueError("Source object must be a single RelatedNodeInfo object")
        return relation

    @property
    def prev_node(self) -> Optional[RelatedNodeInfo]:
        """前一个节点。"""
        if NodeRelationship.PREVIOUS not in self.relationships:
            return None

        relation = self.relationships[NodeRelationship.PREVIOUS]
        if not isinstance(relation, RelatedNodeInfo):
            raise ValueError("Previous object must be a single RelatedNodeInfo object")
        return relation

    @property
    def next_node(self) -> Optional[RelatedNodeInfo]:
        """下一个节点。"""
        if NodeRelationship.NEXT not in self.relationships:
            return None

        relation = self.relationships[NodeRelationship.NEXT]
        if not isinstance(relation, RelatedNodeInfo):
            raise ValueError("Next object must be a single RelatedNodeInfo object")
        return relation

    @property
    def parent_node(self) -> Optional[RelatedNodeInfo]:
        """父节点。"""
        if NodeRelationship.PARENT not in self.relationships:
            return None

        relation = self.relationships[NodeRelationship.PARENT]
        if not isinstance(relation, RelatedNodeInfo):
            raise ValueError("Parent object must be a single RelatedNodeInfo object")
        return relation

    @property
    def child_nodes(self) -> Optional[List[RelatedNodeInfo]]:
        """子节点。"""
        if NodeRelationship.CHILD not in self.relationships:
            return None

        relation = self.relationships[NodeRelationship.CHILD]
        if not isinstance(relation, list):
            raise ValueError("Child objects must be a list of RelatedNodeInfo objects.")
        return relation

    @property
    def ref_doc_id(self) -> Optional[str]:
        """已弃用:获取参考文档的ID。"""
        source_node = self.source_node
        if source_node is None:
            return None
        return source_node.node_id

    @property
    def extra_info(self) -> Dict[str, Any]:
        """TODO: 已弃用:额外信息。"""
        return self.metadata

    def __str__(self) -> str:
        source_text_truncated = truncate_text(
            self.get_content().strip(), TRUNCATE_LENGTH
        )
        source_text_wrapped = textwrap.fill(
            f"Text: {source_text_truncated}\n", width=WRAP_WIDTH
        )
        return f"Node ID: {self.node_id}\n{source_text_wrapped}"

    def get_embedding(self) -> List[float]:
        """获取嵌入。

如果嵌入为None,则报错。
"""
        if self.embedding is None:
            raise ValueError("embedding not set.")
        return self.embedding

    def as_related_node_info(self) -> RelatedNodeInfo:
        """获取节点作为RelatedNodeInfo。"""
        return RelatedNodeInfo(
            node_id=self.node_id,
            node_type=self.get_type(),
            metadata=self.metadata,
            hash=self.hash,
        )

embedding class-attribute instance-attribute #

embedding: Optional[List[float]] = Field(
    default=None, description="Embedding of the node."
)

元数据字段 - 作为呈现给LLMs的上下文的一部分注入 - 作为生成嵌入的文本的一部分注入 - 被向量数据库用于元数据过滤

hash abstractmethod property #

hash: str

获取节点的哈希值。

source_node property #

source_node: Optional[RelatedNodeInfo]

源对象节点。

从关系字段中提取。

prev_node property #

prev_node: Optional[RelatedNodeInfo]

前一个节点。

next_node property #

next_node: Optional[RelatedNodeInfo]

下一个节点。

parent_node property #

parent_node: Optional[RelatedNodeInfo]

父节点。

child_nodes property #

child_nodes: Optional[List[RelatedNodeInfo]]

子节点。

ref_doc_id property #

ref_doc_id: Optional[str]

已弃用:获取参考文档的ID。

extra_info property #

extra_info: Dict[str, Any]

TODO: 已弃用:额外信息。

get_type abstractmethod classmethod #

get_type() -> str

获取对象类型。

Source code in llama_index/core/schema.py
221
222
223
224
@classmethod
@abstractmethod
def get_type(cls) -> str:
    """获取对象类型。"""

get_content abstractmethod #

get_content(
    metadata_mode: MetadataMode = MetadataMode.ALL,
) -> str

获取对象内容。

Source code in llama_index/core/schema.py
226
227
228
@abstractmethod
def get_content(self, metadata_mode: MetadataMode = MetadataMode.ALL) -> str:
    """获取对象内容。"""

get_metadata_str abstractmethod #

get_metadata_str(
    mode: MetadataMode = MetadataMode.ALL,
) -> str

元数据字符串。

Source code in llama_index/core/schema.py
230
231
232
@abstractmethod
def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:
    """元数据字符串。"""

set_content abstractmethod #

set_content(value: Any) -> None

设置节点的内容。

Source code in llama_index/core/schema.py
234
235
236
@abstractmethod
def set_content(self, value: Any) -> None:
    """设置节点的内容。"""

get_embedding #

get_embedding() -> List[float]

获取嵌入。

如果嵌入为None,则报错。

Source code in llama_index/core/schema.py
331
332
333
334
335
336
337
338
    def get_embedding(self) -> List[float]:
        """获取嵌入。

如果嵌入为None,则报错。
"""
        if self.embedding is None:
            raise ValueError("embedding not set.")
        return self.embedding
as_related_node_info() -> RelatedNodeInfo

获取节点作为RelatedNodeInfo。

Source code in llama_index/core/schema.py
340
341
342
343
344
345
346
347
def as_related_node_info(self) -> RelatedNodeInfo:
    """获取节点作为RelatedNodeInfo。"""
    return RelatedNodeInfo(
        node_id=self.node_id,
        node_type=self.get_type(),
        metadata=self.metadata,
        hash=self.hash,
    )

TextNode #

Bases: BaseNode

Source code in llama_index/core/schema.py
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
class TextNode(BaseNode):
    text: str = Field(default="", description="Text content of the node.")
    start_char_idx: Optional[int] = Field(
        default=None, description="Start char index of the node."
    )
    end_char_idx: Optional[int] = Field(
        default=None, description="End char index of the node."
    )
    text_template: str = Field(
        default=DEFAULT_TEXT_NODE_TMPL,
        description=(
            "Template for how text is formatted, with {content} and "
            "{metadata_str} placeholders."
        ),
    )
    metadata_template: str = Field(
        default=DEFAULT_METADATA_TMPL,
        description=(
            "Template for how metadata is formatted, with {key} and "
            "{value} placeholders."
        ),
    )
    metadata_seperator: str = Field(
        default="\n",
        description="Separator between metadata fields when converting to string.",
    )

    @classmethod
    def class_name(cls) -> str:
        return "TextNode"

    @property
    def hash(self) -> str:
        doc_identity = str(self.text) + str(self.metadata)
        return str(sha256(doc_identity.encode("utf-8", "surrogatepass")).hexdigest())

    @classmethod
    def get_type(cls) -> str:
        """获取对象类型。"""
        return ObjectType.TEXT

    def get_content(self, metadata_mode: MetadataMode = MetadataMode.NONE) -> str:
        """获取对象内容。"""
        metadata_str = self.get_metadata_str(mode=metadata_mode).strip()
        if not metadata_str:
            return self.text

        return self.text_template.format(
            content=self.text, metadata_str=metadata_str
        ).strip()

    def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:
        """元数据信息字符串。"""
        if mode == MetadataMode.NONE:
            return ""

        usable_metadata_keys = set(self.metadata.keys())
        if mode == MetadataMode.LLM:
            for key in self.excluded_llm_metadata_keys:
                if key in usable_metadata_keys:
                    usable_metadata_keys.remove(key)
        elif mode == MetadataMode.EMBED:
            for key in self.excluded_embed_metadata_keys:
                if key in usable_metadata_keys:
                    usable_metadata_keys.remove(key)

        return self.metadata_seperator.join(
            [
                self.metadata_template.format(key=key, value=str(value))
                for key, value in self.metadata.items()
                if key in usable_metadata_keys
            ]
        )

    def set_content(self, value: str) -> None:
        """设置节点的内容。"""
        self.text = value

    def get_node_info(self) -> Dict[str, Any]:
        """获取节点信息。"""
        return {"start": self.start_char_idx, "end": self.end_char_idx}

    def get_text(self) -> str:
        return self.get_content(metadata_mode=MetadataMode.NONE)

    @property
    def node_info(self) -> Dict[str, Any]:
        """已弃用:获取节点信息。"""
        return self.get_node_info()

node_info property #

node_info: Dict[str, Any]

已弃用:获取节点信息。

get_type classmethod #

get_type() -> str

获取对象类型。

Source code in llama_index/core/schema.py
386
387
388
389
@classmethod
def get_type(cls) -> str:
    """获取对象类型。"""
    return ObjectType.TEXT

get_content #

get_content(
    metadata_mode: MetadataMode = MetadataMode.NONE,
) -> str

获取对象内容。

Source code in llama_index/core/schema.py
391
392
393
394
395
396
397
398
399
def get_content(self, metadata_mode: MetadataMode = MetadataMode.NONE) -> str:
    """获取对象内容。"""
    metadata_str = self.get_metadata_str(mode=metadata_mode).strip()
    if not metadata_str:
        return self.text

    return self.text_template.format(
        content=self.text, metadata_str=metadata_str
    ).strip()

get_metadata_str #

get_metadata_str(
    mode: MetadataMode = MetadataMode.ALL,
) -> str

元数据信息字符串。

Source code in llama_index/core/schema.py
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:
    """元数据信息字符串。"""
    if mode == MetadataMode.NONE:
        return ""

    usable_metadata_keys = set(self.metadata.keys())
    if mode == MetadataMode.LLM:
        for key in self.excluded_llm_metadata_keys:
            if key in usable_metadata_keys:
                usable_metadata_keys.remove(key)
    elif mode == MetadataMode.EMBED:
        for key in self.excluded_embed_metadata_keys:
            if key in usable_metadata_keys:
                usable_metadata_keys.remove(key)

    return self.metadata_seperator.join(
        [
            self.metadata_template.format(key=key, value=str(value))
            for key, value in self.metadata.items()
            if key in usable_metadata_keys
        ]
    )

set_content #

set_content(value: str) -> None

设置节点的内容。

Source code in llama_index/core/schema.py
424
425
426
def set_content(self, value: str) -> None:
    """设置节点的内容。"""
    self.text = value

get_node_info #

get_node_info() -> Dict[str, Any]

获取节点信息。

Source code in llama_index/core/schema.py
428
429
430
def get_node_info(self) -> Dict[str, Any]:
    """获取节点信息。"""
    return {"start": self.start_char_idx, "end": self.end_char_idx}

ImageNode #

Bases: TextNode

带有图像的节点。

Source code in llama_index/core/schema.py
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
class ImageNode(TextNode):
    """带有图像的节点。"""

    # TODO: store reference instead of actual image
    # base64 encoded image str
    image: Optional[str] = None
    image_path: Optional[str] = None
    image_url: Optional[str] = None
    image_mimetype: Optional[str] = None
    text_embedding: Optional[List[float]] = Field(
        default=None,
        description="Text embedding of image node, if text field is filled out",
    )

    @classmethod
    def get_type(cls) -> str:
        return ObjectType.IMAGE

    @classmethod
    def class_name(cls) -> str:
        return "ImageNode"

    def resolve_image(self) -> ImageType:
        """解析图像,使PIL能够读取它。"""
        if self.image is not None:
            import base64

            return BytesIO(base64.b64decode(self.image))
        elif self.image_path is not None:
            return self.image_path
        elif self.image_url is not None:
            # load image from URL
            import requests

            response = requests.get(self.image_url)
            return BytesIO(response.content)
        else:
            raise ValueError("No image found in node.")

resolve_image #

resolve_image() -> ImageType

解析图像,使PIL能够读取它。

Source code in llama_index/core/schema.py
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
def resolve_image(self) -> ImageType:
    """解析图像,使PIL能够读取它。"""
    if self.image is not None:
        import base64

        return BytesIO(base64.b64decode(self.image))
    elif self.image_path is not None:
        return self.image_path
    elif self.image_url is not None:
        # load image from URL
        import requests

        response = requests.get(self.image_url)
        return BytesIO(response.content)
    else:
        raise ValueError("No image found in node.")

IndexNode #

Bases: TextNode

具有对任何对象的引用的节点。

这可以包括其他索引、查询引擎、检索器。

这也可以包括其他节点(尽管这与节点类上的“关系”重叠)。

Source code in llama_index/core/schema.py
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
class IndexNode(TextNode):
    """具有对任何对象的引用的节点。

这可以包括其他索引、查询引擎、检索器。

这也可以包括其他节点(尽管这与节点类上的“关系”重叠)。"""

    index_id: str
    obj: Any = None

    def dict(self, **kwargs: Any) -> Dict[str, Any]:
        from llama_index.core.storage.docstore.utils import doc_to_json

        data = super().dict(**kwargs)

        try:
            if self.obj is None:
                data["obj"] = None
            elif isinstance(self.obj, BaseNode):
                data["obj"] = doc_to_json(self.obj)
            elif isinstance(self.obj, BaseModel):
                data["obj"] = self.obj.dict()
            else:
                data["obj"] = json.dumps(self.obj)
        except Exception:
            raise ValueError("IndexNode obj is not serializable: " + str(self.obj))

        return data

    @classmethod
    def from_text_node(
        cls,
        node: TextNode,
        index_id: str,
    ) -> "IndexNode":
        """从文本节点创建索引节点。"""
        # copy all attributes from text node, add index id
        return cls(
            **node.dict(),
            index_id=index_id,
        )

    # TODO: return type here not supported by current mypy version
    @classmethod
    def from_dict(cls, data: Dict[str, Any], **kwargs: Any) -> Self:  # type: ignore
        output = super().from_dict(data, **kwargs)

        obj = data.get("obj", None)
        parsed_obj = None

        if isinstance(obj, str):
            parsed_obj = TextNode(text=obj)
        elif isinstance(obj, dict):
            from llama_index.core.storage.docstore.utils import json_to_doc

            # check if its a node, else assume stringable
            try:
                parsed_obj = json_to_doc(obj)
            except Exception:
                parsed_obj = TextNode(text=str(obj))

        output.obj = parsed_obj

        return output

    @classmethod
    def get_type(cls) -> str:
        return ObjectType.INDEX

    @classmethod
    def class_name(cls) -> str:
        return "IndexNode"

from_text_node classmethod #

from_text_node(node: TextNode, index_id: str) -> IndexNode

从文本节点创建索引节点。

Source code in llama_index/core/schema.py
514
515
516
517
518
519
520
521
522
523
524
525
@classmethod
def from_text_node(
    cls,
    node: TextNode,
    index_id: str,
) -> "IndexNode":
    """从文本节点创建索引节点。"""
    # copy all attributes from text node, add index id
    return cls(
        **node.dict(),
        index_id=index_id,
    )

NodeWithScore #

Bases: BaseComponent

Source code in llama_index/core/schema.py
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
class NodeWithScore(BaseComponent):
    node: BaseNode
    score: Optional[float] = None

    def __str__(self) -> str:
        score_str = "None" if self.score is None else f"{self.score: 0.3f}"
        return f"{self.node}\nScore: {score_str}\n"

    def get_score(self, raise_error: bool = False) -> float:
        """获取分数。"""
        if self.score is None:
            if raise_error:
                raise ValueError("Score not set.")
            else:
                return 0.0
        else:
            return self.score

    @classmethod
    def class_name(cls) -> str:
        return "NodeWithScore"

    ##### pass through methods to BaseNode #####
    @property
    def node_id(self) -> str:
        return self.node.node_id

    @property
    def id_(self) -> str:
        return self.node.id_

    @property
    def text(self) -> str:
        if isinstance(self.node, TextNode):
            return self.node.text
        else:
            raise ValueError("Node must be a TextNode to get text.")

    @property
    def metadata(self) -> Dict[str, Any]:
        return self.node.metadata

    @property
    def embedding(self) -> Optional[List[float]]:
        return self.node.embedding

    def get_text(self) -> str:
        if isinstance(self.node, TextNode):
            return self.node.get_text()
        else:
            raise ValueError("Node must be a TextNode to get text.")

    def get_content(self, metadata_mode: MetadataMode = MetadataMode.NONE) -> str:
        return self.node.get_content(metadata_mode=metadata_mode)

    def get_embedding(self) -> List[float]:
        return self.node.get_embedding()

get_score #

get_score(raise_error: bool = False) -> float

获取分数。

Source code in llama_index/core/schema.py
567
568
569
570
571
572
573
574
575
def get_score(self, raise_error: bool = False) -> float:
    """获取分数。"""
    if self.score is None:
        if raise_error:
            raise ValueError("Score not set.")
        else:
            return 0.0
    else:
        return self.score

Document #

Bases: TextNode

通用的数据文档接口。

这个文档连接到数据源。

Source code in llama_index/core/schema.py
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
class Document(TextNode):
    """通用的数据文档接口。

这个文档连接到数据源。"""

    # TODO: A lot of backwards compatibility logic here, clean up
    id_: str = Field(
        default_factory=lambda: str(uuid.uuid4()),
        description="Unique ID of the node.",
        alias="doc_id",
    )

    _compat_fields = {"doc_id": "id_", "extra_info": "metadata"}

    @classmethod
    def get_type(cls) -> str:
        """获取文档类型。"""
        return ObjectType.DOCUMENT

    @property
    def doc_id(self) -> str:
        """获取文档ID。"""
        return self.id_

    def __str__(self) -> str:
        source_text_truncated = truncate_text(
            self.get_content().strip(), TRUNCATE_LENGTH
        )
        source_text_wrapped = textwrap.fill(
            f"Text: {source_text_truncated}\n", width=WRAP_WIDTH
        )
        return f"Doc ID: {self.doc_id}\n{source_text_wrapped}"

    def get_doc_id(self) -> str:
        """TODO:已弃用:获取文档ID。"""
        return self.id_

    def __setattr__(self, name: str, value: object) -> None:
        if name in self._compat_fields:
            name = self._compat_fields[name]
        super().__setattr__(name, value)

    def to_langchain_format(self) -> "LCDocument":
        """将结构转换为LangChain文档格式。"""
        from llama_index.core.bridge.langchain import Document as LCDocument

        metadata = self.metadata or {}
        return LCDocument(page_content=self.text, metadata=metadata)

    @classmethod
    def from_langchain_format(cls, doc: "LCDocument") -> "Document":
        """将结构从LangChain文档格式转换。"""
        return cls(text=doc.page_content, metadata=doc.metadata)

    def to_haystack_format(self) -> "HaystackDocument":
        """将结构转换为Haystack文档格式。"""
        from haystack.schema import Document as HaystackDocument

        return HaystackDocument(
            content=self.text, meta=self.metadata, embedding=self.embedding, id=self.id_
        )

    @classmethod
    def from_haystack_format(cls, doc: "HaystackDocument") -> "Document":
        """将结构从Haystack文档格式转换。"""
        return cls(
            text=doc.content, metadata=doc.meta, embedding=doc.embedding, id_=doc.id
        )

    def to_embedchain_format(self) -> Dict[str, Any]:
        """将结构体转换为EmbedChain文档格式。"""
        return {
            "doc_id": self.id_,
            "data": {"content": self.text, "meta_data": self.metadata},
        }

    @classmethod
    def from_embedchain_format(cls, doc: Dict[str, Any]) -> "Document":
        """将结构从EmbedChain文档格式转换。"""
        return cls(
            text=doc["data"]["content"],
            metadata=doc["data"]["meta_data"],
            id_=doc["doc_id"],
        )

    def to_semantic_kernel_format(self) -> "MemoryRecord":
        """将结构转换为语义内核文档格式。"""
        import numpy as np
        from semantic_kernel.memory.memory_record import MemoryRecord

        return MemoryRecord(
            id=self.id_,
            text=self.text,
            additional_metadata=self.get_metadata_str(),
            embedding=np.array(self.embedding) if self.embedding else None,
        )

    @classmethod
    def from_semantic_kernel_format(cls, doc: "MemoryRecord") -> "Document":
        """将结构从语义内核文档格式转换。"""
        return cls(
            text=doc._text,
            metadata={"additional_metadata": doc._additional_metadata},
            embedding=doc._embedding.tolist() if doc._embedding is not None else None,
            id_=doc._id,
        )

    def to_vectorflow(self, client: Any) -> None:
        """将一个文档发送给vectorflow,因为他们没有文档对象。"""
        # write document to temp file
        import tempfile

        with tempfile.NamedTemporaryFile() as f:
            f.write(self.text.encode("utf-8"))
            f.flush()
            client.embed(f.name)

    @classmethod
    def example(cls) -> "Document":
        return Document(
            text=SAMPLE_TEXT,
            metadata={"filename": "README.md", "category": "codebase"},
        )

    @classmethod
    def class_name(cls) -> str:
        return "Document"

doc_id property #

doc_id: str

获取文档ID。

get_type classmethod #

get_type() -> str

获取文档类型。

Source code in llama_index/core/schema.py
635
636
637
638
@classmethod
def get_type(cls) -> str:
    """获取文档类型。"""
    return ObjectType.DOCUMENT

get_doc_id #

get_doc_id() -> str

TODO:已弃用:获取文档ID。

Source code in llama_index/core/schema.py
654
655
656
def get_doc_id(self) -> str:
    """TODO:已弃用:获取文档ID。"""
    return self.id_

to_langchain_format #

to_langchain_format() -> Document

将结构转换为LangChain文档格式。

Source code in llama_index/core/schema.py
663
664
665
666
667
668
def to_langchain_format(self) -> "LCDocument":
    """将结构转换为LangChain文档格式。"""
    from llama_index.core.bridge.langchain import Document as LCDocument

    metadata = self.metadata or {}
    return LCDocument(page_content=self.text, metadata=metadata)

from_langchain_format classmethod #

from_langchain_format(doc: Document) -> Document

将结构从LangChain文档格式转换。

Source code in llama_index/core/schema.py
670
671
672
673
@classmethod
def from_langchain_format(cls, doc: "LCDocument") -> "Document":
    """将结构从LangChain文档格式转换。"""
    return cls(text=doc.page_content, metadata=doc.metadata)

to_haystack_format #

to_haystack_format() -> Document

将结构转换为Haystack文档格式。

Source code in llama_index/core/schema.py
675
676
677
678
679
680
681
def to_haystack_format(self) -> "HaystackDocument":
    """将结构转换为Haystack文档格式。"""
    from haystack.schema import Document as HaystackDocument

    return HaystackDocument(
        content=self.text, meta=self.metadata, embedding=self.embedding, id=self.id_
    )

from_haystack_format classmethod #

from_haystack_format(doc: Document) -> Document

将结构从Haystack文档格式转换。

Source code in llama_index/core/schema.py
683
684
685
686
687
688
@classmethod
def from_haystack_format(cls, doc: "HaystackDocument") -> "Document":
    """将结构从Haystack文档格式转换。"""
    return cls(
        text=doc.content, metadata=doc.meta, embedding=doc.embedding, id_=doc.id
    )

to_embedchain_format #

to_embedchain_format() -> Dict[str, Any]

将结构体转换为EmbedChain文档格式。

Source code in llama_index/core/schema.py
690
691
692
693
694
695
def to_embedchain_format(self) -> Dict[str, Any]:
    """将结构体转换为EmbedChain文档格式。"""
    return {
        "doc_id": self.id_,
        "data": {"content": self.text, "meta_data": self.metadata},
    }

from_embedchain_format classmethod #

from_embedchain_format(doc: Dict[str, Any]) -> Document

将结构从EmbedChain文档格式转换。

Source code in llama_index/core/schema.py
697
698
699
700
701
702
703
704
@classmethod
def from_embedchain_format(cls, doc: Dict[str, Any]) -> "Document":
    """将结构从EmbedChain文档格式转换。"""
    return cls(
        text=doc["data"]["content"],
        metadata=doc["data"]["meta_data"],
        id_=doc["doc_id"],
    )

to_semantic_kernel_format #

to_semantic_kernel_format() -> MemoryRecord

将结构转换为语义内核文档格式。

Source code in llama_index/core/schema.py
706
707
708
709
710
711
712
713
714
715
716
def to_semantic_kernel_format(self) -> "MemoryRecord":
    """将结构转换为语义内核文档格式。"""
    import numpy as np
    from semantic_kernel.memory.memory_record import MemoryRecord

    return MemoryRecord(
        id=self.id_,
        text=self.text,
        additional_metadata=self.get_metadata_str(),
        embedding=np.array(self.embedding) if self.embedding else None,
    )

from_semantic_kernel_format classmethod #

from_semantic_kernel_format(doc: MemoryRecord) -> Document

将结构从语义内核文档格式转换。

Source code in llama_index/core/schema.py
718
719
720
721
722
723
724
725
726
@classmethod
def from_semantic_kernel_format(cls, doc: "MemoryRecord") -> "Document":
    """将结构从语义内核文档格式转换。"""
    return cls(
        text=doc._text,
        metadata={"additional_metadata": doc._additional_metadata},
        embedding=doc._embedding.tolist() if doc._embedding is not None else None,
        id_=doc._id,
    )

to_vectorflow #

to_vectorflow(client: Any) -> None

将一个文档发送给vectorflow,因为他们没有文档对象。

Source code in llama_index/core/schema.py
728
729
730
731
732
733
734
735
736
def to_vectorflow(self, client: Any) -> None:
    """将一个文档发送给vectorflow,因为他们没有文档对象。"""
    # write document to temp file
    import tempfile

    with tempfile.NamedTemporaryFile() as f:
        f.write(self.text.encode("utf-8"))
        f.flush()
        client.embed(f.name)

ImageDocument #

Bases: Document, ImageNode

数据文档包含一张图片。

Source code in llama_index/core/schema.py
750
751
752
753
754
755
class ImageDocument(Document, ImageNode):
    """数据文档包含一张图片。"""

    @classmethod
    def class_name(cls) -> str:
        return "ImageDocument"

QueryBundle dataclass #

Bases: DataClassJsonMixin

查询包。

这个数据类包含了原始查询字符串和相关的转换。

Parameters:

Name Type Description Default
query_str str

原始用户指定的查询字符串。 目前被所有非基于嵌入的查询使用。

required
custom_embedding_strs list[str]

用于嵌入查询的字符串列表。 目前被所有基于嵌入的查询使用。

None
embedding list[float]

查询的存储嵌入。

None
Source code in llama_index/core/schema.py
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
@dataclass
class QueryBundle(DataClassJsonMixin):
    """查询包。

这个数据类包含了原始查询字符串和相关的转换。

Args:
    query_str (str): 原始用户指定的查询字符串。
        目前被所有非基于嵌入的查询使用。
    custom_embedding_strs (list[str]): 用于嵌入查询的字符串列表。
        目前被所有基于嵌入的查询使用。
    embedding (list[float]): 查询的存储嵌入。"""

    query_str: str
    # using single image path as query input
    image_path: Optional[str] = None
    custom_embedding_strs: Optional[List[str]] = None
    embedding: Optional[List[float]] = None

    @property
    def embedding_strs(self) -> List[str]:
        """如果指定了自定义嵌入字符串,则使用自定义嵌入字符串,否则使用查询字符串。"""
        if self.custom_embedding_strs is None:
            if len(self.query_str) == 0:
                return []
            return [self.query_str]
        else:
            return self.custom_embedding_strs

    @property
    def embedding_image(self) -> List[ImageType]:
        """使用图像路径进行图像检索。"""
        if self.image_path is None:
            return []
        return [self.image_path]

    def __str__(self) -> str:
        """转换为字符串表示形式。"""
        return self.query_str

embedding_strs property #

embedding_strs: List[str]

如果指定了自定义嵌入字符串,则使用自定义嵌入字符串,否则使用查询字符串。

embedding_image property #

embedding_image: List[ImageType]

使用图像路径进行图像检索。