Skip to content

Dynamodb

DynamoDBVectorStore #

Bases: BasePydanticVectorStore

DynamoDB向量存储。

在这个向量存储中,嵌入被存储在DynamoDB表中。 这个类是参考SimpleVectorStore实现的。

Parameters:

Name Type Description Default
dynamodb_kvstore DynamoDBKVStore

数据存储

required
namespace Optional[str]

命名空间

None
示例

pip install llama-index-vector-stores-dynamodb

from llama_index.vector_stores.dynamodb import DynamoDBVectorStore

vector_store = DynamoDBVectorStore.from_table_name(table_name="my_table")
Source code in llama_index/vector_stores/dynamodb/base.py
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
class DynamoDBVectorStore(BasePydanticVectorStore):
    """DynamoDB向量存储。

在这个向量存储中,嵌入被存储在DynamoDB表中。
这个类是参考SimpleVectorStore实现的。

Args:
    dynamodb_kvstore (DynamoDBKVStore): 数据存储
    namespace (Optional[str]): 命名空间

示例:
    `pip install llama-index-vector-stores-dynamodb`

    ```python
    from llama_index.vector_stores.dynamodb import DynamoDBVectorStore

    vector_store = DynamoDBVectorStore.from_table_name(table_name="my_table")
    ```"""

    stores_text: bool = False

    _kvstore: DynamoDBKVStore = PrivateAttr()
    _collection_embedding: str = PrivateAttr()
    _collection_text_id_to_doc_id: str = PrivateAttr()
    _key_value: str = PrivateAttr()

    def __init__(
        self, dynamodb_kvstore: DynamoDBKVStore, namespace: str | None = None
    ) -> None:
        """初始化参数。"""
        super().__init__()

        self._kvstore = dynamodb_kvstore
        namespace = namespace or DEFAULT_NAMESPACE
        self._collection_embedding = f"{namespace}/embedding"
        self._collection_text_id_to_doc_id = f"{namespace}/text_id_to_doc_id"
        self._key_value = "value"

    @classmethod
    def from_table_name(
        cls, table_name: str, namespace: str | None = None
    ) -> DynamoDBVectorStore:
        """从DynamoDB表中加载。"""
        dynamodb_kvstore = DynamoDBKVStore.from_table_name(table_name=table_name)
        return cls(dynamodb_kvstore=dynamodb_kvstore, namespace=namespace)

    @classmethod
    def class_name(cls) -> str:
        return "DynamoDBVectorStore"

    @property
    def client(self) -> None:
        """获取客户端。"""
        return

    def get(self, text_id: str) -> List[float]:
        """获取嵌入。"""
        item = self._kvstore.get(key=text_id, collection=self._collection_embedding)
        item = cast(Dict[str, List[float]], item)
        return item[self._key_value]

    def add(self, nodes: List[BaseNode], **add_kwargs: Any) -> List[str]:
        """将节点添加到索引。"""
        response = []
        for node in nodes:
            self._kvstore.put(
                key=node.node_id,
                val={self._key_value: node.get_embedding()},
                collection=self._collection_embedding,
            )
            self._kvstore.put(
                key=node.node_id,
                val={self._key_value: node.ref_doc_id},
                collection=self._collection_text_id_to_doc_id,
            )
            response.append(node.node_id)
        return response

    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
        """使用ref_doc_id删除节点。

Args:
    ref_doc_id(str):要删除的文档的doc_id。
"""
        text_ids_to_delete = set()
        for text_id, item in self._kvstore.get_all(
            collection=self._collection_text_id_to_doc_id
        ).items():
            if ref_doc_id == item[self._key_value]:
                text_ids_to_delete.add(text_id)

        for text_id in text_ids_to_delete:
            self._kvstore.delete(key=text_id, collection=self._collection_embedding)
            self._kvstore.delete(
                key=text_id, collection=self._collection_text_id_to_doc_id
            )

    def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
        """获取响应的节点。"""
        if query.filters is not None:
            raise ValueError(
                "Metadata filters not implemented for SimpleVectorStore yet."
            )

        # TODO: consolidate with get_query_text_embedding_similarities
        items = self._kvstore.get_all(collection=self._collection_embedding).items()

        if query.node_ids:
            available_ids = set(query.node_ids)

            node_ids = [k for k, _ in items if k in available_ids]
            embeddings = [v[self._key_value] for k, v in items if k in available_ids]
        else:
            node_ids = [k for k, _ in items]
            embeddings = [v[self._key_value] for k, v in items]

        query_embedding = cast(List[float], query.query_embedding)
        if query.mode in LEARNER_MODES:
            top_similarities, top_ids = get_top_k_embeddings_learner(
                query_embedding=query_embedding,
                embeddings=embeddings,
                similarity_top_k=query.similarity_top_k,
                embedding_ids=node_ids,
            )
        elif query.mode == VectorStoreQueryMode.DEFAULT:
            top_similarities, top_ids = get_top_k_embeddings(
                query_embedding=query_embedding,
                embeddings=embeddings,
                similarity_top_k=query.similarity_top_k,
                embedding_ids=node_ids,
            )
        else:
            raise ValueError(f"Invalid query mode: {query.mode}")

        return VectorStoreQueryResult(similarities=top_similarities, ids=top_ids)

client property #

client: None

获取客户端。

from_table_name classmethod #

from_table_name(
    table_name: str, namespace: str | None = None
) -> DynamoDBVectorStore

从DynamoDB表中加载。

Source code in llama_index/vector_stores/dynamodb/base.py
71
72
73
74
75
76
77
@classmethod
def from_table_name(
    cls, table_name: str, namespace: str | None = None
) -> DynamoDBVectorStore:
    """从DynamoDB表中加载。"""
    dynamodb_kvstore = DynamoDBKVStore.from_table_name(table_name=table_name)
    return cls(dynamodb_kvstore=dynamodb_kvstore, namespace=namespace)

get #

get(text_id: str) -> List[float]

获取嵌入。

Source code in llama_index/vector_stores/dynamodb/base.py
88
89
90
91
92
def get(self, text_id: str) -> List[float]:
    """获取嵌入。"""
    item = self._kvstore.get(key=text_id, collection=self._collection_embedding)
    item = cast(Dict[str, List[float]], item)
    return item[self._key_value]

add #

add(nodes: List[BaseNode], **add_kwargs: Any) -> List[str]

将节点添加到索引。

Source code in llama_index/vector_stores/dynamodb/base.py
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def add(self, nodes: List[BaseNode], **add_kwargs: Any) -> List[str]:
    """将节点添加到索引。"""
    response = []
    for node in nodes:
        self._kvstore.put(
            key=node.node_id,
            val={self._key_value: node.get_embedding()},
            collection=self._collection_embedding,
        )
        self._kvstore.put(
            key=node.node_id,
            val={self._key_value: node.ref_doc_id},
            collection=self._collection_text_id_to_doc_id,
        )
        response.append(node.node_id)
    return response

delete #

delete(ref_doc_id: str, **delete_kwargs: Any) -> None

使用ref_doc_id删除节点。

Source code in llama_index/vector_stores/dynamodb/base.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
        """使用ref_doc_id删除节点。

Args:
    ref_doc_id(str):要删除的文档的doc_id。
"""
        text_ids_to_delete = set()
        for text_id, item in self._kvstore.get_all(
            collection=self._collection_text_id_to_doc_id
        ).items():
            if ref_doc_id == item[self._key_value]:
                text_ids_to_delete.add(text_id)

        for text_id in text_ids_to_delete:
            self._kvstore.delete(key=text_id, collection=self._collection_embedding)
            self._kvstore.delete(
                key=text_id, collection=self._collection_text_id_to_doc_id
            )

query #

query(
    query: VectorStoreQuery, **kwargs: Any
) -> VectorStoreQueryResult

获取响应的节点。

Source code in llama_index/vector_stores/dynamodb/base.py
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
    """获取响应的节点。"""
    if query.filters is not None:
        raise ValueError(
            "Metadata filters not implemented for SimpleVectorStore yet."
        )

    # TODO: consolidate with get_query_text_embedding_similarities
    items = self._kvstore.get_all(collection=self._collection_embedding).items()

    if query.node_ids:
        available_ids = set(query.node_ids)

        node_ids = [k for k, _ in items if k in available_ids]
        embeddings = [v[self._key_value] for k, v in items if k in available_ids]
    else:
        node_ids = [k for k, _ in items]
        embeddings = [v[self._key_value] for k, v in items]

    query_embedding = cast(List[float], query.query_embedding)
    if query.mode in LEARNER_MODES:
        top_similarities, top_ids = get_top_k_embeddings_learner(
            query_embedding=query_embedding,
            embeddings=embeddings,
            similarity_top_k=query.similarity_top_k,
            embedding_ids=node_ids,
        )
    elif query.mode == VectorStoreQueryMode.DEFAULT:
        top_similarities, top_ids = get_top_k_embeddings(
            query_embedding=query_embedding,
            embeddings=embeddings,
            similarity_top_k=query.similarity_top_k,
            embedding_ids=node_ids,
        )
    else:
        raise ValueError(f"Invalid query mode: {query.mode}")

    return VectorStoreQueryResult(similarities=top_similarities, ids=top_ids)