Skip to content

Vertexaivectorsearch

VertexAIVectorStore #

Bases: BasePydanticVectorStore

Vertex AI矢量搜索矢量存储。

在这个矢量存储中,嵌入式向量被存储在Vertex AI矢量存储中,文档被存储在Cloud Storage存储桶中。

在查询时,索引使用Vertex AI矢量搜索来查询前k个最相似的节点。

Parameters:

Name Type Description Default
project_id str)

Google Cloud 项目ID。

None
region str)

调用API的默认位置。必须与创建Vector Search索引的位置相同,并且必须是区域性的。

None
index_id str)

在Vertex AI矢量搜索中创建的索引的完全限定资源名称。

None
endpoint_id str

在Vertex AI矢量搜索中创建的索引端点的完全限定资源名称。

None
gcs_bucket_name Optional[str]
           用于在批处理模式下创建索引的向量存储位置。
None
credentials_path Optional[str]
           本地文件系统上Google凭据的路径。
None
示例

pip install llama-index-vector-stores-vertexaivectorsearch

from
vector_store = VertexAIVectorStore(
    project_id=PROJECT_ID,
    region=REGION,
    index_id="<index_resource_name>"
    endpoint_id="<index_endpoint_resource_name>"
)
Source code in llama_index/vector_stores/vertexaivectorsearch/base.py
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
class VertexAIVectorStore(BasePydanticVectorStore):
    """Vertex AI矢量搜索矢量存储。

在这个矢量存储中,嵌入式向量被存储在Vertex AI矢量存储中,文档被存储在Cloud Storage存储桶中。

在查询时,索引使用Vertex AI矢量搜索来查询前k个最相似的节点。

Args:
    project_id (str) : Google Cloud 项目ID。
    region (str)     : 调用API的默认位置。必须与创建Vector Search索引的位置相同,并且必须是区域性的。
    index_id (str)   : 在Vertex AI矢量搜索中创建的索引的完全限定资源名称。
    endpoint_id (str): 在Vertex AI矢量搜索中创建的索引端点的完全限定资源名称。
    gcs_bucket_name (Optional[str]):
                       用于在批处理模式下创建索引的向量存储位置。
    credentials_path (Optional[str]):
                       本地文件系统上Google凭据的路径。

示例:
    `pip install llama-index-vector-stores-vertexaivectorsearch`

    ```python
    from
    vector_store = VertexAIVectorStore(
        project_id=PROJECT_ID,
        region=REGION,
        index_id="<index_resource_name>"
        endpoint_id="<index_endpoint_resource_name>"
    )
    ```"""

    stores_text: bool = True
    remove_text_from_metadata: bool = True
    flat_metadata: bool = False

    text_key: str

    project_id: str
    region: str
    index_id: str
    endpoint_id: str
    gcs_bucket_name: Optional[str] = None
    credentials_path: Optional[str] = None

    _index: MatchingEngineIndex = PrivateAttr()
    _endpoint: MatchingEngineIndexEndpoint = PrivateAttr()
    _index_metadata: dict = PrivateAttr()
    _stream_update: bool = PrivateAttr()
    _staging_bucket: storage.Bucket = PrivateAttr()
    # _document_storage: GCSDocumentStorage = PrivateAttr()

    def __init__(
        self,
        project_id: Optional[str] = None,
        region: Optional[str] = None,
        index_id: Optional[str] = None,
        endpoint_id: Optional[str] = None,
        gcs_bucket_name: Optional[str] = None,
        credentials_path: Optional[str] = None,
        text_key: str = DEFAULT_TEXT_KEY,
        remove_text_from_metadata: bool = True,
        **kwargs: Any,
    ) -> None:
        super().__init__(
            project_id=project_id,
            region=region,
            index_id=index_id,
            endpoint_id=endpoint_id,
            gcs_bucket_name=gcs_bucket_name,
            credentials_path=credentials_path,
            text_key=text_key,
            remove_text_from_metadata=remove_text_from_metadata,
        )

        """Initialize params."""
        _sdk_manager = VectorSearchSDKManager(
            project_id=project_id, region=region, credentials_path=credentials_path
        )

        # get index and endpoint resource names including metadata
        self._index = _sdk_manager.get_index(index_id=index_id)
        self._endpoint = _sdk_manager.get_endpoint(endpoint_id=endpoint_id)
        self._index_metadata = self._index.to_dict()

        # get index update method from index metadata
        self._stream_update = False
        if self._index_metadata["indexUpdateMethod"] == "STREAM_UPDATE":
            self._stream_update = True

        # get bucket object when available
        if self.gcs_bucket_name:
            self._staging_bucket = _sdk_manager.get_gcs_bucket(
                bucket_name=gcs_bucket_name
            )
        else:
            self._staging_bucket = None

    @classmethod
    def from_params(
        cls,
        project_id: Optional[str] = None,
        region: Optional[str] = None,
        index_id: Optional[str] = None,
        endpoint_id: Optional[str] = None,
        gcs_bucket_name: Optional[str] = None,
        credentials_path: Optional[str] = None,
        text_key: str = DEFAULT_TEXT_KEY,
        **kwargs: Any,
    ) -> "VertexAIVectorStore":
        """从配置中创建VertexAIVectorStore。"""
        return cls(
            project_id=project_id,
            region=region,
            index_name=index_id,
            endpoint_id=endpoint_id,
            gcs_bucket_name=gcs_bucket_name,
            credentials_path=credentials_path,
            text_key=text_key,
            **kwargs,
        )

    @classmethod
    def class_name(cls) -> str:
        return "VertexAIVectorStore"

    @property
    def client(self) -> Any:
        """获取客户端。"""
        return self._index

    @property
    def index(self) -> Any:
        """获取客户端。"""
        return self._index

    @property
    def endpoint(self) -> Any:
        """获取客户端。"""
        return self._endpoint

    @property
    def staging_bucket(self) -> Any:
        """获取客户端。"""
        return self._staging_bucket

    def add(
        self,
        nodes: List[BaseNode],
        is_complete_overwrite: bool = False,
        **add_kwargs: Any,
    ) -> List[str]:
        """将节点添加到索引中。

Args:
    节点: List[BaseNode]: 带有嵌入的节点列表
"""
        ids = []
        embeddings = []
        metadatas = []
        for node in nodes:
            node_id = node.node_id
            metadata = node_to_metadata_dict(
                node, remove_text=False, flat_metadata=False
            )
            embedding = node.get_embedding()

            ids.append(node_id)
            embeddings.append(embedding)
            metadatas.append(metadata)

        data_points = utils.to_data_points(ids, embeddings, metadatas)
        # self._document_storage.add_documents(list(zip(ids, nodes)))

        if self._stream_update:
            utils.stream_update_index(index=self._index, data_points=data_points)
        else:
            if self._staging_bucket is None:
                raise ValueError(
                    "To update a Vector Search index a staging bucket must"
                    " be defined."
                )
            utils.batch_update_index(
                index=self._index,
                data_points=data_points,
                staging_bucket=self._staging_bucket,
                is_complete_overwrite=is_complete_overwrite,
            )
        return ids

    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
        """使用ref_doc_id删除节点。

Args:
    ref_doc_id(str):要删除的文档的doc_id。
"""
        # get datapoint ids by filter
        filter = {"ref_doc_id": ref_doc_id}
        ids = utils.get_datapoints_by_filter(
            index=self.index, endpoint=self.endpoint, metadata=filter
        )
        # remove datapoints
        self._index.remove_datapoints(datapoint_ids=ids)

    def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
        """查询前k个最相似节点的索引。"""
        query_embedding = None
        if query.mode == VectorStoreQueryMode.DEFAULT:
            query_embedding = [cast(List[float], query.query_embedding)]

        if query.filters is not None:
            if "filter" in kwargs and kwargs["filter"] is not None:
                raise ValueError(
                    "Cannot specify filter via both query and kwargs. "
                    "Use kwargs only for Vertex AI Vector Search specific items that are "
                    "not supported via the generic query interface such as numeric filters."
                )
            filter, num_filter = utils.to_vectorsearch_filter(query.filters)
        else:
            filter = None
            num_filter = None

        matches = utils.find_neighbors(
            index=self._index,
            endpoint=self._endpoint,
            embeddings=query_embedding,
            top_k=query.similarity_top_k,
            filter=filter,
            numeric_filter=num_filter,
        )

        top_k_nodes = []
        top_k_ids = []
        top_k_scores = []

        for match in matches:
            node = utils.to_node(match, self.text_key)
            top_k_ids.append(match.id)
            top_k_scores.append(match.distance)
            top_k_nodes.append(node)

        return VectorStoreQueryResult(
            nodes=top_k_nodes, similarities=top_k_scores, ids=top_k_ids
        )

client property #

client: Any

获取客户端。

index property #

index: Any

获取客户端。

endpoint property #

endpoint: Any

获取客户端。

staging_bucket property #

staging_bucket: Any

获取客户端。

from_params classmethod #

from_params(
    project_id: Optional[str] = None,
    region: Optional[str] = None,
    index_id: Optional[str] = None,
    endpoint_id: Optional[str] = None,
    gcs_bucket_name: Optional[str] = None,
    credentials_path: Optional[str] = None,
    text_key: str = DEFAULT_TEXT_KEY,
    **kwargs: Any
) -> VertexAIVectorStore

从配置中创建VertexAIVectorStore。

Source code in llama_index/vector_stores/vertexaivectorsearch/base.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
@classmethod
def from_params(
    cls,
    project_id: Optional[str] = None,
    region: Optional[str] = None,
    index_id: Optional[str] = None,
    endpoint_id: Optional[str] = None,
    gcs_bucket_name: Optional[str] = None,
    credentials_path: Optional[str] = None,
    text_key: str = DEFAULT_TEXT_KEY,
    **kwargs: Any,
) -> "VertexAIVectorStore":
    """从配置中创建VertexAIVectorStore。"""
    return cls(
        project_id=project_id,
        region=region,
        index_name=index_id,
        endpoint_id=endpoint_id,
        gcs_bucket_name=gcs_bucket_name,
        credentials_path=credentials_path,
        text_key=text_key,
        **kwargs,
    )

add #

add(
    nodes: List[BaseNode],
    is_complete_overwrite: bool = False,
    **add_kwargs: Any
) -> List[str]

将节点添加到索引中。

Parameters:

Name Type Description Default
节点

List[BaseNode]: 带有嵌入的节点列表

required
Source code in llama_index/vector_stores/vertexaivectorsearch/base.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
    def add(
        self,
        nodes: List[BaseNode],
        is_complete_overwrite: bool = False,
        **add_kwargs: Any,
    ) -> List[str]:
        """将节点添加到索引中。

Args:
    节点: List[BaseNode]: 带有嵌入的节点列表
"""
        ids = []
        embeddings = []
        metadatas = []
        for node in nodes:
            node_id = node.node_id
            metadata = node_to_metadata_dict(
                node, remove_text=False, flat_metadata=False
            )
            embedding = node.get_embedding()

            ids.append(node_id)
            embeddings.append(embedding)
            metadatas.append(metadata)

        data_points = utils.to_data_points(ids, embeddings, metadatas)
        # self._document_storage.add_documents(list(zip(ids, nodes)))

        if self._stream_update:
            utils.stream_update_index(index=self._index, data_points=data_points)
        else:
            if self._staging_bucket is None:
                raise ValueError(
                    "To update a Vector Search index a staging bucket must"
                    " be defined."
                )
            utils.batch_update_index(
                index=self._index,
                data_points=data_points,
                staging_bucket=self._staging_bucket,
                is_complete_overwrite=is_complete_overwrite,
            )
        return ids

delete #

delete(ref_doc_id: str, **delete_kwargs: Any) -> None

使用ref_doc_id删除节点。

Source code in llama_index/vector_stores/vertexaivectorsearch/base.py
221
222
223
224
225
226
227
228
229
230
231
232
233
    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
        """使用ref_doc_id删除节点。

Args:
    ref_doc_id(str):要删除的文档的doc_id。
"""
        # get datapoint ids by filter
        filter = {"ref_doc_id": ref_doc_id}
        ids = utils.get_datapoints_by_filter(
            index=self.index, endpoint=self.endpoint, metadata=filter
        )
        # remove datapoints
        self._index.remove_datapoints(datapoint_ids=ids)

query #

query(
    query: VectorStoreQuery, **kwargs: Any
) -> VectorStoreQueryResult

查询前k个最相似节点的索引。

Source code in llama_index/vector_stores/vertexaivectorsearch/base.py
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
    """查询前k个最相似节点的索引。"""
    query_embedding = None
    if query.mode == VectorStoreQueryMode.DEFAULT:
        query_embedding = [cast(List[float], query.query_embedding)]

    if query.filters is not None:
        if "filter" in kwargs and kwargs["filter"] is not None:
            raise ValueError(
                "Cannot specify filter via both query and kwargs. "
                "Use kwargs only for Vertex AI Vector Search specific items that are "
                "not supported via the generic query interface such as numeric filters."
            )
        filter, num_filter = utils.to_vectorsearch_filter(query.filters)
    else:
        filter = None
        num_filter = None

    matches = utils.find_neighbors(
        index=self._index,
        endpoint=self._endpoint,
        embeddings=query_embedding,
        top_k=query.similarity_top_k,
        filter=filter,
        numeric_filter=num_filter,
    )

    top_k_nodes = []
    top_k_ids = []
    top_k_scores = []

    for match in matches:
        node = utils.to_node(match, self.text_key)
        top_k_ids.append(match.id)
        top_k_scores.append(match.distance)
        top_k_nodes.append(node)

    return VectorStoreQueryResult(
        nodes=top_k_nodes, similarities=top_k_scores, ids=top_k_ids
    )