Skip to content

Supabase

SupabaseVectorStore #

Bases: BasePydanticVectorStore

Supbabase矢量存储。

在这个矢量存储中,使用pgvector将嵌入存储在Postgres表中。

在查询时,索引使用pgvector/Supabase来查询前k个最相似的节点。

Parameters:

Name Type Description Default
postgres_connection_string str

postgres连接字符串

required
collection_name str

存储嵌入的集合名称

required
dimension int

嵌入的维度。默认为1536。

DEFAULT_EMBEDDING_DIM
示例

pip install llama-index-vector-stores-supabase

from llama_index.vector_stores.supabase import SupabaseVectorStore

# 设置SupabaseVectorStore
vector_store = SupabaseVectorStore(
    postgres_connection_string="postgresql://<user>:<password>@<host>:<port>/<db_name>",
    collection_name="base_demo",
)
Source code in llama_index/vector_stores/supabase/base.py
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
class SupabaseVectorStore(BasePydanticVectorStore):
    """Supbabase矢量存储。

    在这个矢量存储中,使用pgvector将嵌入存储在Postgres表中。

    在查询时,索引使用pgvector/Supabase来查询前k个最相似的节点。

    Args:
        postgres_connection_string (str):
            postgres连接字符串
        collection_name (str):
            存储嵌入的集合名称
        dimension (int, optional):
            嵌入的维度。默认为1536。

    示例:
        `pip install llama-index-vector-stores-supabase`

        ```python
        from llama_index.vector_stores.supabase import SupabaseVectorStore

        # 设置SupabaseVectorStore
        vector_store = SupabaseVectorStore(
            postgres_connection_string="postgresql://<user>:<password>@<host>:<port>/<db_name>",
            collection_name="base_demo",
        )
        ```"""

    stores_text = True
    flat_metadata = False
    _client: Optional[Any] = PrivateAttr()
    _collection: Optional[Collection] = PrivateAttr()

    def __init__(
        self,
        postgres_connection_string: str,
        collection_name: str,
        dimension: int = DEFAULT_EMBEDDING_DIM,
        **kwargs: Any,
    ) -> None:
        super().__init__()
        self._client = vecs.create_client(postgres_connection_string)

        try:
            self._collection = self._client.get_collection(name=collection_name)
        except CollectionNotFound:
            logger.info(
                f"Collection {collection_name} does not exist, "
                f"try creating one with dimension={dimension}"
            )
            self._collection = self._client.create_collection(
                name=collection_name, dimension=dimension
            )

    def __del__(self) -> None:
        """当对象被删除时关闭客户端。"""
        try:  # try-catch in case the attribute is not present
            self._client.disconnect()
        except AttributeError:
            pass

    @property
    def client(self) -> None:
        """获取客户端。"""
        return

    def _to_vecs_filters(self, filters: MetadataFilters) -> Any:
        """将llama过滤器转换为vecs过滤器。$eq是唯一支持的运算符。"""
        vecs_filter = defaultdict(list)
        filter_cond = f"${filters.condition.value}"

        for f in filters.legacy_filters():
            sub_filter = {}
            sub_filter[f.key] = {"$eq": f.value}
            vecs_filter[filter_cond].append(sub_filter)
        return vecs_filter

    def add(self, nodes: List[BaseNode], **add_kwargs: Any) -> List[str]:
        """将节点添加到索引中。

Args:
    节点: List[BaseNode]: 带有嵌入的节点列表
"""
        if self._collection is None:
            raise ValueError("Collection not initialized")

        data = []
        ids = []

        for node in nodes:
            # NOTE: keep text in metadata dict since there's no special field in
            #       Supabase Vector.
            metadata_dict = node_to_metadata_dict(
                node, remove_text=False, flat_metadata=self.flat_metadata
            )

            data.append((node.node_id, node.get_embedding(), metadata_dict))
            ids.append(node.node_id)

        self._collection.upsert(records=data)

        return ids

    def get_by_id(self, doc_id: str, **kwargs: Any) -> list:
        """通过文档ID获取行ID。

Args:
    doc_id(str):文档ID
"""
        filters = {"doc_id": {"$eq": doc_id}}

        return self._collection.query(
            data=None,
            filters=filters,
            include_value=False,
            include_metadata=False,
            **kwargs,
        )

        # NOTE: list of row ids

    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
        """删除文档。

Args:
    :param ref_doc_id (str): 文档id
"""
        row_ids = self.get_by_id(ref_doc_id)

        if len(row_ids) > 0:
            self._collection.delete(row_ids)

    def query(
        self,
        query: VectorStoreQuery,
        **kwargs: Any,
    ) -> VectorStoreQueryResult:
        """查询前k个最相似节点的索引。

Args:
    query(List[float]):查询嵌入向量
"""
        filters = None
        if query.filters is not None:
            filters = self._to_vecs_filters(query.filters)

        results = self._collection.query(
            data=query.query_embedding,
            limit=query.similarity_top_k,
            filters=filters,
            include_value=True,
            include_metadata=True,
        )

        similarities = []
        ids = []
        nodes = []
        for id_, distance, metadata in results:
            """shape of the result is [(vector, distance, metadata)]"""
            text = metadata.pop("text", None)

            try:
                node = metadata_dict_to_node(metadata)
            except Exception:
                # NOTE: deprecated legacy logic for backward compatibility
                metadata, node_info, relationships = legacy_metadata_dict_to_node(
                    metadata
                )
                node = TextNode(
                    id_=id_,
                    text=text,
                    metadata=metadata,
                    start_char_idx=node_info.get("start", None),
                    end_char_idx=node_info.get("end", None),
                    relationships=relationships,
                )

            nodes.append(node)
            similarities.append(1.0 - math.exp(-distance))
            ids.append(id_)

        return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids)

client property #

client: None

获取客户端。

add #

add(nodes: List[BaseNode], **add_kwargs: Any) -> List[str]

将节点添加到索引中。

Parameters:

Name Type Description Default
节点

List[BaseNode]: 带有嵌入的节点列表

required
Source code in llama_index/vector_stores/supabase/base.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
    def add(self, nodes: List[BaseNode], **add_kwargs: Any) -> List[str]:
        """将节点添加到索引中。

Args:
    节点: List[BaseNode]: 带有嵌入的节点列表
"""
        if self._collection is None:
            raise ValueError("Collection not initialized")

        data = []
        ids = []

        for node in nodes:
            # NOTE: keep text in metadata dict since there's no special field in
            #       Supabase Vector.
            metadata_dict = node_to_metadata_dict(
                node, remove_text=False, flat_metadata=self.flat_metadata
            )

            data.append((node.node_id, node.get_embedding(), metadata_dict))
            ids.append(node.node_id)

        self._collection.upsert(records=data)

        return ids

get_by_id #

get_by_id(doc_id: str, **kwargs: Any) -> list

通过文档ID获取行ID。

Source code in llama_index/vector_stores/supabase/base.py
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
    def get_by_id(self, doc_id: str, **kwargs: Any) -> list:
        """通过文档ID获取行ID。

Args:
    doc_id(str):文档ID
"""
        filters = {"doc_id": {"$eq": doc_id}}

        return self._collection.query(
            data=None,
            filters=filters,
            include_value=False,
            include_metadata=False,
            **kwargs,
        )

delete #

delete(ref_doc_id: str, **delete_kwargs: Any) -> None

删除文档。

Parameters:

Name Type Description Default

param ref_doc_id (str): 文档id

required
Source code in llama_index/vector_stores/supabase/base.py
148
149
150
151
152
153
154
155
156
157
    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
        """删除文档。

Args:
    :param ref_doc_id (str): 文档id
"""
        row_ids = self.get_by_id(ref_doc_id)

        if len(row_ids) > 0:
            self._collection.delete(row_ids)

query #

query(
    query: VectorStoreQuery, **kwargs: Any
) -> VectorStoreQueryResult

查询前k个最相似节点的索引。

Source code in llama_index/vector_stores/supabase/base.py
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
    def query(
        self,
        query: VectorStoreQuery,
        **kwargs: Any,
    ) -> VectorStoreQueryResult:
        """查询前k个最相似节点的索引。

Args:
    query(List[float]):查询嵌入向量
"""
        filters = None
        if query.filters is not None:
            filters = self._to_vecs_filters(query.filters)

        results = self._collection.query(
            data=query.query_embedding,
            limit=query.similarity_top_k,
            filters=filters,
            include_value=True,
            include_metadata=True,
        )

        similarities = []
        ids = []
        nodes = []
        for id_, distance, metadata in results:
            """shape of the result is [(vector, distance, metadata)]"""
            text = metadata.pop("text", None)

            try:
                node = metadata_dict_to_node(metadata)
            except Exception:
                # NOTE: deprecated legacy logic for backward compatibility
                metadata, node_info, relationships = legacy_metadata_dict_to_node(
                    metadata
                )
                node = TextNode(
                    id_=id_,
                    text=text,
                    metadata=metadata,
                    start_char_idx=node_info.get("start", None),
                    end_char_idx=node_info.get("end", None),
                    relationships=relationships,
                )

            nodes.append(node)
            similarities.append(1.0 - math.exp(-distance))
            ids.append(id_)

        return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids)