Skip to content

Chatgpt plugin

ChatGPTRetrievalPluginClient #

Bases: BasePydanticVectorStore

聊天GPT检索插件客户端。

在此客户端中,我们利用了ChatGPT定义的端点。

Parameters:

Name Type Description Default
endpoint_url str

ChatGPT检索插件的URL。

required
bearer_token Optional[str]

ChatGPT检索插件的Bearer令牌。

None
retries Optional[Retry]

ChatGPT检索插件的重试对象。

None
batch_size int

ChatGPT检索插件的批处理大小。

100
Source code in llama_index/vector_stores/chatgpt_plugin/base.py
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
class ChatGPTRetrievalPluginClient(BasePydanticVectorStore):
    """聊天GPT检索插件客户端。

在此客户端中,我们利用了ChatGPT定义的端点。

Args:
    endpoint_url (str): ChatGPT检索插件的URL。
    bearer_token (Optional[str]): ChatGPT检索插件的Bearer令牌。
    retries (Optional[Retry]): ChatGPT检索插件的重试对象。
    batch_size (int): ChatGPT检索插件的批处理大小。"""

    stores_text: bool = True
    is_embedding_query: bool = False

    _endpoint_url: str = PrivateAttr()
    _bearer_token: Optional[str] = PrivateAttr()
    _retries: Optional[Retry] = PrivateAttr()
    _batch_size: int = PrivateAttr()
    _s: requests.Session = PrivateAttr()

    def __init__(
        self,
        endpoint_url: str,
        bearer_token: Optional[str] = None,
        retries: Optional[Retry] = None,
        batch_size: int = 100,
        **kwargs: Any,
    ) -> None:
        """初始化参数。"""
        super().__init__()

        self._endpoint_url = endpoint_url
        self._bearer_token = bearer_token or os.getenv("BEARER_TOKEN")
        self._retries = retries
        self._batch_size = batch_size

        self._s = requests.Session()
        self._s.mount("http://", HTTPAdapter(max_retries=self._retries))

    @classmethod
    def class_name(cls) -> str:
        return "ChatGPTRetrievalPluginClient"

    @property
    def client(self) -> None:
        """获取客户端。"""
        return

    def add(
        self,
        nodes: List[BaseNode],
        **add_kwargs: Any,
    ) -> List[str]:
        """将节点添加到索引。"""
        headers = {"Authorization": f"Bearer {self._bearer_token}"}

        docs_to_upload = convert_docs_to_json(nodes)
        iterable_docs = get_tqdm_iterable(
            range(0, len(docs_to_upload), self._batch_size),
            show_progress=True,
            desc="Uploading documents",
        )
        for i in iterable_docs:
            i_end = min(i + self._batch_size, len(docs_to_upload))
            self._s.post(
                f"{self._endpoint_url}/upsert",
                headers=headers,
                json={"documents": docs_to_upload[i:i_end]},
            )

        return [result.node_id for result in nodes]

    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
        """使用ref_doc_id删除节点。

Args:
    ref_doc_id(str):要删除的文档的doc_id。
"""
        headers = {"Authorization": f"Bearer {self._bearer_token}"}
        self._s.post(
            f"{self._endpoint_url}/delete",
            headers=headers,
            json={"ids": [ref_doc_id]},
        )

    def query(
        self,
        query: VectorStoreQuery,
        **kwargs: Any,
    ) -> VectorStoreQueryResult:
        """获取响应的节点。"""
        if query.filters is not None:
            raise ValueError("Metadata filters not implemented for ChatGPT Plugin yet.")

        if query.query_str is None:
            raise ValueError("query_str must be provided")
        headers = {"Authorization": f"Bearer {self._bearer_token}"}
        # TODO: add metadata filter
        queries = [{"query": query.query_str, "top_k": query.similarity_top_k}]
        res = requests.post(
            f"{self._endpoint_url}/query", headers=headers, json={"queries": queries}
        )

        nodes = []
        similarities = []
        ids = []
        for query_result in res.json()["results"]:
            for result in query_result["results"]:
                result_id = result["id"]
                result_txt = result["text"]
                result_score = result["score"]
                result_ref_doc_id = result["source_id"]
                node = TextNode(
                    id_=result_id,
                    text=result_txt,
                    relationships={
                        NodeRelationship.SOURCE: RelatedNodeInfo(
                            node_id=result_ref_doc_id
                        )
                    },
                )
                nodes.append(node)
                similarities.append(result_score)
                ids.append(result_id)

            # NOTE: there should only be one query
            break

        return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids)

client property #

client: None

获取客户端。

add #

add(nodes: List[BaseNode], **add_kwargs: Any) -> List[str]

将节点添加到索引。

Source code in llama_index/vector_stores/chatgpt_plugin/base.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
def add(
    self,
    nodes: List[BaseNode],
    **add_kwargs: Any,
) -> List[str]:
    """将节点添加到索引。"""
    headers = {"Authorization": f"Bearer {self._bearer_token}"}

    docs_to_upload = convert_docs_to_json(nodes)
    iterable_docs = get_tqdm_iterable(
        range(0, len(docs_to_upload), self._batch_size),
        show_progress=True,
        desc="Uploading documents",
    )
    for i in iterable_docs:
        i_end = min(i + self._batch_size, len(docs_to_upload))
        self._s.post(
            f"{self._endpoint_url}/upsert",
            headers=headers,
            json={"documents": docs_to_upload[i:i_end]},
        )

    return [result.node_id for result in nodes]

delete #

delete(ref_doc_id: str, **delete_kwargs: Any) -> None

使用ref_doc_id删除节点。

Source code in llama_index/vector_stores/chatgpt_plugin/base.py
130
131
132
133
134
135
136
137
138
139
140
141
    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
        """使用ref_doc_id删除节点。

Args:
    ref_doc_id(str):要删除的文档的doc_id。
"""
        headers = {"Authorization": f"Bearer {self._bearer_token}"}
        self._s.post(
            f"{self._endpoint_url}/delete",
            headers=headers,
            json={"ids": [ref_doc_id]},
        )

query #

query(
    query: VectorStoreQuery, **kwargs: Any
) -> VectorStoreQueryResult

获取响应的节点。

Source code in llama_index/vector_stores/chatgpt_plugin/base.py
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
def query(
    self,
    query: VectorStoreQuery,
    **kwargs: Any,
) -> VectorStoreQueryResult:
    """获取响应的节点。"""
    if query.filters is not None:
        raise ValueError("Metadata filters not implemented for ChatGPT Plugin yet.")

    if query.query_str is None:
        raise ValueError("query_str must be provided")
    headers = {"Authorization": f"Bearer {self._bearer_token}"}
    # TODO: add metadata filter
    queries = [{"query": query.query_str, "top_k": query.similarity_top_k}]
    res = requests.post(
        f"{self._endpoint_url}/query", headers=headers, json={"queries": queries}
    )

    nodes = []
    similarities = []
    ids = []
    for query_result in res.json()["results"]:
        for result in query_result["results"]:
            result_id = result["id"]
            result_txt = result["text"]
            result_score = result["score"]
            result_ref_doc_id = result["source_id"]
            node = TextNode(
                id_=result_id,
                text=result_txt,
                relationships={
                    NodeRelationship.SOURCE: RelatedNodeInfo(
                        node_id=result_ref_doc_id
                    )
                },
            )
            nodes.append(node)
            similarities.append(result_score)
            ids.append(result_id)

        # NOTE: there should only be one query
        break

    return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids)