Skip to content

Google

GoogleIndex #

Bases: BaseManagedIndex

谷歌的生成式AI语义向量存储与AQA。

Source code in llama_index/indices/managed/google/base.py
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
class GoogleIndex(BaseManagedIndex):
    """谷歌的生成式AI语义向量存储与AQA。"""

    _store: GoogleVectorStore
    _index: VectorStoreIndex

    def __init__(
        self,
        vector_store: GoogleVectorStore,
        embed_model: Optional[BaseEmbedding] = None,
        # deprecated
        service_context: Optional[ServiceContext] = None,
        **kwargs: Any,
    ) -> None:
        """创建一个GoogleIndex的实例。

最好使用工厂函数`from_corpus`或`create_corpus`来代替。
"""
        embed_model = embed_model or MockEmbedding(embed_dim=3)

        self._store = vector_store
        self._index = VectorStoreIndex.from_vector_store(
            vector_store, embed_model=embed_model, **kwargs
        )

        super().__init__(
            index_struct=self._index.index_struct,
            service_context=service_context,
            **kwargs,
        )

    @classmethod
    def from_corpus(
        cls: Type[IndexType], *, corpus_id: str, **kwargs: Any
    ) -> IndexType:
        """从现有语料库创建一个GoogleIndex。

Args:
    corpus_id:Google服务器上现有语料库的ID。

Returns:
    指向指定语料库的GoogleIndex实例。
"""
        _logger.debug(f"\n\nGoogleIndex.from_corpus(corpus_id={corpus_id})")
        return cls(
            vector_store=GoogleVectorStore.from_corpus(corpus_id=corpus_id), **kwargs
        )

    @classmethod
    def create_corpus(
        cls: Type[IndexType],
        *,
        corpus_id: Optional[str] = None,
        display_name: Optional[str] = None,
        **kwargs: Any,
    ) -> IndexType:
        """从新语料库创建一个GoogleIndex。

Args:
    corpus_id:要创建的新语料库的ID。如果未提供,Google服务器将提供一个。
    display_name:新语料库的标题。如果未提供,Google服务器将提供一个。

Returns:
    指向指定语料库的GoogleIndex实例。
"""
        _logger.debug(
            f"\n\nGoogleIndex.from_new_corpus(new_corpus_id={corpus_id}, new_display_name={display_name})"
        )
        return cls(
            vector_store=GoogleVectorStore.create_corpus(
                corpus_id=corpus_id, display_name=display_name
            ),
            **kwargs,
        )

    @classmethod
    def from_documents(
        cls: Type[IndexType],
        documents: Sequence[Document],
        storage_context: Optional[StorageContext] = None,
        show_progress: bool = False,
        callback_manager: Optional[CallbackManager] = None,
        transformations: Optional[List[TransformComponent]] = None,
        # deprecated
        service_context: Optional[ServiceContext] = None,
        embed_model: Optional[BaseEmbedding] = None,
        **kwargs: Any,
    ) -> IndexType:
        """从一系列文档中构建索引。"""
        _logger.debug("\n\nGoogleIndex.from_documents(...)")

        new_display_name = f"Corpus created on {datetime.datetime.now()}"
        instance = cls(
            vector_store=GoogleVectorStore.create_corpus(display_name=new_display_name),
            embed_model=embed_model,
            service_context=service_context,
            storage_context=storage_context,
            show_progress=show_progress,
            callback_manager=callback_manager,
            transformations=transformations,
            **kwargs,
        )

        index = cast(GoogleIndex, instance)
        index.insert_documents(
            documents=documents,
            service_context=service_context,
        )

        return instance

    @property
    def corpus_id(self) -> str:
        """返回此GoogleIndex正在使用的语料库ID。"""
        return self._store.corpus_id

    def _insert(self, nodes: Sequence[BaseNode], **insert_kwargs: Any) -> None:
        """插入一组节点。"""
        self._index.insert_nodes(nodes=nodes, **insert_kwargs)

    def insert_documents(self, documents: Sequence[Document], **kwargs: Any) -> None:
        """插入一组文档。"""
        for document in documents:
            self.insert(document=document, **kwargs)

    def delete_ref_doc(
        self, ref_doc_id: str, delete_from_docstore: bool = False, **delete_kwargs: Any
    ) -> None:
        """使用ref_doc_id删除文档及其节点。"""
        self._index.delete_ref_doc(ref_doc_id=ref_doc_id, **delete_kwargs)

    def update_ref_doc(self, document: Document, **update_kwargs: Any) -> None:
        """更新文档及其对应的节点。"""
        self._index.update(document=document, **update_kwargs)

    def as_retriever(self, **kwargs: Any) -> BaseRetriever:
        """返回此托管索引的检索器。"""
        return self._index.as_retriever(**kwargs)

    def as_query_engine(
        self,
        llm: Optional[LLMType] = None,
        temperature: float = 0.7,
        answer_style: Any = 1,
        safety_setting: List[Any] = [],
        **kwargs: Any,
    ) -> BaseQueryEngine:
        """返回此索引的AQA引擎。

示例:
query_engine = index.as_query_engine(
    temperature=0.7,
    answer_style=AnswerStyle.ABSTRACTIVE,
    safety_setting=[
        SafetySetting(
            category=HARM_CATEGORY_SEXUALLY_EXPLICIT,
            threshold=HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
        ),
    ]
)

Args:
    temperature:0.0 到 1.0。
    answer_style:参见 `google.ai.generativelanguage.GenerateAnswerRequest.AnswerStyle`
    safety_setting:参见 `google.ai.generativelanguage.SafetySetting`。

Returns:
    使用Google的AQA模型的查询引擎。查询引擎将返回一个 `Response` 对象。

    `Response` 的 `source_nodes` 将以属性段落列表开头。这些段落是用于构建基础响应的段落。这些段落始终没有分数,这是标记它们为属性段落的唯一方式。然后,列表将跟随最初提供的段落,这些段落将具有来自检索的分数。

    `Response` 的 `metadata` 也可能有一个键为 `answerable_probability` 的条目,该条目是基础答案可能正确的概率。
"""
        # NOTE: lazy import
        from llama_index.core.query_engine.retriever_query_engine import (
            RetrieverQueryEngine,
        )

        # Don't overwrite the caller's kwargs, which may surprise them.
        local_kwargs = kwargs.copy()

        if "retriever" in kwargs:
            _logger.warning(
                "Ignoring user's retriever to GoogleIndex.as_query_engine, "
                "which uses its own retriever."
            )
            del local_kwargs["retriever"]

        if "response_synthesizer" in kwargs:
            _logger.warning(
                "Ignoring user's response synthesizer to "
                "GoogleIndex.as_query_engine, which uses its own retriever."
            )
            del local_kwargs["response_synthesizer"]

        local_kwargs["retriever"] = self.as_retriever(**local_kwargs)
        local_kwargs["response_synthesizer"] = GoogleTextSynthesizer.from_defaults(
            temperature=temperature,
            answer_style=answer_style,
            safety_setting=safety_setting,
        )
        if "service_context" not in local_kwargs:
            local_kwargs["service_context"] = self._service_context

        return RetrieverQueryEngine.from_args(**local_kwargs)

    def _build_index_from_nodes(self, nodes: Sequence[BaseNode]) -> IndexDict:
        """从节点构建索引。"""
        return self._index._build_index_from_nodes(nodes)

corpus_id property #

corpus_id: str

返回此GoogleIndex正在使用的语料库ID。

from_corpus classmethod #

from_corpus(*, corpus_id: str, **kwargs: Any) -> IndexType

从现有语料库创建一个GoogleIndex。

Returns:

Type Description
IndexType

指向指定语料库的GoogleIndex实例。

Source code in llama_index/indices/managed/google/base.py
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
    @classmethod
    def from_corpus(
        cls: Type[IndexType], *, corpus_id: str, **kwargs: Any
    ) -> IndexType:
        """从现有语料库创建一个GoogleIndex。

Args:
    corpus_id:Google服务器上现有语料库的ID。

Returns:
    指向指定语料库的GoogleIndex实例。
"""
        _logger.debug(f"\n\nGoogleIndex.from_corpus(corpus_id={corpus_id})")
        return cls(
            vector_store=GoogleVectorStore.from_corpus(corpus_id=corpus_id), **kwargs
        )

create_corpus classmethod #

create_corpus(
    *,
    corpus_id: Optional[str] = None,
    display_name: Optional[str] = None,
    **kwargs: Any
) -> IndexType

从新语料库创建一个GoogleIndex。

Returns:

Type Description
IndexType

指向指定语料库的GoogleIndex实例。

Source code in llama_index/indices/managed/google/base.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
    @classmethod
    def create_corpus(
        cls: Type[IndexType],
        *,
        corpus_id: Optional[str] = None,
        display_name: Optional[str] = None,
        **kwargs: Any,
    ) -> IndexType:
        """从新语料库创建一个GoogleIndex。

Args:
    corpus_id:要创建的新语料库的ID。如果未提供,Google服务器将提供一个。
    display_name:新语料库的标题。如果未提供,Google服务器将提供一个。

Returns:
    指向指定语料库的GoogleIndex实例。
"""
        _logger.debug(
            f"\n\nGoogleIndex.from_new_corpus(new_corpus_id={corpus_id}, new_display_name={display_name})"
        )
        return cls(
            vector_store=GoogleVectorStore.create_corpus(
                corpus_id=corpus_id, display_name=display_name
            ),
            **kwargs,
        )

from_documents classmethod #

from_documents(
    documents: Sequence[Document],
    storage_context: Optional[StorageContext] = None,
    show_progress: bool = False,
    callback_manager: Optional[CallbackManager] = None,
    transformations: Optional[
        List[TransformComponent]
    ] = None,
    service_context: Optional[ServiceContext] = None,
    embed_model: Optional[BaseEmbedding] = None,
    **kwargs: Any
) -> IndexType

从一系列文档中构建索引。

Source code in llama_index/indices/managed/google/base.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
@classmethod
def from_documents(
    cls: Type[IndexType],
    documents: Sequence[Document],
    storage_context: Optional[StorageContext] = None,
    show_progress: bool = False,
    callback_manager: Optional[CallbackManager] = None,
    transformations: Optional[List[TransformComponent]] = None,
    # deprecated
    service_context: Optional[ServiceContext] = None,
    embed_model: Optional[BaseEmbedding] = None,
    **kwargs: Any,
) -> IndexType:
    """从一系列文档中构建索引。"""
    _logger.debug("\n\nGoogleIndex.from_documents(...)")

    new_display_name = f"Corpus created on {datetime.datetime.now()}"
    instance = cls(
        vector_store=GoogleVectorStore.create_corpus(display_name=new_display_name),
        embed_model=embed_model,
        service_context=service_context,
        storage_context=storage_context,
        show_progress=show_progress,
        callback_manager=callback_manager,
        transformations=transformations,
        **kwargs,
    )

    index = cast(GoogleIndex, instance)
    index.insert_documents(
        documents=documents,
        service_context=service_context,
    )

    return instance

insert_documents #

insert_documents(
    documents: Sequence[Document], **kwargs: Any
) -> None

插入一组文档。

Source code in llama_index/indices/managed/google/base.py
158
159
160
161
def insert_documents(self, documents: Sequence[Document], **kwargs: Any) -> None:
    """插入一组文档。"""
    for document in documents:
        self.insert(document=document, **kwargs)

delete_ref_doc #

delete_ref_doc(
    ref_doc_id: str,
    delete_from_docstore: bool = False,
    **delete_kwargs: Any
) -> None

使用ref_doc_id删除文档及其节点。

Source code in llama_index/indices/managed/google/base.py
163
164
165
166
167
def delete_ref_doc(
    self, ref_doc_id: str, delete_from_docstore: bool = False, **delete_kwargs: Any
) -> None:
    """使用ref_doc_id删除文档及其节点。"""
    self._index.delete_ref_doc(ref_doc_id=ref_doc_id, **delete_kwargs)

update_ref_doc #

update_ref_doc(
    document: Document, **update_kwargs: Any
) -> None

更新文档及其对应的节点。

Source code in llama_index/indices/managed/google/base.py
169
170
171
def update_ref_doc(self, document: Document, **update_kwargs: Any) -> None:
    """更新文档及其对应的节点。"""
    self._index.update(document=document, **update_kwargs)

as_retriever #

as_retriever(**kwargs: Any) -> BaseRetriever

返回此托管索引的检索器。

Source code in llama_index/indices/managed/google/base.py
173
174
175
def as_retriever(self, **kwargs: Any) -> BaseRetriever:
    """返回此托管索引的检索器。"""
    return self._index.as_retriever(**kwargs)

as_query_engine #

as_query_engine(
    llm: Optional[LLMType] = None,
    temperature: float = 0.7,
    answer_style: Any = 1,
    safety_setting: List[Any] = [],
    **kwargs: Any
) -> BaseQueryEngine

返回此索引的AQA引擎。

示例: query_engine = index.as_query_engine( temperature=0.7, answer_style=AnswerStyle.ABSTRACTIVE, safety_setting=[ SafetySetting( category=HARM_CATEGORY_SEXUALLY_EXPLICIT, threshold=HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, ), ] )

Returns:

Type Description
BaseQueryEngine

使用Google的AQA模型的查询引擎。查询引擎将返回一个 Response 对象。

BaseQueryEngine

Responsesource_nodes 将以属性段落列表开头。这些段落是用于构建基础响应的段落。这些段落始终没有分数,这是标记它们为属性段落的唯一方式。然后,列表将跟随最初提供的段落,这些段落将具有来自检索的分数。

BaseQueryEngine

Responsemetadata 也可能有一个键为 answerable_probability 的条目,该条目是基础答案可能正确的概率。

Source code in llama_index/indices/managed/google/base.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
    def as_query_engine(
        self,
        llm: Optional[LLMType] = None,
        temperature: float = 0.7,
        answer_style: Any = 1,
        safety_setting: List[Any] = [],
        **kwargs: Any,
    ) -> BaseQueryEngine:
        """返回此索引的AQA引擎。

示例:
query_engine = index.as_query_engine(
    temperature=0.7,
    answer_style=AnswerStyle.ABSTRACTIVE,
    safety_setting=[
        SafetySetting(
            category=HARM_CATEGORY_SEXUALLY_EXPLICIT,
            threshold=HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
        ),
    ]
)

Args:
    temperature:0.0 到 1.0。
    answer_style:参见 `google.ai.generativelanguage.GenerateAnswerRequest.AnswerStyle`
    safety_setting:参见 `google.ai.generativelanguage.SafetySetting`。

Returns:
    使用Google的AQA模型的查询引擎。查询引擎将返回一个 `Response` 对象。

    `Response` 的 `source_nodes` 将以属性段落列表开头。这些段落是用于构建基础响应的段落。这些段落始终没有分数,这是标记它们为属性段落的唯一方式。然后,列表将跟随最初提供的段落,这些段落将具有来自检索的分数。

    `Response` 的 `metadata` 也可能有一个键为 `answerable_probability` 的条目,该条目是基础答案可能正确的概率。
"""
        # NOTE: lazy import
        from llama_index.core.query_engine.retriever_query_engine import (
            RetrieverQueryEngine,
        )

        # Don't overwrite the caller's kwargs, which may surprise them.
        local_kwargs = kwargs.copy()

        if "retriever" in kwargs:
            _logger.warning(
                "Ignoring user's retriever to GoogleIndex.as_query_engine, "
                "which uses its own retriever."
            )
            del local_kwargs["retriever"]

        if "response_synthesizer" in kwargs:
            _logger.warning(
                "Ignoring user's response synthesizer to "
                "GoogleIndex.as_query_engine, which uses its own retriever."
            )
            del local_kwargs["response_synthesizer"]

        local_kwargs["retriever"] = self.as_retriever(**local_kwargs)
        local_kwargs["response_synthesizer"] = GoogleTextSynthesizer.from_defaults(
            temperature=temperature,
            answer_style=answer_style,
            safety_setting=safety_setting,
        )
        if "service_context" not in local_kwargs:
            local_kwargs["service_context"] = self._service_context

        return RetrieverQueryEngine.from_args(**local_kwargs)