Source code for langchain_community.document_transformers.google_translate

from typing import Any, Optional, Sequence

from langchain_core._api.deprecation import deprecated
from langchain_core.documents import BaseDocumentTransformer, Document

from langchain_community.utilities.vertexai import get_client_info


[docs]@deprecated( since="0.0.32", removal="0.3.0", alternative_import="langchain_google_community.DocAIParser", ) class GoogleTranslateTransformer(BaseDocumentTransformer): """使用Google Cloud翻译文本文档。"""
[docs] def __init__( self, project_id: str, *, location: str = "global", model_id: Optional[str] = None, glossary_id: Optional[str] = None, api_endpoint: Optional[str] = None, ) -> None: """参数: project_id: Google Cloud 项目 ID。 location: (可选)翻译模型位置。 model_id: (可选)要使用的翻译模型 ID。 glossary_id: (可选)要使用的翻译词汇表 ID。 api_endpoint: (可选)要使用的区域端点。 """ try: from google.api_core.client_options import ClientOptions from google.cloud import translate except ImportError as exc: raise ImportError( "Install Google Cloud Translate to use this parser." "(pip install google-cloud-translate)" ) from exc self.project_id = project_id self.location = location self.model_id = model_id self.glossary_id = glossary_id self._client = translate.TranslationServiceClient( client_info=get_client_info("translate"), client_options=( ClientOptions(api_endpoint=api_endpoint) if api_endpoint else None ), ) self._parent_path = self._client.common_location_path(project_id, location) # For some reason, there's no `model_path()` method for the client. self._model_path = ( f"{self._parent_path}/models/{model_id}" if model_id else None ) self._glossary_path = ( self._client.glossary_path(project_id, location, glossary_id) if glossary_id else None )
[docs] def transform_documents( self, documents: Sequence[Document], **kwargs: Any ) -> Sequence[Document]: """使用Google翻译来翻译文本文档。 参数: source_language_code: 输入文档的ISO 639语言代码。 target_language_code: 输出文档的ISO 639语言代码。 支持的语言,请参考: https://cloud.google.com/translate/docs/languages mime_type: (可选)输入文本的媒体类型。 选项:`text/plain`,`text/html` """ try: from google.cloud import translate except ImportError as exc: raise ImportError( "Install Google Cloud Translate to use this parser." "(pip install google-cloud-translate)" ) from exc response = self._client.translate_text( request=translate.TranslateTextRequest( contents=[doc.page_content for doc in documents], parent=self._parent_path, model=self._model_path, glossary_config=translate.TranslateTextGlossaryConfig( glossary=self._glossary_path ), source_language_code=kwargs.get("source_language_code", None), target_language_code=kwargs.get("target_language_code"), mime_type=kwargs.get("mime_type", "text/plain"), ) ) # If using a glossary, the translations will be in `glossary_translations`. translations = response.glossary_translations or response.translations return [ Document( page_content=translation.translated_text, metadata={ **doc.metadata, "model": translation.model, "detected_language_code": translation.detected_language_code, }, ) for doc, translation in zip(documents, translations) ]