Source code for langchain_community.document_transformers.doctran_text_translate

from typing import Any, Optional, Sequence

from langchain_core.documents import BaseDocumentTransformer, Document
from langchain_core.utils import get_from_env


[docs]class DoctranTextTranslator(BaseDocumentTransformer): """使用doctran来翻译文本文档。 参数: openai_api_key: OpenAI API密钥。也可以通过环境变量``OPENAI_API_KEY``指定。 language: 要翻译成的语言。 示例: .. code-block:: python from langchain_community.document_transformers import DoctranTextTranslator # 传入openai_api_key或设置环境变量OPENAI_API_KEY qa_translator = DoctranTextTranslator(language="spanish") translated_document = await qa_translator.atransform_documents(documents) """
[docs] def __init__( self, openai_api_key: Optional[str] = None, language: str = "english", openai_api_model: Optional[str] = None, ) -> None: self.openai_api_key = openai_api_key or get_from_env( "openai_api_key", "OPENAI_API_KEY" ) self.openai_api_model = openai_api_model or get_from_env( "openai_api_model", "OPENAI_API_MODEL" ) self.language = language
[docs] async def atransform_documents( self, documents: Sequence[Document], **kwargs: Any ) -> Sequence[Document]: raise NotImplementedError
[docs] def transform_documents( self, documents: Sequence[Document], **kwargs: Any ) -> Sequence[Document]: """使用doctran翻译文本文档。""" try: from doctran import Doctran doctran = Doctran( openai_api_key=self.openai_api_key, openai_model=self.openai_api_model ) except ImportError: raise ImportError( "Install doctran to use this parser. (pip install doctran)" ) doctran_docs = [ doctran.parse(content=doc.page_content, metadata=doc.metadata) for doc in documents ] for i, doc in enumerate(doctran_docs): doctran_docs[i] = doc.translate(language=self.language).execute() return [ Document(page_content=doc.transformed_content, metadata=doc.metadata) for doc in doctran_docs ]