Source code for langchain_community.document_transformers.doctran_text_qa

from typing import Any, Optional, Sequence

from langchain_core.documents import BaseDocumentTransformer, Document
from langchain_core.utils import get_from_env


[docs]class DoctranQATransformer(BaseDocumentTransformer): """从文本文档中使用doctran提取问答对。 参数: openai_api_key: OpenAI API密钥。也可以通过环境变量``OPENAI_API_KEY``指定。 示例: .. code-block:: python from langchain_community.document_transformers import DoctranQATransformer # 传入openai_api_key或设置环境变量OPENAI_API_KEY qa_transformer = DoctranQATransformer() transformed_document = await qa_transformer.atransform_documents(documents)"""
[docs] def __init__( self, openai_api_key: Optional[str] = None, openai_api_model: Optional[str] = None, ) -> None: self.openai_api_key = openai_api_key or get_from_env( "openai_api_key", "OPENAI_API_KEY" ) self.openai_api_model = openai_api_model or get_from_env( "openai_api_model", "OPENAI_API_MODEL" )
[docs] async def atransform_documents( self, documents: Sequence[Document], **kwargs: Any ) -> Sequence[Document]: raise NotImplementedError
[docs] def transform_documents( self, documents: Sequence[Document], **kwargs: Any ) -> Sequence[Document]: """使用doctran从文本文档中提取问答。""" try: from doctran import Doctran doctran = Doctran( openai_api_key=self.openai_api_key, openai_model=self.openai_api_model ) except ImportError: raise ImportError( "Install doctran to use this parser. (pip install doctran)" ) for d in documents: doctran_doc = doctran.parse(content=d.page_content).interrogate().execute() questions_and_answers = doctran_doc.extracted_properties.get( "questions_and_answers" ) d.metadata["questions_and_answers"] = questions_and_answers return documents