Source code for langchain_community.document_loaders.google_speech_to_text

from __future__ import annotations

from typing import TYPE_CHECKING, List, Optional

from langchain_core._api.deprecation import deprecated
from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseLoader
from langchain_community.utilities.vertexai import get_client_info

if TYPE_CHECKING:
    from google.cloud.speech_v2 import RecognitionConfig
    from google.protobuf.field_mask_pb2 import FieldMask


[docs]@deprecated( since="0.0.32", removal="0.3.0", alternative_import="langchain_google_community.SpeechToTextLoader", ) class GoogleSpeechToTextLoader(BaseLoader): """用于Google Cloud Speech-to-Text音频转录的加载程序。 它使用Google Cloud Speech-to-Text API来转录音频文件,并根据指定的格式将转录文本加载到一个或多个文档中。 要使用,应该已安装``google-cloud-speech`` python包。 可以通过Google Cloud Storage uri或本地文件路径指定音频文件。 有关Google Cloud Speech-to-Text的详细说明,请参阅产品文档。 https://cloud.google.com/speech-to-text"""
[docs] def __init__( self, project_id: str, file_path: str, location: str = "us-central1", recognizer_id: str = "_", config: Optional[RecognitionConfig] = None, config_mask: Optional[FieldMask] = None, ): """初始化GoogleSpeechToTextLoader。 参数: project_id: Google Cloud 项目ID。 file_path: Google Cloud存储URI或本地文件路径。 location: 语音转文本识别器位置。 recognizer_id: 语音转文本识别器ID。 config: 识别选项和特性。 更多信息请参考: https://cloud.google.com/python/docs/reference/speech/latest/google.cloud.speech_v2.types.RecognitionConfig config_mask: 在此识别请求期间覆盖识别器的``default_recognition_config``中的值的config中字段列表。 更多信息请参考: https://cloud.google.com/python/docs/reference/speech/latest/google.cloud.speech_v2.types.RecognizeRequest """ try: from google.api_core.client_options import ClientOptions from google.cloud.speech_v2 import ( AutoDetectDecodingConfig, RecognitionConfig, RecognitionFeatures, SpeechClient, ) except ImportError as exc: raise ImportError( "Could not import google-cloud-speech python package. " "Please install it with `pip install google-cloud-speech`." ) from exc self.project_id = project_id self.file_path = file_path self.location = location self.recognizer_id = recognizer_id # Config must be set in speech recognition request. self.config = config or RecognitionConfig( auto_decoding_config=AutoDetectDecodingConfig(), language_codes=["en-US"], model="chirp", features=RecognitionFeatures( # Automatic punctuation could be useful for language applications enable_automatic_punctuation=True, ), ) self.config_mask = config_mask self._client = SpeechClient( client_info=get_client_info(module="speech-to-text"), client_options=( ClientOptions(api_endpoint=f"{location}-speech.googleapis.com") if location != "global" else None ), ) self._recognizer_path = self._client.recognizer_path( project_id, location, recognizer_id )
[docs] def load(self) -> List[Document]: """将音频文件转录并加载转录内容到文档中。 使用Google Cloud语音转文本API来转录音频文件,并阻塞直到转录完成。 """ try: from google.cloud.speech_v2 import RecognizeRequest except ImportError as exc: raise ImportError( "Could not import google-cloud-speech python package. " "Please install it with `pip install google-cloud-speech`." ) from exc request = RecognizeRequest( recognizer=self._recognizer_path, config=self.config, config_mask=self.config_mask, ) if "gs://" in self.file_path: request.uri = self.file_path else: with open(self.file_path, "rb") as f: request.content = f.read() response = self._client.recognize(request=request) return [ Document( page_content=result.alternatives[0].transcript, metadata={ "language_code": result.language_code, "result_end_offset": result.result_end_offset, }, ) for result in response.results ]