Source code for langchain_community.document_loaders.google_speech_to_text

from __future__ import annotations

from typing import TYPE_CHECKING, List, Optional

from langchain_core._api.deprecation import deprecated
from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseLoader
from langchain_community.utilities.vertexai import get_client_info

if TYPE_CHECKING:
    from google.cloud.speech_v2 import RecognitionConfig
    from google.protobuf.field_mask_pb2 import FieldMask


[docs]@deprecated(
    since="0.0.32",
    removal="0.3.0",
    alternative_import="langchain_google_community.SpeechToTextLoader",
)
class GoogleSpeechToTextLoader(BaseLoader):
    """用于Google Cloud Speech-to-Text音频转录的加载程序。

它使用Google Cloud Speech-to-Text API来转录音频文件，并根据指定的格式将转录文本加载到一个或多个文档中。

要使用，应该已安装``google-cloud-speech`` python包。

可以通过Google Cloud Storage uri或本地文件路径指定音频文件。

有关Google Cloud Speech-to-Text的详细说明，请参阅产品文档。
https://cloud.google.com/speech-to-text"""

[docs]    def __init__(
        self,
        project_id: str,
        file_path: str,
        location: str = "us-central1",
        recognizer_id: str = "_",
        config: Optional[RecognitionConfig] = None,
        config_mask: Optional[FieldMask] = None,
    ):
        """初始化GoogleSpeechToTextLoader。

参数：
    project_id: Google Cloud 项目ID。
    file_path: Google Cloud存储URI或本地文件路径。
    location: 语音转文本识别器位置。
    recognizer_id: 语音转文本识别器ID。
    config: 识别选项和特性。
        更多信息请参考：
        https://cloud.google.com/python/docs/reference/speech/latest/google.cloud.speech_v2.types.RecognitionConfig
    config_mask: 在此识别请求期间覆盖识别器的``default_recognition_config``中的值的config中字段列表。
        更多信息请参考：
        https://cloud.google.com/python/docs/reference/speech/latest/google.cloud.speech_v2.types.RecognizeRequest
"""
        try:
            from google.api_core.client_options import ClientOptions
            from google.cloud.speech_v2 import (
                AutoDetectDecodingConfig,
                RecognitionConfig,
                RecognitionFeatures,
                SpeechClient,
            )
        except ImportError as exc:
            raise ImportError(
                "Could not import google-cloud-speech python package. "
                "Please install it with `pip install google-cloud-speech`."
            ) from exc

        self.project_id = project_id
        self.file_path = file_path
        self.location = location
        self.recognizer_id = recognizer_id
        # Config must be set in speech recognition request.
        self.config = config or RecognitionConfig(
            auto_decoding_config=AutoDetectDecodingConfig(),
            language_codes=["en-US"],
            model="chirp",
            features=RecognitionFeatures(
                # Automatic punctuation could be useful for language applications
                enable_automatic_punctuation=True,
            ),
        )
        self.config_mask = config_mask

        self._client = SpeechClient(
            client_info=get_client_info(module="speech-to-text"),
            client_options=(
                ClientOptions(api_endpoint=f"{location}-speech.googleapis.com")
                if location != "global"
                else None
            ),
        )
        self._recognizer_path = self._client.recognizer_path(
            project_id, location, recognizer_id
        )

[docs]    def load(self) -> List[Document]:
        """将音频文件转录并加载转录内容到文档中。

使用Google Cloud语音转文本API来转录音频文件，并阻塞直到转录完成。
"""
        try:
            from google.cloud.speech_v2 import RecognizeRequest
        except ImportError as exc:
            raise ImportError(
                "Could not import google-cloud-speech python package. "
                "Please install it with `pip install google-cloud-speech`."
            ) from exc

        request = RecognizeRequest(
            recognizer=self._recognizer_path,
            config=self.config,
            config_mask=self.config_mask,
        )

        if "gs://" in self.file_path:
            request.uri = self.file_path
        else:
            with open(self.file_path, "rb") as f:
                request.content = f.read()

        response = self._client.recognize(request=request)

        return [
            Document(
                page_content=result.alternatives[0].transcript,
                metadata={
                    "language_code": result.language_code,
                    "result_end_offset": result.result_end_offset,
                },
            )
            for result in response.results
        ]