Skip to content

Azure speech

AzureSpeechToolSpec #

Bases: BaseToolSpec

Azure语音工具规范。

Source code in llama_index/tools/azure_speech/base.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
class AzureSpeechToolSpec(BaseToolSpec):
    """Azure语音工具规范。"""

    spec_functions = ["speech_to_text", "text_to_speech"]

    def __init__(
        self, region: str, speech_key: str, language: Optional[str] = "en-US"
    ) -> None:
        import azure.cognitiveservices.speech as speechsdk

        """Initialize with parameters."""
        self.config = speechsdk.SpeechConfig(subscription=speech_key, region=region)
        self.config.speech_recognition_language = language

    def text_to_speech(self, text: str) -> None:
        """这个工具接受自然语言字符串,并将使用Azure语音服务来创建文本的音频版本,并在用户的计算机上播放它。

Args:
    text (str): 要播放的文本
"""
        import azure.cognitiveservices.speech as speechsdk

        speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.config)
        result = speech_synthesizer.speak_text(text)

        if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
            speechsdk.AudioDataStream(result)
            return "Audio playback complete."
        elif result.reason == speechsdk.ResultReason.Canceled:
            cancellation_details = result.cancellation_details
            print(f"Speech synthesis canceled: {cancellation_details.reason}")
            if cancellation_details.reason == speechsdk.CancellationReason.Error:
                print(f"Error details: {cancellation_details.error_details}")
                return None
            return None
        return None

    def _transcribe(self, speech_recognizer) -> List[str]:
        done = False
        results = []

        def stop_cb(evt) -> None:
            """停止连续识别的回调函数。"""
            speech_recognizer.stop_continuous_recognition_async()
            nonlocal done
            done = True

        speech_recognizer.recognized.connect(
            lambda evt, results=results: results.append(evt.result.text)
        )
        speech_recognizer.session_stopped.connect(stop_cb)
        speech_recognizer.canceled.connect(stop_cb)

        # Start continuous speech recognition
        speech_recognizer.start_continuous_recognition_async()
        while not done:
            time.sleep(0.5)

        return results

    def speech_to_text(self, filename: str) -> List[str]:
        """这个工具接受一个演讲音频文件的文件名,并使用Azure将其转录为文本。

Args:
    filename (str): 要转录的文件名
"""
        import azure.cognitiveservices.speech as speechsdk

        speech_recognizer = speechsdk.SpeechRecognizer(
            speech_config=self.config,
            audio_config=speechsdk.audio.AudioConfig(filename=filename),
        )
        return self._transcribe(speech_recognizer)

text_to_speech #

text_to_speech(text: str) -> None

这个工具接受自然语言字符串,并将使用Azure语音服务来创建文本的音频版本,并在用户的计算机上播放它。

Parameters:

Name Type Description Default
text str

要播放的文本

required
Source code in llama_index/tools/azure_speech/base.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
    def text_to_speech(self, text: str) -> None:
        """这个工具接受自然语言字符串,并将使用Azure语音服务来创建文本的音频版本,并在用户的计算机上播放它。

Args:
    text (str): 要播放的文本
"""
        import azure.cognitiveservices.speech as speechsdk

        speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.config)
        result = speech_synthesizer.speak_text(text)

        if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
            speechsdk.AudioDataStream(result)
            return "Audio playback complete."
        elif result.reason == speechsdk.ResultReason.Canceled:
            cancellation_details = result.cancellation_details
            print(f"Speech synthesis canceled: {cancellation_details.reason}")
            if cancellation_details.reason == speechsdk.CancellationReason.Error:
                print(f"Error details: {cancellation_details.error_details}")
                return None
            return None
        return None

speech_to_text #

speech_to_text(filename: str) -> List[str]

这个工具接受一个演讲音频文件的文件名,并使用Azure将其转录为文本。

Parameters:

Name Type Description Default
filename str

要转录的文件名

required
Source code in llama_index/tools/azure_speech/base.py
69
70
71
72
73
74
75
76
77
78
79
80
81
    def speech_to_text(self, filename: str) -> List[str]:
        """这个工具接受一个演讲音频文件的文件名,并使用Azure将其转录为文本。

Args:
    filename (str): 要转录的文件名
"""
        import azure.cognitiveservices.speech as speechsdk

        speech_recognizer = speechsdk.SpeechRecognizer(
            speech_config=self.config,
            audio_config=speechsdk.audio.AudioConfig(filename=filename),
        )
        return self._transcribe(speech_recognizer)