Azure AI Speech
An Azure service that integrates speech processing into apps and services.
2,061 questions
This browser is no longer supported.
Upgrade to Microsoft Edge to take advantage of the latest features, security updates, and technical support.
I am streaming from a *.wav file. While I execute the code below I get an exception as below. I do not get this exception all the time but sometimes. Why is this happening?
FATAL: exception not rethrown
Aborted (core dumped)
My code:
import os
import time
import wave
import threading
import azure.cognitiveservices.speech as speechsdk
#use key here
SPEECH_KEY = "mykey"
SPEECH_REGION = "region"
def speech_recognition_with_pull_stream():
"""gives an example how to use a pull audio stream to recognize speech from a custom audio
source"""
class WavFileReaderCallback(speechsdk.audio.PullAudioInputStreamCallback):
"""Example class that implements the Pull Audio Stream interface to recognize speech from
an audio file"""
def __init__(self, filename: str):
super().__init__()
self._file_h = wave.open(filename, mode=None)
self.sample_width = self._file_h.getsampwidth()
assert self._file_h.getnchannels() == 1
assert self._file_h.getsampwidth() == 2
assert self._file_h.getframerate() == 8000
assert self._file_h.getcomptype() == 'NONE'
def read(self, buffer: memoryview) -> int:
"""read callback function"""
size = buffer.nbytes
frames = self._file_h.readframes(size // self.sample_width)
buffer[:len(frames)] = frames
return len(frames)
def close(self):
"""close callback function"""
self._file_h.close()
speech_config = speechsdk.SpeechConfig(subscription=SPEECH_KEY, region=SPEECH_REGION)
speech_config.set_property(speechsdk.PropertyId.Speech_SegmentationSilenceTimeoutMs, "500")
speech_config.set_property(speechsdk.PropertyId.SpeechServiceConnection_InitialSilenceTimeoutMs, "8000")
# specify the audio format
wave_format = speechsdk.audio.AudioStreamFormat(samples_per_second=8000, bits_per_sample=16,
channels=1)
# setup the audio stream
callback = WavFileReaderCallback("filename.wav")
stream = speechsdk.audio.PullAudioInputStream(callback, wave_format)
audio_config = speechsdk.audio.AudioConfig(stream=stream)
# instantiate the speech recognizer with pull stream input
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
done = False
def recognizing_cb(evt: speechsdk.SpeechRecognitionEventArgs):
print(evt.result.text)
pass
all_results = []
def recognized_cb(evt: speechsdk.SpeechRecognitionEventArgs):
print('RECOGNIZED: {}'.format(evt))
all_results.append(evt.result.text)
speech_recognizer.stop_continuous_recognition_async()
def stop_cb(evt: speechsdk.SessionEventArgs):
"""callback that signals to stop continuous recognition upon receiving an event `evt`"""
nonlocal done
done = True
speech_recognizer.recognizing.connect(recognizing_cb)
speech_recognizer.recognized.connect(recognized_cb)
speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
# stop continuous recognition on either session stopped or canceled events
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
# Start continuous speech recognition
speech_recognizer.start_continuous_recognition_async()
while not done:
pass
#speech_recognizer.stop_continuous_recognition()
all_results[:] = [''.join(all_results[:])]
return all_results, all_results[0]
transcript, transcript_0 = speech_recognition_with_pull_stream()
print(transcript)
print(transcript_0)
@SOGEDES DIGITAL Thanks for the question. We are able to execute successfully that is given in the sample without any issues.