Change Language on TranslationRecognizer for Synthesizer

Thomas Bauer 20 Reputation points
2025-01-27T18:16:30.33+00:00

Hi All,

I'm using the speech SDK TranslationRecognizer for transcription and also translation. The translation is also synthesized. I need to change the language during runtime which works fine for the text recognition, but as soon as I add a new language and remove the previous one the synthesizer doesn't do anything anymore.

def create_translation_recognizer(self):
        speech_translation_config = speechsdk.translation.SpeechTranslationConfig(subscription=os.getenv('AZURE_SPEECH_KEY'), region=os.getenv('AZURE_SPEECH_REGION'))
        speech_translation_config.set_property(speechsdk.PropertyId.Speech_SegmentationStrategy, "Semantic")
        speech_translation_config.speech_recognition_language=self.source_language
        speech_translation_config.add_target_language(self.target_language)
        # See voices in azure speech studio: https://speech.microsoft.com/portal/voicegallery
        speech_translation_config.voice_name = "en-US-AdamMultilingualNeural"
        speech_translation_config.request_word_level_timestamps()
        speech_translation_config.output_format = speechsdk.OutputFormat.Detailed

        self.push_stream = speechsdk.audio.PushAudioInputStream()            
        audio_config = speechsdk.audio.AudioConfig(stream=self.push_stream)
        self.translation_recognizer = speechsdk.translation.TranslationRecognizer(translation_config=speech_translation_config, audio_config=audio_config)
        self.translation_recognizer.recognizing.connect(self._recognizing)
        self.translation_recognizer.recognized.connect(self._recognized)
        self.translation_recognizer.canceled.connect(self._canceled)
        self.translation_recognizer.session_stopped.connect(self._session_stopped)
        self.translation_recognizer.synthesizing.connect(self._synthesis_callback)
       	self.translation_recognizer.start_continuous_recognition_async()
    
    def _synthesis_callback(self, evt):
        print("synth")
        #print("synthoffset",evt.result.offset)
        if evt.result.reason == speechsdk.ResultReason.SynthesizingAudio:
            size = len(evt.result.audio)
            print(f'Audio synthesized: {size} byte(s) {"(COMPLETED)" if size == 0 else ""}')
            if size > 0:
                self.audio_out(evt.result.audio)

    def _recognizing(self, evt):
        if evt.result.reason == speechsdk.ResultReason.TranslatingSpeech:
            #print(f'Recognizing: "{evt.result.translations[self.target_language]}"')
            self.activity_out(evt.result.translations[self.target_language])  # for future use

    def _recognized(self, evt):
        print("offset",evt.result.offset/10**7)
        if evt.result.reason == speechsdk.ResultReason.TranslatedSpeech:
            if evt.result.text == "":
                return
            print("\nIncoming Translation:\n",evt.result.translations[self.target_language])
            self.queue.append({"text": evt.result.text, "translation" : evt.result.translations[self.target_language]})
        elif evt.result.reason == speechsdk.ResultReason.NoMatch:
            print('No speech could be translated.')
        self.check_language()

    def check_language(self):
         with open("language_config.txt","r") as f:
            target_lang = f.read().strip().splitlines()[1][:2]
            print("targetlang",target_lang)
            if target_lang != self.target_language:
                print("trying to change")
                self.translation_recognizer.add_target_language(target_lang)
                self.translation_recognizer.remove_targe

    def _canceled(self, evt):
        print(f'TranslationInterface - canceled - reason={evt.reason}')
        if evt.reason == speechsdk.CancellationReason.Error:
            print(f'  errorCode={evt.error_code}')
            print(f'  errorDetails={evt.error_details}')
        self.translation_recognizer.stop_continuous_recognition_async()
        self.resolve = evt

    def _session_stopped(self, evt):
        print('TranslationInterface - sessionStopped -- resolving')
        self.translation_recognizer.stop_continuous_recognition_async()
        self.resolve = evt

Do I have to do anything except adding the new language and remove the old one? Does the synthesizer need to be re-initialized? I can kill the whole TranslationRecognizer and create it again with the new language but that would mean all audio stuck in it will be lost.

Not Monitored
Not Monitored
Tag not monitored by Microsoft.
41,893 questions
{count} votes

Your answer

Answers can be marked as Accepted Answers by the question author, which helps users to know the answer solved the author's problem.