@navba-MSFT
Here's a snippet of the relevant initalization of the recognizer and the configuration function
async init(languageTag: string = defaultLanguageTag, facts: Fact[] = []) {
const logFileName = `speech-to-text-${Date.now()}`;
this.answers = facts.flatMap(fact => fact.answers);
const singleWordAnswers = this.answers.filter(answer => answer.split(' ').length === 1).length > 0;
this.recognizedSpeech = new EventEmitter;
await this.getAzureSpeechToken();
this.recognizer = new sdk.SpeechRecognizer(this.getSpeechConfig(languageTag, singleWordAnswers), this.getAudioConfig());
this.recognizer.recognizing = (s: sdk.SpeechRecognizer, e: sdk.SpeechRecognitionEventArgs) => {
console.log('Recognizing:', e.result);
}
this.recognizer.recognized = (s: sdk.SpeechRecognizer, e: sdk.SpeechRecognitionEventArgs) => {
this.processSpeechResult(e.result);
}
const phraseListGrammar = sdk.PhraseListGrammar.fromRecognizer(this.recognizer);
phraseListGrammar.addPhrases(this.answers);
this.recognizer.speechStartDetected = () => {
this.isListening = true;
this.startSpeechTime = performance.now();
}
this.recognizer.speechEndDetected = () => {
this.isListening = false;
}
let callbackInvoked: boolean = false;
const connection: sdk.Connection = sdk.Connection.fromRecognizer(this.recognizer);
connection.openConnection();
WaitForCondition((): boolean => callbackInvoked, (): void => {
connection.closeConnection();
});
sdk.Diagnostics.onLogOutput = (s: string) => {
this.server.post('/api/speech/log', {}, {fileName: logFileName, log: s});
callbackInvoked = true;
};
}
private getSpeechConfig(languageTag: string, singleWordAnswers: boolean): sdk.SpeechConfig {
const speechConfig = sdk.SpeechConfig.fromAuthorizationToken(this.speechToken.token, this.speechToken.region);
speechConfig.speechRecognitionLanguage = languageTag;
speechConfig.requestWordLevelTimestamps();
speechConfig.outputFormat = sdk.OutputFormat.Detailed;
// https://learn.microsoft.com/en-us/azure/ai-services/speech-service/how-to-recognize-speech?pivots=programming-language-csharp#change-how-silence-is-handled
speechConfig.setProperty(sdk.PropertyId.Speech_SegmentationSilenceTimeoutMs, singleWordAnswers ? this.SEGMENTATION_SILENCE_TIMEOUT_MS_SINGLE_WORD : this.SEGMENTATION_SILENCE_TIMEOUT_MS_MULTIPLE_WORDS);
speechConfig.setProperty(sdk.PropertyId.SpeechServiceConnection_EndSilenceTimeoutMs, singleWordAnswers ? this.END_SILENCE_TIMEOUT_MS_SINGLE_WORD : this.END_SILENCE_TIMEOUT_MS_MULTIPLE_WORDS);
speechConfig.enableAudioLogging();
return speechConfig;
}