Share via


Note

Please see Azure Cognitive Services for Speech documentation for the latest supported speech solutions.

Microsoft Speech Platform

Initialize and Configure a Speech Recognition Engine

Using the ISpRecognizer interface, applications can initialize an instance of a speech recognition engine, configure its audio input, create a recognition context, and start and stop recognition.

Example

The following code excerpt queries the registry for a recognizer token that supports the US English language and uses the token to create an ISpRecognizer instance. Because the state of a speech recognition engine is active by default, the example sets the state of the recognizer object to inactive to prevent recognitions from being processed before grammars are loaded and event notifications are registered.

The example creates a recognition context for the recognizer and uses it to subscribe to recognition events and load a grammar from a file. For input, the example configures the recognizer to process audio from a WAV-format file. When the application subsequently sets the recognizer state to active, the ISpRecognizer instance begins processing audio input.

`

// Find the best matching installed en-US recognizer.
CComPtr<ISpObjectToken> cpRecognizerToken;

if (SUCCEEDED(hr)) { hr = SpFindBestToken(SPCAT_RECOGNIZERS, L"language=409", NULL, &cpRecognizerToken;); }

// Create the in-process recognizer and immediately set its state to inactive. CComPtr<ISpRecognizer> cpRecognizer;

if (SUCCEEDED(hr)) { hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer); }

if (SUCCEEDED(hr)) { hr = cpRecognizer->SetRecognizer(cpRecognizerToken); }

if (SUCCEEDED(hr)) { hr = cpRecognizer->SetRecoState(SPRST_INACTIVE); }

// Create a new recognition context from the recognizer. CComPtr<ISpRecoContext> cpContext;

if (SUCCEEDED(hr)) { hr = cpRecognizer->CreateRecoContext(&cpContext;); }

// Subscribe to the speech recognition event and end stream event. if (SUCCEEDED(hr)) { ULONGLONG ullEventInterest = SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM); hr = cpContext->SetInterest(ullEventInterest, ullEventInterest); }

// Establish a Win32 event to signal when speech events are available. HANDLE hSpeechNotifyEvent = INVALID_HANDLE_VALUE;

if (SUCCEEDED(hr)) { hr = cpContext->SetNotifyWin32Event(); }

if (SUCCEEDED(hr)) { hSpeechNotifyEvent = cpContext->GetNotifyEventHandle();

if (INVALID_HANDLE_VALUE == hSpeechNotifyEvent) { // Notification handle unsupported hr = SPERR_UNITIALIZED; } }

// Set up an audio input stream using a .wav file and set the recognizer's input. CComPtr<ISpStream> cpInputStream;

if (SUCCEEDED(hr)) { hr = SPBindToFile(L"Test.wav", SPFM_OPEN_READONLY, &cpInputStream;); }

if (SUCCEEDED(hr)) { hr = cpRecognizer->SetInput(cpInputStream, TRUE); }

// Create a new grammar and load an SRGS grammar from file. CComPtr<ISpRecoGrammar> cpGrammar;

if (SUCCEEDED(hr)) { hr = cpContext->CreateGrammar(0, &cpGrammar;); }

if (SUCCEEDED(hr)) { hr = cpGrammar->LoadCmdFromFile(L"SRGS_Grammar.grxml", SPLO_STATIC); }

// Set all top-level rules in the new grammar to the active state. if (SUCCEEDED(hr)) { hr = cpGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE); }

// Finally, set the recognizer state to active to begin recognition. if (SUCCEEDED(hr)) { hr = cpRecognizer->SetRecoState(SPRST_ACTIVE_ALWAYS); }

`

See Also