Share via

How to get sentence word timestamp results for real-time speech recognition ?

莓 草 0 Reputation points
2024-07-05T06:25:10.16+00:00

I am using Golang's SDK

this is my golang code

func (m *microsoft) Do(ctx context.Context, path string) (string, error) {
	defer os.Remove(path)
	accessKeyConfig := AccessKeyList[rand.Intn(len(AccessKeyList))]
	subscription := accessKeyConfig.Key
	region := accessKeyConfig.Region
	file := path
	audioConfig, err := audio.NewAudioConfigFromWavFileInput(file)
	if err != nil {
		fwlog.New(ctx).Info("", "audioConfigErr")
		return "", err
	}
	defer audioConfig.Close()
	config, err := speech.NewSpeechConfigFromSubscription(subscription, region)
	if err != nil {
		fwlog.New(ctx).Info("", "configErr")
		return "", err
	}
	config.RequestWordLevelTimestamps()
	defer config.Close()
	speechRecognizer, err := speech.NewSpeechRecognizerFromConfig(config, audioConfig)
	if err != nil {
		fwlog.New(ctx).Info("", "speechRecognizerErr")
		return "", err
	}
	defer speechRecognizer.Close()
	speechRecognizer.SessionStarted(func(event speech.SessionEventArgs) {
		defer event.Close()
		fmt.Println("Session Started (ID=", event.SessionID, ")")
	})
	speechRecognizer.Recognizing(recognizingHandler)
	speechRecognizer.Recognized(recognizedHandler)
	//speechRecognizer.Recognizing(recognizedHandler)
	speechRecognizer.SessionStopped(func(event speech.SessionEventArgs) {
		defer event.Close()
		fmt.Println("Session Stopped (ID=", event.SessionID, ")")
	})

	task := speechRecognizer.RecognizeOnceAsync()
	var outcome speech.SpeechRecognitionOutcome
	select {
	case outcome = <-task:
	case <-time.After(120 * time.Second):
		fmt.Println("Timed out")
		return "", errors.New("Timed out")
	}
	defer outcome.Close()
	defer os.Remove(path)
	if outcome.Error != nil {
		fwlog.New(ctx).Info("", "outcomeErr")
		return "", outcome.Error
	}
	return outcome.Result.Text, nil
}

Azure Speech in Foundry Tools

1 answer

Sort by: Most helpful
  1. navba-MSFT 27,625 Reputation points Microsoft Employee Moderator
    2024-07-05T10:47:04.7+00:00

    @莓 草 Welcome to Microsoft Q&A Forum, Thank you for posting your query here!

    .

    While I did some research on this, I see that in JAVA SDK this is how it is done.

    .

    .

    Just following the same approach in the Go SDK too and Could you please check if the below helps ?

    jsonText := outcome.Result.Properties.GetProperty(common.SpeechServiceResponseJSONResult, "")
    

    .

    If you have any follow-up questions, please let me know. I would be happy to help.

    Was this answer helpful?


Your answer

Answers can be marked as 'Accepted' by the question author and 'Recommended' by moderators, which helps users know the answer solved the author's problem.