Tag not monitored by Microsoft.
How to use Azure Speech-to-Text continuous recognition?
Development Environment
HoloLens2
Unity 2020.3.11f
Visual Studio 2019
Microsoft Mixed Reality Toolkit v2.7.3
Microsoft.CognitiveServices.Speech package
I would like to Speech-to-text conversion non-user voice using HoloLens2.
I created the readBytes(byte[]) that audio data using MicStream.StreamCategory.ROOM_CAPTURE in the HoloToolkit.Unity.InputModule.MicStream Class(MicStream.cs).
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using HoloToolkit.Unity.InputModule;
using UnityEngine;
using UnityEngine.UI;
using System.Linq;
public class Test : MonoBehaviour
{
public static byte[] readBytes;
public MicStream.StreamCategory StreamType = MicStream.StreamCategory.ROOM_CAPTURE;
public bool KeepAllData = false;
public float InputGain = 1;
public readonly List<short> samplingData = new List<short>();
private bool _isStart = false;
private void Awake()
{
CheckForErrorOnCall(MicStream.MicInitializeCustomRate((int)StreamType, AudioSettings.outputSampleRate));
}
public void OnClickButton1()
{
samplingData.Clear();
CheckForErrorOnCall(MicStream.MicStartStream(KeepAllData, false));
CheckForErrorOnCall(MicStream.MicSetGain(InputGain));
_isStart = true;
}
public void OnClickButton2()
{
_isStart = false;
CheckForErrorOnCall(MicStream.MicStopStream());
}
private void OnDestroy()
{
CheckForErrorOnCall(MicStream.MicDestroy());
}
private async void OnAudioFilterRead(float[] buffer, int numChannels)
{
if (!_isStart) return;
lock (this)
{
CheckForErrorOnCall(MicStream.MicGetFrame(buffer, buffer.Length, numChannels));
foreach (var f in buffer)
{
samplingData.Add(FloatToInt16(f));
}
}
var convertBytes = ConvertBytes(samplingData);
readBytes = convertBytes.ToArray();
}
private IEnumerable<byte> ConvertBytes(List<short> sampleData)
{
foreach (var s in sampleData)
{
var bytes = BitConverter.GetBytes(s);
yield return bytes[0];
yield return bytes[1];
}
}
private void CheckForErrorOnCall(int returnCode)
{
MicStream.CheckForErrorOnCall(returnCode);
}
private static short FloatToInt16(float value)
{
var f = value * short.MaxValue;
if (f > short.MaxValue) f = short.MaxValue;
if (f < short.MinValue) f = short.MinValue;
return (short)f;
}
}
I would like to implement continuous recognition of Azure Speech service/Speech-to-text with reference to (how-to-recognize-speech).
Could you tell me what the script will look like if I implement them using the readBytes(byte[]) I created?
I would be grateful if you would send some type of information.