Cognitive Services viseme audio offset and audio doesn't match
Shubham Katta
0
Reputation points
Hello there,
I am using Text to Speech with visemeReceived Callback to get viseme, but the audio time and the viseme audiooffset doesn't match. For example I gave "Hello there, how may I help you today!" as input so the audio ends before the viseme animation.
this is the code I have used
app.post("/texttospeechviseme", async function (req, res) {
let text = req.body.text;
console.log(text);
let audioName = "sound-file-" + new Date().getTime() + ".mp3";
let audioConfig = sdk.AudioConfig.fromAudioFileOutput(audioName);
const synthesizer = new sdk.SpeechSynthesizer(textToSpeechConfig, audioConfig);
let visemeData = [];
synthesizer.visemeReceived = function (s, e) {
let audioOffset = e.audioOffset / 10000;
visemeData.push({ visemeId: e.visemeId, audioOffset: audioOffset });
//console.log("Animation :- " + e.animation);
};
synthesizer.speakTextAsync(
text,
result => {
synthesizer.close();
fs.readFile(audioName, function (err, result) {
let body = {
viseme: visemeData,
audioData: result.toString("base64")
}
res.send(JSON.stringify(body));
fs.unlink(audioName, (err) => {
if (err) throw err;
});
});
},
error => {
synthesizer.close();
reject(error);
}
);
});
I have referred the code from
and why is audioOffset divided by 10000 in the example
window.console.log("(Viseme), Audio offset: " + e.audioOffset / 10000 + "ms. Viseme ID: " + e.visemeId);
Sign in to answer