Repetitive result issue with Azure Speech Recognition in .NET
Philipp Schleier
0
Reputation points
I'm building a live mic speech-to-text transcription on Azure using .NET. However, a single sentence results in multiple repetitive sentences. I need help identifying the issue.
What I want is live transcription of a sentence that appears once without repetition, exactly how it works on Azure's Speech Studio - Real-time speech to text.
Here's my code:
Program.cs:
using Microsoft.AspNetCore.Hosting;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using System;
using System.Linq;
using [ ].Hubs;
using [ ].Services;
using [ ].Data;
public class Program
{
public static void Main(string[] args)
{
CreateHostBuilder(args).Build().Run();
}
public static IHostBuilder CreateHostBuilder(string[] args) =>
Host.CreateDefaultBuilder(args)
.ConfigureWebHostDefaults(webBuilder =>
{
webBuilder.UseUrls("http://localhost:5001");
webBuilder.ConfigureServices((context, services) =>
{
services.AddControllers();
services.AddSignalR();
// Configure CORS
services.AddCors(options =>
{
options.AddDefaultPolicy(builder =>
{
builder.WithOrigins("http://localhost:3000")
.AllowAnyHeader()
.AllowAnyMethod()
.AllowCredentials();
});
});
services.AddSingleton<SpeechService>();
services.AddSingleton<TranscriptionService>();
// Add DbContext and hosted service
services.AddDbContext<CosmosDbContext>();
services.AddHostedService<CosmosDbTestService>();
})
.Configure((context, app) =>
{
if (context.HostingEnvironment.IsDevelopment())
{
app.UseDeveloperExceptionPage();
}
// Enable CORS
app.UseCors();
app.UseRouting();
app.UseEndpoints(endpoints =>
{
endpoints.MapControllers();
endpoints.MapHub<TranscriptionHub>("/transcriptionHub");
});
});
})
.ConfigureLogging(logging =>
{
logging.ClearProviders();
logging.AddConsole();
});
}
SpeechService.cs (/Service):
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
namespace [ ].Services
{
public class SpeechService
{
private readonly SpeechRecognizer _speechRecognizer;
private bool isRecognizing = false;
public event Action<string>? OnRecognizing;
public event Action<string>? OnRecognized;
public SpeechService()
{
var subscriptionKey = Environment.GetEnvironmentVariable("AZURE_SPEECH_KEY");
var region = Environment.GetEnvironmentVariable("AZURE_SPEECH_REGION");
if (string.IsNullOrEmpty(subscriptionKey) || string.IsNullOrEmpty(region))
{
throw new InvalidOperationException("Azure Speech Service key and region must be provided via environment variables.");
}
var speechConfig = SpeechConfig.FromSubscription(subscriptionKey, region);
speechConfig.SpeechRecognitionLanguage = "de-DE";
speechConfig.EnableDictation(); // Enable dictation mode for explicit punctuation
var audioConfig = AudioConfig.FromDefaultMicrophoneInput();
_speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig);
_speechRecognizer.Recognizing += (s, e) =>
{
if (e.Result.Reason == ResultReason.RecognizingSpeech)
{
OnRecognizing?.Invoke(e.Result.Text);
}
};
_speechRecognizer.Recognized += (s, e) =>
{
if (e.Result.Reason == ResultReason.RecognizedSpeech)
{
OnRecognized?.Invoke(e.Result.Text);
}
};
_speechRecognizer.Canceled += (s, e) =>
{
isRecognizing = false;
Console.WriteLine($"Recognition canceled: {e.Reason}, {e.ErrorDetails}");
};
_speechRecognizer.SessionStopped += (s, e) =>
{
isRecognizing = false;
Console.WriteLine($"Session stopped: {e.SessionId}");
};
}
public async Task StartRecognitionAsync()
{
if (!isRecognizing)
{
isRecognizing = true;
await _speechRecognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);
}
}
public async Task StopRecognitionAsync()
{
if (isRecognizing)
{
await _speechRecognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);
isRecognizing = false;
}
}
}
}
TranscriptionService.cs (/Service):
using Microsoft.AspNetCore.SignalR;
using System.Collections.Concurrent;
using [ ].Hubs;
namespace [ ].Services
{
public class TranscriptionService
{
private readonly IHubContext<TranscriptionHub> _hubContext;
private readonly ConcurrentDictionary<string, string> _connections = new ConcurrentDictionary<string, string>();
public TranscriptionService(IHubContext<TranscriptionHub> hubContext)
{
_hubContext = hubContext;
}
public void AddConnection(string connectionId)
{
_connections[connectionId] = connectionId;
}
public void RemoveConnection(string connectionId)
{
_connections.TryRemove(connectionId, out _);
}
public async Task BroadcastRecognizing(string text)
{
foreach (var connectionId in _connections.Keys)
{
await _hubContext.Clients.Client(connectionId).SendAsync("ReceiveRecognizing", text);
}
}
public async Task BroadcastRecognized(string text)
{
foreach (var connectionId in _connections.Keys)
{
await _hubContext.Clients.Client(connectionId).SendAsync("ReceiveRecognized", text);
}
}
}
}
TranscriptionHub.cs (/Hub):
using Microsoft.AspNetCore.SignalR;
using [ ].Services;
namespace [ ].Hubs
{
public class TranscriptionHub : Hub
{
private readonly SpeechService _speechService;
private readonly TranscriptionService _transcriptionService;
public TranscriptionHub(SpeechService speechService, TranscriptionService transcriptionService)
{
_speechService = speechService;
_transcriptionService = transcriptionService;
}
public override async Task OnConnectedAsync()
{
_transcriptionService.AddConnection(Context.ConnectionId);
_speechService.OnRecognizing += HandleRecognizing;
_speechService.OnRecognized += HandleRecognized;
await base.OnConnectedAsync();
}
public override async Task OnDisconnectedAsync(Exception? exception)
{
_transcriptionService.RemoveConnection(Context.ConnectionId);
_speechService.OnRecognizing -= HandleRecognizing;
_speechService.OnRecognized -= HandleRecognized;
await base.OnDisconnectedAsync(exception);
}
private async void HandleRecognizing(string text)
{
await _transcriptionService.BroadcastRecognizing(text);
}
private async void HandleRecognized(string text)
{
await _transcriptionService.BroadcastRecognized(text);
}
public async Task StartTranscription()
{
await _speechService.StartRecognitionAsync();
}
public async Task StopTranscription()
{
await _speechService.StopRecognitionAsync();
}
}
}
Sign in to answer