use
Echo cancellation for a windows desktop audio recording app in Windows C++
I have a Windows application written in C++ that performs audio recording.
The recording process is initiated by detecting microphone activity at the operating system level, resulting in the creation of two separate raw files—one for each audio channel.
One channel captures microphone input, and the other records speakers output, forming a dual-channel recording.
Once recording stops, these two files are merged into a single stereo file, with one channel dedicated to the local user at the computer and the other to the incoming audio from the remote speaker.
Currently, the application functions effectively, but it lacks audio pre-processing and echo cancellation features.
This deficiency leads to the microphone picking up audio from the speakers, and adding it to the microphone channel.
This issue does not affect the speakers channel. Obviously this issue does not arise when using headphones as the microphone does not hear anything from the speakers in such a case.
Unlike our application, standard communication platforms like Microsoft Teams, Zoom, Skype, Viber, and WhatsApp for Windows employ mechanisms that prevent the microphone from capturing audio output from the speakers, thus avoiding the feedback loop where remote participants might hear their voices echoed back.
I need you helping hand for this functionality into our existing application that enables the microphone to isolate and eliminate noise and sound from the speakers, ensuring it only records audio from the local user during a conference call.
I am attaching code for microphone recording. I have to add pro processing where we can echo cancellation in below code:
// TODO: Apply echo cancellation algorithm to pData
Thanks a lot :)
Looking for your expertise for any references.
bool AudioCapture::CaptureMicrophoneAudio(const wchar_t* outputFilePath)
{
// Activate audio client
IAudioClient* pAudioClient = nullptr;
HRESULT hr = pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**)&pAudioClient);
if (FAILED(hr)) {
logger->Log("Error activating audio client: ");
pDevice->Release();
//CoUninitialize();
return false;
}
// Get mix format
WAVEFORMATEX* pWaveFormat = nullptr;
hr = pAudioClient->GetMixFormat(&pWaveFormat);
if (FAILED(hr)) {
logger->Log("Error getting mix format: ");
pAudioClient->Release();
pDevice->Release();
//CoUninitialize();
return false;
}
logger->Log("WAVEFORMATEX: nChannels - " + std::to_string(pWaveFormat->nChannels));
logger->Log("WAVEFORMATEX: nSamplesPerSec - " + std::to_string(pWaveFormat->nSamplesPerSec));
logger->Log("WAVEFORMATEX: wBitsPerSample - " + std::to_string(pWaveFormat->wBitsPerSample));
logger->Log("WAVEFORMATEX: wFormatTag - " + std::to_string(pWaveFormat->wFormatTag));
logger->Log("WAVEFORMATEX: cbSize - " + std::to_string(pWaveFormat->cbSize));
logger->Log("WAVEFORMATEX: nAvgBytesPerSec - " + std::to_string(pWaveFormat->nAvgBytesPerSec));
logger->Log("WAVEFORMATEX: nBlockAlign - " + std::to_string(pWaveFormat->nBlockAlign));
nMicChannels = pWaveFormat->nChannels;
nMicSamplesPerSec = pWaveFormat->nSamplesPerSec;
wMicBitsPerSample = pWaveFormat->wBitsPerSample;
nMicAvgBytesPerSec = pWaveFormat->nAvgBytesPerSec;
// Initialize audio client with the mix format
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, 0, 0, 0, pWaveFormat, NULL);
if (FAILED(hr)) {
logger->Log("Error initializing audio client: ");
CoTaskMemFree(pWaveFormat);
pAudioClient->Release();
pDevice->Release();
//CoUninitialize();
return false;
}
// Get capture client
IAudioCaptureClient* pCaptureClient = nullptr;
hr = pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient);
if (FAILED(hr)) {
logger->Log("Error getting capture client: ");
CoTaskMemFree(pWaveFormat);
pAudioClient->Release();
pDevice->Release();
// CoUninitialize();
return false;
}
// Open binary file for writing
std::string tempFilePath = Utils::GetTempFilename("Audio_mic_capture", "raw");
logger->Log(tempFilePath);
std::ofstream outFile(tempFilePath, std::ios::binary);
if (!outFile.is_open()) {
logger->Log("Error opening binary file for writing");
CoTaskMemFree(pWaveFormat);
pCaptureClient->Release();
pAudioClient->Release();
pDevice->Release();
// CoUninitialize();
return false;
}
// Start capturing
hr = pAudioClient->Start();
if (FAILED(hr))
{
logger->Log("Error starting audio client: ");
CoTaskMemFree(pWaveFormat);
pCaptureClient->Release();
pAudioClient->Release();
pDevice->Release();
// CoUninitialize();
return false;
}
// Main loop for capturing and writing to file
while (!exitFlag)
{
// Capture audio data
// Read audio data from pCaptureClient
BYTE* pData;
UINT32 numFramesAvailable;
DWORD flags;
hr = pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);
if (FAILED(hr))
{
logger->Log("Error getting audio buffer: ");
break;
}
//
// TODO: Apply echo cancellation algorithm to pData
//
// Write audio data to file
int count = numFramesAvailable * pWaveFormat->nBlockAlign;
outFile.write(reinterpret_cast<const char*>(pData), count);
// Release the buffer
hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
if (FAILED(hr))
{
logger->Log("Error releasing audio buffer: ");
break;
}
}
logger->Log("Capturing Completed");
// Close binary file
outFile.close();
// Stop capturing
pAudioClient->Stop();
// Clean up resources
CoTaskMemFree(pWaveFormat);
pCaptureClient->Release();
pAudioClient->Release();
pDevice->Release();
// CoUninitialize();
// convert to mp3 and delete temp file
std::string tempFilePathMp3 = Utils::GetTempFilename("Audio_capture_mic", "mp3");
tempMicFilePathWMp3 = std::wstring(tempFilePathMp3.begin(), tempFilePathMp3.end());
std::wstring tempFilePathW(tempFilePath.begin(), tempFilePath.end());
if (!convertToMp3(tempFilePathW, tempMicFilePathWMp3, nMicChannels, nMicSamplesPerSec, wMicBitsPerSample, nMicAvgBytesPerSec))
{
logger->Log("Failed to convert to MP3");
}
else
{
logger->Log("Saved to MP3 data");
}
// Remove temp file
std::remove(tempFilePath.c_str());
return true;
}