Audio glitch when using MediaFoundation AAC encoder

Niklas Wenzel 21 Reputation points
2021-10-08T15:59:28.37+00:00

Upvote this bug report in Feedback Hub

Microsoft asks everyone who experiences this issue to upvote the following bug report in Feedback Hub: https://aka.ms/AAe3i6x

If you also experience this issue, please log into Feedback Hub with your Microsoft account, then click the link above, and upvote it.

This will help bring it to the attention of the respective developers at Microsoft.

Bug description

In my application, I use Media Foundation to convert WAV files to AAC. For some audio files, the encoder inserts very loud and annoying "clicking" sounds into the generated AAC audio. This seems to be a bug in the AAC encoder.

I have heard the same clicking sounds when using the built-in Windows 10 Video Editor app and I have also encountered them in DaVinci Resolve, which also uses the Media Foundation AAC encoder.

Reproduction

I have prepared a minimal code example below. To reproduce the issue, do the following:

  1. Download the following WAV file: https://app.box.com/s/5da0ta01wymkbfpcakxhoczqv1w5aboi
  2. Adjust the file path in the code to point to the downloaded file.

After running the code, there will be a very loud and annoying "clicking" sound in the generated out.mp4 file at around 0:31 minutes.
Here is the file generated by the minimal code example: https://app.box.com/s/vgxz4mblr6v65tjhh2nb25gbnbszlbh3

Illustration of the issue

To illustrate what happens, here is the waveform of the aforementioned WAV file before the AAC encoding process:

138952-wav-before-encoding.png

Here is the waveform of the AAC file after the AAC encoding process:

138944-aac-after-encoding.png

Zooming in, the encoder seems to be emitting nonsense at around 31.30 seconds:

138907-aac-zoomed-in-1.png

138945-aac-zoomed-in-2.png

To the user this appears as a very loud and obnoxious "clicking" sound in the AAC. I can reproduce this issue on both Windows 10 and Windows 11.

Request

Could you please look into this and fix this bug in the Media Foundation AAC encoder?

Thank you very much in advance!

Minimal code example (adjust the file path for the WAV file):

#include <windows.h>  
#include <windowsx.h>  
#include <comdef.h>  
#include <stdio.h>  
#include <mfapi.h>  
#include <mfidl.h>  
#include <mfreadwrite.h>  
#include <Mferror.h>  
#include <mfplay.h>  
#include <codecapi.h>  
#include <atlcomcli.h>  
#pragma comment(lib, "ole32")  
#pragma comment(lib, "mfplat")  
#pragma comment(lib, "mfreadwrite")  
#pragma comment(lib, "mfuuid")  
  
LPCWSTR inFileName = L"C:\\Users\\username\\example.wav";  
  
int main()  
{  
    HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);  
    hr = MFStartup(MF_VERSION);  
  
    IMFMediaType* pMediaType;  
    IMFMediaType* pMediaTypeOut;  
    IMFSourceReader* pSourceReader;  
    IMFAttributes* pAttributes;  
    IMFSinkWriter* pSinkWriter;  
    IMFMediaType* pCurrentMediaType;  
    LONGLONG nDruration = 700000000;  
    // Load souce file  
    hr = MFCreateSourceReaderFromURL(inFileName, NULL, &pSourceReader);  
    pSourceReader->SetStreamSelection(MF_SOURCE_READER_FIRST_AUDIO_STREAM, TRUE);  
    // Create a partial media type that specifies uncompressed audio  
    IMFMediaType* pPartialType;  
    MFCreateMediaType(&pPartialType);  
    hr = pPartialType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);  
    hr = pPartialType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM);  
    hr = pSourceReader->SetCurrentMediaType(MF_SOURCE_READER_FIRST_AUDIO_STREAM  
        , nullptr  
        , pPartialType);  
    hr = pSourceReader->GetCurrentMediaType(MF_SOURCE_READER_FIRST_AUDIO_STREAM, &pPartialType);  
    hr = pSourceReader->SetStreamSelection(MF_SOURCE_READER_FIRST_AUDIO_STREAM, TRUE);  
    // set media type for output file  
    hr = MFCreateMediaType(&pMediaTypeOut);  
    // set major type for output file  
    hr = pMediaTypeOut->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);  
    // Set subtype for output file  
    hr = pMediaTypeOut->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_AAC);  
    hr = pMediaTypeOut->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, 44100);  
    // set audio number channal for output file  
    hr = pMediaTypeOut->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, 2);  
    // set audio bit depth for output file  
    hr = pMediaTypeOut->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, 16);  
    hr = pMediaTypeOut->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, 24000);  
    hr = pMediaTypeOut->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, 1);  
    pMediaTypeOut->SetUINT32(MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, 0x29);  
    DWORD nWriterStreamIndex = -1;  
    hr = MFCreateSinkWriterFromURL(L"out.mp4", NULL, NULL, &pSinkWriter);  
    hr = pSinkWriter->AddStream(pMediaTypeOut, &nWriterStreamIndex);  
    hr = pSinkWriter->SetInputMediaType(nWriterStreamIndex, pPartialType, NULL);  
    LONGLONG SampleDuration = 0L;  
    hr = pSinkWriter->BeginWriting();  
    for (;;)  
    {  
        DWORD nStreamIndex, nStreamFlags;  
        LONGLONG nTime;  
        IMFSample* pSample;  
        hr = pSourceReader->ReadSample(MF_SOURCE_READER_FIRST_AUDIO_STREAM,  
            0,  
            &nStreamIndex,  
            &nStreamFlags,  
            &nTime,  
            &pSample);  
        printf("FLAGS %d\n", nStreamFlags);  
        printf("TIME %lld\n", nTime);  
        if (nStreamFlags & MF_SOURCE_READERF_ENDOFSTREAM)  
        {  
            break;  
        }  
        //Update media type, when current media tye changed.  
        if (nStreamFlags & MF_SOURCE_READERF_CURRENTMEDIATYPECHANGED) {  
            pSourceReader->GetNativeMediaType(MF_SOURCE_READER_FIRST_AUDIO_STREAM, MF_SOURCE_READER_CURRENT_TYPE_INDEX, &pCurrentMediaType);  
            printf("MediaType changed\n");  
            pSourceReader->SetStreamSelection(MF_SOURCE_READER_FIRST_AUDIO_STREAM, TRUE);  
            hr = pSinkWriter->SetInputMediaType(nWriterStreamIndex, pCurrentMediaType, NULL);  
            continue;  
        }  
        pSample->GetSampleDuration(&SampleDuration);  
        if (nTime >= nDruration)  
        {  
            break;  
        }  
        // Calculate new timestamp of sample when this sample is written on output file  
        if (nTime + SampleDuration >= nDruration)  
        {  
            SampleDuration = nDruration - nTime;  
            pSample->SetSampleDuration(SampleDuration);  
        }  
        pSample->SetSampleTime(nTime);  
        if (FAILED(hr)) {  
            printf("ReadSample Error...\n");  
            return hr;  
        }  
        //write sample  
        if (pSample)  
        {  
            OutputDebugString(L"Write sample...\n");  
            hr = pSinkWriter->WriteSample(  
                nWriterStreamIndex,  
                pSample  
            );  
            if (FAILED(hr)) {  
                pSample->Release();  
                printf("WriteSample Error...\n");  
                return hr;  
            }  
            pSample->Release();  
            pSample = NULL;  
        }  
    }  
    hr = pSinkWriter->Finalize();  
    return 0;  
  
}  
Windows development | Windows API - Win32
{count} votes

5 answers

Sort by: Most helpful
  1. Castorix31 90,686 Reputation points
    2021-10-09T18:08:39.05+00:00

    A workaround had been given in this thread (channels from 2 to 1) : Converting audio using MFT was error when convert WAV file to AAC
    but it is not a real solution

    FeiXue-MSFT said they were investigating the issue.. but it was 1 year ago.


  2. Niklas Wenzel 21 Reputation points
    2021-10-20T16:17:18.3+00:00

    I created a Feedback Hub entry to get this fixed: https://aka.ms/AAe3i6x

    If you also experience this issue, please log into Feedback Hub with your Microsoft account, then click the above link, and upvote it.

    This will help bring it to the attention of the respective developers at Microsoft.

    0 comments No comments

  3. Simon Felix 1 Reputation point
    2022-09-20T20:38:36.303+00:00

    The same problem occurs when using AAC as Bluetooth A2DP codec.

    It appears that the codec has issues with low frequencies. I can trigger the bug when trying to encode <40Hz sounds. For example with the song "2049" by Hans Zimmer.

    0 comments No comments

  4. Adequat 1 Reputation point
    2022-09-20T21:37:05.737+00:00

    Apparently, the problem is solved in Windows 11 22H2.
    I am wondering if this will be the case in the upcoming WaveLab 10 22H2.

    0 comments No comments

  5. Adam Marks 6 Reputation points
    2022-10-27T13:33:57.323+00:00

    We have a number of customers on Windows 10 that are experiencing this problem with audio corruption because of the Media Foundation AAC encoder.

    Is there any news on if there will be a patch for this problem in Win 10?


Your answer

Answers can be marked as Accepted Answers by the question author, which helps users to know the answer solved the author's problem.