Unexpected U/V plane offset with WIndows Media Foundation H264 decoder

schwa 1 Reputation point
2022-04-07T15:55:11.6+00:00

Decoding H264 video via Windows Media Foundation using IMFSourceReader, I am seeing an unexpected y-offset in the U/V plane data. By trial and error, I have found an adjustment that seems to work on all the video sources I've tried, but I'd really like to know if this data layout is expected or documented.

Specifically, sometimes IMFSample::GetTotalLength() returns a total buffer size that is greater than height x stride x 3/2. In that case, the U/V planes are offset from the end of the Y plane data by 2/3 of the extra data, like so:

YV12.png

To be clear, the unexpected part is the 2 rows of pixels below the Y plane data and above the U plane data. I think all of the other offsets and padding are as expected.

Image before applying adjustment:

before.jpg

Image after applying adjustment:

before.jpg

Is this data layout expected or documented?

(edit to add)

Here is a complete program that draws the first frame of a video with and without the adjustment. There is no error checking and the YUV to RGB adjustment is very basic.

The video: https://stash.reaper.fm/44214/johnny.mp4

#include <windows.h>
 #include <initguid.h>
 #include <mfapi.h>
 #include <mfidl.h>
 #include <mfreadwrite.h>

 #include "resource.h"

 const int marg=16;

 unsigned char clamp(int v)
 {
   return v < 0 ? 0 : v > 255 ? 255 : v;
 }

 void yuv_to_rgb(unsigned char *rgb, int y, int u, int v)
 {
   y -= 16;
   u -= 128;
   v -= 128;
   rgb[3] = 0;
   rgb[2] = clamp((y*298 + v*409 + 128) / 256);
   rgb[1] = clamp((y*298 - u*100 - v*208 + 128) / 256);
   rgb[0] = clamp((y*298 + u*516 + 128) / 256);
 }

 INT_PTR CALLBACK wndproc(HWND hwndDlg, UINT uMsg, WPARAM wParam, LPARAM lParam)
 {
   static HBITMAP rawbmp = NULL, adjbmp=NULL;
   static unsigned int srcw = 0, srch = 0;

   switch (uMsg)
   {
     case WM_INITDIALOG:
     {
       MFStartup(MF_VERSION);

       IMFSourceReader *reader = NULL;
       MFCreateSourceReaderFromURL(L"C:\\Users\\xxx\\Documents\\johnny.mp4", NULL, &reader);
       reader->SetStreamSelection(MF_SOURCE_READER_ALL_STREAMS, FALSE);
       reader->SetStreamSelection(MF_SOURCE_READER_FIRST_VIDEO_STREAM, TRUE);

       IMFMediaType *fmt = NULL;
       MFCreateMediaType(&fmt);
       fmt->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
       fmt->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_YV12);
       reader->SetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, NULL, fmt);
       fmt->Release();

       reader->GetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, &fmt);
       MFGetAttributeSize(fmt, MF_MT_FRAME_SIZE, &srcw, &srch);
       fmt->Release();

       IMFSample *sample = NULL;
       DWORD flags=0;
       INT64 readpos=0;
       IMFMediaBuffer *buffer = NULL;
       DWORD bufsz=0;
       reader->ReadSample(MF_SOURCE_READER_FIRST_VIDEO_STREAM, 0, NULL, &flags, &readpos, &sample);
       sample->GetTotalLength(&bufsz);
       sample->ConvertToContiguousBuffer(&buffer);
       sample->Release();
       reader->Release();

       BITMAPINFO bi = {0};
       bi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
       bi.bmiHeader.biWidth = srcw;
       bi.bmiHeader.biHeight = srch;
       bi.bmiHeader.biPlanes = 1;
       bi.bmiHeader.biBitCount = 32;
       bi.bmiHeader.biCompression = BI_RGB;
       unsigned char *raw = NULL, *adj=NULL;
       rawbmp = CreateDIBSection(NULL, &bi, DIB_RGB_COLORS, (void**)&raw, NULL, 0);
       adjbmp = CreateDIBSection(NULL, &bi, DIB_RGB_COLORS, (void**)&adj, NULL, 0);

       IMF2DBuffer *buffer2d = NULL;
       BYTE *bptr = NULL;
       LONG bstride = 0;
       buffer->QueryInterface(&buffer2d);
       buffer2d->Lock2D(&bptr, &bstride);

       int offs=(bufsz*2/3-srch*bstride); // unexpected

       unsigned char *rawptr = raw + srcw*(srch-1)*4;
       unsigned char *adjptr = adj + srcw*(srch-1)*4;
       unsigned char *yptr = bptr;
       unsigned char *uptr = bptr + srch*bstride*5/4;
       unsigned char *vptr = bptr + srch*bstride;

       for (unsigned int y=0; y < srch; ++y)
       {
         for (unsigned int x=0; x < srcw; ++x)
         {
           yuv_to_rgb(rawptr+x*4, yptr[x], uptr[x/2], vptr[x/2]);
           yuv_to_rgb(adjptr+x*4, yptr[x], uptr[x/2+offs], vptr[x/2+offs]);
         }

         rawptr -= srcw*4;
         adjptr -= srcw*4;
         yptr += bstride;
         if (y&1) uptr += bstride/2;
         if (y&1) vptr += bstride/2;
       }

       buffer2d->Unlock2D();
       buffer2d->Release();
       buffer->Release();

       SetWindowPos(hwndDlg, NULL, 0, 0, srcw+2*marg, 2*(srch+2*marg), SWP_NOZORDER|SWP_NOMOVE|SWP_NOACTIVATE);
     }
     return 0;

     case WM_DESTROY:
     {
       DeleteObject(rawbmp);
       DeleteObject(adjbmp);
     }
     return 0;

     case WM_PAINT:
     {
       RECT r;
       GetClientRect(hwndDlg, &r);
       int w=r.right, h=r.bottom;

       PAINTSTRUCT ps;
       HDC dc = BeginPaint(hwndDlg, &ps);

       HDC srcdc = CreateCompatibleDC(dc);
       SelectObject(srcdc, rawbmp);
       BitBlt(dc, marg/2, marg/2, srcw, srch, srcdc, 0, 0, SRCCOPY);
       SelectObject(srcdc, adjbmp);
       BitBlt(dc, marg/2, (h+marg)/2, srcw, srch, srcdc, 0, 0, SRCCOPY);
       EndPaint(hwndDlg, &ps);
       ReleaseDC(hwndDlg, srcdc);
     }
     return 0;

     case WM_COMMAND:
       if (LOWORD(wParam) == IDCANCEL)
       {
         EndDialog(hwndDlg, 0);
       }
     return 0;
   }

   return 0;
 }

 int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShowCmd)
 {
   DialogBox(hInstance, MAKEINTRESOURCE(IDD_DIALOG), GetDesktopWindow(), wndproc);
   return 0;
 }
Windows API - Win32
Windows API - Win32
A core set of Windows application programming interfaces (APIs) for desktop and server applications. Previously known as Win32 API.
2,426 questions
{count} votes