从 NUMA 节点分配内存

以下示例代码演示如何使用 NUMA 函数 GetNumaHighestNodeNumberGetNumaProcessorNodeVirtualAllocExNuma。 它还演示了如何使用 QueryWorkingSetEx 函数来检索分配页的 NUMA 节点。

#define _WIN32_WINNT 0x0600

#include <windows.h>
#include <stdlib.h>
#include <stdio.h>
#include <psapi.h>
#include <tchar.h>

SIZE_T AllocationSize;
DWORD  PageSize;

void DumpNumaNodeInfo (PVOID Buffer, SIZE_T Size);

void __cdecl wmain (int argc, const wchar_t* argv[])
{
    ULONG HighestNodeNumber;
    ULONG NumberOfProcessors;
    PVOID* Buffers = NULL;

    if (argc > 1)
    {
        (void)swscanf_s (argv[1], L"%Ix", &AllocationSize);
    }

    if (AllocationSize == 0)
    {
        AllocationSize = 16*1024*1024;
    }

    //
    // Get the number of processors and system page size.
    //

    SYSTEM_INFO SystemInfo;
    GetSystemInfo (&SystemInfo);
    NumberOfProcessors = SystemInfo.dwNumberOfProcessors;
    PageSize = SystemInfo.dwPageSize;

    //
    // Get the highest node number.
    //

    if (!GetNumaHighestNodeNumber (&HighestNodeNumber))
    {
        _tprintf (_T("GetNumaHighestNodeNumber failed: %d\n"), GetLastError());
        goto Exit;
    }

    if (HighestNodeNumber == 0)
    {
        _putts (_T("Not a NUMA system - exiting"));
        goto Exit;
    }

    //
    // Allocate array of pointers to memory blocks.
    //

    Buffers = (PVOID*) malloc (sizeof(PVOID)*NumberOfProcessors);

    if (Buffers == NULL)
    {
        _putts (_T("Allocating array of buffers failed"));
        goto Exit;
    }

    ZeroMemory (Buffers, sizeof(PVOID)*NumberOfProcessors);

    //
    // For each processor, get its associated NUMA node and allocate some memory from it.
    //

    for (UCHAR i = 0; i < NumberOfProcessors; i++)
    {
        UCHAR NodeNumber;

        if (!GetNumaProcessorNode (i, &NodeNumber))
        {
            _tprintf (_T("GetNumaProcessorNode failed: %d\n"), GetLastError());
            goto Exit;
        }

        _tprintf (_T("CPU %u: node %u\n"), (ULONG)i, NodeNumber);

        PCHAR Buffer = (PCHAR)VirtualAllocExNuma(
            GetCurrentProcess(),
            NULL,
            AllocationSize,
            MEM_RESERVE | MEM_COMMIT,
            PAGE_READWRITE,
            NodeNumber
        );

        if (Buffer == NULL)
        {
            _tprintf (_T("VirtualAllocExNuma failed: %d, node %u\n"), GetLastError(), NodeNumber);
            goto Exit;
        }

        PCHAR BufferEnd = Buffer + AllocationSize - 1;
        SIZE_T NumPages = ((SIZE_T)BufferEnd)/PageSize - ((SIZE_T)Buffer)/PageSize + 1;

        _putts (_T("Allocated virtual memory:"));
        _tprintf (_T("%p - %p (%6Iu pages), preferred node %u\n"), Buffer, BufferEnd, NumPages, NodeNumber);

        Buffers[i] = Buffer;

        //
        // At this point, virtual pages are allocated but no valid physical
        // pages are associated with them yet.
        //
        // The FillMemory call below will touch every page in the buffer, faulting
        // them into our working set. When this happens physical pages will be allocated
        // from the preferred node we specified in VirtualAllocExNuma, or any node
        // if the preferred one is out of pages.
        //

        FillMemory (Buffer, AllocationSize, 'x');

        //
        // Check the actual node number for the physical pages that are still valid
        // (if system is low on physical memory, some pages could have been trimmed already).
        //

        DumpNumaNodeInfo (Buffer, AllocationSize);

        _putts(_T(""));
    }

Exit:
    if (Buffers != NULL)
    {
        for (UINT i = 0; i < NumberOfProcessors; i++)
        {
            if (Buffers[i] != NULL)
            {
                VirtualFree (Buffers[i], 0, MEM_RELEASE);
            }
        }

        free (Buffers);
    }
}

void DumpRegion (PVOID StartPtr, PVOID EndPtr, BOOL Valid, DWORD Node)
{
    DWORD_PTR StartPage = ((DWORD_PTR)StartPtr)/PageSize;
    DWORD_PTR EndPage   = ((DWORD_PTR)EndPtr)/PageSize;
    DWORD_PTR NumPages  = (EndPage - StartPage) + 1;

    if (!Valid)
    {
        _tprintf (_T("%p - %p (%6Iu pages): no valid pages\n"), StartPtr, EndPtr, NumPages);
    }
    else
    {
        _tprintf (_T("%p - %p (%6Iu pages): node %u\n"), StartPtr, EndPtr, NumPages, Node);
    }
}

void DumpNumaNodeInfo (PVOID Buffer, SIZE_T Size)
{
    DWORD_PTR StartPage = ((DWORD_PTR)Buffer)/PageSize;
    DWORD_PTR EndPage   = ((DWORD_PTR)Buffer + Size - 1)/PageSize;
    DWORD_PTR NumPages  = (EndPage - StartPage) + 1;

    PCHAR StartPtr = (PCHAR)(PageSize*StartPage);

    _putts (_T("Checking NUMA node:"));

    PPSAPI_WORKING_SET_EX_INFORMATION WsInfo = (PPSAPI_WORKING_SET_EX_INFORMATION)
        malloc (NumPages*sizeof(PSAPI_WORKING_SET_EX_INFORMATION));

    if (WsInfo == NULL)
    {
        _putts (_T("Could not allocate array of PSAPI_WORKING_SET_EX_INFORMATION structures"));
        return;
    }

    for (DWORD_PTR i = 0; i < NumPages; i++)
    {
        WsInfo[i].VirtualAddress = StartPtr + i*PageSize;
    }

    BOOL bResult = QueryWorkingSetEx(
        GetCurrentProcess(),
        WsInfo,
        (DWORD)NumPages*sizeof(PSAPI_WORKING_SET_EX_INFORMATION)
    );

    if (!bResult)
    {
        _tprintf (_T("QueryWorkingSetEx failed: %d\n"), GetLastError());
        free (WsInfo);
        return;
    }

    PCHAR RegionStart   = NULL;
    BOOL  RegionIsValid = false;
    DWORD RegionNode    = 0;

    for (DWORD_PTR i = 0; i < NumPages; i++)
    {
        PCHAR Address = (PCHAR)WsInfo[i].VirtualAddress;
        BOOL  IsValid = WsInfo[i].VirtualAttributes.Valid;
        DWORD Node    = WsInfo[i].VirtualAttributes.Node;

        if (i == 0)
        {
            RegionStart   = Address;
            RegionIsValid = IsValid;
            RegionNode    = Node;
        }

        if (IsValid != RegionIsValid || Node != RegionNode)
        {
            DumpRegion (RegionStart, Address - 1, RegionIsValid, RegionNode);

            RegionStart   = Address;
            RegionIsValid = IsValid;
            RegionNode    = Node;
        }

        if (i == (NumPages - 1))
        {
            DumpRegion (RegionStart, Address + PageSize - 1, IsValid, Node);
        }
    }

    free (WsInfo);
}

NUMA 支持