Guide pratique pour créer un nuanceur de calcul
Un nuanceur de calcul est un nuanceur programmable HLSL (High Level Shader Language) microsoft qui utilise un accès généralisé à la mémoire d’entrée et de sortie pour prendre en charge pratiquement n’importe quel type de calcul. Cette rubrique montre comment créer un nuanceur de calcul. La technologie de nuanceur de calcul est également appelée technologie DirectCompute.
Pour créer un nuanceur de calcul :
Compilez le code du nuanceur HLSL en appelant D3DCompileFromFile.
UINT flags = D3DCOMPILE_ENABLE_STRICTNESS; #if defined( DEBUG ) || defined( _DEBUG ) flags |= D3DCOMPILE_DEBUG; #endif // Prefer higher CS shader profile when possible as CS 5.0 provides better performance on 11-class hardware. LPCSTR profile = ( device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0 ) ? "cs_5_0" : "cs_4_0"; const D3D_SHADER_MACRO defines[] = { "EXAMPLE_DEFINE", "1", NULL, NULL }; ID3DBlob* shaderBlob = nullptr; ID3DBlob* errorBlob = nullptr; HRESULT hr = D3DCompileFromFile( srcFile, defines, D3D_COMPILE_STANDARD_FILE_INCLUDE, entryPoint, profile, flags, 0, &shaderBlob, &errorBlob );
Créez un nuanceur de calcul à l’aide de ID3D11Device::CreateComputeShader.
ID3D11ComputeShader* g_pFinalPassCS = NULL; pd3dDevice->CreateComputeShader( pBlobFinalPassCS->GetBufferPointer(), pBlobFinalPassCS->GetBufferSize(), NULL, &g_pFinalPassCS );
L’exemple de code suivant montre comment compiler et créer un nuanceur de calcul.
Notes
Pour cet exemple de code, vous avez besoin du Kit de développement logiciel (SDK) Windows 8.0 et du fichier d3dcompiler_44.dll du dossier %PROGRAM_FILE%\Windows Kits\8.0\Redist\D3D\<arch> dans votre chemin d’accès.
#define _WIN32_WINNT 0x600
#include <stdio.h>
#include <d3d11.h>
#include <d3dcompiler.h>
#pragma comment(lib,"d3d11.lib")
#pragma comment(lib,"d3dcompiler.lib")
HRESULT CompileComputeShader( _In_ LPCWSTR srcFile, _In_ LPCSTR entryPoint,
_In_ ID3D11Device* device, _Outptr_ ID3DBlob** blob )
{
if ( !srcFile || !entryPoint || !device || !blob )
return E_INVALIDARG;
*blob = nullptr;
UINT flags = D3DCOMPILE_ENABLE_STRICTNESS;
#if defined( DEBUG ) || defined( _DEBUG )
flags |= D3DCOMPILE_DEBUG;
#endif
// We generally prefer to use the higher CS shader profile when possible as CS 5.0 is better performance on 11-class hardware
LPCSTR profile = ( device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0 ) ? "cs_5_0" : "cs_4_0";
const D3D_SHADER_MACRO defines[] =
{
"EXAMPLE_DEFINE", "1",
NULL, NULL
};
ID3DBlob* shaderBlob = nullptr;
ID3DBlob* errorBlob = nullptr;
HRESULT hr = D3DCompileFromFile( srcFile, defines, D3D_COMPILE_STANDARD_FILE_INCLUDE,
entryPoint, profile,
flags, 0, &shaderBlob, &errorBlob );
if ( FAILED(hr) )
{
if ( errorBlob )
{
OutputDebugStringA( (char*)errorBlob->GetBufferPointer() );
errorBlob->Release();
}
if ( shaderBlob )
shaderBlob->Release();
return hr;
}
*blob = shaderBlob;
return hr;
}
int main()
{
// Create Device
const D3D_FEATURE_LEVEL lvl[] = { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0 };
UINT createDeviceFlags = 0;
#ifdef _DEBUG
createDeviceFlags |= D3D11_CREATE_DEVICE_DEBUG;
#endif
ID3D11Device* device = nullptr;
HRESULT hr = D3D11CreateDevice( nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, createDeviceFlags, lvl, _countof(lvl),
D3D11_SDK_VERSION, &device, nullptr, nullptr );
if ( hr == E_INVALIDARG )
{
// DirectX 11.0 Runtime doesn't recognize D3D_FEATURE_LEVEL_11_1 as a valid value
hr = D3D11CreateDevice( nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, 0, &lvl[1], _countof(lvl) - 1,
D3D11_SDK_VERSION, &device, nullptr, nullptr );
}
if ( FAILED(hr) )
{
printf("Failed creating Direct3D 11 device %08X\n", hr );
return -1;
}
// Verify compute shader is supported
if ( device->GetFeatureLevel() < D3D_FEATURE_LEVEL_11_0 )
{
D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS hwopts = { 0 } ;
(void)device->CheckFeatureSupport( D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, &hwopts, sizeof(hwopts) );
if ( !hwopts.ComputeShaders_Plus_RawAndStructuredBuffers_Via_Shader_4_x )
{
device->Release();
printf( "DirectCompute is not supported by this device\n" );
return -1;
}
}
// Compile shader
ID3DBlob *csBlob = nullptr;
hr = CompileComputeShader( L"ExampleCompute.hlsl", "CSMain", device, &csBlob );
if ( FAILED(hr) )
{
device->Release();
printf("Failed compiling shader %08X\n", hr );
return -1;
}
// Create shader
ID3D11ComputeShader* computeShader = nullptr;
hr = device->CreateComputeShader( csBlob->GetBufferPointer(), csBlob->GetBufferSize(), nullptr, &computeShader );
csBlob->Release();
if ( FAILED(hr) )
{
device->Release();
}
printf("Success\n");
// Clean up
computeShader->Release();
device->Release();
return 0;
}
L’exemple de code précédent compile le code du nuanceur de calcul dans le fichier ExampleCompute.hlsl. Voici le code dans ExampleCompute.hlsl :
//--------------------------------------------------------------------------------------
// File: BasicCompute11.hlsl
//
// This file contains the Compute Shader to perform array A + array B
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//--------------------------------------------------------------------------------------
#ifdef USE_STRUCTURED_BUFFERS
struct BufType
{
int i;
float f;
#ifdef TEST_DOUBLE
double d;
#endif
};
StructuredBuffer<BufType> Buffer0 : register(t0);
StructuredBuffer<BufType> Buffer1 : register(t1);
RWStructuredBuffer<BufType> BufferOut : register(u0);
[numthreads(1, 1, 1)]
void CSMain( uint3 DTid : SV_DispatchThreadID )
{
BufferOut[DTid.x].i = Buffer0[DTid.x].i + Buffer1[DTid.x].i;
BufferOut[DTid.x].f = Buffer0[DTid.x].f + Buffer1[DTid.x].f;
#ifdef TEST_DOUBLE
BufferOut[DTid.x].d = Buffer0[DTid.x].d + Buffer1[DTid.x].d;
#endif
}
#else // The following code is for raw buffers
ByteAddressBuffer Buffer0 : register(t0);
ByteAddressBuffer Buffer1 : register(t1);
RWByteAddressBuffer BufferOut : register(u0);
[numthreads(1, 1, 1)]
void CSMain( uint3 DTid : SV_DispatchThreadID )
{
#ifdef TEST_DOUBLE
int i0 = asint( Buffer0.Load( DTid.x*16 ) );
float f0 = asfloat( Buffer0.Load( DTid.x*16+4 ) );
double d0 = asdouble( Buffer0.Load( DTid.x*16+8 ), Buffer0.Load( DTid.x*16+12 ) );
int i1 = asint( Buffer1.Load( DTid.x*16 ) );
float f1 = asfloat( Buffer1.Load( DTid.x*16+4 ) );
double d1 = asdouble( Buffer1.Load( DTid.x*16+8 ), Buffer1.Load( DTid.x*16+12 ) );
BufferOut.Store( DTid.x*16, asuint(i0 + i1) );
BufferOut.Store( DTid.x*16+4, asuint(f0 + f1) );
uint dl, dh;
asuint( d0 + d1, dl, dh );
BufferOut.Store( DTid.x*16+8, dl );
BufferOut.Store( DTid.x*16+12, dh );
#else
int i0 = asint( Buffer0.Load( DTid.x*8 ) );
float f0 = asfloat( Buffer0.Load( DTid.x*8+4 ) );
int i1 = asint( Buffer1.Load( DTid.x*8 ) );
float f1 = asfloat( Buffer1.Load( DTid.x*8+4 ) );
BufferOut.Store( DTid.x*8, asuint(i0 + i1) );
BufferOut.Store( DTid.x*8+4, asuint(f0 + f1) );
#endif // TEST_DOUBLE
}
#endif // USE_STRUCTURED_BUFFERS