Can't play noise through default audio endpoint renderer using WASPI code sample

John Glen 1 Reputation point
2020-12-13T18:37:45.827+00:00

I am trying to play noise through the default audio endpoint renderer using the WASPAI interface. I am trying to use the code provided by Microsoft on this page: https://learn.microsoft.com/en-us/windows/win32/coreaudio/rendering-a-stream. I want to write a class that can generate noise for this code sample.

I have tried writing signed and unsigned integer values to the buffer of the default audio endpoint renderer, and see that values are being written to the buffer, but there is no sound playing.

To start, I made a header with the needed methods, and a random number generator.

#pragma once  
  
// RNG  
#include <random>  
  
template <typename T>  
class Random {  
public:  
 Random(T low, T high) : mLow(low), mHigh(high), function(std::mt19937_64(__rdtsc())) {};  
  
 T operator()() {   
 unsigned __int64 f =  function();  
  
 return ((f  % ((unsigned __int64) mHigh + (unsigned __int64) mLow)) + (unsigned __int64) mLow); }  
  
private:  
 T mLow;  
 T mHigh;  
 std::mt19937_64 function;  
};  
  
class Noise_Gen {  
  
public:  
  
 Noise_Gen() : nChannels(NULL), nSamplesPerSec(NULL), nAvgBytesPerSec(NULL), nByteAlign(NULL), wBitsPerSample(NULL),   
 wValidBitsPerSample(NULL), wSamplesPerBlock(NULL), dwChannelMask(NULL), rd(NULL) {};  
  
 ~Noise_Gen() {  
 if(rd != NULL) {  
 delete rd;  
 }  
 };  
  
 HRESULT SetFormat(WAVEFORMATEX*);  
  
 HRESULT LoadData(UINT32 bufferFrameCount, BYTE* pData, DWORD* flags);  
  
private:  
 void* rd;  
  
 // WAVEFORMATEX  
 WORD nChannels;  
 DWORD nSamplesPerSec;  
 DWORD nAvgBytesPerSec;  
 WORD nByteAlign;  
 WORD wBitsPerSample;  
  
 // WAVEFORMATEXTENSIBLE  
 WORD wValidBitsPerSample;  
 WORD wSamplesPerBlock;  
 DWORD dwChannelMask;  
};  

Then I added the definitions:
// WASAPI
#include <Audiopolicy.h>
#include <Audioclient.h>

#include <time.h>  
  
#include "Noise_Gen.h"  
  
HRESULT Noise_Gen::SetFormat(WAVEFORMATEX* format) {  
 nChannels = format->nChannels;  
 nSamplesPerSec = format->nSamplesPerSec;  
 nAvgBytesPerSec = format->nAvgBytesPerSec;  
 nByteAlign = format->nBlockAlign;  
 wBitsPerSample = format->wBitsPerSample;  
 WORD  wFormatTag = format->wFormatTag;  
 if(wFormatTag == WAVE_FORMAT_EXTENSIBLE) {  
 WAVEFORMATEXTENSIBLE* pWFE = reinterpret_cast<WAVEFORMATEXTENSIBLE*>(format);  
 wValidBitsPerSample = pWFE->Samples.wValidBitsPerSample;  
 wSamplesPerBlock = pWFE->Samples.wSamplesPerBlock;  
 dwChannelMask = pWFE->dwChannelMask;  
 } else {  
 wValidBitsPerSample = wBitsPerSample;  
 }  
 double amplitude = std::pow(2.0, wValidBitsPerSample) - 1;  
 switch(wBitsPerSample / 8) {  
 case(1):  
 rd = new Random<unsigned __int8>(0.0, amplitude);  
 break;  
 case(2):   
 rd = new Random<unsigned __int16>(0.0, amplitude);  
 break;  
 case(3):  
 rd = new Random<unsigned __int32>(0.0, amplitude);  
 break;  
 case(4):   
 rd = new Random<unsigned __int32>(0.0, amplitude);  
 break;  
 case(5):   
 rd = new Random<unsigned __int64>(0.0, amplitude);  
 break;  
 case(6):  
 rd = new Random<unsigned __int64>(0.0, amplitude);  
 break;  
 case(7):   
 rd = new Random<unsigned __int64>(0.0, amplitude);  
 break;  
 case(8):  
 rd = new Random<unsigned __int64>(0.0, amplitude);  
 break;  
 default:  
 return E_NOTIMPL;  
 }  
 return S_OK;  
}  
  
// (The size of an audio frame = nChannels * wBitsPerSample)  
HRESULT Noise_Gen::LoadData(UINT32 bufferFrameCount, BYTE* pData, DWORD* flags) {  
 for(UINT32 i = 0; i < nChannels *bufferFrameCount; i++) {  
 switch(wBitsPerSample / 8) {  
 case(1):  
 pData[i] = (((Random<unsigned __int8>*)rd)->operator()());  
 break;  
 case(2):{  
 unsigned __int16* pData2 = (unsigned __int16*) pData;  
 pData2[i] = (((Random<unsigned __int16>*)rd)->operator()());  
 break;  
 }  
 case(3): {  
 __int32 data = ((Random<unsigned __int32>*)rd)->operator()();  
 unsigned char* cp = (unsigned char*) (&data);  
 pData[(3 * i)] = cp[0];  
 pData[1 + (3 * i)] = cp[1];  
 pData[2 + (3 * i)] = cp[2];  
 break;  
 }  
 case(4):{  
 unsigned __int32* pData2 = (unsigned __int32*) pData;  
 pData2[i] = (((Random<unsigned __int32>*)rd)->operator()());  
 break;  
 }  
 case(5): {  
 __int64 data = ((Random<unsigned __int64>*)rd)->operator()();  
 unsigned char* cp = (unsigned char*) &data;  
 pData[(5 * i)] = cp[0];  
 pData[1 + (5 * i)] = cp[1];  
 pData[2 + (5 * i)] = cp[2];  
 pData[3 + (5 * i)] = cp[3];  
 pData[4 + (5 * i)] = cp[4];  
 break;  
 }  
 case(6): {  
 __int64 data = ((Random<unsigned __int64>*)rd)->operator()();  
 unsigned char* cp = (unsigned char*) &data;  
 pData[(6 * i)] = cp[0];  
 pData[1 + (6 * i)] = cp[1];  
 pData[2 + (6 * i)] = cp[2];  
 pData[3 + (6 * i)] = cp[3];  
 pData[4 + (6 * i)] = cp[4];  
 pData[5 + (6 * i)] = cp[5];  
 break;  
 }  
 case(7): {  
 __int64 data = ((Random<unsigned __int64>*)rd)->operator()();  
 unsigned char* cp = (unsigned char*) &data;  
 pData[(7 * i)] = cp[0];  
 pData[1 + (7 * i)] = cp[1];  
 pData[2 + (7 * i)] = cp[2];  
 pData[3 + (7 * i)] = cp[3];  
 pData[4 + (7 * i)] = cp[4];  
 pData[5 + (7 * i)] = cp[5];  
 pData[6 + (7 * i)] = cp[6];  
 break;  
 }  
 case(8): {  
 unsigned __int64* pData2 = (unsigned __int64*) pData;  
 pData2[i] = (((Random<unsigned __int64>*)rd)->operator()());  
 break;  
 }  
 default:  
 // For stopping playback  
 flags[0] = AUDCLNT_BUFFERFLAGS_SILENT;  
 return E_NOTIMPL;  
 }  
 }  
 flags[0] = 0;  
 return S_OK;  
}  
  

Then I added my class to the template provided by Microsoft and printed the default audio endpoint renderer to the console.
#include <InitGuid.h>
#include <iostream>
#include <Windows.h>
#include <dshow.h>

// Windows multimedia device  
#include <Mmdeviceapi.h>  
#include <Functiondiscoverykeys_devpkey.h>  
  
// WASAPI  
#include <Audiopolicy.h>  
#include <Audioclient.h>  
  
#include "Noise_Gen.h"  
  
//-----------------------------------------------------------  
// Play an audio stream on the default audio rendering  
// device. The PlayAudioStream function allocates a shared  
// buffer big enough to hold one second of PCM audio data.  
// The function uses this buffer to stream data to the  
// rendering device. The inner loop runs every 1/2 second.  
//-----------------------------------------------------------  
  
// REFERENCE_TIME time units per second and per millisecond  
#define REFTIMES_PER_SEC  10000000  
#define REFTIMES_PER_MILLISEC  10000  
  
#define EXIT_ON_ERROR(hres)  \  
              if (FAILED(hres)) { goto Exit; }  
#define SAFE_RELEASE(punk)  \  
              if ((punk) != NULL)  \  
                { (punk)->Release(); (punk) = NULL; }  
  
const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);  
const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);  
const IID IID_IAudioClient = __uuidof(IAudioClient);  
const IID IID_IAudioRenderClient = __uuidof(IAudioRenderClient);  
  
HRESULT PlayAudioStream(Noise_Gen* pMySource) {  
 HRESULT hr;  
 REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;  
 REFERENCE_TIME hnsActualDuration;  
 IMMDeviceEnumerator* pEnumerator = NULL;  
 IMMDevice* pDevice = NULL;  
 IAudioClient* pAudioClient = NULL;  
 IAudioRenderClient* pRenderClient = NULL;  
 WAVEFORMATEX* pwfx = NULL;  
 UINT32 bufferFrameCount;  
 UINT32 numFramesAvailable;  
 UINT32 numFramesPadding;  
 BYTE* pData;  
 DWORD flags = 0;  
 IPropertyStore* pPropertyStore = NULL;  
 PROPVARIANT name;  
  
 hr = CoCreateInstance(CLSID_MMDeviceEnumerator, NULL,  
   CLSCTX_ALL, IID_IMMDeviceEnumerator,  
   (void**) &pEnumerator);  
 EXIT_ON_ERROR(hr);  
 hr = pEnumerator->GetDefaultAudioEndpoint(  
 eRender, eConsole, &pDevice);  
  
 hr = pDevice->OpenPropertyStore(STGM_READ, &pPropertyStore);  
 PropVariantInit(&name);  
 hr = pPropertyStore->GetValue(PKEY_Device_FriendlyName, &name);  
 printf("%S", name.pwszVal);  
 printf("\n");  
 EXIT_ON_ERROR(hr);  
 hr = pDevice->Activate(IID_IAudioClient, CLSCTX_ALL,  
    NULL, (void**) &pAudioClient);  
 EXIT_ON_ERROR(hr);  
 hr = pAudioClient->GetMixFormat(&pwfx);  
 EXIT_ON_ERROR(hr);  
 hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED,  
   0, hnsRequestedDuration,  
   0, pwfx, NULL);  
 EXIT_ON_ERROR(hr);  
 // Tell the audio source which format to use.  
 hr = pMySource->SetFormat(pwfx);  
 EXIT_ON_ERROR(hr);  
 // Get the actual size of the allocated buffer.  
 hr = pAudioClient->GetBufferSize(&bufferFrameCount);  
 EXIT_ON_ERROR(hr);  
 hr = pAudioClient->GetService(IID_IAudioRenderClient,  
   (void**) &pRenderClient);  
 EXIT_ON_ERROR(hr);  
 // Grab the entire buffer for the initial fill operation.  
 hr = pRenderClient->GetBuffer(bufferFrameCount, &pData);  
 EXIT_ON_ERROR(hr);  
 // Load the initial data into the shared buffer.  
 hr = pMySource->LoadData(bufferFrameCount, pData, &flags);  
 EXIT_ON_ERROR(hr);  
 hr = pRenderClient->ReleaseBuffer(bufferFrameCount, flags);  
 EXIT_ON_ERROR(hr);  
 // Calculate the actual duration of the allocated buffer.  
 hnsActualDuration = (double) REFTIMES_PER_SEC * bufferFrameCount / pwfx->nSamplesPerSec;  
 hr = pAudioClient->Start();  // Start playing.  
 EXIT_ON_ERROR(hr);  
 // Each loop fills about half of the shared buffer.  
 while(flags != AUDCLNT_BUFFERFLAGS_SILENT) {  
 // Sleep for half the buffer duration.  
 Sleep((DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2));  
 // See how much buffer space is available.  
 hr = pAudioClient->GetCurrentPadding(&numFramesPadding);  
 EXIT_ON_ERROR(hr);  
 numFramesAvailable = bufferFrameCount - numFramesPadding;  
 // Grab all the available space in the shared buffer.  
 hr = pRenderClient->GetBuffer(numFramesAvailable, &pData);  
 EXIT_ON_ERROR(hr);  
 // Get next 1/2-second of data from the audio source.  
 hr = pMySource->LoadData(numFramesAvailable, pData, &flags);  
 EXIT_ON_ERROR(hr);  
 hr = pRenderClient->ReleaseBuffer(numFramesAvailable, flags);  
 EXIT_ON_ERROR(hr);  
 }  
 // Wait for last data in buffer to play before stopping.  
 Sleep((DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2));  
 hr = pAudioClient->Stop();  // Stop playing.  
 EXIT_ON_ERROR(hr);  
Exit:  
 CoTaskMemFree(pwfx);  
 SAFE_RELEASE(pEnumerator);  
 SAFE_RELEASE(pDevice);  
 SAFE_RELEASE(pAudioClient);  
 SAFE_RELEASE(pRenderClient);  
 return hr;  
}  
  
int main() {  
 HRESULT hr = CoInitialize(nullptr);  
 if(FAILED(hr)) { return hr; }  
 Noise_Gen* ng = new Noise_Gen();  
 PlayAudioStream(ng);  
 delete ng;  
 CoUninitialize();  
}  

The default audio endpoint renderer on my system uses 32 bit values. No sound is played. I have tried unsigned and signed values. I checked the contents of the buffer while debugging and they do change, with each __uint32 being written back to back.

I printed the default audio endpoint renderer to the console, and it is my system's speaker. Windows even shows my app in the Volume mixer, but there is no sound showing even with the volume all the way up. I then checked the sleep time to be sure it was sleeping so the system had access to the buffer, and it does sleep for 500ms between writes to the buffer.

What am I missing?

Windows API - Win32
Windows API - Win32
A core set of Windows application programming interfaces (APIs) for desktop and server applications. Previously known as Win32 API.
2,523 questions
C++
C++
A high-level, general-purpose programming language, created as an extension of the C programming language, that has object-oriented, generic, and functional features in addition to facilities for low-level memory manipulation.
3,636 questions
0 comments No comments
{count} votes