Skip to content

Commit

Permalink
Fix for audio drift issues with fragmented mp4 caused by inaccurate t…
Browse files Browse the repository at this point in the history
…imestamps and variable framerate.
  • Loading branch information
sskodje committed Jan 19, 2023
1 parent 58ab97e commit 8fc241d
Show file tree
Hide file tree
Showing 9 changed files with 266 additions and 203 deletions.
10 changes: 5 additions & 5 deletions ScreenRecorderLibNative/AudioManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ HRESULT AudioManager::InitializeAudioCapture()
return hr;
}

std::vector<BYTE> AudioManager::GrabAudioFrame(_In_ int byteCount)
std::vector<BYTE> AudioManager::GrabAudioFrame(_In_ int durationHundredNanos)
{
EnterCriticalSection(&m_CriticalSection);
LeaveCriticalSectionOnExit leaveOnExit(&m_CriticalSection);
Expand All @@ -98,8 +98,8 @@ std::vector<BYTE> AudioManager::GrabAudioFrame(_In_ int byteCount)
}
};

std::vector<BYTE> outputDeviceData = m_LoopbackCaptureOutputDevice->GetRecordedBytes(byteCount);
std::vector<BYTE> inputDeviceData = m_LoopbackCaptureInputDevice->GetRecordedBytes(byteCount);
std::vector<BYTE> outputDeviceData = m_LoopbackCaptureOutputDevice->GetRecordedBytes(durationHundredNanos);
std::vector<BYTE> inputDeviceData = m_LoopbackCaptureInputDevice->GetRecordedBytes(durationHundredNanos);
returnAudioOverflowToBuffer(outputDeviceData, inputDeviceData);
if (inputDeviceData.size() > 0 && outputDeviceData.size() && inputDeviceData.size() != outputDeviceData.size()) {
LOG_ERROR(L"Mixing audio byte arrays with differing sizes");
Expand All @@ -108,9 +108,9 @@ std::vector<BYTE> AudioManager::GrabAudioFrame(_In_ int byteCount)
return std::move(MixAudio(outputDeviceData, inputDeviceData, GetAudioOptions()->GetOutputVolume(), GetAudioOptions()->GetInputVolume()));
}
else if (m_LoopbackCaptureOutputDevice)
return std::move(MixAudio(m_LoopbackCaptureOutputDevice->GetRecordedBytes(byteCount), std::vector<BYTE>(), GetAudioOptions()->GetOutputVolume(), 1.0));
return std::move(MixAudio(m_LoopbackCaptureOutputDevice->GetRecordedBytes(durationHundredNanos), std::vector<BYTE>(), GetAudioOptions()->GetOutputVolume(), 1.0));
else if (m_LoopbackCaptureInputDevice)
return std::move(MixAudio(std::vector<BYTE>(), m_LoopbackCaptureInputDevice->GetRecordedBytes(byteCount), 1.0, GetAudioOptions()->GetInputVolume()));
return std::move(MixAudio(std::vector<BYTE>(), m_LoopbackCaptureInputDevice->GetRecordedBytes(durationHundredNanos), 1.0, GetAudioOptions()->GetInputVolume()));
else
return std::vector<BYTE>();
}
Expand Down
2 changes: 1 addition & 1 deletion ScreenRecorderLibNative/AudioManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class AudioManager
~AudioManager();
HRESULT Initialize(_In_ std::shared_ptr<AUDIO_OPTIONS> &audioOptions);
void ClearRecordedBytes();
std::vector<BYTE> GrabAudioFrame(_In_ int byteCount);
std::vector<BYTE> GrabAudioFrame(_In_ int durationHundredNanos);
private:
CRITICAL_SECTION m_CriticalSection;
std::shared_ptr<AUDIO_OPTIONS> m_AudioOptions;
Expand Down
84 changes: 42 additions & 42 deletions ScreenRecorderLibNative/LoopbackCapture.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,34 +73,34 @@ HRESULT LoopbackCapture::StartLoopbackCapture(
// can do this in-place since we're not changing the size of the format
// also, the engine will auto-convert from float to int for us
switch (pwfx->wFormatTag) {
case WAVE_FORMAT_IEEE_FLOAT:
pwfx->wFormatTag = WAVE_FORMAT_PCM;
pwfx->wBitsPerSample = 16;
pwfx->nBlockAlign = pwfx->nChannels * pwfx->wBitsPerSample / 8;
pwfx->nAvgBytesPerSec = pwfx->nBlockAlign * pwfx->nSamplesPerSec;
break;

case WAVE_FORMAT_EXTENSIBLE:
{
// naked scope for case-local variable
PWAVEFORMATEXTENSIBLE pEx = reinterpret_cast<PWAVEFORMATEXTENSIBLE>(pwfx);
if (IsEqualGUID(KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, pEx->SubFormat)) {
pEx->SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
pEx->Samples.wValidBitsPerSample = 16;
case WAVE_FORMAT_IEEE_FLOAT:
pwfx->wFormatTag = WAVE_FORMAT_PCM;
pwfx->wBitsPerSample = 16;
pwfx->nBlockAlign = pwfx->nChannels * pwfx->wBitsPerSample / 8;
pwfx->nAvgBytesPerSec = pwfx->nBlockAlign * pwfx->nSamplesPerSec;
break;

case WAVE_FORMAT_EXTENSIBLE:
{
// naked scope for case-local variable
PWAVEFORMATEXTENSIBLE pEx = reinterpret_cast<PWAVEFORMATEXTENSIBLE>(pwfx);
if (IsEqualGUID(KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, pEx->SubFormat)) {
pEx->SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
pEx->Samples.wValidBitsPerSample = 16;
pwfx->wBitsPerSample = 16;
pwfx->nBlockAlign = pwfx->nChannels * pwfx->wBitsPerSample / 8;
pwfx->nAvgBytesPerSec = pwfx->nBlockAlign * pwfx->nSamplesPerSec;
}
else {
LOG_ERROR(L"%s", L"Don't know how to coerce mix format to int-16");
return E_UNEXPECTED;
}
}
else {
LOG_ERROR(L"%s", L"Don't know how to coerce mix format to int-16");
return E_UNEXPECTED;
}
}
break;
break;

default:
LOG_ERROR(L"Don't know how to coerce WAVEFORMATEX with wFormatTag = 0x%08x to int-16", pwfx->wFormatTag);
return E_UNEXPECTED;
default:
LOG_ERROR(L"Don't know how to coerce WAVEFORMATEX with wFormatTag = 0x%08x to int-16", pwfx->wFormatTag);
return E_UNEXPECTED;
}
}
UINT32 outputSampleRate;
Expand Down Expand Up @@ -166,15 +166,15 @@ HRESULT LoopbackCapture::StartLoopbackCapture(
// so we're going to do a timer-driven loop
switch (flow)
{
case eRender:
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, audioClientBuffer, 0, pwfx, 0);
break;
case eCapture:
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, 0, audioClientBuffer, 0, pwfx, 0);
break;
default:
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, audioClientBuffer, 0, pwfx, 0);
break;
case eRender:
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, audioClientBuffer, 0, pwfx, 0);
break;
case eCapture:
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, 0, audioClientBuffer, 0, pwfx, 0);
break;
default:
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, audioClientBuffer, 0, pwfx, 0);
break;
}
if (FAILED(hr)) {
LOG_ERROR(L"IAudioClient::Initialize failed on %ls: hr = 0x%08x", m_Tag.c_str(), hr);
Expand Down Expand Up @@ -351,11 +351,11 @@ std::vector<BYTE> LoopbackCapture::PeakRecordedBytes()
return m_RecordedBytes;
}

std::vector<BYTE> LoopbackCapture::GetRecordedBytes(int byteCount)
std::vector<BYTE> LoopbackCapture::GetRecordedBytes(int duration100Nanos)
{
byteCount = min(byteCount, m_RecordedBytes.size());
int frameCount = int(ceil(m_InputFormat.sampleRate * HundredNanosToSeconds(duration100Nanos)));
int byteCount = min((frameCount * m_InputFormat.FrameBytes()), m_RecordedBytes.size());
m_TaskWrapperImpl->m_Mutex.lock();

std::vector<BYTE> newvector(m_RecordedBytes.begin(), m_RecordedBytes.begin() + byteCount);
// convert audio
if (requiresResampling() && byteCount > 0) {
Expand Down Expand Up @@ -405,13 +405,13 @@ HRESULT LoopbackCapture::StartCapture(UINT32 sampleRate, UINT32 audioChannels, s
auto file = prefs.m_hFile;
m_TaskWrapperImpl->m_CaptureTask = concurrency::create_task([this, flow, sampleRate, audioChannels, device, file]() {
if (FAILED(StartLoopbackCapture(device,
file,
true,
m_CaptureStartedEvent,
m_CaptureStopEvent,
flow,
sampleRate,
audioChannels))) {
file,
true,
m_CaptureStartedEvent,
m_CaptureStopEvent,
flow,
sampleRate,
audioChannels))) {
SetEvent(m_CaptureStopEvent);
}
});
Expand Down
2 changes: 1 addition & 1 deletion ScreenRecorderLibNative/LoopbackCapture.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class LoopbackCapture
UINT32 channels
);
std::vector<BYTE> PeakRecordedBytes();
std::vector<BYTE> GetRecordedBytes(int byteCount);
std::vector<BYTE> GetRecordedBytes(int duration100Nanos);
HRESULT StartCapture(UINT32 audioChannels, std::wstring device, EDataFlow flow) { return StartCapture(0, audioChannels, device, flow); }
HRESULT StartCapture(UINT32 sampleRate, UINT32 audioChannels, std::wstring device, EDataFlow flow);
HRESULT StopCapture();
Expand Down
87 changes: 68 additions & 19 deletions ScreenRecorderLibNative/OutputManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ using namespace concurrency;
OutputManager::OutputManager() :
m_Device(nullptr),
m_DeviceContext(nullptr),
m_PresentationClock(nullptr),
m_TimeSrc(nullptr),
m_CallBack(nullptr),
m_FinalizeEvent(nullptr),
m_SinkWriter(nullptr),
Expand Down Expand Up @@ -66,6 +68,13 @@ HRESULT OutputManager::Initialize(
if (m_MediaTransform) {
m_MediaTransform->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, 0);
}
if (!m_TimeSrc) {
RETURN_ON_BAD_HR(MFCreateSystemTimeSource(&m_TimeSrc));
}
if (!m_PresentationClock) {
RETURN_ON_BAD_HR(MFCreatePresentationClock(&m_PresentationClock));
RETURN_ON_BAD_HR(m_PresentationClock->SetTimeSource(m_TimeSrc));
}
RETURN_ON_BAD_HR(m_DeviceManager->ResetDevice(pDevice, m_ResetToken));
return S_OK;
}
Expand All @@ -91,6 +100,8 @@ HRESULT OutputManager::BeginRecording(_In_ std::wstring outputPath, _In_ SIZE vi
RETURN_ON_BAD_HR(MFCreateFile(MF_ACCESSMODE_READWRITE, MF_OPENMODE_FAIL_IF_EXIST, MF_FILEFLAGS_NONE, outputPath.c_str(), &mfByteStream));
RETURN_ON_BAD_HR(hr = InitializeVideoSinkWriter(mfByteStream, m_Device, inputMediaFrameRect, videoOutputFrameSize, DXGI_MODE_ROTATION_UNSPECIFIED, m_CallBack, &m_SinkWriter, &m_VideoStreamIndex, &m_AudioStreamIndex));
}
StartMediaClock();
LOG_DEBUG("Sink Writer initialized");
return hr;
}

Expand All @@ -113,6 +124,8 @@ HRESULT OutputManager::BeginRecording(_In_ IStream *pStream, _In_ SIZE videoOutp
RECT inputMediaFrameRect = RECT{ 0,0,videoOutputFrameSize.cx,videoOutputFrameSize.cy };
RETURN_ON_BAD_HR(hr = InitializeVideoSinkWriter(mfByteStream, m_Device, inputMediaFrameRect, videoOutputFrameSize, DXGI_MODE_ROTATION_UNSPECIFIED, m_CallBack, &m_SinkWriter, &m_VideoStreamIndex, &m_AudioStreamIndex));
}
StartMediaClock();
LOG_DEBUG("Sink Writer initialized");
return hr;
}

Expand Down Expand Up @@ -159,6 +172,7 @@ HRESULT OutputManager::FinalizeRecording()
}
}
}
StopMediaClock();
return finalizeResult;
}

Expand Down Expand Up @@ -256,23 +270,58 @@ void OutputManager::WriteTextureToImageAsync(_In_ ID3D11Texture2D *pAcquiredDesk
}).then([this, filePath, pAcquiredDesktopImage, onCompletion](concurrency::task<HRESULT> t)
{
HRESULT hr;
try {
hr = t.get();
// if .get() didn't throw and the HRESULT succeeded, there are no errors.
}
catch (const exception &e) {
// handle error
LOG_ERROR(L"Exception saving snapshot: %s", e.what());
hr = E_FAIL;
}
pAcquiredDesktopImage->Release();
if (onCompletion) {
std::invoke(onCompletion, hr);
}
return hr;
try {
hr = t.get();
// if .get() didn't throw and the HRESULT succeeded, there are no errors.
}
catch (const exception &e) {
// handle error
LOG_ERROR(L"Exception saving snapshot: %s", e.what());
hr = E_FAIL;
}
pAcquiredDesktopImage->Release();
if (onCompletion) {
std::invoke(onCompletion, hr);
}
return hr;
});
}

HRESULT OutputManager::StartMediaClock()
{
return m_PresentationClock->Start(0);
}
HRESULT OutputManager::ResumeMediaClock()
{
return m_PresentationClock->Start(PRESENTATION_CURRENT_POSITION);
}
HRESULT OutputManager::PauseMediaClock()
{
return m_PresentationClock->Pause();
}
HRESULT OutputManager::StopMediaClock()
{
return m_PresentationClock->Stop();
}

bool OutputManager::isMediaClockRunning()
{
MFCLOCK_STATE state;
m_PresentationClock->GetState(0, &state);
return state == MFCLOCK_STATE_RUNNING;
}

bool OutputManager::isMediaClockPaused()
{
MFCLOCK_STATE state;
m_PresentationClock->GetState(0, &state);
return state == MFCLOCK_STATE_PAUSED;
}

HRESULT OutputManager::GetMediaTimeStamp(_Out_ INT64 *pTime)
{
return m_PresentationClock->GetTime(pTime);
}

HRESULT OutputManager::ConfigureOutputMediaTypes(
_In_ UINT destWidth,
Expand Down Expand Up @@ -338,7 +387,7 @@ HRESULT OutputManager::ConfigureInputMediaTypes(
RETURN_ON_BAD_HR(pVideoMediaType->SetUINT32(MF_MT_VIDEO_CHROMA_SITING, MFVideoChromaSubsampling_ProgressiveChroma));
RETURN_ON_BAD_HR(pVideoMediaType->SetUINT32(MF_MT_VIDEO_NOMINAL_RANGE, MFNominalRange_0_255));
RETURN_ON_BAD_HR(MFSetAttributeSize(pVideoMediaType, MF_MT_FRAME_SIZE, sourceWidth, sourceHeight));
if (!GetEncoderOptions()->GetIsFixedFramerate()) {
if (!GetEncoderOptions()->GetIsFixedFramerate() && !GetEncoderOptions()->GetIsFragmentedMp4Enabled()) {
RETURN_ON_BAD_HR(MFSetAttributeRatio(pVideoMediaType, MF_MT_FRAME_RATE, GetEncoderOptions()->GetVideoFps(), 1));
}
RETURN_ON_BAD_HR(MFSetAttributeRatio(pVideoMediaType, MF_MT_PIXEL_ASPECT_RATIO, 1, 1));
Expand Down Expand Up @@ -464,9 +513,9 @@ HRESULT OutputManager::InitializeVideoSinkWriter(
auto SetAttributeU32([](_Inout_ CComPtr<ICodecAPI> &codec, _In_ const GUID &guid, _In_ UINT32 value)
{
VARIANT val;
val.vt = VT_UI4;
val.uintVal = value;
return codec->SetValue(&guid, &val);
val.vt = VT_UI4;
val.uintVal = value;
return codec->SetValue(&guid, &val);
});

CComPtr<ICodecAPI> encoder = nullptr;
Expand Down Expand Up @@ -594,7 +643,7 @@ HRESULT OutputManager::WriteFrameToVideo(_In_ INT64 frameStartPos, _In_ INT64 fr
SafeRelease(&p2DBuffer);
SafeRelease(&pMediaBuffer);
return hr;
}
}

HRESULT OutputManager::WriteAudioSamplesToVideo(_In_ INT64 frameStartPos, _In_ INT64 frameDuration, _In_ DWORD streamIndex, _In_ BYTE *pSrc, _In_ DWORD cbData)
{
Expand Down
10 changes: 10 additions & 0 deletions ScreenRecorderLibNative/OutputManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,20 @@ class OutputManager
void WriteTextureToImageAsync(_In_ ID3D11Texture2D *pAcquiredDesktopImage, _In_ std::wstring filePath, _In_opt_ std::function<void(HRESULT)> onCompletion = nullptr);
inline nlohmann::fifo_map<std::wstring, int> GetFrameDelays() { return m_FrameDelays; }
inline UINT64 GetRenderedFrameCount() { return m_RenderedFrameCount; }
HRESULT StartMediaClock();
HRESULT ResumeMediaClock();
HRESULT PauseMediaClock();
HRESULT StopMediaClock();
HRESULT GetMediaTimeStamp(_Out_ INT64 *pTime);
bool isMediaClockRunning();
bool isMediaClockPaused();
private:
ID3D11DeviceContext *m_DeviceContext = nullptr;
ID3D11Device *m_Device = nullptr;

CComPtr<IMFPresentationTimeSource> m_TimeSrc;
CComPtr<IMFPresentationClock> m_PresentationClock;

std::shared_ptr<ENCODER_OPTIONS> m_EncoderOptions;
std::shared_ptr<AUDIO_OPTIONS> m_AudioOptions;
std::shared_ptr<SNAPSHOT_OPTIONS> m_SnapshotOptions;
Expand Down
Loading

0 comments on commit 8fc241d

Please sign in to comment.