Skip to content

Commit

Permalink
nvapi: Add support for Reflex
Browse files Browse the repository at this point in the history
The intent of this commit is to enable Reflex for all D3D11,
and D3D12 titles using dxvk-nvapi. It does this through a new
device interface called ID3DLowLatencyDevice. This interface
will be implemented by ID3D12Device in vkd3d-proton, and
ID3D11Device in dxvk.

To provide compatibility with LatencyFleX this change will
only use the ID3DLowLatencyDevice interface when LatencyFleX
is not detected.
  • Loading branch information
esullivan-nvidia authored and ejsullivan committed Feb 27, 2024
1 parent 42ec75b commit 5ca994f
Show file tree
Hide file tree
Showing 19 changed files with 752 additions and 47 deletions.
40 changes: 28 additions & 12 deletions src/d3d/nvapi_d3d_instance.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "../util/util_log.h"
#include "nvapi_d3d_low_latency_device.h"
#include "nvapi_d3d_instance.h"

namespace dxvk {
Expand All @@ -13,25 +14,40 @@ namespace dxvk {
log::write("LatencyFleX loaded and initialized successfully");
}

bool NvapiD3dInstance::IsReflexAvailable() {
return m_lfx->IsAvailable();
bool NvapiD3dInstance::IsReflexAvailable(IUnknown* device) {
return NvapiD3dLowLatencyDevice::SupportsLowLatency(device) || m_lfx->IsAvailable();
}

bool NvapiD3dInstance::IsReflexEnabled() const {
return m_isLfxEnabled;
bool NvapiD3dInstance::IsLowLatencyEnabled() const {
return m_isLowLatencyEnabled;
}

void NvapiD3dInstance::SetReflexEnabled(bool value) {
m_isLfxEnabled = value;
bool NvapiD3dInstance::IsUsingLfx() const {
return m_lfx->IsAvailable();
}

void NvapiD3dInstance::Sleep() {
if (m_isLfxEnabled)
m_lfx->WaitAndBeginFrame();
bool NvapiD3dInstance::SetReflexMode(IUnknown* device, bool enable, bool boost, uint32_t frameTimeUs) {
bool result = true;

if (IsReflexAvailable(device))
m_isLowLatencyEnabled = enable;

if (m_lfx->IsAvailable() && enable)
m_lfx->SetTargetFrameTime(frameTimeUs * kNanoInMicro);
else if (NvapiD3dLowLatencyDevice::SupportsLowLatency(device))
result = NvapiD3dLowLatencyDevice::SetLatencySleepMode(device, enable, boost, frameTimeUs);

return result;
}

void NvapiD3dInstance::SetTargetFrameTime(uint64_t frameTimeUs) {
constexpr uint64_t kNanoInMicro = 1000;
m_lfx->SetTargetFrameTime(frameTimeUs * kNanoInMicro);
bool NvapiD3dInstance::Sleep(IUnknown* device) {
bool result = true;

if (m_lfx->IsAvailable() && m_isLowLatencyEnabled)
m_lfx->WaitAndBeginFrame();
else if (NvapiD3dLowLatencyDevice::SupportsLowLatency(device))
result = NvapiD3dLowLatencyDevice::LatencySleep(device);

return result;
}
}
14 changes: 8 additions & 6 deletions src/d3d/nvapi_d3d_instance.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,17 @@ namespace dxvk {
~NvapiD3dInstance();

void Initialize();
[[nodiscard]] bool IsReflexAvailable();
[[nodiscard]] bool IsReflexEnabled() const;
void SetReflexEnabled(bool value);
void Sleep();
void SetTargetFrameTime(uint64_t frameTimeUs);
[[nodiscard]] bool IsReflexAvailable(IUnknown* device);
[[nodiscard]] bool IsLowLatencyEnabled() const;
[[nodiscard]] bool IsUsingLfx() const;
[[nodiscard]] bool SetReflexMode(IUnknown* device, bool enable, bool boost, uint32_t frameTimeUs);
[[nodiscard]] bool Sleep(IUnknown* device);

private:
constexpr static uint64_t kNanoInMicro = 1000;

ResourceFactory& m_resourceFactory;
std::unique_ptr<Lfx> m_lfx;
bool m_isLfxEnabled = false;
bool m_isLowLatencyEnabled = false;
};
}
64 changes: 64 additions & 0 deletions src/d3d/nvapi_d3d_low_latency_device.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#include "nvapi_d3d_low_latency_device.h"

namespace dxvk {
bool NvapiD3dLowLatencyDevice::SupportsLowLatency(IUnknown* device) {
auto d3dLowLatencyDevice = GetLowLatencyDevice(device);
if (d3dLowLatencyDevice == nullptr)
return false;

return d3dLowLatencyDevice->SupportsLowLatency();
}

bool NvapiD3dLowLatencyDevice::LatencySleep(IUnknown* device) {
auto d3dLowLatencyDevice = GetLowLatencyDevice(device);
if (d3dLowLatencyDevice == nullptr)
return false;

return SUCCEEDED(d3dLowLatencyDevice->LatencySleep());
}

bool NvapiD3dLowLatencyDevice::SetLatencySleepMode(IUnknown* device, bool lowLatencyMode, bool lowLatencyBoost, uint32_t minimumIntervalUs) {
auto d3dLowLatencyDevice = GetLowLatencyDevice(device);
if (d3dLowLatencyDevice == nullptr)
return false;

return SUCCEEDED(d3dLowLatencyDevice->SetLatencySleepMode(lowLatencyMode, lowLatencyBoost, minimumIntervalUs));
}

bool NvapiD3dLowLatencyDevice::GetLatencyInfo(IUnknown* device, D3D_LATENCY_RESULTS* latencyResults) {
auto d3dLowLatencyDevice = GetLowLatencyDevice(device);
if (d3dLowLatencyDevice == nullptr)
return false;

return SUCCEEDED(d3dLowLatencyDevice->GetLatencyInfo(latencyResults));
}

bool NvapiD3dLowLatencyDevice::SetLatencyMarker(IUnknown* device, uint64_t frameID, uint32_t markerType) {
auto d3dLowLatencyDevice = GetLowLatencyDevice(device);
if (d3dLowLatencyDevice == nullptr)
return false;

return SUCCEEDED(d3dLowLatencyDevice->SetLatencyMarker(frameID, markerType));
}

void NvapiD3dLowLatencyDevice::ClearCacheMaps() {
std::scoped_lock lock(m_LowLatencyDeviceMutex);

m_lowLatencyDeviceMap.clear();
}

Com<ID3DLowLatencyDevice> NvapiD3dLowLatencyDevice::GetLowLatencyDevice(IUnknown* device) {
std::scoped_lock lock(m_LowLatencyDeviceMutex);
auto it = m_lowLatencyDeviceMap.find(device);
if (it != m_lowLatencyDeviceMap.end())
return it->second;

Com<ID3DLowLatencyDevice> d3dLowLatencyDevice;
if (FAILED(device->QueryInterface(IID_PPV_ARGS(&d3dLowLatencyDevice))))
return nullptr;

m_lowLatencyDeviceMap.emplace(device, d3dLowLatencyDevice.ptr());

return d3dLowLatencyDevice;
}
}
25 changes: 25 additions & 0 deletions src/d3d/nvapi_d3d_low_latency_device.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#pragma once

#include "../nvapi_private.h"
#include "../shared/shared_interfaces.h"
#include "../util/com_pointer.h"

namespace dxvk {
class NvapiD3dLowLatencyDevice {
public:
static bool SupportsLowLatency(IUnknown* device);
static bool LatencySleep(IUnknown* device);
static bool SetLatencySleepMode(IUnknown* device, bool lowLatencyMode, bool lowLatencyBoost, uint32_t minimumIntervalUs);
static bool GetLatencyInfo(IUnknown* device, D3D_LATENCY_RESULTS* latencyResults);
static bool SetLatencyMarker(IUnknown* device, uint64_t frameID, uint32_t markerType);

static void ClearCacheMaps();

private:
inline static std::unordered_map<IUnknown*, ID3DLowLatencyDevice*> m_lowLatencyDeviceMap;

inline static std::mutex m_LowLatencyDeviceMutex;

[[nodiscard]] static Com<ID3DLowLatencyDevice> GetLowLatencyDevice(IUnknown* device);
};
}
26 changes: 26 additions & 0 deletions src/d3d12/nvapi_d3d12_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,14 @@ namespace dxvk {
return SUCCEEDED(cubinDevice->GetCudaSurfaceObject(uavHandle, reinterpret_cast<UINT32*>(cudaSurfaceHandle)));
}

bool NvapiD3d12Device::NotifyOutOfBandCommandQueue(ID3D12CommandQueue* commandQueue, D3D12_OUT_OF_BAND_CQ_TYPE type) {
auto commandQueueExt = GetCommandQueueExt(commandQueue);
if (commandQueueExt == nullptr)
return false;

return SUCCEEDED(commandQueueExt->NotifyOutOfBandCommandQueue(type));
}

bool NvapiD3d12Device::LaunchCubinShader(ID3D12GraphicsCommandList* commandList, NVDX_ObjectHandle pShader, NvU32 blockX, NvU32 blockY, NvU32 blockZ, const void* params, NvU32 paramSize) {
auto commandListExt = GetCommandListExt(commandList);
if (!commandListExt.has_value())
Expand Down Expand Up @@ -146,6 +154,22 @@ namespace dxvk {
return deviceExt;
}

Com<ID3D12CommandQueueExt> NvapiD3d12Device::GetCommandQueueExt(ID3D12CommandQueue* commandQueue) {
std::scoped_lock lock(m_commandQueueMutex);
auto it = m_commandQueueMap.find(commandQueue);
if (it != m_commandQueueMap.end())
return it->second;

Com<ID3D12CommandQueueExt> commandQueueExt;
if (FAILED(commandQueue->QueryInterface(IID_PPV_ARGS(&commandQueueExt))))
return nullptr;

if (commandQueueExt != nullptr)
m_commandQueueMap.emplace(commandQueue, commandQueueExt.ptr());

return commandQueueExt;
}

std::optional<NvapiD3d12Device::CommandListExtWithVersion> NvapiD3d12Device::GetCommandListExt(ID3D12GraphicsCommandList* commandList) {
std::scoped_lock lock(m_commandListMutex);
auto it = m_commandListMap.find(commandList);
Expand All @@ -169,11 +193,13 @@ namespace dxvk {
}

void NvapiD3d12Device::ClearCacheMaps() {
std::scoped_lock commandQueueLock(m_commandQueueMutex);
std::scoped_lock commandListLock(m_commandListMutex);
std::scoped_lock cubinDeviceLock(m_cubinDeviceMutex);
std::scoped_lock cubinSmemLock(m_cubinSmemMutex);

m_cubinDeviceMap.clear();
m_commandQueueMap.clear();
m_commandListMap.clear();
m_cubinSmemMap.clear();
}
Expand Down
4 changes: 4 additions & 0 deletions src/d3d12/nvapi_d3d12_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ namespace dxvk {
static bool DestroyCubinComputeShader(ID3D12Device* device, NVDX_ObjectHandle shader);
static bool GetCudaTextureObject(ID3D12Device* device, D3D12_CPU_DESCRIPTOR_HANDLE srvHandle, D3D12_CPU_DESCRIPTOR_HANDLE samplerHandle, NvU32* cudaTextureHandle);
static bool GetCudaSurfaceObject(ID3D12Device* device, D3D12_CPU_DESCRIPTOR_HANDLE uavHandle, NvU32* cudaSurfaceHandle);
static bool NotifyOutOfBandCommandQueue(ID3D12CommandQueue* commandQueue, D3D12_OUT_OF_BAND_CQ_TYPE type);
static bool LaunchCubinShader(ID3D12GraphicsCommandList* commandList, NVDX_ObjectHandle shader, NvU32 blockX, NvU32 blockY, NvU32 blockZ, const void* params, NvU32 paramSize);
static bool CaptureUAVInfo(ID3D12Device* device, NVAPI_UAV_INFO* uavInfo);
static bool IsFatbinPTXSupported(ID3D12Device* device);
Expand All @@ -32,15 +33,18 @@ namespace dxvk {

private:
inline static std::unordered_map<ID3D12Device*, ID3D12DeviceExt*> m_cubinDeviceMap;
inline static std::unordered_map<ID3D12CommandQueue*, ID3D12CommandQueueExt*> m_commandQueueMap;
inline static std::unordered_map<ID3D12GraphicsCommandList*, CommandListExtWithVersion> m_commandListMap;
inline static std::unordered_map<NVDX_ObjectHandle, NvU32> m_cubinSmemMap;

inline static std::mutex m_commandListMutex;
inline static std::mutex m_commandQueueMutex;
inline static std::mutex m_cubinDeviceMutex;
inline static std::mutex m_cubinSmemMutex;

[[nodiscard]] static Com<ID3D12DeviceExt> GetCubinDevice(ID3D12Device* device);
[[nodiscard]] static Com<ID3D12DeviceExt> GetDeviceExt(ID3D12Device* device, D3D12_VK_EXTENSION extension);
[[nodiscard]] static Com<ID3D12CommandQueueExt> GetCommandQueueExt(ID3D12CommandQueue* commandQueue);
[[nodiscard]] static std::optional<CommandListExtWithVersion> GetCommandListExt(ID3D12GraphicsCommandList* commandList);
};
}
1 change: 1 addition & 0 deletions src/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ nvapi_src = files([
'resource_factory.cpp',
'd3d/lfx.cpp',
'd3d/nvapi_d3d_instance.cpp',
'd3d/nvapi_d3d_low_latency_device.cpp',
'd3d11/nvapi_d3d11_device.cpp',
'd3d12/nvapi_d3d12_device.cpp',
'nvapi_globals.cpp',
Expand Down
76 changes: 57 additions & 19 deletions src/nvapi_d3d.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#include "dxvk/dxvk_interfaces.h"
#include "d3d/nvapi_d3d_low_latency_device.h"
#include "nvapi_private.h"
#include "nvapi_globals.h"
#include "util/util_statuscode.h"
Expand Down Expand Up @@ -106,64 +108,100 @@ extern "C" {

NvAPI_Status __cdecl NvAPI_D3D_Sleep(IUnknown* pDevice) {
constexpr auto n = __func__;
static bool alreadyLoggedNoReflex = false;
static bool alreadyLoggedError = false;
static bool alreadyLoggedOk = false;
static bool alreadyLoggedNoLfx = false;

if (nvapiAdapterRegistry == nullptr)
return ApiNotInitialized(n);

if (!nvapiD3dInstance->IsReflexAvailable())
return NoImplementation(n, alreadyLoggedNoLfx);
if (!nvapiD3dInstance->IsReflexAvailable(pDevice))
return NoImplementation(n, alreadyLoggedNoReflex);

nvapiD3dInstance->Sleep();
if (!nvapiD3dInstance->Sleep(pDevice))
return Error(n, alreadyLoggedError);

return Ok(n, alreadyLoggedOk);
}

NvAPI_Status __cdecl NvAPI_D3D_SetSleepMode(IUnknown* pDevice, NV_SET_SLEEP_MODE_PARAMS* pSetSleepModeParams) {
constexpr auto n = __func__;
static bool alreadyLoggedNoLfx = false;
static bool alreadyLoggedNoReflex = false;
static bool alreadyLoggedError = false;

if (nvapiAdapterRegistry == nullptr)
return ApiNotInitialized(n);

if (pSetSleepModeParams->version != NV_SET_SLEEP_MODE_PARAMS_VER1)
return IncompatibleStructVersion(n);

if (!nvapiD3dInstance->IsReflexAvailable())
return NoImplementation(n, alreadyLoggedNoLfx);
if (!nvapiD3dInstance->IsReflexAvailable(pDevice))
return NoImplementation(n, alreadyLoggedNoReflex);

nvapiD3dInstance->SetReflexEnabled(pSetSleepModeParams->bLowLatencyMode);
if (pSetSleepModeParams->bLowLatencyMode)
nvapiD3dInstance->SetTargetFrameTime(pSetSleepModeParams->minimumIntervalUs);
if (!nvapiD3dInstance->SetReflexMode(pDevice, pSetSleepModeParams->bLowLatencyMode, pSetSleepModeParams->bLowLatencyBoost, pSetSleepModeParams->minimumIntervalUs))
return Error(n, alreadyLoggedError);

return Ok(str::format(n, " (", pSetSleepModeParams->bLowLatencyMode ? (str::format("Enabled/", pSetSleepModeParams->minimumIntervalUs, "us")) : "Disabled", ")"));
}

NvAPI_Status __cdecl NvAPI_D3D_GetSleepStatus(IUnknown* pDevice, NV_GET_SLEEP_STATUS_PARAMS* pGetSleepStatusParams) {
constexpr auto n = __func__;
static bool alreadyLoggedNoLfx = false;
static bool alreadyLoggedNoReflex = false;
static bool alreadyLoggedOk = false;

if (nvapiAdapterRegistry == nullptr)
return ApiNotInitialized(n);

if (pGetSleepStatusParams->version != NV_GET_SLEEP_STATUS_PARAMS_VER1)
return IncompatibleStructVersion(n);

if (!nvapiD3dInstance->IsReflexAvailable())
return NoImplementation(n, alreadyLoggedNoLfx);
if (!nvapiD3dInstance->IsReflexAvailable(pDevice))
return NoImplementation(n, alreadyLoggedNoReflex);

pGetSleepStatusParams->bLowLatencyMode = nvapiD3dInstance->IsReflexEnabled();
return Ok(n);
pGetSleepStatusParams->bLowLatencyMode = nvapiD3dInstance->IsLowLatencyEnabled();

return Ok(n, alreadyLoggedOk);
}

NvAPI_Status __cdecl NvAPI_D3D_GetLatency(IUnknown* pDev, NV_LATENCY_RESULT_PARAMS* pGetLatencyParams) {
static bool alreadyLogged = false;
return NoImplementation(__func__, alreadyLogged);
constexpr auto n = __func__;
static bool alreadyLoggedNoImpl = false;
static bool alreadyLoggedError = false;
static bool alreadyLoggedOk = false;

if (nvapiAdapterRegistry == nullptr)
return ApiNotInitialized(n);

if (pGetLatencyParams->version != NV_LATENCY_RESULT_PARAMS_VER1)
return IncompatibleStructVersion(n);

if (nvapiD3dInstance->IsUsingLfx() || !NvapiD3dLowLatencyDevice::SupportsLowLatency(pDev))
return NoImplementation(n, alreadyLoggedNoImpl);

if (!NvapiD3dLowLatencyDevice::GetLatencyInfo(pDev, reinterpret_cast<D3D_LATENCY_RESULTS*>(pGetLatencyParams)))
return Error(n, alreadyLoggedError);

return Ok(n, alreadyLoggedOk);
}

NvAPI_Status __cdecl NvAPI_D3D_SetLatencyMarker(IUnknown* pDev, NV_LATENCY_MARKER_PARAMS* pSetLatencyMarkerParams) {
static bool alreadyLogged = false;
return NoImplementation(__func__, alreadyLogged);
constexpr auto n = __func__;
static bool alreadyLoggedNoImpl = false;
static bool alreadyLoggedError = false;
static bool alreadyLoggedOk = false;

if (nvapiAdapterRegistry == nullptr)
return ApiNotInitialized(n);

if (pSetLatencyMarkerParams->version != NV_LATENCY_MARKER_PARAMS_VER1)
return IncompatibleStructVersion(n);

if (nvapiD3dInstance->IsUsingLfx() || !NvapiD3dLowLatencyDevice::SupportsLowLatency(pDev))
return NoImplementation(n, alreadyLoggedNoImpl);

if (!NvapiD3dLowLatencyDevice::SetLatencyMarker(pDev, pSetLatencyMarkerParams->frameID, pSetLatencyMarkerParams->markerType))
return Error(n, alreadyLoggedError);

return Ok(n, alreadyLoggedOk);
}
}
Loading

0 comments on commit 5ca994f

Please sign in to comment.