mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 01:04:57 +08:00
fix: RTDispatchGlobals programming
Related-To: NEO-14423 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
4cbd6c79a0
commit
fe34302a3a
@@ -1124,29 +1124,28 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
|
||||
auto rtStackSize = RayTracingHelper::getRTStackSizePerTile(*this, tileCount, maxBvhLevels, extraBytesLocal, extraBytesGlobal);
|
||||
|
||||
std::unique_ptr<RTDispatchGlobalsInfo> dispatchGlobalsInfo = std::make_unique<RTDispatchGlobalsInfo>();
|
||||
|
||||
auto releaseHelper = getReleaseHelper();
|
||||
auto &productHelper = getProductHelper();
|
||||
|
||||
GraphicsAllocation *dispatchGlobalsArrayAllocation = nullptr;
|
||||
bool isResource48Bit = productHelper.is48bResourceNeededForRayTracing();
|
||||
|
||||
AllocationProperties arrayAllocProps(getRootDeviceIndex(), true, dispatchGlobalsSize,
|
||||
AllocationType::globalSurface, true, getDeviceBitfield());
|
||||
arrayAllocProps.flags.resource48Bit = productHelper.is48bResourceNeededForRayTracing();
|
||||
arrayAllocProps.flags.resource48Bit = isResource48Bit;
|
||||
arrayAllocProps.flags.isUSMDeviceAllocation = true;
|
||||
dispatchGlobalsArrayAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(arrayAllocProps);
|
||||
GraphicsAllocation *dispatchGlobalsArrayAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(arrayAllocProps);
|
||||
|
||||
if (dispatchGlobalsArrayAllocation == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (unsigned int tile = 0; tile < tileCount; tile++) {
|
||||
for (auto tile = 0u; tile < tileCount; tile++) {
|
||||
DeviceBitfield deviceBitfield =
|
||||
(tileCount == 1)
|
||||
? this->getDeviceBitfield()
|
||||
: subdevices[tile]->getDeviceBitfield();
|
||||
|
||||
AllocationProperties allocProps(getRootDeviceIndex(), true, rtStackSize, AllocationType::buffer, true, deviceBitfield);
|
||||
allocProps.flags.resource48Bit = productHelper.is48bResourceNeededForRayTracing();
|
||||
allocProps.flags.resource48Bit = isResource48Bit;
|
||||
allocProps.flags.isUSMDeviceAllocation = true;
|
||||
|
||||
auto rtStackAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps);
|
||||
@@ -1157,22 +1156,14 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
|
||||
}
|
||||
|
||||
RTDispatchGlobals dispatchGlobals = {0};
|
||||
|
||||
dispatchGlobals.rtMemBasePtr = rtStackAllocation->getGpuAddress() + rtStackSize;
|
||||
dispatchGlobals.callStackHandlerKSP = reinterpret_cast<uint64_t>(nullptr);
|
||||
auto releaseHelper = getReleaseHelper();
|
||||
dispatchGlobals.stackSizePerRay = releaseHelper ? releaseHelper->getStackSizePerRay() : 0;
|
||||
|
||||
auto rtStacksPerDss = RayTracingHelper::getNumRtStacksPerDss(*this);
|
||||
dispatchGlobals.numDSSRTStacks = rtStacksPerDss;
|
||||
dispatchGlobals.callStackHandlerKSP = 0;
|
||||
dispatchGlobals.stackSizePerRay = releaseHelper ? releaseHelper->getAsyncStackSizePerRay() : 0;
|
||||
dispatchGlobals.numDSSRTStacks = RayTracingHelper::getAsyncNumRTStacksPerDss();
|
||||
dispatchGlobals.maxBVHLevels = maxBvhLevels;
|
||||
uint32_t *dispatchGlobalsAsArray = reinterpret_cast<uint32_t *>(&dispatchGlobals);
|
||||
dispatchGlobalsAsArray[7] = 1;
|
||||
dispatchGlobals.flags = 1;
|
||||
|
||||
if (releaseHelper) {
|
||||
bool heaplessEnabled = this->getCompilerProductHelper().isHeaplessModeEnabled(this->getHardwareInfo());
|
||||
releaseHelper->adjustRTDispatchGlobals(static_cast<void *>(&dispatchGlobals), rtStacksPerDss, heaplessEnabled, maxBvhLevels);
|
||||
}
|
||||
productHelper.adjustRTDispatchGlobals(dispatchGlobals, this->getHardwareInfo());
|
||||
|
||||
MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(this->getRootDeviceEnvironment(), *dispatchGlobalsArrayAllocation),
|
||||
*this,
|
||||
|
||||
@@ -26,16 +26,19 @@ class RayTracingHelper : public NonCopyableAndNonMovableClass {
|
||||
static constexpr uint32_t bvhStackSize = 96;
|
||||
static constexpr uint32_t memoryBackedFifoSizePerDss = 8 * MemoryConstants::kiloByte;
|
||||
static constexpr uint32_t maxBvhLevels = 8;
|
||||
|
||||
static constexpr uint32_t maxSizeOfRtStacksPerDss = 4096;
|
||||
static constexpr uint32_t fixedSizeOfRtStacksPerDss = 2048;
|
||||
static constexpr uint32_t maxNumDSSRTStacks = 2048;
|
||||
|
||||
static size_t getDispatchGlobalSize() {
|
||||
return static_cast<size_t>(alignUp(sizeof(RTDispatchGlobals), MemoryConstants::cacheLineSize));
|
||||
}
|
||||
|
||||
static size_t getRTStackSizePerTile(const Device &device, uint32_t tiles, uint32_t maxBvhLevel, uint32_t extraBytesLocal, uint32_t extraBytesGlobal) {
|
||||
return static_cast<size_t>(alignUp(getStackSizePerRay(maxBvhLevel, extraBytesLocal) * (getNumRtStacks(device)) + extraBytesGlobal, MemoryConstants::cacheLineSize));
|
||||
auto &productHelper = device.getProductHelper();
|
||||
auto numRTStacksPerDss = productHelper.getNumRtStacksPerDSSForAllocation(device.getHardwareInfo());
|
||||
auto stackSizePerRay = getStackSizePerRay(maxBvhLevel, extraBytesLocal);
|
||||
auto numRtStacks = getNumRtStacks(device, numRTStacksPerDss);
|
||||
|
||||
return static_cast<size_t>(alignUp(stackSizePerRay * numRtStacks + extraBytesGlobal, MemoryConstants::cacheLineSize));
|
||||
}
|
||||
|
||||
static size_t getTotalMemoryBackedFifoSize(const Device &device) {
|
||||
@@ -46,29 +49,16 @@ class RayTracingHelper : public NonCopyableAndNonMovableClass {
|
||||
return static_cast<size_t>(Math::log2(memoryBackedFifoSizePerDss / MemoryConstants::kiloByte) - 1);
|
||||
}
|
||||
|
||||
static uint32_t getNumRtStacks(const Device &device) {
|
||||
return NEO::GfxCoreHelper::getHighestEnabledDualSubSlice(device.getHardwareInfo()) * getNumRtStacksPerDss(device);
|
||||
}
|
||||
|
||||
static uint32_t getNumRtStacksPerDss(const Device &device) {
|
||||
auto releaseHelper = device.getReleaseHelper();
|
||||
|
||||
if (releaseHelper == nullptr || releaseHelper->isNumRtStacksPerDssFixedValue()) {
|
||||
return fixedSizeOfRtStacksPerDss;
|
||||
}
|
||||
|
||||
const auto &hwInfo = device.getHardwareInfo();
|
||||
UNRECOVERABLE_IF(hwInfo.gtSystemInfo.EUCount == 0)
|
||||
|
||||
uint32_t maxNumEUsPerDSS = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
|
||||
uint32_t maxNumThreadsPerEU = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount;
|
||||
uint32_t maxSIMTThreadsPerThread = CommonConstants::maximalSimdSize;
|
||||
|
||||
return std::min(maxSizeOfRtStacksPerDss, maxNumEUsPerDSS * maxNumThreadsPerEU * maxSIMTThreadsPerThread);
|
||||
static uint32_t getNumRtStacks(const Device &device, uint32_t nRtStacksPerDss) {
|
||||
return NEO::GfxCoreHelper::getHighestEnabledDualSubSlice(device.getHardwareInfo()) * nRtStacksPerDss;
|
||||
}
|
||||
|
||||
static uint32_t getStackSizePerRay(uint32_t maxBvhLevel, uint32_t extraBytesLocal) {
|
||||
return hitInfoSize + bvhStackSize * maxBvhLevel + extraBytesLocal;
|
||||
}
|
||||
|
||||
static uint32_t getAsyncNumRTStacksPerDss() {
|
||||
return maxNumDSSRTStacks;
|
||||
}
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "shared/source/command_stream/task_count_helper.h"
|
||||
|
||||
#include "aubstream/engine_node.h"
|
||||
#include "ocl_igc_shared/raytracing/ocl_raytracing_structures.h"
|
||||
|
||||
#include <igfxfmid.h>
|
||||
#include <memory>
|
||||
@@ -44,6 +45,7 @@ class MemoryManager;
|
||||
struct RootDeviceEnvironment;
|
||||
class OSInterface;
|
||||
class DriverModel;
|
||||
|
||||
enum class DriverModelType;
|
||||
enum class EngineGroupType : uint32_t;
|
||||
enum class GfxMemoryAllocationMethod : uint32_t;
|
||||
@@ -268,6 +270,9 @@ class ProductHelper {
|
||||
virtual bool isNonCoherentTimestampsModeEnabled() const = 0;
|
||||
virtual bool isPackedCopyFormatSupported() const = 0;
|
||||
virtual bool isPidFdOrSocketForIpcSupported() const = 0;
|
||||
virtual void adjustRTDispatchGlobals(RTDispatchGlobals &rtDispatchGlobals, const HardwareInfo &hwInfo) const = 0;
|
||||
virtual uint32_t getSyncNumRTStacksPerDss(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual uint32_t getNumRtStacksPerDSSForAllocation(const HardwareInfo &hwInfo) const = 0;
|
||||
|
||||
virtual bool getStorageInfoLocalOnlyFlag(LocalMemAllocationMode usmDeviceAllocationMode, bool defaultValue) const = 0;
|
||||
virtual ~ProductHelper() = default;
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "shared/source/helpers/kernel_helpers.h"
|
||||
#include "shared/source/helpers/local_memory_access_modes.h"
|
||||
#include "shared/source/helpers/preamble.h"
|
||||
#include "shared/source/helpers/ray_tracing_helper.h"
|
||||
#include "shared/source/helpers/string_helpers.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/source/memory_manager/allocation_properties.h"
|
||||
@@ -1082,4 +1083,19 @@ bool ProductHelperHw<gfxProduct>::getStorageInfoLocalOnlyFlag(LocalMemAllocation
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
void ProductHelperHw<gfxProduct>::adjustRTDispatchGlobals(RTDispatchGlobals &rtDispatchGlobals, const HardwareInfo &hwInfo) const {
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
uint32_t ProductHelperHw<gfxProduct>::getSyncNumRTStacksPerDss(const HardwareInfo &hwInfo) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
uint32_t ProductHelperHw<gfxProduct>::getNumRtStacksPerDSSForAllocation(const HardwareInfo &hwInfo) const {
|
||||
|
||||
return RayTracingHelper::getAsyncNumRTStacksPerDss();
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -208,6 +208,9 @@ class ProductHelperHw : public ProductHelper {
|
||||
bool isNonCoherentTimestampsModeEnabled() const override;
|
||||
bool getStorageInfoLocalOnlyFlag(LocalMemAllocationMode usmDeviceAllocationMode, bool defaultValue) const override;
|
||||
bool isPidFdOrSocketForIpcSupported() const override;
|
||||
void adjustRTDispatchGlobals(RTDispatchGlobals &rtDispatchGlobals, const HardwareInfo &hwInfo) const override;
|
||||
uint32_t getSyncNumRTStacksPerDss(const HardwareInfo &hwInfo) const override;
|
||||
uint32_t getNumRtStacksPerDSSForAllocation(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
~ProductHelperHw() override = default;
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct HardwareInfo;
|
||||
class ReleaseHelper;
|
||||
enum class ReleaseType;
|
||||
|
||||
@@ -56,13 +57,11 @@ class ReleaseHelper {
|
||||
virtual bool isRayTracingSupported() const = 0;
|
||||
virtual uint32_t getAdditionalFp16Caps() const = 0;
|
||||
virtual uint32_t getAdditionalExtraCaps() const = 0;
|
||||
virtual uint32_t getStackSizePerRay() const = 0;
|
||||
virtual void adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const = 0;
|
||||
virtual uint32_t getAsyncStackSizePerRay() const = 0;
|
||||
virtual bool isLocalOnlyAllowed() const = 0;
|
||||
virtual bool isDummyBlitWaRequired() const = 0;
|
||||
virtual bool isDirectSubmissionLightSupported() const = 0;
|
||||
virtual const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const = 0;
|
||||
virtual bool isNumRtStacksPerDssFixedValue() const = 0;
|
||||
virtual bool getFtrXe2Compression() const = 0;
|
||||
virtual bool programmAdditionalStallPriorToBarrierWithTimestamp() const = 0;
|
||||
virtual uint32_t computeSlmValues(uint32_t slmSize, bool isHeapless) const = 0;
|
||||
@@ -103,13 +102,11 @@ class ReleaseHelperHw : public ReleaseHelper {
|
||||
bool isRayTracingSupported() const override;
|
||||
uint32_t getAdditionalFp16Caps() const override;
|
||||
uint32_t getAdditionalExtraCaps() const override;
|
||||
uint32_t getStackSizePerRay() const override;
|
||||
void adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const override;
|
||||
uint32_t getAsyncStackSizePerRay() const override;
|
||||
bool isLocalOnlyAllowed() const override;
|
||||
bool isDummyBlitWaRequired() const override;
|
||||
bool isDirectSubmissionLightSupported() const override;
|
||||
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override;
|
||||
bool isNumRtStacksPerDssFixedValue() const override;
|
||||
bool getFtrXe2Compression() const override;
|
||||
bool programmAdditionalStallPriorToBarrierWithTimestamp() const override;
|
||||
uint32_t computeSlmValues(uint32_t slmSize, bool isHeapless) const override;
|
||||
|
||||
@@ -38,7 +38,7 @@ bool ReleaseHelperHw<release>::isLocalOnlyAllowed() const {
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t ReleaseHelperHw<release>::getStackSizePerRay() const {
|
||||
uint32_t ReleaseHelperHw<release>::getAsyncStackSizePerRay() const {
|
||||
return 64u;
|
||||
}
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ bool ReleaseHelperHw<release>::isLocalOnlyAllowed() const {
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t ReleaseHelperHw<release>::getStackSizePerRay() const {
|
||||
uint32_t ReleaseHelperHw<release>::getAsyncStackSizePerRay() const {
|
||||
return 64u;
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/ray_tracing_helper.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/release_helper/release_helper.h"
|
||||
|
||||
@@ -137,14 +138,10 @@ uint32_t ReleaseHelperHw<releaseType>::getAdditionalExtraCaps() const {
|
||||
}
|
||||
|
||||
template <ReleaseType releaseType>
|
||||
uint32_t ReleaseHelperHw<releaseType>::getStackSizePerRay() const {
|
||||
uint32_t ReleaseHelperHw<releaseType>::getAsyncStackSizePerRay() const {
|
||||
return 0u;
|
||||
}
|
||||
|
||||
template <ReleaseType releaseType>
|
||||
void ReleaseHelperHw<releaseType>::adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const {
|
||||
}
|
||||
|
||||
template <ReleaseType releaseType>
|
||||
bool ReleaseHelperHw<releaseType>::isLocalOnlyAllowed() const {
|
||||
return true;
|
||||
@@ -161,11 +158,6 @@ const SizeToPreferredSlmValueArray &ReleaseHelperHw<releaseType>::getSizeToPrefe
|
||||
return sizeToPreferredSlmValue;
|
||||
}
|
||||
|
||||
template <ReleaseType releaseType>
|
||||
bool ReleaseHelperHw<releaseType>::isNumRtStacksPerDssFixedValue() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <ReleaseType releaseType>
|
||||
bool ReleaseHelperHw<releaseType>::getFtrXe2Compression() const {
|
||||
return true;
|
||||
|
||||
@@ -35,17 +35,15 @@ class MockReleaseHelper : public ReleaseHelper {
|
||||
ADDMETHOD_CONST_NOBASE(isRayTracingSupported, bool, true, ());
|
||||
ADDMETHOD_CONST_NOBASE(getAdditionalFp16Caps, uint32_t, {}, ());
|
||||
ADDMETHOD_CONST_NOBASE(getAdditionalExtraCaps, uint32_t, {}, ());
|
||||
ADDMETHOD_CONST_NOBASE(getStackSizePerRay, uint32_t, {}, ());
|
||||
ADDMETHOD_CONST_NOBASE(getAsyncStackSizePerRay, uint32_t, {}, ());
|
||||
ADDMETHOD_CONST_NOBASE(isLocalOnlyAllowed, bool, {}, ());
|
||||
ADDMETHOD_CONST_NOBASE(isDummyBlitWaRequired, bool, false, ());
|
||||
ADDMETHOD_CONST_NOBASE(isNumRtStacksPerDssFixedValue, bool, true, ());
|
||||
ADDMETHOD_CONST_NOBASE(isBlitImageAllowedForDepthFormat, bool, true, ());
|
||||
ADDMETHOD_CONST_NOBASE(getFtrXe2Compression, bool, false, ());
|
||||
ADDMETHOD_CONST_NOBASE(isDirectSubmissionLightSupported, bool, false, ());
|
||||
ADDMETHOD_CONST_NOBASE(computeSlmValues, uint32_t, {}, (uint32_t slmSize, bool isHeapless));
|
||||
ADDMETHOD_CONST_NOBASE(programmAdditionalStallPriorToBarrierWithTimestamp, bool, false, ());
|
||||
ADDMETHOD_CONST_NOBASE(isPostImageWriteFlushRequired, bool, false, ());
|
||||
ADDMETHOD_CONST_NOBASE_VOIDRETURN(adjustRTDispatchGlobals, (void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels));
|
||||
|
||||
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override {
|
||||
static SizeToPreferredSlmValueArray sizeToPreferredSlmValue = {};
|
||||
|
||||
@@ -257,7 +257,7 @@ TEST_F(DeviceTest, whenAllocateRTDispatchGlobalsIsCalledThenStackSizePerRayIsSet
|
||||
|
||||
auto releaseHelper = getReleaseHelper();
|
||||
if (releaseHelper) {
|
||||
EXPECT_EQ(dispatchGlobals.stackSizePerRay, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_EQ(dispatchGlobals.stackSizePerRay, releaseHelper->getAsyncStackSizePerRay());
|
||||
} else {
|
||||
EXPECT_EQ(dispatchGlobals.stackSizePerRay, 0u);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
* Copyright (C) 2020-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -39,8 +39,8 @@ TEST(RayTracingHelperTests, whenRTStackSizeIsRequestedThenCorrectValueIsReturned
|
||||
uint32_t extraBytesLocal = 20;
|
||||
uint32_t extraBytesGlobal = 100;
|
||||
uint32_t tiles = 2;
|
||||
|
||||
size_t expectedSize = alignUp(RayTracingHelper::getStackSizePerRay(maxBvhLevel, extraBytesLocal) * RayTracingHelper::getNumRtStacks(device) + extraBytesGlobal, MemoryConstants::cacheLineSize);
|
||||
auto numRtStacksPerDss = device.getProductHelper().getNumRtStacksPerDSSForAllocation(device.getHardwareInfo());
|
||||
size_t expectedSize = alignUp(RayTracingHelper::getStackSizePerRay(maxBvhLevel, extraBytesLocal) * RayTracingHelper::getNumRtStacks(device, numRtStacksPerDss) + extraBytesGlobal, MemoryConstants::cacheLineSize);
|
||||
size_t size = RayTracingHelper::getRTStackSizePerTile(device, tiles, maxBvhLevel, extraBytesLocal, extraBytesGlobal);
|
||||
EXPECT_EQ(expectedSize, size);
|
||||
}
|
||||
@@ -48,8 +48,8 @@ TEST(RayTracingHelperTests, whenRTStackSizeIsRequestedThenCorrectValueIsReturned
|
||||
TEST(RayTracingHelperTests, whenNumRtStacksIsQueriedThenItIsEqualToNumRtStacksPerDssMultipliedByDualSubsliceCount) {
|
||||
MockDevice device;
|
||||
|
||||
uint32_t numDssRtStacksPerDss = RayTracingHelper::getNumRtStacksPerDss(device);
|
||||
uint32_t numDssRtStacks = RayTracingHelper::getNumRtStacks(device);
|
||||
uint32_t numDssRtStacksPerDss = device.getProductHelper().getNumRtStacksPerDSSForAllocation(device.getHardwareInfo());
|
||||
uint32_t numDssRtStacks = RayTracingHelper::getNumRtStacks(device, numDssRtStacksPerDss);
|
||||
uint32_t subsliceCount = GfxCoreHelper::getHighestEnabledDualSubSlice(device.getHardwareInfo());
|
||||
|
||||
EXPECT_LT(0u, numDssRtStacks);
|
||||
@@ -71,53 +71,40 @@ TEST(RayTracingHelperTests, whenGetMemoryBackedFifoSizeToPatchIsCalledThenCorrec
|
||||
EXPECT_EQ(2u, RayTracingHelper::getMemoryBackedFifoSizeToPatch());
|
||||
}
|
||||
|
||||
TEST(RayTracingHelperTests, whenNumRtStacksPerDssIsRequestedAndFixedValueIsTrueThenCorrectValueIsReturned) {
|
||||
MockReleaseHelper mockReleaseHelper;
|
||||
MockDevice mockDevice;
|
||||
|
||||
mockReleaseHelper.isNumRtStacksPerDssFixedValueResult = true;
|
||||
mockDevice.mockReleaseHelper = &mockReleaseHelper;
|
||||
|
||||
uint32_t fixedSizeOfRtStacksPerDss = 2048;
|
||||
uint32_t result = RayTracingHelper::getNumRtStacksPerDss(mockDevice);
|
||||
EXPECT_EQ(fixedSizeOfRtStacksPerDss, result);
|
||||
}
|
||||
|
||||
TEST(RayTracingHelperTests, whenNumRtStacksPerDssIsRequestedAndFixedValueIsFalseThenCorrectValueIsReturned) {
|
||||
MockReleaseHelper mockReleaseHelper;
|
||||
mockReleaseHelper.isNumRtStacksPerDssFixedValueResult = false;
|
||||
|
||||
MockDevice device;
|
||||
uint32_t maxEuPerSubSlice = 16;
|
||||
uint32_t threadCount = 672;
|
||||
uint32_t euCount = 96;
|
||||
|
||||
auto hwInfo = *NEO::defaultHwInfo;
|
||||
auto &hwInfo = *device.getRootDeviceEnvironmentRef().getMutableHardwareInfo();
|
||||
hwInfo.gtSystemInfo.MaxEuPerSubSlice = maxEuPerSubSlice;
|
||||
hwInfo.gtSystemInfo.ThreadCount = threadCount;
|
||||
hwInfo.gtSystemInfo.EUCount = euCount;
|
||||
|
||||
std::unique_ptr<MockDevice> mockDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0));
|
||||
mockDevice->mockReleaseHelper = &mockReleaseHelper;
|
||||
|
||||
// maxEuPerSubSlice * (threadCount / euCount) * CommonConstants::maximalSimdSize = 3584u
|
||||
constexpr uint32_t expectedValue = 3584;
|
||||
|
||||
EXPECT_EQ(expectedValue, RayTracingHelper::getNumRtStacksPerDss(*mockDevice));
|
||||
auto numRtStackPerDss = device.getProductHelper().getSyncNumRTStacksPerDss(device.getHardwareInfo());
|
||||
if (numRtStackPerDss > 0) {
|
||||
// maxEuPerSubSlice * (threadCount / euCount) * CommonConstants::maximalSimdSize = 3584u
|
||||
constexpr uint32_t expectedValue = 3584;
|
||||
EXPECT_EQ(expectedValue, numRtStackPerDss);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RayTracingHelperTests, whenNumRtStacksPerDssExceedsMaxThenReturnsMaxRtStacksPerDssSupported) {
|
||||
MockReleaseHelper mockReleaseHelper;
|
||||
mockReleaseHelper.isNumRtStacksPerDssFixedValueResult = false;
|
||||
|
||||
auto hwInfo = *NEO::defaultHwInfo;
|
||||
MockDevice device;
|
||||
uint32_t maxSizeOfRtStacksPerDss = 4096;
|
||||
|
||||
auto &hwInfo = *device.getRootDeviceEnvironmentRef().getMutableHardwareInfo();
|
||||
hwInfo.gtSystemInfo.MaxEuPerSubSlice = 512;
|
||||
hwInfo.gtSystemInfo.ThreadCount = 2048;
|
||||
hwInfo.gtSystemInfo.EUCount = 256;
|
||||
|
||||
std::unique_ptr<MockDevice> mockDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0));
|
||||
mockDevice->mockReleaseHelper = &mockReleaseHelper;
|
||||
|
||||
uint32_t maxSizeOfRtStacksPerDss = 4096;
|
||||
uint32_t result = RayTracingHelper::getNumRtStacksPerDss(*mockDevice);
|
||||
EXPECT_EQ(maxSizeOfRtStacksPerDss, result);
|
||||
}
|
||||
auto numRtStackPerDss = device.getProductHelper().getSyncNumRTStacksPerDss(device.getHardwareInfo());
|
||||
if (numRtStackPerDss > 0) {
|
||||
uint32_t uncappedRtStacksPerDss = hwInfo.gtSystemInfo.MaxEuPerSubSlice * (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount) * CommonConstants::maximalSimdSize;
|
||||
EXPECT_GT(uncappedRtStacksPerDss, maxSizeOfRtStacksPerDss);
|
||||
EXPECT_EQ(maxSizeOfRtStacksPerDss, numRtStackPerDss);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,9 +38,8 @@ TEST_F(ReleaseHelper1255Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,9 +38,8 @@ TEST_F(ReleaseHelper1256Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,9 +38,8 @@ TEST_F(ReleaseHelper1257Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,9 +35,8 @@ TEST_F(ReleaseHelper1260Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_TRUE(releaseHelper->isRcsExposureDisabled());
|
||||
EXPECT_TRUE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,9 +34,8 @@ TEST_F(ReleaseHelper1261Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_TRUE(releaseHelper->isRcsExposureDisabled());
|
||||
EXPECT_TRUE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,9 +39,8 @@ TEST_F(ReleaseHelper1270Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
EXPECT_TRUE(releaseHelper->isDirectSubmissionLightSupported());
|
||||
}
|
||||
|
||||
@@ -39,9 +39,8 @@ TEST_F(ReleaseHelper1271Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
EXPECT_TRUE(releaseHelper->isDirectSubmissionLightSupported());
|
||||
}
|
||||
|
||||
@@ -38,9 +38,8 @@ TEST_F(ReleaseHelper1274Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
EXPECT_TRUE(releaseHelper->isDirectSubmissionLightSupported());
|
||||
}
|
||||
|
||||
@@ -37,10 +37,9 @@ TEST_F(ReleaseHelper2001Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_TRUE(releaseHelper->isRcsExposureDisabled());
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -38,9 +38,8 @@ TEST_F(ReleaseHelper2002Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_TRUE(releaseHelper->isRcsExposureDisabled());
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -38,10 +38,9 @@ TEST_F(ReleaseHelper2004Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_TRUE(releaseHelper->isRcsExposureDisabled());
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,8 +38,7 @@ TEST_F(ReleaseHelper3000Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(10u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_EQ(64u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_EQ(64u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_EQ(revision != 0, releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,8 +38,7 @@ TEST_F(ReleaseHelper3001Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(10u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_EQ(64u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_EQ(64u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,8 +38,7 @@ TEST_F(ReleaseHelper3003Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(10u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_FALSE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user