mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
Revert "fix: RTDispatchGlobals programming"
This reverts commit fe34302a3a.
Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
a591f919bb
commit
58b4d1951a
@@ -1140,28 +1140,29 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
|
||||
auto rtStackSize = RayTracingHelper::getRTStackSizePerTile(*this, tileCount, maxBvhLevels, extraBytesLocal, extraBytesGlobal);
|
||||
|
||||
std::unique_ptr<RTDispatchGlobalsInfo> dispatchGlobalsInfo = std::make_unique<RTDispatchGlobalsInfo>();
|
||||
auto releaseHelper = getReleaseHelper();
|
||||
|
||||
auto &productHelper = getProductHelper();
|
||||
bool isResource48Bit = productHelper.is48bResourceNeededForRayTracing();
|
||||
|
||||
GraphicsAllocation *dispatchGlobalsArrayAllocation = nullptr;
|
||||
|
||||
AllocationProperties arrayAllocProps(getRootDeviceIndex(), true, dispatchGlobalsSize,
|
||||
AllocationType::globalSurface, true, getDeviceBitfield());
|
||||
arrayAllocProps.flags.resource48Bit = isResource48Bit;
|
||||
arrayAllocProps.flags.resource48Bit = productHelper.is48bResourceNeededForRayTracing();
|
||||
arrayAllocProps.flags.isUSMDeviceAllocation = true;
|
||||
GraphicsAllocation *dispatchGlobalsArrayAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(arrayAllocProps);
|
||||
dispatchGlobalsArrayAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(arrayAllocProps);
|
||||
|
||||
if (dispatchGlobalsArrayAllocation == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto tile = 0u; tile < tileCount; tile++) {
|
||||
for (unsigned int tile = 0; tile < tileCount; tile++) {
|
||||
DeviceBitfield deviceBitfield =
|
||||
(tileCount == 1)
|
||||
? this->getDeviceBitfield()
|
||||
: subdevices[tile]->getDeviceBitfield();
|
||||
|
||||
AllocationProperties allocProps(getRootDeviceIndex(), true, rtStackSize, AllocationType::buffer, true, deviceBitfield);
|
||||
allocProps.flags.resource48Bit = isResource48Bit;
|
||||
allocProps.flags.resource48Bit = productHelper.is48bResourceNeededForRayTracing();
|
||||
allocProps.flags.isUSMDeviceAllocation = true;
|
||||
|
||||
auto rtStackAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps);
|
||||
@@ -1172,14 +1173,22 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
|
||||
}
|
||||
|
||||
RTDispatchGlobals dispatchGlobals = {0};
|
||||
dispatchGlobals.rtMemBasePtr = rtStackAllocation->getGpuAddress() + rtStackSize;
|
||||
dispatchGlobals.callStackHandlerKSP = 0;
|
||||
dispatchGlobals.stackSizePerRay = releaseHelper ? releaseHelper->getAsyncStackSizePerRay() : 0;
|
||||
dispatchGlobals.numDSSRTStacks = RayTracingHelper::getAsyncNumRTStacksPerDss();
|
||||
dispatchGlobals.maxBVHLevels = maxBvhLevels;
|
||||
dispatchGlobals.flags = 1;
|
||||
|
||||
productHelper.adjustRTDispatchGlobals(dispatchGlobals, this->getHardwareInfo());
|
||||
dispatchGlobals.rtMemBasePtr = rtStackAllocation->getGpuAddress() + rtStackSize;
|
||||
dispatchGlobals.callStackHandlerKSP = reinterpret_cast<uint64_t>(nullptr);
|
||||
auto releaseHelper = getReleaseHelper();
|
||||
dispatchGlobals.stackSizePerRay = releaseHelper ? releaseHelper->getStackSizePerRay() : 0;
|
||||
|
||||
auto rtStacksPerDss = RayTracingHelper::getNumRtStacksPerDss(*this);
|
||||
dispatchGlobals.numDSSRTStacks = rtStacksPerDss;
|
||||
dispatchGlobals.maxBVHLevels = maxBvhLevels;
|
||||
uint32_t *dispatchGlobalsAsArray = reinterpret_cast<uint32_t *>(&dispatchGlobals);
|
||||
dispatchGlobalsAsArray[7] = 1;
|
||||
|
||||
if (releaseHelper) {
|
||||
bool heaplessEnabled = this->getCompilerProductHelper().isHeaplessModeEnabled(this->getHardwareInfo());
|
||||
releaseHelper->adjustRTDispatchGlobals(static_cast<void *>(&dispatchGlobals), rtStacksPerDss, heaplessEnabled, maxBvhLevels);
|
||||
}
|
||||
|
||||
MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(this->getRootDeviceEnvironment(), *dispatchGlobalsArrayAllocation),
|
||||
*this,
|
||||
|
||||
@@ -26,19 +26,16 @@ class RayTracingHelper : public NonCopyableAndNonMovableClass {
|
||||
static constexpr uint32_t bvhStackSize = 96;
|
||||
static constexpr uint32_t memoryBackedFifoSizePerDss = 8 * MemoryConstants::kiloByte;
|
||||
static constexpr uint32_t maxBvhLevels = 8;
|
||||
static constexpr uint32_t maxNumDSSRTStacks = 2048;
|
||||
|
||||
static constexpr uint32_t maxSizeOfRtStacksPerDss = 4096;
|
||||
static constexpr uint32_t fixedSizeOfRtStacksPerDss = 2048;
|
||||
|
||||
static size_t getDispatchGlobalSize() {
|
||||
return static_cast<size_t>(alignUp(sizeof(RTDispatchGlobals), MemoryConstants::cacheLineSize));
|
||||
}
|
||||
|
||||
static size_t getRTStackSizePerTile(const Device &device, uint32_t tiles, uint32_t maxBvhLevel, uint32_t extraBytesLocal, uint32_t extraBytesGlobal) {
|
||||
auto &productHelper = device.getProductHelper();
|
||||
auto numRTStacksPerDss = productHelper.getNumRtStacksPerDSSForAllocation(device.getHardwareInfo());
|
||||
auto stackSizePerRay = getStackSizePerRay(maxBvhLevel, extraBytesLocal);
|
||||
auto numRtStacks = getNumRtStacks(device, numRTStacksPerDss);
|
||||
|
||||
return static_cast<size_t>(alignUp(stackSizePerRay * numRtStacks + extraBytesGlobal, MemoryConstants::cacheLineSize));
|
||||
return static_cast<size_t>(alignUp(getStackSizePerRay(maxBvhLevel, extraBytesLocal) * (getNumRtStacks(device)) + extraBytesGlobal, MemoryConstants::cacheLineSize));
|
||||
}
|
||||
|
||||
static size_t getTotalMemoryBackedFifoSize(const Device &device) {
|
||||
@@ -49,16 +46,29 @@ class RayTracingHelper : public NonCopyableAndNonMovableClass {
|
||||
return static_cast<size_t>(Math::log2(memoryBackedFifoSizePerDss / MemoryConstants::kiloByte) - 1);
|
||||
}
|
||||
|
||||
static uint32_t getNumRtStacks(const Device &device, uint32_t nRtStacksPerDss) {
|
||||
return NEO::GfxCoreHelper::getHighestEnabledDualSubSlice(device.getHardwareInfo()) * nRtStacksPerDss;
|
||||
static uint32_t getNumRtStacks(const Device &device) {
|
||||
return NEO::GfxCoreHelper::getHighestEnabledDualSubSlice(device.getHardwareInfo()) * getNumRtStacksPerDss(device);
|
||||
}
|
||||
|
||||
static uint32_t getNumRtStacksPerDss(const Device &device) {
|
||||
auto releaseHelper = device.getReleaseHelper();
|
||||
|
||||
if (releaseHelper == nullptr || releaseHelper->isNumRtStacksPerDssFixedValue()) {
|
||||
return fixedSizeOfRtStacksPerDss;
|
||||
}
|
||||
|
||||
const auto &hwInfo = device.getHardwareInfo();
|
||||
UNRECOVERABLE_IF(hwInfo.gtSystemInfo.EUCount == 0)
|
||||
|
||||
uint32_t maxNumEUsPerDSS = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
|
||||
uint32_t maxNumThreadsPerEU = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount;
|
||||
uint32_t maxSIMTThreadsPerThread = CommonConstants::maximalSimdSize;
|
||||
|
||||
return std::min(maxSizeOfRtStacksPerDss, maxNumEUsPerDSS * maxNumThreadsPerEU * maxSIMTThreadsPerThread);
|
||||
}
|
||||
|
||||
static uint32_t getStackSizePerRay(uint32_t maxBvhLevel, uint32_t extraBytesLocal) {
|
||||
return hitInfoSize + bvhStackSize * maxBvhLevel + extraBytesLocal;
|
||||
}
|
||||
|
||||
static uint32_t getAsyncNumRTStacksPerDss() {
|
||||
return maxNumDSSRTStacks;
|
||||
}
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
#include "shared/source/helpers/common_types.h"
|
||||
|
||||
#include "aubstream/engine_node.h"
|
||||
#include "ocl_igc_shared/raytracing/ocl_raytracing_structures.h"
|
||||
|
||||
#include <igfxfmid.h>
|
||||
#include <memory>
|
||||
@@ -46,7 +45,6 @@ class MemoryManager;
|
||||
struct RootDeviceEnvironment;
|
||||
class OSInterface;
|
||||
class DriverModel;
|
||||
|
||||
enum class DriverModelType;
|
||||
enum class EngineGroupType : uint32_t;
|
||||
enum class GfxMemoryAllocationMethod : uint32_t;
|
||||
@@ -271,9 +269,6 @@ class ProductHelper {
|
||||
virtual bool isNonCoherentTimestampsModeEnabled() const = 0;
|
||||
virtual bool isPackedCopyFormatSupported() const = 0;
|
||||
virtual bool isPidFdOrSocketForIpcSupported() const = 0;
|
||||
virtual void adjustRTDispatchGlobals(RTDispatchGlobals &rtDispatchGlobals, const HardwareInfo &hwInfo) const = 0;
|
||||
virtual uint32_t getSyncNumRTStacksPerDss(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual uint32_t getNumRtStacksPerDSSForAllocation(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool checkBcsForDirectSubmissionStop() const = 0;
|
||||
virtual bool shouldRegisterEnqueuedWalkerWithProfiling() const = 0;
|
||||
|
||||
|
||||
@@ -20,7 +20,6 @@
|
||||
#include "shared/source/helpers/kernel_helpers.h"
|
||||
#include "shared/source/helpers/local_memory_access_modes.h"
|
||||
#include "shared/source/helpers/preamble.h"
|
||||
#include "shared/source/helpers/ray_tracing_helper.h"
|
||||
#include "shared/source/helpers/string_helpers.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/source/memory_manager/allocation_properties.h"
|
||||
@@ -1083,21 +1082,6 @@ bool ProductHelperHw<gfxProduct>::getStorageInfoLocalOnlyFlag(LocalMemAllocation
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
void ProductHelperHw<gfxProduct>::adjustRTDispatchGlobals(RTDispatchGlobals &rtDispatchGlobals, const HardwareInfo &hwInfo) const {
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
uint32_t ProductHelperHw<gfxProduct>::getSyncNumRTStacksPerDss(const HardwareInfo &hwInfo) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
uint32_t ProductHelperHw<gfxProduct>::getNumRtStacksPerDSSForAllocation(const HardwareInfo &hwInfo) const {
|
||||
|
||||
return RayTracingHelper::getAsyncNumRTStacksPerDss();
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
bool ProductHelperHw<gfxProduct>::checkBcsForDirectSubmissionStop() const {
|
||||
return false;
|
||||
|
||||
@@ -208,9 +208,6 @@ class ProductHelperHw : public ProductHelper {
|
||||
bool isNonCoherentTimestampsModeEnabled() const override;
|
||||
bool getStorageInfoLocalOnlyFlag(LocalMemAllocationMode usmDeviceAllocationMode, bool defaultValue) const override;
|
||||
bool isPidFdOrSocketForIpcSupported() const override;
|
||||
void adjustRTDispatchGlobals(RTDispatchGlobals &rtDispatchGlobals, const HardwareInfo &hwInfo) const override;
|
||||
uint32_t getSyncNumRTStacksPerDss(const HardwareInfo &hwInfo) const override;
|
||||
uint32_t getNumRtStacksPerDSSForAllocation(const HardwareInfo &hwInfo) const override;
|
||||
bool checkBcsForDirectSubmissionStop() const override;
|
||||
bool shouldRegisterEnqueuedWalkerWithProfiling() const override;
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct HardwareInfo;
|
||||
class ReleaseHelper;
|
||||
enum class ReleaseType;
|
||||
|
||||
@@ -57,11 +56,13 @@ class ReleaseHelper {
|
||||
virtual bool isRayTracingSupported() const = 0;
|
||||
virtual uint32_t getAdditionalFp16Caps() const = 0;
|
||||
virtual uint32_t getAdditionalExtraCaps() const = 0;
|
||||
virtual uint32_t getAsyncStackSizePerRay() const = 0;
|
||||
virtual uint32_t getStackSizePerRay() const = 0;
|
||||
virtual void adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const = 0;
|
||||
virtual bool isLocalOnlyAllowed() const = 0;
|
||||
virtual bool isDummyBlitWaRequired() const = 0;
|
||||
virtual bool isDirectSubmissionLightSupported() const = 0;
|
||||
virtual const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const = 0;
|
||||
virtual bool isNumRtStacksPerDssFixedValue() const = 0;
|
||||
virtual bool getFtrXe2Compression() const = 0;
|
||||
virtual bool programmAdditionalStallPriorToBarrierWithTimestamp() const = 0;
|
||||
virtual uint32_t computeSlmValues(uint32_t slmSize, bool isHeapless) const = 0;
|
||||
@@ -102,11 +103,13 @@ class ReleaseHelperHw : public ReleaseHelper {
|
||||
bool isRayTracingSupported() const override;
|
||||
uint32_t getAdditionalFp16Caps() const override;
|
||||
uint32_t getAdditionalExtraCaps() const override;
|
||||
uint32_t getAsyncStackSizePerRay() const override;
|
||||
uint32_t getStackSizePerRay() const override;
|
||||
void adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const override;
|
||||
bool isLocalOnlyAllowed() const override;
|
||||
bool isDummyBlitWaRequired() const override;
|
||||
bool isDirectSubmissionLightSupported() const override;
|
||||
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override;
|
||||
bool isNumRtStacksPerDssFixedValue() const override;
|
||||
bool getFtrXe2Compression() const override;
|
||||
bool programmAdditionalStallPriorToBarrierWithTimestamp() const override;
|
||||
uint32_t computeSlmValues(uint32_t slmSize, bool isHeapless) const override;
|
||||
|
||||
@@ -38,7 +38,7 @@ bool ReleaseHelperHw<release>::isLocalOnlyAllowed() const {
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t ReleaseHelperHw<release>::getAsyncStackSizePerRay() const {
|
||||
uint32_t ReleaseHelperHw<release>::getStackSizePerRay() const {
|
||||
return 64u;
|
||||
}
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ bool ReleaseHelperHw<release>::isLocalOnlyAllowed() const {
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t ReleaseHelperHw<release>::getAsyncStackSizePerRay() const {
|
||||
uint32_t ReleaseHelperHw<release>::getStackSizePerRay() const {
|
||||
return 64u;
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/ray_tracing_helper.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/release_helper/release_helper.h"
|
||||
|
||||
@@ -138,10 +137,14 @@ uint32_t ReleaseHelperHw<releaseType>::getAdditionalExtraCaps() const {
|
||||
}
|
||||
|
||||
template <ReleaseType releaseType>
|
||||
uint32_t ReleaseHelperHw<releaseType>::getAsyncStackSizePerRay() const {
|
||||
uint32_t ReleaseHelperHw<releaseType>::getStackSizePerRay() const {
|
||||
return 0u;
|
||||
}
|
||||
|
||||
template <ReleaseType releaseType>
|
||||
void ReleaseHelperHw<releaseType>::adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const {
|
||||
}
|
||||
|
||||
template <ReleaseType releaseType>
|
||||
bool ReleaseHelperHw<releaseType>::isLocalOnlyAllowed() const {
|
||||
return true;
|
||||
@@ -158,6 +161,11 @@ const SizeToPreferredSlmValueArray &ReleaseHelperHw<releaseType>::getSizeToPrefe
|
||||
return sizeToPreferredSlmValue;
|
||||
}
|
||||
|
||||
template <ReleaseType releaseType>
|
||||
bool ReleaseHelperHw<releaseType>::isNumRtStacksPerDssFixedValue() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <ReleaseType releaseType>
|
||||
bool ReleaseHelperHw<releaseType>::getFtrXe2Compression() const {
|
||||
return true;
|
||||
|
||||
@@ -35,15 +35,17 @@ class MockReleaseHelper : public ReleaseHelper {
|
||||
ADDMETHOD_CONST_NOBASE(isRayTracingSupported, bool, true, ());
|
||||
ADDMETHOD_CONST_NOBASE(getAdditionalFp16Caps, uint32_t, {}, ());
|
||||
ADDMETHOD_CONST_NOBASE(getAdditionalExtraCaps, uint32_t, {}, ());
|
||||
ADDMETHOD_CONST_NOBASE(getAsyncStackSizePerRay, uint32_t, {}, ());
|
||||
ADDMETHOD_CONST_NOBASE(getStackSizePerRay, uint32_t, {}, ());
|
||||
ADDMETHOD_CONST_NOBASE(isLocalOnlyAllowed, bool, {}, ());
|
||||
ADDMETHOD_CONST_NOBASE(isDummyBlitWaRequired, bool, false, ());
|
||||
ADDMETHOD_CONST_NOBASE(isNumRtStacksPerDssFixedValue, bool, true, ());
|
||||
ADDMETHOD_CONST_NOBASE(isBlitImageAllowedForDepthFormat, bool, true, ());
|
||||
ADDMETHOD_CONST_NOBASE(getFtrXe2Compression, bool, false, ());
|
||||
ADDMETHOD_CONST_NOBASE(isDirectSubmissionLightSupported, bool, false, ());
|
||||
ADDMETHOD_CONST_NOBASE(computeSlmValues, uint32_t, {}, (uint32_t slmSize, bool isHeapless));
|
||||
ADDMETHOD_CONST_NOBASE(programmAdditionalStallPriorToBarrierWithTimestamp, bool, false, ());
|
||||
ADDMETHOD_CONST_NOBASE(isPostImageWriteFlushRequired, bool, false, ());
|
||||
ADDMETHOD_CONST_NOBASE_VOIDRETURN(adjustRTDispatchGlobals, (void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels));
|
||||
|
||||
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override {
|
||||
static SizeToPreferredSlmValueArray sizeToPreferredSlmValue = {};
|
||||
|
||||
@@ -259,7 +259,7 @@ TEST_F(DeviceTest, whenAllocateRTDispatchGlobalsIsCalledThenStackSizePerRayIsSet
|
||||
|
||||
auto releaseHelper = getReleaseHelper();
|
||||
if (releaseHelper) {
|
||||
EXPECT_EQ(dispatchGlobals.stackSizePerRay, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_EQ(dispatchGlobals.stackSizePerRay, releaseHelper->getStackSizePerRay());
|
||||
} else {
|
||||
EXPECT_EQ(dispatchGlobals.stackSizePerRay, 0u);
|
||||
}
|
||||
|
||||
@@ -39,8 +39,8 @@ TEST(RayTracingHelperTests, whenRTStackSizeIsRequestedThenCorrectValueIsReturned
|
||||
uint32_t extraBytesLocal = 20;
|
||||
uint32_t extraBytesGlobal = 100;
|
||||
uint32_t tiles = 2;
|
||||
auto numRtStacksPerDss = device.getProductHelper().getNumRtStacksPerDSSForAllocation(device.getHardwareInfo());
|
||||
size_t expectedSize = alignUp(RayTracingHelper::getStackSizePerRay(maxBvhLevel, extraBytesLocal) * RayTracingHelper::getNumRtStacks(device, numRtStacksPerDss) + extraBytesGlobal, MemoryConstants::cacheLineSize);
|
||||
|
||||
size_t expectedSize = alignUp(RayTracingHelper::getStackSizePerRay(maxBvhLevel, extraBytesLocal) * RayTracingHelper::getNumRtStacks(device) + extraBytesGlobal, MemoryConstants::cacheLineSize);
|
||||
size_t size = RayTracingHelper::getRTStackSizePerTile(device, tiles, maxBvhLevel, extraBytesLocal, extraBytesGlobal);
|
||||
EXPECT_EQ(expectedSize, size);
|
||||
}
|
||||
@@ -48,8 +48,8 @@ TEST(RayTracingHelperTests, whenRTStackSizeIsRequestedThenCorrectValueIsReturned
|
||||
TEST(RayTracingHelperTests, whenNumRtStacksIsQueriedThenItIsEqualToNumRtStacksPerDssMultipliedByDualSubsliceCount) {
|
||||
MockDevice device;
|
||||
|
||||
uint32_t numDssRtStacksPerDss = device.getProductHelper().getNumRtStacksPerDSSForAllocation(device.getHardwareInfo());
|
||||
uint32_t numDssRtStacks = RayTracingHelper::getNumRtStacks(device, numDssRtStacksPerDss);
|
||||
uint32_t numDssRtStacksPerDss = RayTracingHelper::getNumRtStacksPerDss(device);
|
||||
uint32_t numDssRtStacks = RayTracingHelper::getNumRtStacks(device);
|
||||
uint32_t subsliceCount = GfxCoreHelper::getHighestEnabledDualSubSlice(device.getHardwareInfo());
|
||||
|
||||
EXPECT_LT(0u, numDssRtStacks);
|
||||
@@ -71,40 +71,53 @@ TEST(RayTracingHelperTests, whenGetMemoryBackedFifoSizeToPatchIsCalledThenCorrec
|
||||
EXPECT_EQ(2u, RayTracingHelper::getMemoryBackedFifoSizeToPatch());
|
||||
}
|
||||
|
||||
TEST(RayTracingHelperTests, whenNumRtStacksPerDssIsRequestedAndFixedValueIsFalseThenCorrectValueIsReturned) {
|
||||
TEST(RayTracingHelperTests, whenNumRtStacksPerDssIsRequestedAndFixedValueIsTrueThenCorrectValueIsReturned) {
|
||||
MockReleaseHelper mockReleaseHelper;
|
||||
MockDevice mockDevice;
|
||||
|
||||
mockReleaseHelper.isNumRtStacksPerDssFixedValueResult = true;
|
||||
mockDevice.mockReleaseHelper = &mockReleaseHelper;
|
||||
|
||||
uint32_t fixedSizeOfRtStacksPerDss = 2048;
|
||||
uint32_t result = RayTracingHelper::getNumRtStacksPerDss(mockDevice);
|
||||
EXPECT_EQ(fixedSizeOfRtStacksPerDss, result);
|
||||
}
|
||||
|
||||
TEST(RayTracingHelperTests, whenNumRtStacksPerDssIsRequestedAndFixedValueIsFalseThenCorrectValueIsReturned) {
|
||||
MockReleaseHelper mockReleaseHelper;
|
||||
mockReleaseHelper.isNumRtStacksPerDssFixedValueResult = false;
|
||||
|
||||
MockDevice device;
|
||||
uint32_t maxEuPerSubSlice = 16;
|
||||
uint32_t threadCount = 672;
|
||||
uint32_t euCount = 96;
|
||||
|
||||
auto &hwInfo = *device.getRootDeviceEnvironmentRef().getMutableHardwareInfo();
|
||||
auto hwInfo = *NEO::defaultHwInfo;
|
||||
hwInfo.gtSystemInfo.MaxEuPerSubSlice = maxEuPerSubSlice;
|
||||
hwInfo.gtSystemInfo.ThreadCount = threadCount;
|
||||
hwInfo.gtSystemInfo.EUCount = euCount;
|
||||
|
||||
auto numRtStackPerDss = device.getProductHelper().getSyncNumRTStacksPerDss(device.getHardwareInfo());
|
||||
if (numRtStackPerDss > 0) {
|
||||
// maxEuPerSubSlice * (threadCount / euCount) * CommonConstants::maximalSimdSize = 3584u
|
||||
constexpr uint32_t expectedValue = 3584;
|
||||
EXPECT_EQ(expectedValue, numRtStackPerDss);
|
||||
}
|
||||
std::unique_ptr<MockDevice> mockDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0));
|
||||
mockDevice->mockReleaseHelper = &mockReleaseHelper;
|
||||
|
||||
// maxEuPerSubSlice * (threadCount / euCount) * CommonConstants::maximalSimdSize = 3584u
|
||||
constexpr uint32_t expectedValue = 3584;
|
||||
|
||||
EXPECT_EQ(expectedValue, RayTracingHelper::getNumRtStacksPerDss(*mockDevice));
|
||||
}
|
||||
|
||||
TEST(RayTracingHelperTests, whenNumRtStacksPerDssExceedsMaxThenReturnsMaxRtStacksPerDssSupported) {
|
||||
MockReleaseHelper mockReleaseHelper;
|
||||
mockReleaseHelper.isNumRtStacksPerDssFixedValueResult = false;
|
||||
|
||||
MockDevice device;
|
||||
uint32_t maxSizeOfRtStacksPerDss = 4096;
|
||||
|
||||
auto &hwInfo = *device.getRootDeviceEnvironmentRef().getMutableHardwareInfo();
|
||||
auto hwInfo = *NEO::defaultHwInfo;
|
||||
hwInfo.gtSystemInfo.MaxEuPerSubSlice = 512;
|
||||
hwInfo.gtSystemInfo.ThreadCount = 2048;
|
||||
hwInfo.gtSystemInfo.EUCount = 256;
|
||||
|
||||
auto numRtStackPerDss = device.getProductHelper().getSyncNumRTStacksPerDss(device.getHardwareInfo());
|
||||
if (numRtStackPerDss > 0) {
|
||||
uint32_t uncappedRtStacksPerDss = hwInfo.gtSystemInfo.MaxEuPerSubSlice * (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount) * CommonConstants::maximalSimdSize;
|
||||
EXPECT_GT(uncappedRtStacksPerDss, maxSizeOfRtStacksPerDss);
|
||||
EXPECT_EQ(maxSizeOfRtStacksPerDss, numRtStackPerDss);
|
||||
}
|
||||
}
|
||||
std::unique_ptr<MockDevice> mockDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0));
|
||||
mockDevice->mockReleaseHelper = &mockReleaseHelper;
|
||||
|
||||
uint32_t maxSizeOfRtStacksPerDss = 4096;
|
||||
uint32_t result = RayTracingHelper::getNumRtStacksPerDss(*mockDevice);
|
||||
EXPECT_EQ(maxSizeOfRtStacksPerDss, result);
|
||||
}
|
||||
@@ -38,8 +38,9 @@ TEST_F(ReleaseHelper1255Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,8 +38,9 @@ TEST_F(ReleaseHelper1256Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,8 +38,9 @@ TEST_F(ReleaseHelper1257Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,8 +35,9 @@ TEST_F(ReleaseHelper1260Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_TRUE(releaseHelper->isRcsExposureDisabled());
|
||||
EXPECT_TRUE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,8 +34,9 @@ TEST_F(ReleaseHelper1261Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_TRUE(releaseHelper->isRcsExposureDisabled());
|
||||
EXPECT_TRUE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,8 +39,9 @@ TEST_F(ReleaseHelper1270Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
EXPECT_TRUE(releaseHelper->isDirectSubmissionLightSupported());
|
||||
}
|
||||
|
||||
@@ -39,8 +39,9 @@ TEST_F(ReleaseHelper1271Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
EXPECT_TRUE(releaseHelper->isDirectSubmissionLightSupported());
|
||||
}
|
||||
|
||||
@@ -38,8 +38,9 @@ TEST_F(ReleaseHelper1274Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
EXPECT_TRUE(releaseHelper->isDirectSubmissionLightSupported());
|
||||
}
|
||||
|
||||
@@ -37,9 +37,10 @@ TEST_F(ReleaseHelper2001Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_TRUE(releaseHelper->isRcsExposureDisabled());
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -38,8 +38,9 @@ TEST_F(ReleaseHelper2002Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_TRUE(releaseHelper->isRcsExposureDisabled());
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -38,9 +38,10 @@ TEST_F(ReleaseHelper2004Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_TRUE(releaseHelper->isRcsExposureDisabled());
|
||||
EXPECT_FALSE(releaseHelper->isBindlessAddressingDisabled());
|
||||
EXPECT_EQ(8u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_FALSE(releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,7 +38,8 @@ TEST_F(ReleaseHelper3000Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(10u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_EQ(64u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_EQ(64u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_EQ(revision != 0, releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,7 +38,8 @@ TEST_F(ReleaseHelper3001Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(10u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_TRUE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_EQ(64u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_EQ(64u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_TRUE(releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,7 +38,8 @@ TEST_F(ReleaseHelper3003Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
EXPECT_TRUE(releaseHelper->isGlobalBindlessAllocatorEnabled());
|
||||
EXPECT_EQ(10u, releaseHelper->getNumThreadsPerEu());
|
||||
EXPECT_FALSE(releaseHelper->isRayTracingSupported());
|
||||
EXPECT_EQ(0u, releaseHelper->getAsyncStackSizePerRay());
|
||||
EXPECT_EQ(0u, releaseHelper->getStackSizePerRay());
|
||||
EXPECT_TRUE(releaseHelper->isNumRtStacksPerDssFixedValue());
|
||||
EXPECT_TRUE(releaseHelper->getFtrXe2Compression());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user