refactor: add SetMaxBVHLevels debug flag

Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk 2025-03-13 13:34:22 +00:00 committed by Compute-Runtime-Automation
parent 251ced0e4b
commit 2c77d2c698
10 changed files with 56 additions and 4 deletions

View File

@ -902,6 +902,11 @@ ze_result_t DeviceImp::getKernelProperties(ze_device_module_properties_t *pKerne
if (releaseHelper && releaseHelper->isRayTracingSupported()) { if (releaseHelper && releaseHelper->isRayTracingSupported()) {
rtProperties->flags = ZE_DEVICE_RAYTRACING_EXT_FLAG_RAYQUERY; rtProperties->flags = ZE_DEVICE_RAYTRACING_EXT_FLAG_RAYQUERY;
rtProperties->maxBVHLevels = NEO::RayTracingHelper::maxBvhLevels; rtProperties->maxBVHLevels = NEO::RayTracingHelper::maxBvhLevels;
if (NEO::debugManager.flags.SetMaxBVHLevels.get() != -1) {
rtProperties->maxBVHLevels = static_cast<uint32_t>(NEO::debugManager.flags.SetMaxBVHLevels.get());
}
} else { } else {
rtProperties->flags = 0; rtProperties->flags = 0;
rtProperties->maxBVHLevels = 0; rtProperties->maxBVHLevels = 0;

View File

@ -1199,6 +1199,11 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
if (this->usesRayTracing()) { if (this->usesRayTracing()) {
uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels; uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels;
if (NEO::debugManager.flags.SetMaxBVHLevels.get() != -1) {
bvhLevels = static_cast<uint32_t>(NEO::debugManager.flags.SetMaxBVHLevels.get());
}
auto arg = this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals; auto arg = this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals;
neoDevice->initializeRayTracing(bvhLevels); neoDevice->initializeRayTracing(bvhLevels);

View File

@ -1205,6 +1205,33 @@ HWTEST2_F(DeviceTest, whenPassingRaytracingExpStructToGetPropertiesThenPropertie
EXPECT_EQ(expectedMaxBVHLevels, rayTracingProperties.maxBVHLevels); EXPECT_EQ(expectedMaxBVHLevels, rayTracingProperties.maxBVHLevels);
} }
HWTEST2_F(DeviceTest, givenSetMaxBVHLevelsWhenPassingRaytracingExpStructToGetPropertiesThenPropertiesWithCorrectFlagIsReturned, MatchAny) {
DebugManagerStateRestore dbgRestorer;
debugManager.flags.SetMaxBVHLevels.set(7);
ze_device_module_properties_t kernelProperties = {};
kernelProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES;
ze_device_raytracing_ext_properties_t rayTracingProperties = {};
rayTracingProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_RAYTRACING_EXT_PROPERTIES;
rayTracingProperties.flags = ZE_DEVICE_RAYTRACING_EXT_FLAG_FORCE_UINT32;
rayTracingProperties.maxBVHLevels = 37u;
kernelProperties.pNext = &rayTracingProperties;
ze_result_t res = device->getKernelProperties(&kernelProperties);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_NE(ZE_DEVICE_RAYTRACING_EXT_FLAG_FORCE_UINT32, rayTracingProperties.flags);
auto releaseHelper = this->neoDevice->getReleaseHelper();
if (releaseHelper && releaseHelper->isRayTracingSupported()) {
EXPECT_EQ(7u, rayTracingProperties.maxBVHLevels);
} else {
EXPECT_EQ(0u, rayTracingProperties.maxBVHLevels);
}
}
TEST_F(DeviceTest, givenKernelPropertiesStructureWhenKernelPropertiesCalledThenAllPropertiesAreAssigned) { TEST_F(DeviceTest, givenKernelPropertiesStructureWhenKernelPropertiesCalledThenAllPropertiesAreAssigned) {
const auto &hardwareInfo = this->neoDevice->getHardwareInfo(); const auto &hardwareInfo = this->neoDevice->getHardwareInfo();

View File

@ -304,6 +304,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DeferStateInitSubmissionToFirstRegularUsage, -1,
DECLARE_DEBUG_VARIABLE(int32_t, ForceNonWalkerSplitMemoryCopy, -1, "-1: default, 0: disabled, 1: enabled. If set, memory copy will be executed as single byte copy Walker without performance optimizations") DECLARE_DEBUG_VARIABLE(int32_t, ForceNonWalkerSplitMemoryCopy, -1, "-1: default, 0: disabled, 1: enabled. If set, memory copy will be executed as single byte copy Walker without performance optimizations")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideTimestampWidth, -1, "-1: default from KMD, > 0: Override timestamp width used for profiling. Requires XeKMD kernel.") DECLARE_DEBUG_VARIABLE(int32_t, OverrideTimestampWidth, -1, "-1: default from KMD, > 0: Override timestamp width used for profiling. Requires XeKMD kernel.")
DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdFifoPollInterval, -1, "-1: default , > 0: Fifo will be polled based on input in milliseconds.") DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdFifoPollInterval, -1, "-1: default , > 0: Fifo will be polled based on input in milliseconds.")
DECLARE_DEBUG_VARIABLE(int32_t, SetMaxBVHLevels, -1, "-1: default , > 0: Set maxBVHLevel in RTDispatchGlobal.")
DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdInterruptTimeout, -1, "-1: default , > 0: interruptTimeout based on input in milliseconds. Default is 2000 milliseconds") DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdInterruptTimeout, -1, "-1: default , > 0: interruptTimeout based on input in milliseconds. Default is 2000 milliseconds")
DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdMaxReadWriteRetry, -1, "-1: default , > 0: max pread/pwrite retry attempts in read/writeGpuMemory calls based on input in milliseconds. Default is 3") DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdMaxReadWriteRetry, -1, "-1: default , > 0: max pread/pwrite retry attempts in read/writeGpuMemory calls based on input in milliseconds. Default is 3")
DECLARE_DEBUG_VARIABLE(int32_t, ForceIndirectDetectionForCMKernels, -1, "-1: default , 0 : disable indirect detection for CM kernels, 1 : enable indirect detection for CM kernels") DECLARE_DEBUG_VARIABLE(int32_t, ForceIndirectDetectionForCMKernels, -1, "-1: default , 0 : disable indirect detection for CM kernels, 1 : enable indirect detection for CM kernels")

View File

@ -1134,18 +1134,24 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
break; break;
} }
struct RTDispatchGlobals dispatchGlobals = {0}; RTDispatchGlobals dispatchGlobals = {0};
dispatchGlobals.rtMemBasePtr = rtStackAllocation->getGpuAddress() + rtStackSize; dispatchGlobals.rtMemBasePtr = rtStackAllocation->getGpuAddress() + rtStackSize;
dispatchGlobals.callStackHandlerKSP = reinterpret_cast<uint64_t>(nullptr); dispatchGlobals.callStackHandlerKSP = reinterpret_cast<uint64_t>(nullptr);
auto releaseHelper = getReleaseHelper(); auto releaseHelper = getReleaseHelper();
dispatchGlobals.stackSizePerRay = releaseHelper ? releaseHelper->getStackSizePerRay() : 0; dispatchGlobals.stackSizePerRay = releaseHelper ? releaseHelper->getStackSizePerRay() : 0;
dispatchGlobals.numDSSRTStacks = RayTracingHelper::getNumRtStacksPerDss(*this);
dispatchGlobals.maxBVHLevels = maxBvhLevels;
auto rtStacksPerDss = RayTracingHelper::getNumRtStacksPerDss(*this);
dispatchGlobals.numDSSRTStacks = rtStacksPerDss;
dispatchGlobals.maxBVHLevels = maxBvhLevels;
uint32_t *dispatchGlobalsAsArray = reinterpret_cast<uint32_t *>(&dispatchGlobals); uint32_t *dispatchGlobalsAsArray = reinterpret_cast<uint32_t *>(&dispatchGlobals);
dispatchGlobalsAsArray[7] = 1; dispatchGlobalsAsArray[7] = 1;
if (releaseHelper) {
bool heaplessEnabled = this->getCompilerProductHelper().isHeaplessModeEnabled();
releaseHelper->adjustRTDispatchGlobals(static_cast<void *>(&dispatchGlobals), rtStacksPerDss, heaplessEnabled, maxBvhLevels);
}
MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(this->getRootDeviceEnvironment(), *dispatchGlobalsArrayAllocation), MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(this->getRootDeviceEnvironment(), *dispatchGlobalsArrayAllocation),
*this, *this,
dispatchGlobalsArrayAllocation, dispatchGlobalsArrayAllocation,

View File

@ -57,6 +57,7 @@ class ReleaseHelper {
virtual uint32_t getAdditionalFp16Caps() const = 0; virtual uint32_t getAdditionalFp16Caps() const = 0;
virtual uint32_t getAdditionalExtraCaps() const = 0; virtual uint32_t getAdditionalExtraCaps() const = 0;
virtual uint32_t getStackSizePerRay() const = 0; virtual uint32_t getStackSizePerRay() const = 0;
virtual void adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const = 0;
virtual bool isLocalOnlyAllowed() const = 0; virtual bool isLocalOnlyAllowed() const = 0;
virtual bool isDummyBlitWaRequired() const = 0; virtual bool isDummyBlitWaRequired() const = 0;
virtual bool isDirectSubmissionLightSupported() const = 0; virtual bool isDirectSubmissionLightSupported() const = 0;
@ -100,6 +101,7 @@ class ReleaseHelperHw : public ReleaseHelper {
uint32_t getAdditionalFp16Caps() const override; uint32_t getAdditionalFp16Caps() const override;
uint32_t getAdditionalExtraCaps() const override; uint32_t getAdditionalExtraCaps() const override;
uint32_t getStackSizePerRay() const override; uint32_t getStackSizePerRay() const override;
void adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const override;
bool isLocalOnlyAllowed() const override; bool isLocalOnlyAllowed() const override;
bool isDummyBlitWaRequired() const override; bool isDummyBlitWaRequired() const override;
bool isDirectSubmissionLightSupported() const override; bool isDirectSubmissionLightSupported() const override;

View File

@ -137,6 +137,10 @@ uint32_t ReleaseHelperHw<releaseType>::getStackSizePerRay() const {
return 0u; return 0u;
} }
template <ReleaseType releaseType>
void ReleaseHelperHw<releaseType>::adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const {
}
template <ReleaseType releaseType> template <ReleaseType releaseType>
bool ReleaseHelperHw<releaseType>::isLocalOnlyAllowed() const { bool ReleaseHelperHw<releaseType>::isLocalOnlyAllowed() const {
return true; return true;

View File

@ -42,6 +42,7 @@ class MockReleaseHelper : public ReleaseHelper {
ADDMETHOD_CONST_NOBASE(getFtrXe2Compression, bool, false, ()); ADDMETHOD_CONST_NOBASE(getFtrXe2Compression, bool, false, ());
ADDMETHOD_CONST_NOBASE(isDirectSubmissionLightSupported, bool, false, ()); ADDMETHOD_CONST_NOBASE(isDirectSubmissionLightSupported, bool, false, ());
ADDMETHOD_CONST_NOBASE(computeSlmValues, uint32_t, {}, (uint32_t slmSize, bool isHeapless)); ADDMETHOD_CONST_NOBASE(computeSlmValues, uint32_t, {}, (uint32_t slmSize, bool isHeapless));
ADDMETHOD_CONST_NOBASE_VOIDRETURN(adjustRTDispatchGlobals, (void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels));
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override { const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override {
static SizeToPreferredSlmValueArray sizeToPreferredSlmValue = {}; static SizeToPreferredSlmValueArray sizeToPreferredSlmValue = {};

View File

@ -663,6 +663,7 @@ PipelinedEuThreadArbitration = -1
ExperimentalUSMAllocationReuseCleaner = -1 ExperimentalUSMAllocationReuseCleaner = -1
DummyPageBackingEnabled = 0 DummyPageBackingEnabled = 0
EnableDeferBacking = 0 EnableDeferBacking = 0
SetMaxBVHLevels = -1
GetSipBinaryFromExternalLib = -1 GetSipBinaryFromExternalLib = -1
LogUsmReuse = 0 LogUsmReuse = 0
# Please don't edit below this line # Please don't edit below this line

View File

@ -253,7 +253,7 @@ TEST_F(DeviceTest, whenAllocateRTDispatchGlobalsIsCalledThenStackSizePerRayIsSet
pDevice->initializeRayTracing(5); pDevice->initializeRayTracing(5);
pDevice->allocateRTDispatchGlobals(3); pDevice->allocateRTDispatchGlobals(3);
EXPECT_NE(nullptr, pDevice->getRTDispatchGlobals(3)); EXPECT_NE(nullptr, pDevice->getRTDispatchGlobals(3));
struct RTDispatchGlobals dispatchGlobals = *reinterpret_cast<struct RTDispatchGlobals *>(pDevice->getRTDispatchGlobals(3)->rtDispatchGlobalsArray->getUnderlyingBuffer()); RTDispatchGlobals dispatchGlobals = *reinterpret_cast<struct RTDispatchGlobals *>(pDevice->getRTDispatchGlobals(3)->rtDispatchGlobalsArray->getUnderlyingBuffer());
auto releaseHelper = getReleaseHelper(); auto releaseHelper = getReleaseHelper();
if (releaseHelper) { if (releaseHelper) {