From 2c77d2c6988bc6f8038b3ebbf2cd02fc9a3196b9 Mon Sep 17 00:00:00 2001 From: Kamil Kopryk Date: Thu, 13 Mar 2025 13:34:22 +0000 Subject: [PATCH] refactor: add SetMaxBVHLevels debug flag Signed-off-by: Kamil Kopryk --- level_zero/core/source/device/device_imp.cpp | 5 ++++ level_zero/core/source/kernel/kernel_imp.cpp | 5 ++++ .../sources/device/test_l0_device.cpp | 27 +++++++++++++++++++ .../debug_settings/debug_variables_base.inl | 1 + shared/source/device/device.cpp | 12 ++++++--- shared/source/release_helper/release_helper.h | 2 ++ .../release_helper/release_helper_base.inl | 4 +++ .../test/common/mocks/mock_release_helper.h | 1 + shared/test/common/test_files/igdrcl.config | 1 + .../unit_test/device/neo_device_tests.cpp | 2 +- 10 files changed, 56 insertions(+), 4 deletions(-) diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index 496be2862d..dea8ddf0f4 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -902,6 +902,11 @@ ze_result_t DeviceImp::getKernelProperties(ze_device_module_properties_t *pKerne if (releaseHelper && releaseHelper->isRayTracingSupported()) { rtProperties->flags = ZE_DEVICE_RAYTRACING_EXT_FLAG_RAYQUERY; rtProperties->maxBVHLevels = NEO::RayTracingHelper::maxBvhLevels; + + if (NEO::debugManager.flags.SetMaxBVHLevels.get() != -1) { + rtProperties->maxBVHLevels = static_cast(NEO::debugManager.flags.SetMaxBVHLevels.get()); + } + } else { rtProperties->flags = 0; rtProperties->maxBVHLevels = 0; diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index efc9bafa7f..d8d090a2bb 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -1199,6 +1199,11 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { if (this->usesRayTracing()) { uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels; + + if (NEO::debugManager.flags.SetMaxBVHLevels.get() != -1) { + bvhLevels = static_cast(NEO::debugManager.flags.SetMaxBVHLevels.get()); + } + auto arg = this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals; neoDevice->initializeRayTracing(bvhLevels); diff --git a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp index 584f88c44f..092058b52c 100644 --- a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp +++ b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp @@ -1205,6 +1205,33 @@ HWTEST2_F(DeviceTest, whenPassingRaytracingExpStructToGetPropertiesThenPropertie EXPECT_EQ(expectedMaxBVHLevels, rayTracingProperties.maxBVHLevels); } +HWTEST2_F(DeviceTest, givenSetMaxBVHLevelsWhenPassingRaytracingExpStructToGetPropertiesThenPropertiesWithCorrectFlagIsReturned, MatchAny) { + + DebugManagerStateRestore dbgRestorer; + debugManager.flags.SetMaxBVHLevels.set(7); + + ze_device_module_properties_t kernelProperties = {}; + kernelProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES; + + ze_device_raytracing_ext_properties_t rayTracingProperties = {}; + rayTracingProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_RAYTRACING_EXT_PROPERTIES; + rayTracingProperties.flags = ZE_DEVICE_RAYTRACING_EXT_FLAG_FORCE_UINT32; + rayTracingProperties.maxBVHLevels = 37u; + + kernelProperties.pNext = &rayTracingProperties; + + ze_result_t res = device->getKernelProperties(&kernelProperties); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_NE(ZE_DEVICE_RAYTRACING_EXT_FLAG_FORCE_UINT32, rayTracingProperties.flags); + + auto releaseHelper = this->neoDevice->getReleaseHelper(); + if (releaseHelper && releaseHelper->isRayTracingSupported()) { + EXPECT_EQ(7u, rayTracingProperties.maxBVHLevels); + } else { + EXPECT_EQ(0u, rayTracingProperties.maxBVHLevels); + } +} + TEST_F(DeviceTest, givenKernelPropertiesStructureWhenKernelPropertiesCalledThenAllPropertiesAreAssigned) { const auto &hardwareInfo = this->neoDevice->getHardwareInfo(); diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 3e7566d55d..7c33e4311c 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -304,6 +304,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DeferStateInitSubmissionToFirstRegularUsage, -1, DECLARE_DEBUG_VARIABLE(int32_t, ForceNonWalkerSplitMemoryCopy, -1, "-1: default, 0: disabled, 1: enabled. If set, memory copy will be executed as single byte copy Walker without performance optimizations") DECLARE_DEBUG_VARIABLE(int32_t, OverrideTimestampWidth, -1, "-1: default from KMD, > 0: Override timestamp width used for profiling. Requires XeKMD kernel.") DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdFifoPollInterval, -1, "-1: default , > 0: Fifo will be polled based on input in milliseconds.") +DECLARE_DEBUG_VARIABLE(int32_t, SetMaxBVHLevels, -1, "-1: default , > 0: Set maxBVHLevel in RTDispatchGlobal.") DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdInterruptTimeout, -1, "-1: default , > 0: interruptTimeout based on input in milliseconds. Default is 2000 milliseconds") DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdMaxReadWriteRetry, -1, "-1: default , > 0: max pread/pwrite retry attempts in read/writeGpuMemory calls based on input in milliseconds. Default is 3") DECLARE_DEBUG_VARIABLE(int32_t, ForceIndirectDetectionForCMKernels, -1, "-1: default , 0 : disable indirect detection for CM kernels, 1 : enable indirect detection for CM kernels") diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index efef7812d9..e36ce9de89 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -1134,18 +1134,24 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) { break; } - struct RTDispatchGlobals dispatchGlobals = {0}; + RTDispatchGlobals dispatchGlobals = {0}; dispatchGlobals.rtMemBasePtr = rtStackAllocation->getGpuAddress() + rtStackSize; dispatchGlobals.callStackHandlerKSP = reinterpret_cast(nullptr); auto releaseHelper = getReleaseHelper(); dispatchGlobals.stackSizePerRay = releaseHelper ? releaseHelper->getStackSizePerRay() : 0; - dispatchGlobals.numDSSRTStacks = RayTracingHelper::getNumRtStacksPerDss(*this); - dispatchGlobals.maxBVHLevels = maxBvhLevels; + auto rtStacksPerDss = RayTracingHelper::getNumRtStacksPerDss(*this); + dispatchGlobals.numDSSRTStacks = rtStacksPerDss; + dispatchGlobals.maxBVHLevels = maxBvhLevels; uint32_t *dispatchGlobalsAsArray = reinterpret_cast(&dispatchGlobals); dispatchGlobalsAsArray[7] = 1; + if (releaseHelper) { + bool heaplessEnabled = this->getCompilerProductHelper().isHeaplessModeEnabled(); + releaseHelper->adjustRTDispatchGlobals(static_cast(&dispatchGlobals), rtStacksPerDss, heaplessEnabled, maxBvhLevels); + } + MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(this->getRootDeviceEnvironment(), *dispatchGlobalsArrayAllocation), *this, dispatchGlobalsArrayAllocation, diff --git a/shared/source/release_helper/release_helper.h b/shared/source/release_helper/release_helper.h index d7c287a176..335aba561d 100644 --- a/shared/source/release_helper/release_helper.h +++ b/shared/source/release_helper/release_helper.h @@ -57,6 +57,7 @@ class ReleaseHelper { virtual uint32_t getAdditionalFp16Caps() const = 0; virtual uint32_t getAdditionalExtraCaps() const = 0; virtual uint32_t getStackSizePerRay() const = 0; + virtual void adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const = 0; virtual bool isLocalOnlyAllowed() const = 0; virtual bool isDummyBlitWaRequired() const = 0; virtual bool isDirectSubmissionLightSupported() const = 0; @@ -100,6 +101,7 @@ class ReleaseHelperHw : public ReleaseHelper { uint32_t getAdditionalFp16Caps() const override; uint32_t getAdditionalExtraCaps() const override; uint32_t getStackSizePerRay() const override; + void adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const override; bool isLocalOnlyAllowed() const override; bool isDummyBlitWaRequired() const override; bool isDirectSubmissionLightSupported() const override; diff --git a/shared/source/release_helper/release_helper_base.inl b/shared/source/release_helper/release_helper_base.inl index 9569c62838..ebfe8f4351 100644 --- a/shared/source/release_helper/release_helper_base.inl +++ b/shared/source/release_helper/release_helper_base.inl @@ -137,6 +137,10 @@ uint32_t ReleaseHelperHw::getStackSizePerRay() const { return 0u; } +template +void ReleaseHelperHw::adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const { +} + template bool ReleaseHelperHw::isLocalOnlyAllowed() const { return true; diff --git a/shared/test/common/mocks/mock_release_helper.h b/shared/test/common/mocks/mock_release_helper.h index 641b5c8f50..e8cf7ecaa5 100644 --- a/shared/test/common/mocks/mock_release_helper.h +++ b/shared/test/common/mocks/mock_release_helper.h @@ -42,6 +42,7 @@ class MockReleaseHelper : public ReleaseHelper { ADDMETHOD_CONST_NOBASE(getFtrXe2Compression, bool, false, ()); ADDMETHOD_CONST_NOBASE(isDirectSubmissionLightSupported, bool, false, ()); ADDMETHOD_CONST_NOBASE(computeSlmValues, uint32_t, {}, (uint32_t slmSize, bool isHeapless)); + ADDMETHOD_CONST_NOBASE_VOIDRETURN(adjustRTDispatchGlobals, (void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels)); const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override { static SizeToPreferredSlmValueArray sizeToPreferredSlmValue = {}; diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index ef843d93b3..d9d704f960 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -663,6 +663,7 @@ PipelinedEuThreadArbitration = -1 ExperimentalUSMAllocationReuseCleaner = -1 DummyPageBackingEnabled = 0 EnableDeferBacking = 0 +SetMaxBVHLevels = -1 GetSipBinaryFromExternalLib = -1 LogUsmReuse = 0 # Please don't edit below this line diff --git a/shared/test/unit_test/device/neo_device_tests.cpp b/shared/test/unit_test/device/neo_device_tests.cpp index 8d40994bdf..23948a6223 100644 --- a/shared/test/unit_test/device/neo_device_tests.cpp +++ b/shared/test/unit_test/device/neo_device_tests.cpp @@ -253,7 +253,7 @@ TEST_F(DeviceTest, whenAllocateRTDispatchGlobalsIsCalledThenStackSizePerRayIsSet pDevice->initializeRayTracing(5); pDevice->allocateRTDispatchGlobals(3); EXPECT_NE(nullptr, pDevice->getRTDispatchGlobals(3)); - struct RTDispatchGlobals dispatchGlobals = *reinterpret_cast(pDevice->getRTDispatchGlobals(3)->rtDispatchGlobalsArray->getUnderlyingBuffer()); + RTDispatchGlobals dispatchGlobals = *reinterpret_cast(pDevice->getRTDispatchGlobals(3)->rtDispatchGlobalsArray->getUnderlyingBuffer()); auto releaseHelper = getReleaseHelper(); if (releaseHelper) {