refactor: add SetMaxBVHLevels debug flag

Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk 2025-03-13 13:34:22 +00:00 committed by Compute-Runtime-Automation
parent 251ced0e4b
commit 2c77d2c698
10 changed files with 56 additions and 4 deletions

View File

@ -902,6 +902,11 @@ ze_result_t DeviceImp::getKernelProperties(ze_device_module_properties_t *pKerne
if (releaseHelper && releaseHelper->isRayTracingSupported()) {
rtProperties->flags = ZE_DEVICE_RAYTRACING_EXT_FLAG_RAYQUERY;
rtProperties->maxBVHLevels = NEO::RayTracingHelper::maxBvhLevels;
if (NEO::debugManager.flags.SetMaxBVHLevels.get() != -1) {
rtProperties->maxBVHLevels = static_cast<uint32_t>(NEO::debugManager.flags.SetMaxBVHLevels.get());
}
} else {
rtProperties->flags = 0;
rtProperties->maxBVHLevels = 0;

View File

@ -1199,6 +1199,11 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
if (this->usesRayTracing()) {
uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels;
if (NEO::debugManager.flags.SetMaxBVHLevels.get() != -1) {
bvhLevels = static_cast<uint32_t>(NEO::debugManager.flags.SetMaxBVHLevels.get());
}
auto arg = this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals;
neoDevice->initializeRayTracing(bvhLevels);

View File

@ -1205,6 +1205,33 @@ HWTEST2_F(DeviceTest, whenPassingRaytracingExpStructToGetPropertiesThenPropertie
EXPECT_EQ(expectedMaxBVHLevels, rayTracingProperties.maxBVHLevels);
}
HWTEST2_F(DeviceTest, givenSetMaxBVHLevelsWhenPassingRaytracingExpStructToGetPropertiesThenPropertiesWithCorrectFlagIsReturned, MatchAny) {
DebugManagerStateRestore dbgRestorer;
debugManager.flags.SetMaxBVHLevels.set(7);
ze_device_module_properties_t kernelProperties = {};
kernelProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES;
ze_device_raytracing_ext_properties_t rayTracingProperties = {};
rayTracingProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_RAYTRACING_EXT_PROPERTIES;
rayTracingProperties.flags = ZE_DEVICE_RAYTRACING_EXT_FLAG_FORCE_UINT32;
rayTracingProperties.maxBVHLevels = 37u;
kernelProperties.pNext = &rayTracingProperties;
ze_result_t res = device->getKernelProperties(&kernelProperties);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_NE(ZE_DEVICE_RAYTRACING_EXT_FLAG_FORCE_UINT32, rayTracingProperties.flags);
auto releaseHelper = this->neoDevice->getReleaseHelper();
if (releaseHelper && releaseHelper->isRayTracingSupported()) {
EXPECT_EQ(7u, rayTracingProperties.maxBVHLevels);
} else {
EXPECT_EQ(0u, rayTracingProperties.maxBVHLevels);
}
}
TEST_F(DeviceTest, givenKernelPropertiesStructureWhenKernelPropertiesCalledThenAllPropertiesAreAssigned) {
const auto &hardwareInfo = this->neoDevice->getHardwareInfo();

View File

@ -304,6 +304,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DeferStateInitSubmissionToFirstRegularUsage, -1,
DECLARE_DEBUG_VARIABLE(int32_t, ForceNonWalkerSplitMemoryCopy, -1, "-1: default, 0: disabled, 1: enabled. If set, memory copy will be executed as single byte copy Walker without performance optimizations")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideTimestampWidth, -1, "-1: default from KMD, > 0: Override timestamp width used for profiling. Requires XeKMD kernel.")
DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdFifoPollInterval, -1, "-1: default , > 0: Fifo will be polled based on input in milliseconds.")
DECLARE_DEBUG_VARIABLE(int32_t, SetMaxBVHLevels, -1, "-1: default , > 0: Set maxBVHLevel in RTDispatchGlobal.")
DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdInterruptTimeout, -1, "-1: default , > 0: interruptTimeout based on input in milliseconds. Default is 2000 milliseconds")
DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdMaxReadWriteRetry, -1, "-1: default , > 0: max pread/pwrite retry attempts in read/writeGpuMemory calls based on input in milliseconds. Default is 3")
DECLARE_DEBUG_VARIABLE(int32_t, ForceIndirectDetectionForCMKernels, -1, "-1: default , 0 : disable indirect detection for CM kernels, 1 : enable indirect detection for CM kernels")

View File

@ -1134,18 +1134,24 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
break;
}
struct RTDispatchGlobals dispatchGlobals = {0};
RTDispatchGlobals dispatchGlobals = {0};
dispatchGlobals.rtMemBasePtr = rtStackAllocation->getGpuAddress() + rtStackSize;
dispatchGlobals.callStackHandlerKSP = reinterpret_cast<uint64_t>(nullptr);
auto releaseHelper = getReleaseHelper();
dispatchGlobals.stackSizePerRay = releaseHelper ? releaseHelper->getStackSizePerRay() : 0;
dispatchGlobals.numDSSRTStacks = RayTracingHelper::getNumRtStacksPerDss(*this);
dispatchGlobals.maxBVHLevels = maxBvhLevels;
auto rtStacksPerDss = RayTracingHelper::getNumRtStacksPerDss(*this);
dispatchGlobals.numDSSRTStacks = rtStacksPerDss;
dispatchGlobals.maxBVHLevels = maxBvhLevels;
uint32_t *dispatchGlobalsAsArray = reinterpret_cast<uint32_t *>(&dispatchGlobals);
dispatchGlobalsAsArray[7] = 1;
if (releaseHelper) {
bool heaplessEnabled = this->getCompilerProductHelper().isHeaplessModeEnabled();
releaseHelper->adjustRTDispatchGlobals(static_cast<void *>(&dispatchGlobals), rtStacksPerDss, heaplessEnabled, maxBvhLevels);
}
MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(this->getRootDeviceEnvironment(), *dispatchGlobalsArrayAllocation),
*this,
dispatchGlobalsArrayAllocation,

View File

@ -57,6 +57,7 @@ class ReleaseHelper {
virtual uint32_t getAdditionalFp16Caps() const = 0;
virtual uint32_t getAdditionalExtraCaps() const = 0;
virtual uint32_t getStackSizePerRay() const = 0;
virtual void adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const = 0;
virtual bool isLocalOnlyAllowed() const = 0;
virtual bool isDummyBlitWaRequired() const = 0;
virtual bool isDirectSubmissionLightSupported() const = 0;
@ -100,6 +101,7 @@ class ReleaseHelperHw : public ReleaseHelper {
uint32_t getAdditionalFp16Caps() const override;
uint32_t getAdditionalExtraCaps() const override;
uint32_t getStackSizePerRay() const override;
void adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const override;
bool isLocalOnlyAllowed() const override;
bool isDummyBlitWaRequired() const override;
bool isDirectSubmissionLightSupported() const override;

View File

@ -137,6 +137,10 @@ uint32_t ReleaseHelperHw<releaseType>::getStackSizePerRay() const {
return 0u;
}
template <ReleaseType releaseType>
void ReleaseHelperHw<releaseType>::adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const {
}
template <ReleaseType releaseType>
bool ReleaseHelperHw<releaseType>::isLocalOnlyAllowed() const {
return true;

View File

@ -42,6 +42,7 @@ class MockReleaseHelper : public ReleaseHelper {
ADDMETHOD_CONST_NOBASE(getFtrXe2Compression, bool, false, ());
ADDMETHOD_CONST_NOBASE(isDirectSubmissionLightSupported, bool, false, ());
ADDMETHOD_CONST_NOBASE(computeSlmValues, uint32_t, {}, (uint32_t slmSize, bool isHeapless));
ADDMETHOD_CONST_NOBASE_VOIDRETURN(adjustRTDispatchGlobals, (void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels));
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override {
static SizeToPreferredSlmValueArray sizeToPreferredSlmValue = {};

View File

@ -663,6 +663,7 @@ PipelinedEuThreadArbitration = -1
ExperimentalUSMAllocationReuseCleaner = -1
DummyPageBackingEnabled = 0
EnableDeferBacking = 0
SetMaxBVHLevels = -1
GetSipBinaryFromExternalLib = -1
LogUsmReuse = 0
# Please don't edit below this line

View File

@ -253,7 +253,7 @@ TEST_F(DeviceTest, whenAllocateRTDispatchGlobalsIsCalledThenStackSizePerRayIsSet
pDevice->initializeRayTracing(5);
pDevice->allocateRTDispatchGlobals(3);
EXPECT_NE(nullptr, pDevice->getRTDispatchGlobals(3));
struct RTDispatchGlobals dispatchGlobals = *reinterpret_cast<struct RTDispatchGlobals *>(pDevice->getRTDispatchGlobals(3)->rtDispatchGlobalsArray->getUnderlyingBuffer());
RTDispatchGlobals dispatchGlobals = *reinterpret_cast<struct RTDispatchGlobals *>(pDevice->getRTDispatchGlobals(3)->rtDispatchGlobalsArray->getUnderlyingBuffer());
auto releaseHelper = getReleaseHelper();
if (releaseHelper) {