refactor: add SetMaxBVHLevels debug flag
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
parent
251ced0e4b
commit
2c77d2c698
|
@ -902,6 +902,11 @@ ze_result_t DeviceImp::getKernelProperties(ze_device_module_properties_t *pKerne
|
|||
if (releaseHelper && releaseHelper->isRayTracingSupported()) {
|
||||
rtProperties->flags = ZE_DEVICE_RAYTRACING_EXT_FLAG_RAYQUERY;
|
||||
rtProperties->maxBVHLevels = NEO::RayTracingHelper::maxBvhLevels;
|
||||
|
||||
if (NEO::debugManager.flags.SetMaxBVHLevels.get() != -1) {
|
||||
rtProperties->maxBVHLevels = static_cast<uint32_t>(NEO::debugManager.flags.SetMaxBVHLevels.get());
|
||||
}
|
||||
|
||||
} else {
|
||||
rtProperties->flags = 0;
|
||||
rtProperties->maxBVHLevels = 0;
|
||||
|
|
|
@ -1199,6 +1199,11 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
|||
|
||||
if (this->usesRayTracing()) {
|
||||
uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels;
|
||||
|
||||
if (NEO::debugManager.flags.SetMaxBVHLevels.get() != -1) {
|
||||
bvhLevels = static_cast<uint32_t>(NEO::debugManager.flags.SetMaxBVHLevels.get());
|
||||
}
|
||||
|
||||
auto arg = this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals;
|
||||
neoDevice->initializeRayTracing(bvhLevels);
|
||||
|
||||
|
|
|
@ -1205,6 +1205,33 @@ HWTEST2_F(DeviceTest, whenPassingRaytracingExpStructToGetPropertiesThenPropertie
|
|||
EXPECT_EQ(expectedMaxBVHLevels, rayTracingProperties.maxBVHLevels);
|
||||
}
|
||||
|
||||
HWTEST2_F(DeviceTest, givenSetMaxBVHLevelsWhenPassingRaytracingExpStructToGetPropertiesThenPropertiesWithCorrectFlagIsReturned, MatchAny) {
|
||||
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
debugManager.flags.SetMaxBVHLevels.set(7);
|
||||
|
||||
ze_device_module_properties_t kernelProperties = {};
|
||||
kernelProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES;
|
||||
|
||||
ze_device_raytracing_ext_properties_t rayTracingProperties = {};
|
||||
rayTracingProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_RAYTRACING_EXT_PROPERTIES;
|
||||
rayTracingProperties.flags = ZE_DEVICE_RAYTRACING_EXT_FLAG_FORCE_UINT32;
|
||||
rayTracingProperties.maxBVHLevels = 37u;
|
||||
|
||||
kernelProperties.pNext = &rayTracingProperties;
|
||||
|
||||
ze_result_t res = device->getKernelProperties(&kernelProperties);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
EXPECT_NE(ZE_DEVICE_RAYTRACING_EXT_FLAG_FORCE_UINT32, rayTracingProperties.flags);
|
||||
|
||||
auto releaseHelper = this->neoDevice->getReleaseHelper();
|
||||
if (releaseHelper && releaseHelper->isRayTracingSupported()) {
|
||||
EXPECT_EQ(7u, rayTracingProperties.maxBVHLevels);
|
||||
} else {
|
||||
EXPECT_EQ(0u, rayTracingProperties.maxBVHLevels);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, givenKernelPropertiesStructureWhenKernelPropertiesCalledThenAllPropertiesAreAssigned) {
|
||||
const auto &hardwareInfo = this->neoDevice->getHardwareInfo();
|
||||
|
||||
|
|
|
@ -304,6 +304,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DeferStateInitSubmissionToFirstRegularUsage, -1,
|
|||
DECLARE_DEBUG_VARIABLE(int32_t, ForceNonWalkerSplitMemoryCopy, -1, "-1: default, 0: disabled, 1: enabled. If set, memory copy will be executed as single byte copy Walker without performance optimizations")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideTimestampWidth, -1, "-1: default from KMD, > 0: Override timestamp width used for profiling. Requires XeKMD kernel.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdFifoPollInterval, -1, "-1: default , > 0: Fifo will be polled based on input in milliseconds.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SetMaxBVHLevels, -1, "-1: default , > 0: Set maxBVHLevel in RTDispatchGlobal.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdInterruptTimeout, -1, "-1: default , > 0: interruptTimeout based on input in milliseconds. Default is 2000 milliseconds")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdMaxReadWriteRetry, -1, "-1: default , > 0: max pread/pwrite retry attempts in read/writeGpuMemory calls based on input in milliseconds. Default is 3")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceIndirectDetectionForCMKernels, -1, "-1: default , 0 : disable indirect detection for CM kernels, 1 : enable indirect detection for CM kernels")
|
||||
|
|
|
@ -1134,18 +1134,24 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
|
|||
break;
|
||||
}
|
||||
|
||||
struct RTDispatchGlobals dispatchGlobals = {0};
|
||||
RTDispatchGlobals dispatchGlobals = {0};
|
||||
|
||||
dispatchGlobals.rtMemBasePtr = rtStackAllocation->getGpuAddress() + rtStackSize;
|
||||
dispatchGlobals.callStackHandlerKSP = reinterpret_cast<uint64_t>(nullptr);
|
||||
auto releaseHelper = getReleaseHelper();
|
||||
dispatchGlobals.stackSizePerRay = releaseHelper ? releaseHelper->getStackSizePerRay() : 0;
|
||||
dispatchGlobals.numDSSRTStacks = RayTracingHelper::getNumRtStacksPerDss(*this);
|
||||
dispatchGlobals.maxBVHLevels = maxBvhLevels;
|
||||
|
||||
auto rtStacksPerDss = RayTracingHelper::getNumRtStacksPerDss(*this);
|
||||
dispatchGlobals.numDSSRTStacks = rtStacksPerDss;
|
||||
dispatchGlobals.maxBVHLevels = maxBvhLevels;
|
||||
uint32_t *dispatchGlobalsAsArray = reinterpret_cast<uint32_t *>(&dispatchGlobals);
|
||||
dispatchGlobalsAsArray[7] = 1;
|
||||
|
||||
if (releaseHelper) {
|
||||
bool heaplessEnabled = this->getCompilerProductHelper().isHeaplessModeEnabled();
|
||||
releaseHelper->adjustRTDispatchGlobals(static_cast<void *>(&dispatchGlobals), rtStacksPerDss, heaplessEnabled, maxBvhLevels);
|
||||
}
|
||||
|
||||
MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(this->getRootDeviceEnvironment(), *dispatchGlobalsArrayAllocation),
|
||||
*this,
|
||||
dispatchGlobalsArrayAllocation,
|
||||
|
|
|
@ -57,6 +57,7 @@ class ReleaseHelper {
|
|||
virtual uint32_t getAdditionalFp16Caps() const = 0;
|
||||
virtual uint32_t getAdditionalExtraCaps() const = 0;
|
||||
virtual uint32_t getStackSizePerRay() const = 0;
|
||||
virtual void adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const = 0;
|
||||
virtual bool isLocalOnlyAllowed() const = 0;
|
||||
virtual bool isDummyBlitWaRequired() const = 0;
|
||||
virtual bool isDirectSubmissionLightSupported() const = 0;
|
||||
|
@ -100,6 +101,7 @@ class ReleaseHelperHw : public ReleaseHelper {
|
|||
uint32_t getAdditionalFp16Caps() const override;
|
||||
uint32_t getAdditionalExtraCaps() const override;
|
||||
uint32_t getStackSizePerRay() const override;
|
||||
void adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const override;
|
||||
bool isLocalOnlyAllowed() const override;
|
||||
bool isDummyBlitWaRequired() const override;
|
||||
bool isDirectSubmissionLightSupported() const override;
|
||||
|
|
|
@ -137,6 +137,10 @@ uint32_t ReleaseHelperHw<releaseType>::getStackSizePerRay() const {
|
|||
return 0u;
|
||||
}
|
||||
|
||||
template <ReleaseType releaseType>
|
||||
void ReleaseHelperHw<releaseType>::adjustRTDispatchGlobals(void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels) const {
|
||||
}
|
||||
|
||||
template <ReleaseType releaseType>
|
||||
bool ReleaseHelperHw<releaseType>::isLocalOnlyAllowed() const {
|
||||
return true;
|
||||
|
|
|
@ -42,6 +42,7 @@ class MockReleaseHelper : public ReleaseHelper {
|
|||
ADDMETHOD_CONST_NOBASE(getFtrXe2Compression, bool, false, ());
|
||||
ADDMETHOD_CONST_NOBASE(isDirectSubmissionLightSupported, bool, false, ());
|
||||
ADDMETHOD_CONST_NOBASE(computeSlmValues, uint32_t, {}, (uint32_t slmSize, bool isHeapless));
|
||||
ADDMETHOD_CONST_NOBASE_VOIDRETURN(adjustRTDispatchGlobals, (void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels));
|
||||
|
||||
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override {
|
||||
static SizeToPreferredSlmValueArray sizeToPreferredSlmValue = {};
|
||||
|
|
|
@ -663,6 +663,7 @@ PipelinedEuThreadArbitration = -1
|
|||
ExperimentalUSMAllocationReuseCleaner = -1
|
||||
DummyPageBackingEnabled = 0
|
||||
EnableDeferBacking = 0
|
||||
SetMaxBVHLevels = -1
|
||||
GetSipBinaryFromExternalLib = -1
|
||||
LogUsmReuse = 0
|
||||
# Please don't edit below this line
|
||||
|
|
|
@ -253,7 +253,7 @@ TEST_F(DeviceTest, whenAllocateRTDispatchGlobalsIsCalledThenStackSizePerRayIsSet
|
|||
pDevice->initializeRayTracing(5);
|
||||
pDevice->allocateRTDispatchGlobals(3);
|
||||
EXPECT_NE(nullptr, pDevice->getRTDispatchGlobals(3));
|
||||
struct RTDispatchGlobals dispatchGlobals = *reinterpret_cast<struct RTDispatchGlobals *>(pDevice->getRTDispatchGlobals(3)->rtDispatchGlobalsArray->getUnderlyingBuffer());
|
||||
RTDispatchGlobals dispatchGlobals = *reinterpret_cast<struct RTDispatchGlobals *>(pDevice->getRTDispatchGlobals(3)->rtDispatchGlobalsArray->getUnderlyingBuffer());
|
||||
|
||||
auto releaseHelper = getReleaseHelper();
|
||||
if (releaseHelper) {
|
||||
|
|
Loading…
Reference in New Issue