diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index ef64193323..02cf6b0340 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -903,22 +903,28 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { if (this->usesRayTracing()) { uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels; - neoDevice->initializeRayTracing(bvhLevels); - auto rtDispatchGlobalsInfo = neoDevice->getRTDispatchGlobals(bvhLevels); - if (rtDispatchGlobalsInfo == nullptr) { - return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; + auto arg = this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals; + if (arg.pointerSize == 0) { + // kernel is allocating its own RTDispatchGlobals manually + neoDevice->initializeRayTracing(0); + } else { + neoDevice->initializeRayTracing(bvhLevels); + auto rtDispatchGlobalsInfo = neoDevice->getRTDispatchGlobals(bvhLevels); + if (rtDispatchGlobalsInfo == nullptr) { + return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + for (auto rtDispatchGlobals : rtDispatchGlobalsInfo->rtDispatchGlobals) { + this->residencyContainer.push_back(rtDispatchGlobals); + } + + auto address = rtDispatchGlobalsInfo->rtDispatchGlobals[0]->getGpuAddressToPatch(); + NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), + arg, + static_cast(address)); + + this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer()); } - - for (auto rtDispatchGlobals : rtDispatchGlobalsInfo->rtDispatchGlobals) { - this->residencyContainer.push_back(rtDispatchGlobals); - } - - auto address = rtDispatchGlobalsInfo->rtDispatchGlobals[0]->getGpuAddressToPatch(); - NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), - this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals, - static_cast(address)); - - this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer()); } return ZE_RESULT_SUCCESS; diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 48798a3175..3cd6652a8c 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -734,6 +734,49 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized EXPECT_NE(nullptr, rtDispatchGlobals); } +TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndPatchTokenPointerSizeIsZeroThenRayTracingIsInitialized) { + static_cast(device->getNEODevice()->getMemoryManager())->turnOnFakingBigAllocations(); + + KernelDescriptor mockDescriptor = {}; + mockDescriptor.kernelAttributes.flags.hasRTCalls = true; + mockDescriptor.kernelMetadata.kernelName = "rt_test"; + for (auto i = 0u; i < 3u; i++) { + mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0; + } + + std::unique_ptr mockKernelImmutableData = + std::make_unique(32u); + mockKernelImmutableData->kernelDescriptor = &mockDescriptor; + mockDescriptor.payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize = 0; + + ModuleBuildLog *moduleBuildLog = nullptr; + module = std::make_unique(device, + moduleBuildLog, + ModuleType::User, + 32u, + mockKernelImmutableData.get()); + module->maxGroupSize = 10; + + std::unique_ptr kernel; + kernel = std::make_unique(module.get()); + + ze_kernel_desc_t kernelDesc = {}; + kernelDesc.pKernelName = "rt_test"; + + auto immDataVector = + const_cast> *>(&module->getKernelImmutableDataVector()); + + immDataVector->push_back(std::move(mockKernelImmutableData)); + + auto result = kernel->initialize(&kernelDesc); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, module->getDevice()->getNEODevice()->getRTMemoryBackedBuffer()); + + // Application is expected to allocate its own RTDispatchGlobals manually in this case. + auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels); + EXPECT_EQ(nullptr, rtDispatchGlobals); +} + HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAllocatedThenRayTracingIsNotInitialized, IsAtLeastXeHpgCore) { KernelDescriptor mockDescriptor = {}; mockDescriptor.kernelAttributes.flags.hasRTCalls = true;