diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index e962f5faba..ac12a076e2 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -238,7 +238,7 @@ KernelImp::~KernelImp() { alignedFree(perThreadDataForWholeThreadGroup); } if (printfBuffer != nullptr) { - //not allowed to call virtual function on destructor, so calling printOutput directly + // not allowed to call virtual function on destructor, so calling printOutput directly PrintfHandler::printOutput(kernelImmData, this->printfBuffer, module->getDevice()); module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(printfBuffer); } @@ -901,7 +901,8 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { kernelHasIndirectAccess = kernelDescriptor.kernelAttributes.hasNonKernelArgLoad || kernelDescriptor.kernelAttributes.hasNonKernelArgStore || - kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic; + kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic || + getImmutableData()->getKernelInfo()->hasIndirectStatelessAccess; if (this->usesRayTracing()) { if (this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize > 0) { @@ -1028,9 +1029,9 @@ Kernel *Kernel::create(uint32_t productFamily, Module *module, } bool KernelImp::hasIndirectAllocationsAllowed() const { - return (unifiedMemoryControls.indirectDeviceAllocationsAllowed || - unifiedMemoryControls.indirectHostAllocationsAllowed || - unifiedMemoryControls.indirectSharedAllocationsAllowed); + return this->kernelHasIndirectAccess && (unifiedMemoryControls.indirectDeviceAllocationsAllowed || + unifiedMemoryControls.indirectHostAllocationsAllowed || + unifiedMemoryControls.indirectSharedAllocationsAllowed); } uint32_t KernelImp::getSlmTotalSize() const { diff --git a/level_zero/core/source/kernel/kernel_imp.h b/level_zero/core/source/kernel/kernel_imp.h index 356ca585f1..6d41728008 100644 --- a/level_zero/core/source/kernel/kernel_imp.h +++ b/level_zero/core/source/kernel/kernel_imp.h @@ -148,7 +148,7 @@ struct KernelImp : Kernel { ze_result_t getProfileInfo(zet_profile_properties_t *pProfileProperties) override; - bool hasIndirectAccess() { + bool hasIndirectAccess() const { return kernelHasIndirectAccess; } diff --git a/level_zero/core/test/unit_tests/mocks/mock_kernel.h b/level_zero/core/test/unit_tests/mocks/mock_kernel.h index 1db4339be5..0920e87a94 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_kernel.h +++ b/level_zero/core/test/unit_tests/mocks/mock_kernel.h @@ -46,6 +46,7 @@ struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp { using ::L0::KernelImp::crossThreadData; using ::L0::KernelImp::crossThreadDataSize; using ::L0::KernelImp::groupSize; + using ::L0::KernelImp::kernelHasIndirectAccess; using ::L0::KernelImp::kernelImmData; using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime; using ::L0::KernelImp::module; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index c5bf2af04f..ef97cc390d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -29,6 +29,7 @@ using CommandListAppendLaunchKernel = Test; HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) { createKernel(); + kernel->kernelHasIndirectAccess = true; kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = true; diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp index 0aeb89c5f8..7af1ba16fa 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp @@ -378,6 +378,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenCommandQueueWhenExecutingCommandL ASSERT_NE(nullptr, gpuAlloc); createKernel(); + kernel->kernelHasIndirectAccess = true; kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); @@ -442,6 +443,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDebugModeToTreatIndirectAllocatio ASSERT_NE(nullptr, gpuAlloc); createKernel(); + kernel->kernelHasIndirectAccess = true; kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); @@ -505,6 +507,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndir ASSERT_NE(nullptr, gpuAlloc); createKernel(); + kernel->kernelHasIndirectAccess = true; kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); @@ -569,6 +572,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndir ASSERT_NE(nullptr, gpuAlloc); createKernel(); + kernel->kernelHasIndirectAccess = true; kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index dc4ab2e6b2..1d45ced082 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -258,7 +258,8 @@ cl_int Kernel::initialize() { this->kernelHasIndirectAccess |= kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgLoad || kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgStore || - kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic; + kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic || + kernelInfo.hasIndirectStatelessAccess; provideInitializationHints(); // resolve the new kernel info to account for kernel handlers @@ -1251,9 +1252,9 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { gtpinNotifyMakeResident(this, &commandStreamReceiver); - if (unifiedMemoryControls.indirectDeviceAllocationsAllowed || - unifiedMemoryControls.indirectHostAllocationsAllowed || - unifiedMemoryControls.indirectSharedAllocationsAllowed) { + if (this->kernelHasIndirectAccess && (unifiedMemoryControls.indirectDeviceAllocationsAllowed || + unifiedMemoryControls.indirectHostAllocationsAllowed || + unifiedMemoryControls.indirectSharedAllocationsAllowed)) { this->getContext().getSVMAllocsManager()->makeInternalAllocationsResident(commandStreamReceiver, unifiedMemoryControls.generateMask()); } } diff --git a/shared/source/device_binary_format/zebin_decoder.cpp b/shared/source/device_binary_format/zebin_decoder.cpp index 1afafb921a..58ce77faa0 100644 --- a/shared/source/device_binary_format/zebin_decoder.cpp +++ b/shared/source/device_binary_format/zebin_decoder.cpp @@ -326,6 +326,9 @@ DecodeError readZeInfoExperimentalProperties(const NEO::Yaml::YamlParser &parser ConstStringRef context, std::string &outErrReason, std::string &outWarning) { bool validExperimentalProperty = true; + outExperimentalProperties.hasNonKernelArgLoad = true; + outExperimentalProperties.hasNonKernelArgStore = true; + outExperimentalProperties.hasNonKernelArgAtomic = true; for (const auto &experimentalPropertyNd : parser.createChildrenRange(node)) { for (const auto &experimentalPropertyMemberNd : parser.createChildrenRange(experimentalPropertyNd)) { auto key = parser.readKey(experimentalPropertyMemberNd); diff --git a/shared/source/kernel/kernel_descriptor.h b/shared/source/kernel/kernel_descriptor.h index 09ea804c74..3a33da0c38 100644 --- a/shared/source/kernel/kernel_descriptor.h +++ b/shared/source/kernel/kernel_descriptor.h @@ -151,9 +151,9 @@ struct KernelDescriptor { uint16_t numArgsToPatch = 0U; uint16_t numGrfRequired = 0U; uint8_t barrierCount = 0u; - bool hasNonKernelArgLoad = true; - bool hasNonKernelArgStore = true; - bool hasNonKernelArgAtomic = true; + bool hasNonKernelArgLoad = false; + bool hasNonKernelArgStore = false; + bool hasNonKernelArgAtomic = false; AddressingMode bufferAddressingMode = BindfulAndStateless; AddressingMode imageAddressingMode = Bindful;