diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 5df718bd63..fc2f818689 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -924,9 +924,15 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { residencyContainer.insert(residencyContainer.end(), kernelImmData->getResidencyContainer().begin(), kernelImmData->getResidencyContainer().end()); - kernelHasIndirectAccess = kernelDescriptor.kernelAttributes.hasNonKernelArgLoad || - kernelDescriptor.kernelAttributes.hasNonKernelArgStore || - kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic; + if (kernelAttributes.binaryFormat != NEO::DeviceBinaryFormat::Zebin) { + kernelHasIndirectAccess = true; + } else { + kernelHasIndirectAccess = kernelDescriptor.kernelAttributes.hasNonKernelArgLoad || + kernelDescriptor.kernelAttributes.hasNonKernelArgStore || + kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic || + kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess || + NEO::KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor); + } if (this->usesRayTracing()) { uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels; @@ -1060,9 +1066,9 @@ Kernel *Kernel::create(uint32_t productFamily, Module *module, } bool KernelImp::hasIndirectAllocationsAllowed() const { - return (unifiedMemoryControls.indirectDeviceAllocationsAllowed || - unifiedMemoryControls.indirectHostAllocationsAllowed || - unifiedMemoryControls.indirectSharedAllocationsAllowed); + return this->kernelHasIndirectAccess && (unifiedMemoryControls.indirectDeviceAllocationsAllowed || + unifiedMemoryControls.indirectHostAllocationsAllowed || + unifiedMemoryControls.indirectSharedAllocationsAllowed); } uint32_t KernelImp::getSlmTotalSize() const { diff --git a/level_zero/core/source/kernel/kernel_imp.h b/level_zero/core/source/kernel/kernel_imp.h index 4c7991b774..dd937e29cf 100644 --- a/level_zero/core/source/kernel/kernel_imp.h +++ b/level_zero/core/source/kernel/kernel_imp.h @@ -225,7 +225,7 @@ struct KernelImp : Kernel { ze_cache_config_flags_t cacheConfigFlags = 0u; - bool kernelHasIndirectAccess = true; + bool kernelHasIndirectAccess = false; std::unique_ptr pImplicitArgs; diff --git a/level_zero/core/test/unit_tests/fixtures/module_fixture.h b/level_zero/core/test/unit_tests/fixtures/module_fixture.h index 96152f06cc..0a8272974f 100644 --- a/level_zero/core/test/unit_tests/fixtures/module_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/module_fixture.h @@ -90,6 +90,7 @@ struct ModuleImmutableDataFixture : public DeviceFixture { using KernelImp::requiredWorkgroupOrder; using KernelImp::surfaceStateHeapData; using KernelImp::surfaceStateHeapDataSize; + using KernelImp::unifiedMemoryControls; MockKernel(MockModule *mockModule) : WhiteBox(mockModule) { } diff --git a/level_zero/core/test/unit_tests/mocks/mock_kernel.h b/level_zero/core/test/unit_tests/mocks/mock_kernel.h index 83da58845e..4470a5cd3c 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_kernel.h +++ b/level_zero/core/test/unit_tests/mocks/mock_kernel.h @@ -48,6 +48,7 @@ struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp { using ::L0::KernelImp::dynamicStateHeapData; using ::L0::KernelImp::dynamicStateHeapDataSize; using ::L0::KernelImp::groupSize; + using ::L0::KernelImp::kernelHasIndirectAccess; using ::L0::KernelImp::kernelImmData; using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime; using ::L0::KernelImp::module; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index 8ee8c88f97..7175f80939 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -19,6 +19,7 @@ #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "level_zero/core/source/event/event.h" +#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" @@ -27,26 +28,50 @@ namespace L0 { namespace ult { -using CommandListAppendLaunchKernel = Test; - -HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) { - createKernel(); - kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; - kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; +using CommandListAppendLaunchKernelMockModule = Test; +HWTEST_F(CommandListAppendLaunchKernelMockModule, givenKernelWithIndirectAllocationsAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) { + mockKernelImmData->kernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = true; + kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = false; + kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = false; kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = true; - EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); + EXPECT_TRUE(kernel->hasIndirectAllocationsAllowed()); ze_group_count_t groupCount{1, 1, 1}; ze_result_t returnValue; - std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); CmdListKernelLaunchParams launchParams = {}; - auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + { + returnValue = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + EXPECT_TRUE(commandList->hasIndirectAllocationsAllowed()); + } - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - ASSERT_TRUE(commandList->hasIndirectAllocationsAllowed()); + { + returnValue = commandList->reset(); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = false; + kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; + kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = false; + + returnValue = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + EXPECT_TRUE(commandList->hasIndirectAllocationsAllowed()); + } + + { + returnValue = commandList->reset(); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; + kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = false; + kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = false; + + returnValue = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + EXPECT_TRUE(commandList->hasIndirectAllocationsAllowed()); + } } +using CommandListAppendLaunchKernel = Test; HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsNotAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) { createKernel(); kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = false; @@ -347,9 +372,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernel eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + auto eventPool = std::unique_ptr<::L0::EventPool>(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr<::L0::Event>(::L0::Event::create(eventPool.get(), &eventDesc, device)); ze_group_count_t groupCount{1, 1, 1}; CmdListKernelLaunchParams launchParams = {}; @@ -442,9 +467,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelLaunchWithTSEventAndScopeFla ZE_EVENT_SCOPE_FLAG_HOST, ZE_EVENT_SCOPE_FLAG_HOST}; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + auto eventPool = std::unique_ptr<::L0::EventPool>(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr<::L0::Event>(::L0::Event::create(eventPool.get(), &eventDesc, device)); ze_group_count_t groupCount{1, 1, 1}; CmdListKernelLaunchParams launchParams = {}; @@ -559,9 +584,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenCommandListWhenAppendLaunchKernelS ZE_EVENT_SCOPE_FLAG_HOST, ZE_EVENT_SCOPE_FLAG_HOST}; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + auto eventPool = std::unique_ptr<::L0::EventPool>(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr<::L0::Event>(::L0::Event::create(eventPool.get(), &eventDesc, device)); EXPECT_EQ(1u, event->getPacketsInUse()); ze_group_count_t groupCount{1, 1, 1}; CmdListKernelLaunchParams launchParams = {}; @@ -775,9 +800,9 @@ HWTEST_F(CommandListAppendLaunchKernel, givenSingleValidWaitEventsThenAddSemapho ze_event_desc_t eventDesc = {}; eventDesc.index = 0; - std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + std::unique_ptr<::L0::EventPool> eventPool(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - std::unique_ptr event(Event::create(eventPool.get(), &eventDesc, device)); + std::unique_ptr<::L0::Event> event(::L0::Event::create(eventPool.get(), &eventDesc, device)); ze_event_handle_t hEventHandle = event->toHandle(); ze_group_count_t groupCount{1, 1, 1}; @@ -827,10 +852,10 @@ HWTEST_F(CommandListAppendLaunchKernel, givenMultipleValidWaitEventsThenAddSemap ze_event_desc_t eventDesc2 = {}; eventDesc2.index = 1; - std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + std::unique_ptr<::L0::EventPool> eventPool(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - std::unique_ptr event1(Event::create(eventPool.get(), &eventDesc1, device)); - std::unique_ptr event2(Event::create(eventPool.get(), &eventDesc2, device)); + std::unique_ptr<::L0::Event> event1(::L0::Event::create(eventPool.get(), &eventDesc1, device)); + std::unique_ptr<::L0::Event> event2(::L0::Event::create(eventPool.get(), &eventDesc2, device)); ze_event_handle_t hEventHandle1 = event1->toHandle(); ze_event_handle_t hEventHandle2 = event2->toHandle(); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp index 245cc24eff..e4bb689b68 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp @@ -424,6 +424,8 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDebugModeToTreatIndirectAllocatio createKernel(); kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; + kernel->kernelHasIndirectAccess = true; + EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); ze_group_count_t groupCount{1, 1, 1}; @@ -486,6 +488,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndir createKernel(); kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; + kernel->kernelHasIndirectAccess = true; EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); ze_group_count_t groupCount{1, 1, 1}; @@ -548,6 +551,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndir createKernel(); kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; + kernel->kernelHasIndirectAccess = true; EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); static_cast(driverHandle->getMemoryManager())->overrideAllocateAsPackReturn = 1u; diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 72cb80ac03..26a6783774 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -1220,7 +1220,7 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatche using KernelIndirectPropertiesFromIGCTests = KernelImmutableDataTests; -TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicThenHasIndirectAccessIsSetToFalse) { +TEST_F(KernelIndirectPropertiesFromIGCTests, givenZebinFormatWhenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicThenHasIndirectAccessIsSetToFalse) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DisableIndirectAccess.set(0); @@ -1229,6 +1229,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithNoKernelL std::unique_ptr mockKernelImmData = std::make_unique(perHwThreadPrivateMemorySizeRequested); + mockKernelImmData->mockKernelDescriptor->kernelAttributes.binaryFormat = NEO::DeviceBinaryFormat::Zebin; createModuleFromMockBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get()); @@ -1247,7 +1248,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithNoKernelL EXPECT_FALSE(kernel->hasIndirectAccess()); } -TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoadStoreAtomicThenHasIndirectAccessIsSetToTrue) { +TEST_F(KernelIndirectPropertiesFromIGCTests, givenNonZebinFormatWhenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicThenHasIndirectAccessIsSetToFalse) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DisableIndirectAccess.set(0); @@ -1256,6 +1257,69 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoa std::unique_ptr mockKernelImmData = std::make_unique(perHwThreadPrivateMemorySizeRequested); + mockKernelImmData->mockKernelDescriptor->kernelAttributes.binaryFormat = NEO::DeviceBinaryFormat::Unknown; + + createModuleFromMockBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get()); + + std::unique_ptr kernel; + kernel = std::make_unique(module.get()); + + ze_kernel_desc_t desc = {}; + desc.pKernelName = kernelName.c_str(); + + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false; + + kernel->initialize(&desc); + + EXPECT_TRUE(kernel->hasIndirectAccess()); +} + +TEST_F(KernelIndirectPropertiesFromIGCTests, givenZebinFormatAndPtrPassedByValueWhenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicThenHasIndirectAccessIsSetToTrue) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.DisableIndirectAccess.set(0); + + uint32_t perHwThreadPrivateMemorySizeRequested = 32u; + bool isInternal = false; + + std::unique_ptr mockKernelImmData = + std::make_unique(perHwThreadPrivateMemorySizeRequested); + mockKernelImmData->mockKernelDescriptor->kernelAttributes.binaryFormat = NEO::DeviceBinaryFormat::Zebin; + auto ptrByValueArg = ArgDescriptor(ArgDescriptor::ArgTValue); + ArgDescValue::Element element{}; + element.isPtr = true; + ptrByValueArg.as().elements.push_back(element); + mockKernelImmData->mockKernelDescriptor->payloadMappings.explicitArgs.push_back(ptrByValueArg); + EXPECT_EQ(mockKernelImmData->mockKernelDescriptor->payloadMappings.explicitArgs.size(), 1u); + + createModuleFromMockBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get()); + + std::unique_ptr kernel; + kernel = std::make_unique(module.get()); + + ze_kernel_desc_t desc = {}; + desc.pKernelName = kernelName.c_str(); + + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false; + + kernel->initialize(&desc); + + EXPECT_TRUE(kernel->hasIndirectAccess()); +} + +TEST_F(KernelIndirectPropertiesFromIGCTests, givenZebinFormatwhenInitializingKernelWithKernelLoadStoreAtomicThenHasIndirectAccessIsSetToTrue) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.DisableIndirectAccess.set(0); + + uint32_t perHwThreadPrivateMemorySizeRequested = 32u; + bool isInternal = false; + + std::unique_ptr mockKernelImmData = + std::make_unique(perHwThreadPrivateMemorySizeRequested); + mockKernelImmData->mockKernelDescriptor->kernelAttributes.binaryFormat = DeviceBinaryFormat::Zebin; createModuleFromMockBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get()); diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index bcebaa9ff6..62b06b186d 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -148,7 +148,6 @@ void Kernel::patchWithImplicitSurface(uint64_t ptrToPatchInCrossThreadData, Grap } cl_int Kernel::initialize() { - this->kernelHasIndirectAccess = false; auto pClDevice = &getDevice(); auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); reconfigureKernel(); @@ -281,10 +280,15 @@ cl_int Kernel::initialize() { slmSizes.resize(numArgs); this->setInlineSamplers(); - - this->kernelHasIndirectAccess |= kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgLoad || - kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgStore || - kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic; + if (kernelDescriptor.kernelAttributes.binaryFormat != NEO::DeviceBinaryFormat::Zebin) { + this->kernelHasIndirectAccess = true; + } else { + this->kernelHasIndirectAccess = kernelDescriptor.kernelAttributes.hasNonKernelArgLoad || + kernelDescriptor.kernelAttributes.hasNonKernelArgStore || + kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic || + kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess || + NEO::KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor); + } provideInitializationHints(); // resolve the new kernel info to account for kernel handlers diff --git a/opencl/test/unit_test/aub_tests/xe_hpg_core/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp b/opencl/test/unit_test/aub_tests/xe_hpg_core/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp index 5d0a4f9d92..4d2ebba16a 100644 --- a/opencl/test/unit_test/aub_tests/xe_hpg_core/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp +++ b/opencl/test/unit_test/aub_tests/xe_hpg_core/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp @@ -432,6 +432,7 @@ XE_HPG_CORETEST_P(XeHpgCoreUmStatelessCompressionInSBA, givenStatelessKernelWhen } XE_HPG_CORETEST_P(XeHpgCoreUmStatelessCompressionInSBA, givenKernelExecInfoWhenItHasIndirectHostAccessThenDisableCompressionInSBA) { + EXPECT_TRUE(multiDeviceKernel->getHasIndirectAccess()); const size_t bufferSize = MemoryConstants::kiloByte; uint8_t bufferData[bufferSize] = {}; diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index d66c22331b..a7edc00fb6 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -1704,12 +1704,13 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemor EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed); } -HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicThenKernelHasIndirectAccessIsSetToFalse) { +HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicAndZebinFormatThenKernelHasIndirectAccessIsSetToFalse) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false; + pKernelInfo->kernelDescriptor.kernelAttributes.binaryFormat = NEO::DeviceBinaryFormat::Zebin; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; @@ -1729,12 +1730,71 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNor memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } -HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadThenKernelHasIndirectAccessIsSetToTrue) { +HWTEST_F(KernelResidencyTest, givenKernelWithPtrByValueArgumentAndZebinFormatThenKernelHasIndirectAccessIsSetToTrue) { + auto pKernelInfo = std::make_unique(); + pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false; + pKernelInfo->kernelDescriptor.kernelAttributes.binaryFormat = NEO::DeviceBinaryFormat::Zebin; + + auto ptrByValueArg = ArgDescriptor(ArgDescriptor::ArgTValue); + ArgDescValue::Element element; + element.isPtr = true; + ptrByValueArg.as().elements.push_back(element); + pKernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(ptrByValueArg); + + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.storeMakeResidentAllocations = true; + + auto memoryManager = commandStreamReceiver.getMemoryManager(); + pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); + + MockProgram program(toClDeviceVector(*pClDevice)); + MockContext ctx; + program.setContext(&ctx); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); + + EXPECT_TRUE(kernel->getHasIndirectAccess()); + + memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); +} + +HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicAndNonZebinFormatThenKernelHasIndirectAccessIsSetToFalse) { + auto pKernelInfo = std::make_unique(); + pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false; + pKernelInfo->kernelDescriptor.kernelAttributes.binaryFormat = NEO::DeviceBinaryFormat::Unknown; + + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.storeMakeResidentAllocations = true; + + auto memoryManager = commandStreamReceiver.getMemoryManager(); + pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); + + MockProgram program(toClDeviceVector(*pClDevice)); + MockContext ctx; + program.setContext(&ctx); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); + + EXPECT_TRUE(kernel->getHasIndirectAccess()); + + memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); +} + +HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadAndZebinFormatThenKernelHasIndirectAccessIsSetToTrue) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = true; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false; + pKernelInfo->kernelDescriptor.kernelAttributes.binaryFormat = NEO::DeviceBinaryFormat::Zebin; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; @@ -1754,12 +1814,13 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadThenKernelHasIndirec memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } -HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreThenKernelHasIndirectAccessIsSetToTrue) { +HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreAndZebinFormatThenKernelHasIndirectAccessIsSetToTrue) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = true; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false; + pKernelInfo->kernelDescriptor.kernelAttributes.binaryFormat = NEO::DeviceBinaryFormat::Zebin; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; @@ -1779,12 +1840,13 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreThenKernelHasIndire memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } -HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgAtomicThenKernelHasIndirectAccessIsSetToTrue) { +HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgAtomicAndZebinFormatThenKernelHasIndirectAccessIsSetToTrue) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = true; + pKernelInfo->kernelDescriptor.kernelAttributes.binaryFormat = NEO::DeviceBinaryFormat::Zebin; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; diff --git a/shared/source/helpers/kernel_helpers.cpp b/shared/source/helpers/kernel_helpers.cpp index b68caca54e..d540f2bfae 100644 --- a/shared/source/helpers/kernel_helpers.cpp +++ b/shared/source/helpers/kernel_helpers.cpp @@ -85,4 +85,17 @@ KernelHelper::ErrorCode KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(Ker return KernelHelper::ErrorCode::SUCCESS; } +bool KernelHelper::isAnyArgumentPtrByValue(const KernelDescriptor &kernelDescriptor) { + for (auto &argDescriptor : kernelDescriptor.payloadMappings.explicitArgs) { + if (argDescriptor.type == NEO::ArgDescriptor::ArgTValue) { + for (auto &element : argDescriptor.as().elements) { + if (element.isPtr) { + return true; + } + } + } + } + return false; +} + } // namespace NEO diff --git a/shared/source/helpers/kernel_helpers.h b/shared/source/helpers/kernel_helpers.h index 526b5c8d4d..cb5b61c605 100644 --- a/shared/source/helpers/kernel_helpers.h +++ b/shared/source/helpers/kernel_helpers.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2022 Intel Corporation + * Copyright (C) 2019-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -34,6 +34,8 @@ struct KernelHelper { return perHwThreadPrivateScratchSize * computeUnitsUsedForScratch; } static ErrorCode checkIfThereIsSpaceForScratchOrPrivate(KernelDescriptor::KernelAttributes attributes, Device *device); + + static bool isAnyArgumentPtrByValue(const KernelDescriptor &kernelDescriptor); }; } // namespace NEO diff --git a/shared/source/kernel/kernel_descriptor.h b/shared/source/kernel/kernel_descriptor.h index 2341722c8a..319e0f9f8f 100644 --- a/shared/source/kernel/kernel_descriptor.h +++ b/shared/source/kernel/kernel_descriptor.h @@ -54,9 +54,9 @@ struct KernelDescriptor { uint16_t numArgsToPatch = 0U; uint16_t numGrfRequired = GrfConfig::DefaultGrfNumber; uint8_t barrierCount = 0u; - bool hasNonKernelArgLoad = true; - bool hasNonKernelArgStore = true; - bool hasNonKernelArgAtomic = true; + bool hasNonKernelArgLoad = false; + bool hasNonKernelArgStore = false; + bool hasNonKernelArgAtomic = false; bool hasIndirectStatelessAccess = false; AddressingMode bufferAddressingMode = BindfulAndStateless; diff --git a/shared/test/unit_test/helpers/kernel_helpers_tests.cpp b/shared/test/unit_test/helpers/kernel_helpers_tests.cpp index 88d8264c4a..970ad3ab85 100644 --- a/shared/test/unit_test/helpers/kernel_helpers_tests.cpp +++ b/shared/test/unit_test/helpers/kernel_helpers_tests.cpp @@ -162,4 +162,32 @@ TEST_F(KernelHelperTest, GivenScratchEqualsZeroAndPrivetGreaterThanZeroWhenCheck attributes.perThreadScratchSize[0] = 0; attributes.perThreadScratchSize[1] = 0; EXPECT_EQ(KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(attributes, pDevice), KernelHelper::ErrorCode::SUCCESS); +} + +TEST_F(KernelHelperTest, GivenNoPtrByValueWhenCheckingIsAnyArgumentPtrByValueThenFalseIsReturned) { + KernelDescriptor kernelDescriptor; + auto pointerArg = ArgDescriptor(ArgDescriptor::ArgTPointer); + + auto valueArg = ArgDescriptor(ArgDescriptor::ArgTValue); + ArgDescValue::Element element; + element.isPtr = false; + valueArg.as().elements.push_back(element); + + kernelDescriptor.payloadMappings.explicitArgs.push_back(pointerArg); + kernelDescriptor.payloadMappings.explicitArgs.push_back(valueArg); + EXPECT_FALSE(KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor)); +} + +TEST_F(KernelHelperTest, GivenPtrByValueWhenCheckingIsAnyArgumentPtrByValueThenTrueIsReturned) { + KernelDescriptor kernelDescriptor; + auto pointerArg = ArgDescriptor(ArgDescriptor::ArgTPointer); + + auto valueArg = ArgDescriptor(ArgDescriptor::ArgTValue); + ArgDescValue::Element element; + element.isPtr = true; + valueArg.as().elements.push_back(element); + + kernelDescriptor.payloadMappings.explicitArgs.push_back(pointerArg); + kernelDescriptor.payloadMappings.explicitArgs.push_back(valueArg); + EXPECT_TRUE(KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor)); } \ No newline at end of file