diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 840dbbc8f1..945b0c1e53 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -430,7 +430,7 @@ ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount } ze_result_t KernelImp::setIndirectAccess(ze_kernel_indirect_access_flags_t flags) { - if (NEO::DebugManager.flags.DisableIndirectAccess.get() == 1 || this->kernelHasIndirectAccess == false) { + if (NEO::DebugManager.flags.DisableIndirectAccess.get() == 1) { return ZE_RESULT_SUCCESS; } @@ -924,9 +924,19 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { residencyContainer.insert(residencyContainer.end(), kernelImmData->getResidencyContainer().begin(), kernelImmData->getResidencyContainer().end()); - kernelHasIndirectAccess = kernelDescriptor.kernelAttributes.hasNonKernelArgLoad || - kernelDescriptor.kernelAttributes.hasNonKernelArgStore || - kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic; + bool detectIndirectAccessInKernel = productHelper.isDetectIndirectAccessInKernelSupported(kernelDescriptor); + if (NEO::DebugManager.flags.DetectIndirectAccessInKernel.get() != -1) { + detectIndirectAccessInKernel = NEO::DebugManager.flags.DetectIndirectAccessInKernel.get() == 1; + } + if (detectIndirectAccessInKernel) { + kernelHasIndirectAccess = kernelDescriptor.kernelAttributes.hasNonKernelArgLoad || + kernelDescriptor.kernelAttributes.hasNonKernelArgStore || + kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic || + kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess || + NEO::KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor); + } else { + kernelHasIndirectAccess = true; + } if (this->usesRayTracing()) { uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels; @@ -1061,9 +1071,9 @@ Kernel *Kernel::create(uint32_t productFamily, Module *module, } bool KernelImp::hasIndirectAllocationsAllowed() const { - return (unifiedMemoryControls.indirectDeviceAllocationsAllowed || - unifiedMemoryControls.indirectHostAllocationsAllowed || - unifiedMemoryControls.indirectSharedAllocationsAllowed); + return this->kernelHasIndirectAccess && (unifiedMemoryControls.indirectDeviceAllocationsAllowed || + unifiedMemoryControls.indirectHostAllocationsAllowed || + unifiedMemoryControls.indirectSharedAllocationsAllowed); } uint32_t KernelImp::getSlmTotalSize() const { diff --git a/level_zero/core/source/kernel/kernel_imp.h b/level_zero/core/source/kernel/kernel_imp.h index 96feb27c29..8c643c2a0a 100644 --- a/level_zero/core/source/kernel/kernel_imp.h +++ b/level_zero/core/source/kernel/kernel_imp.h @@ -226,7 +226,7 @@ struct KernelImp : Kernel { ze_cache_config_flags_t cacheConfigFlags = 0u; - bool kernelHasIndirectAccess = true; + bool kernelHasIndirectAccess = false; std::unique_ptr pImplicitArgs; diff --git a/level_zero/core/test/unit_tests/fixtures/module_fixture.h b/level_zero/core/test/unit_tests/fixtures/module_fixture.h index d8e4f4d454..2c3fb9c4ca 100644 --- a/level_zero/core/test/unit_tests/fixtures/module_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/module_fixture.h @@ -90,6 +90,7 @@ struct ModuleImmutableDataFixture : public DeviceFixture { using KernelImp::requiredWorkgroupOrder; using KernelImp::surfaceStateHeapData; using KernelImp::surfaceStateHeapDataSize; + using KernelImp::unifiedMemoryControls; MockKernel(MockModule *mockModule) : WhiteBox(mockModule) { } diff --git a/level_zero/core/test/unit_tests/mocks/mock_kernel.h b/level_zero/core/test/unit_tests/mocks/mock_kernel.h index bc0bc6e79c..8ca1be3456 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_kernel.h +++ b/level_zero/core/test/unit_tests/mocks/mock_kernel.h @@ -48,6 +48,7 @@ struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp { using ::L0::KernelImp::dynamicStateHeapData; using ::L0::KernelImp::dynamicStateHeapDataSize; using ::L0::KernelImp::groupSize; + using ::L0::KernelImp::kernelHasIndirectAccess; using ::L0::KernelImp::kernelImmData; using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime; using ::L0::KernelImp::midThreadPreemptionDisallowedForRayTracingKernels; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index 2f767447af..ffe1e12f10 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -19,6 +19,7 @@ #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "level_zero/core/source/event/event.h" +#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" @@ -27,26 +28,52 @@ namespace L0 { namespace ult { -using CommandListAppendLaunchKernel = Test; - -HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) { - createKernel(); - kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; - kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; +using CommandListAppendLaunchKernelMockModule = Test; +HWTEST_F(CommandListAppendLaunchKernelMockModule, givenKernelWithIndirectAllocationsAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.DetectIndirectAccessInKernel.set(1); + mockKernelImmData->kernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = true; + kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = false; + kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = false; kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = true; - EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); + EXPECT_TRUE(kernel->hasIndirectAllocationsAllowed()); ze_group_count_t groupCount{1, 1, 1}; ze_result_t returnValue; - std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); CmdListKernelLaunchParams launchParams = {}; - auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + { + returnValue = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + EXPECT_TRUE(commandList->hasIndirectAllocationsAllowed()); + } - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - ASSERT_TRUE(commandList->hasIndirectAllocationsAllowed()); + { + returnValue = commandList->reset(); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = false; + kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; + kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = false; + + returnValue = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + EXPECT_TRUE(commandList->hasIndirectAllocationsAllowed()); + } + + { + returnValue = commandList->reset(); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; + kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = false; + kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = false; + + returnValue = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + EXPECT_TRUE(commandList->hasIndirectAllocationsAllowed()); + } } +using CommandListAppendLaunchKernel = Test; HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsNotAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) { createKernel(); kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = false; @@ -356,9 +383,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernel eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + auto eventPool = std::unique_ptr<::L0::EventPool>(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr<::L0::Event>(::L0::Event::create(eventPool.get(), &eventDesc, device)); ze_group_count_t groupCount{1, 1, 1}; CmdListKernelLaunchParams launchParams = {}; @@ -451,9 +478,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelLaunchWithTSEventAndScopeFla ZE_EVENT_SCOPE_FLAG_HOST, ZE_EVENT_SCOPE_FLAG_HOST}; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + auto eventPool = std::unique_ptr<::L0::EventPool>(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr<::L0::Event>(::L0::Event::create(eventPool.get(), &eventDesc, device)); ze_group_count_t groupCount{1, 1, 1}; CmdListKernelLaunchParams launchParams = {}; @@ -568,9 +595,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenCommandListWhenAppendLaunchKernelS ZE_EVENT_SCOPE_FLAG_HOST, ZE_EVENT_SCOPE_FLAG_HOST}; - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + auto eventPool = std::unique_ptr<::L0::EventPool>(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto event = std::unique_ptr<::L0::Event>(::L0::Event::create(eventPool.get(), &eventDesc, device)); EXPECT_EQ(1u, event->getPacketsInUse()); ze_group_count_t groupCount{1, 1, 1}; CmdListKernelLaunchParams launchParams = {}; @@ -784,9 +811,9 @@ HWTEST_F(CommandListAppendLaunchKernel, givenSingleValidWaitEventsThenAddSemapho ze_event_desc_t eventDesc = {}; eventDesc.index = 0; - std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + std::unique_ptr<::L0::EventPool> eventPool(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - std::unique_ptr event(Event::create(eventPool.get(), &eventDesc, device)); + std::unique_ptr<::L0::Event> event(::L0::Event::create(eventPool.get(), &eventDesc, device)); ze_event_handle_t hEventHandle = event->toHandle(); ze_group_count_t groupCount{1, 1, 1}; @@ -836,10 +863,10 @@ HWTEST_F(CommandListAppendLaunchKernel, givenMultipleValidWaitEventsThenAddSemap ze_event_desc_t eventDesc2 = {}; eventDesc2.index = 1; - std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + std::unique_ptr<::L0::EventPool> eventPool(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - std::unique_ptr event1(Event::create(eventPool.get(), &eventDesc1, device)); - std::unique_ptr event2(Event::create(eventPool.get(), &eventDesc2, device)); + std::unique_ptr<::L0::Event> event1(::L0::Event::create(eventPool.get(), &eventDesc1, device)); + std::unique_ptr<::L0::Event> event2(::L0::Event::create(eventPool.get(), &eventDesc2, device)); ze_event_handle_t hEventHandle1 = event1->toHandle(); ze_event_handle_t hEventHandle2 = event2->toHandle(); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp index 6605b780c9..046f73c056 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp @@ -425,6 +425,8 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDebugModeToTreatIndirectAllocatio createKernel(); kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; + kernel->kernelHasIndirectAccess = true; + EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); ze_group_count_t groupCount{1, 1, 1}; @@ -487,6 +489,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndir createKernel(); kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; + kernel->kernelHasIndirectAccess = true; EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); ze_group_count_t groupCount{1, 1, 1}; @@ -551,6 +554,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndir createKernel(); kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; + kernel->kernelHasIndirectAccess = true; EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); static_cast(driverHandle->getMemoryManager())->overrideAllocateAsPackReturn = 1u; diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 50845614ff..d3584ab339 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -1214,9 +1214,10 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatche using KernelIndirectPropertiesFromIGCTests = KernelImmutableDataTests; -TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicThenHasIndirectAccessIsSetToFalse) { +TEST_F(KernelIndirectPropertiesFromIGCTests, givenDetectIndirectAccessInKernelEnabledWhenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicAndNoHasIndirectStatelessAccessThenHasIndirectAccessIsSetToFalse) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DisableIndirectAccess.set(0); + NEO::DebugManager.flags.DetectIndirectAccessInKernel.set(1); uint32_t perHwThreadPrivateMemorySizeRequested = 32u; bool isInternal = false; @@ -1235,15 +1236,53 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithNoKernelL module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false; kernel->initialize(&desc); EXPECT_FALSE(kernel->hasIndirectAccess()); } -TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoadStoreAtomicThenHasIndirectAccessIsSetToTrue) { +TEST_F(KernelIndirectPropertiesFromIGCTests, givenDetectIndirectAccessInKernelEnabledAndPtrPassedByValueWhenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicAndNoHasIndirectStatelessAccessThenHasIndirectAccessIsSetToTrue) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DisableIndirectAccess.set(0); + NEO::DebugManager.flags.DetectIndirectAccessInKernel.set(1); + + uint32_t perHwThreadPrivateMemorySizeRequested = 32u; + bool isInternal = false; + + std::unique_ptr mockKernelImmData = + std::make_unique(perHwThreadPrivateMemorySizeRequested); + mockKernelImmData->mockKernelDescriptor->kernelAttributes.binaryFormat = NEO::DeviceBinaryFormat::Zebin; + auto ptrByValueArg = ArgDescriptor(ArgDescriptor::ArgTValue); + ArgDescValue::Element element{}; + element.isPtr = true; + ptrByValueArg.as().elements.push_back(element); + mockKernelImmData->mockKernelDescriptor->payloadMappings.explicitArgs.push_back(ptrByValueArg); + EXPECT_EQ(mockKernelImmData->mockKernelDescriptor->payloadMappings.explicitArgs.size(), 1u); + + createModuleFromMockBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get()); + + std::unique_ptr kernel; + kernel = std::make_unique(module.get()); + + ze_kernel_desc_t desc = {}; + desc.pKernelName = kernelName.c_str(); + + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false; + + kernel->initialize(&desc); + + EXPECT_TRUE(kernel->hasIndirectAccess()); +} + +TEST_F(KernelIndirectPropertiesFromIGCTests, givenDetectIndirectAccessInKernelEnabledWhenInitializingKernelWithKernelLoadStoreAtomicThenHasIndirectAccessIsSetToTrue) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.DisableIndirectAccess.set(0); + NEO::DebugManager.flags.DetectIndirectAccessInKernel.set(1); uint32_t perHwThreadPrivateMemorySizeRequested = 32u; bool isInternal = false; @@ -1263,6 +1302,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoa module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = true; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false; kernel->initialize(&desc); @@ -1279,6 +1319,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoa module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = true; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false; kernel->initialize(&desc); @@ -1295,6 +1336,24 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoa module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = true; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false; + + kernel->initialize(&desc); + + EXPECT_TRUE(kernel->hasIndirectAccess()); + } + + { + std::unique_ptr kernel; + kernel = std::make_unique(module.get()); + + ze_kernel_desc_t desc = {}; + desc.pKernelName = kernelName.c_str(); + + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = true; kernel->initialize(&desc); @@ -1619,7 +1678,7 @@ TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThat EXPECT_TRUE(unifiedMemoryControls.indirectSharedAllocationsAllowed); } -TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatDoesNotHaveIndirectAccessThenIndirectAccessIsNotSet) { +TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatDoesNotHaveIndirectAccessThenIndirectAccessIsSet) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DisableIndirectAccess.set(0); kernel->kernelHasIndirectAccess = false; @@ -1636,9 +1695,9 @@ TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThat EXPECT_EQ(ZE_RESULT_SUCCESS, res); unifiedMemoryControls = kernel->getUnifiedMemoryControls(); - EXPECT_FALSE(unifiedMemoryControls.indirectDeviceAllocationsAllowed); - EXPECT_FALSE(unifiedMemoryControls.indirectHostAllocationsAllowed); - EXPECT_FALSE(unifiedMemoryControls.indirectSharedAllocationsAllowed); + EXPECT_TRUE(unifiedMemoryControls.indirectDeviceAllocationsAllowed); + EXPECT_TRUE(unifiedMemoryControls.indirectHostAllocationsAllowed); + EXPECT_TRUE(unifiedMemoryControls.indirectSharedAllocationsAllowed); } TEST_F(KernelPropertiesTests, givenValidKernelIndirectAccessFlagsSetThenExpectKernelIndirectAllocationsAllowedTrue) { diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index bcebaa9ff6..1154fbc202 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -148,13 +148,13 @@ void Kernel::patchWithImplicitSurface(uint64_t ptrToPatchInCrossThreadData, Grap } cl_int Kernel::initialize() { - this->kernelHasIndirectAccess = false; auto pClDevice = &getDevice(); auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); reconfigureKernel(); auto &hwInfo = pClDevice->getHardwareInfo(); auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment(); auto &gfxCoreHelper = rootDeviceEnvironment.getHelper(); + auto &productHelper = rootDeviceEnvironment.getHelper(); auto &kernelDescriptor = kernelInfo.kernelDescriptor; const auto &implicitArgs = kernelDescriptor.payloadMappings.implicitArgs; const auto &explicitArgs = kernelDescriptor.payloadMappings.explicitArgs; @@ -281,11 +281,19 @@ cl_int Kernel::initialize() { slmSizes.resize(numArgs); this->setInlineSamplers(); - - this->kernelHasIndirectAccess |= kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgLoad || - kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgStore || - kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic; - + bool detectIndirectAccessInKernel = productHelper.isDetectIndirectAccessInKernelSupported(kernelDescriptor); + if (DebugManager.flags.DetectIndirectAccessInKernel.get() != -1) { + detectIndirectAccessInKernel = DebugManager.flags.DetectIndirectAccessInKernel.get() == 1; + } + if (detectIndirectAccessInKernel) { + this->kernelHasIndirectAccess = kernelDescriptor.kernelAttributes.hasNonKernelArgLoad || + kernelDescriptor.kernelAttributes.hasNonKernelArgStore || + kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic || + kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess || + NEO::KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor); + } else { + this->kernelHasIndirectAccess = true; + } provideInitializationHints(); // resolve the new kernel info to account for kernel handlers // I think by this time we have decoded the binary and know the number of args etc. @@ -1311,7 +1319,7 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { } } - if (unifiedMemoryControls.indirectSharedAllocationsAllowed && pageFaultManager) { + if (getHasIndirectAccess() && unifiedMemoryControls.indirectSharedAllocationsAllowed && pageFaultManager) { pageFaultManager->moveAllocationsWithinUMAllocsManagerToGpuDomain(this->getContext().getSVMAllocsManager()); } makeArgsResident(commandStreamReceiver); @@ -1323,9 +1331,9 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { gtpinNotifyMakeResident(this, &commandStreamReceiver); - if (unifiedMemoryControls.indirectDeviceAllocationsAllowed || - unifiedMemoryControls.indirectHostAllocationsAllowed || - unifiedMemoryControls.indirectSharedAllocationsAllowed) { + if (getHasIndirectAccess() && (unifiedMemoryControls.indirectDeviceAllocationsAllowed || + unifiedMemoryControls.indirectHostAllocationsAllowed || + unifiedMemoryControls.indirectSharedAllocationsAllowed)) { this->getContext().getSVMAllocsManager()->makeInternalAllocationsResident(commandStreamReceiver, unifiedMemoryControls.generateMask()); } } diff --git a/opencl/test/unit_test/aub_tests/xe_hpg_core/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp b/opencl/test/unit_test/aub_tests/xe_hpg_core/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp index 5d0a4f9d92..4d2ebba16a 100644 --- a/opencl/test/unit_test/aub_tests/xe_hpg_core/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp +++ b/opencl/test/unit_test/aub_tests/xe_hpg_core/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp @@ -432,6 +432,7 @@ XE_HPG_CORETEST_P(XeHpgCoreUmStatelessCompressionInSBA, givenStatelessKernelWhen } XE_HPG_CORETEST_P(XeHpgCoreUmStatelessCompressionInSBA, givenKernelExecInfoWhenItHasIndirectHostAccessThenDisableCompressionInSBA) { + EXPECT_TRUE(multiDeviceKernel->getHasIndirectAccess()); const size_t bufferSize = MemoryConstants::kiloByte; uint8_t bufferData[bufferSize] = {}; diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 4e38f640a7..d39987e731 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -1267,6 +1267,28 @@ HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryWhenMakeReside svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation); } +HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryButNotHasIndirectAccessWhenMakeResidentIsCalledThenOnlySharedAllocationsAreNotMadeResident) { + MockKernelWithInternals mockKernel(*this->pClDevice); + auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); + + auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); + auto sharedProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields()); + auto hostProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields()); + auto unifiedSharedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(4096u, sharedProperties, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex())); + auto unifiedHostMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, hostProperties); + mockKernel.mockKernel->kernelHasIndirectAccess = false; + + mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); + EXPECT_EQ(0u, commandStreamReceiver.getResidencyAllocations().size()); + mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, true); + + mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); + EXPECT_EQ(0u, commandStreamReceiver.getResidencyAllocations().size()); + + svmAllocationsManager->freeSVMAlloc(unifiedSharedMemoryAllocation); + svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation); +} + HWTEST_F(KernelResidencyTest, givenDeviceUnifiedMemoryAndPageFaultManagerWhenMakeResidentIsCalledThenAllocationIsNotDecommited) { auto mockPageFaultManager = new MockPageFaultManager(); static_cast(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager); @@ -1704,12 +1726,43 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemor EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed); } -HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicThenKernelHasIndirectAccessIsSetToFalse) { +HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicAndHasIndirectStatelessAccessAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) { + DebugManagerStateRestore restorer; + DebugManager.flags.DetectIndirectAccessInKernel.set(1); auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = true; + + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.storeMakeResidentAllocations = true; + + auto memoryManager = commandStreamReceiver.getMemoryManager(); + pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); + + MockProgram program(toClDeviceVector(*pClDevice)); + MockContext ctx; + program.setContext(&ctx); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); + + EXPECT_TRUE(kernel->getHasIndirectAccess()); + + memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); +} + +HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicNorIndirectStatelessAccessAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToFalse) { + DebugManagerStateRestore restorer; + DebugManager.flags.DetectIndirectAccessInKernel.set(1); + auto pKernelInfo = std::make_unique(); + pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; @@ -1729,12 +1782,77 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNor memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } -HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadThenKernelHasIndirectAccessIsSetToTrue) { +HWTEST_F(KernelResidencyTest, givenKernelWithPtrByValueArgumentAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) { + DebugManagerStateRestore restorer; + DebugManager.flags.DetectIndirectAccessInKernel.set(1); + auto pKernelInfo = std::make_unique(); + pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false; + + auto ptrByValueArg = ArgDescriptor(ArgDescriptor::ArgTValue); + ArgDescValue::Element element; + element.isPtr = true; + ptrByValueArg.as().elements.push_back(element); + pKernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(ptrByValueArg); + + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.storeMakeResidentAllocations = true; + + auto memoryManager = commandStreamReceiver.getMemoryManager(); + pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); + + MockProgram program(toClDeviceVector(*pClDevice)); + MockContext ctx; + program.setContext(&ctx); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); + + EXPECT_TRUE(kernel->getHasIndirectAccess()); + + memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); +} + +HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicNorHasIndirectStatelessAccessAndDetectIndirectAccessInKernelDisabledThenKernelHasIndirectAccessIsSetToTrue) { + DebugManagerStateRestore restorer; + DebugManager.flags.DetectIndirectAccessInKernel.set(0); + auto pKernelInfo = std::make_unique(); + pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false; + + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.storeMakeResidentAllocations = true; + + auto memoryManager = commandStreamReceiver.getMemoryManager(); + pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); + + MockProgram program(toClDeviceVector(*pClDevice)); + MockContext ctx; + program.setContext(&ctx); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); + + EXPECT_TRUE(kernel->getHasIndirectAccess()); + + memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); +} + +HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) { + DebugManagerStateRestore restorer; + DebugManager.flags.DetectIndirectAccessInKernel.set(1); auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = true; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; @@ -1754,12 +1872,15 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadThenKernelHasIndirec memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } -HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreThenKernelHasIndirectAccessIsSetToTrue) { +HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) { + DebugManagerStateRestore restorer; + DebugManager.flags.DetectIndirectAccessInKernel.set(1); auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = true; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; @@ -1779,12 +1900,15 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreThenKernelHasIndire memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } -HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgAtomicThenKernelHasIndirectAccessIsSetToTrue) { +HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgAtomicAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) { + DebugManagerStateRestore restorer; + DebugManager.flags.DetectIndirectAccessInKernel.set(1); auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = true; + pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 1cf55d945c..39f6f4defc 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -299,6 +299,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideKernelSizeLimitForSmallDispatch, -1, "-1 DECLARE_DEBUG_VARIABLE(int32_t, OverrideUseKmdWaitFunction, -1, "-1: default (L0: disabled), 0: disabled, 1: enabled. It uses only busy loop to wait or busy loop with KMD wait function, when KMD fallback is enabled") DECLARE_DEBUG_VARIABLE(int32_t, ResolveDependenciesViaPipeControls, -1, "-1: default , 0: disabled, 1: enabled. If enabled, instead of programming semaphores, dependencies are resolved using task levels") DECLARE_DEBUG_VARIABLE(int32_t, MakeIndirectAllocationsResidentAsPack, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver handles all indirect allocations as one pack instead of making them resident individually.") +DECLARE_DEBUG_VARIABLE(int32_t, DetectIndirectAccessInKernel, -1, "-1: default, 0:disabled, 1: enabled. If enabled and indirect accesses are not detected in kernel, indirect allocations will not be allowed even if set by API.") DECLARE_DEBUG_VARIABLE(int32_t, MakeEachAllocationResident, -1, "-1: default, 0: disabled, 1: bind every allocation at creation time, 2: bind all created allocations in flush") DECLARE_DEBUG_VARIABLE(int32_t, AssignBCSAtEnqueue, -1, "-1: default, 0:disabled, 1: enabled.") DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQGpgpuInitialization, -1, "-1: default, 0:disabled, 1: enabled.") diff --git a/shared/source/helpers/kernel_helpers.cpp b/shared/source/helpers/kernel_helpers.cpp index b68caca54e..d540f2bfae 100644 --- a/shared/source/helpers/kernel_helpers.cpp +++ b/shared/source/helpers/kernel_helpers.cpp @@ -85,4 +85,17 @@ KernelHelper::ErrorCode KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(Ker return KernelHelper::ErrorCode::SUCCESS; } +bool KernelHelper::isAnyArgumentPtrByValue(const KernelDescriptor &kernelDescriptor) { + for (auto &argDescriptor : kernelDescriptor.payloadMappings.explicitArgs) { + if (argDescriptor.type == NEO::ArgDescriptor::ArgTValue) { + for (auto &element : argDescriptor.as().elements) { + if (element.isPtr) { + return true; + } + } + } + } + return false; +} + } // namespace NEO diff --git a/shared/source/helpers/kernel_helpers.h b/shared/source/helpers/kernel_helpers.h index 77408e1cf3..cb5b61c605 100644 --- a/shared/source/helpers/kernel_helpers.h +++ b/shared/source/helpers/kernel_helpers.h @@ -34,6 +34,8 @@ struct KernelHelper { return perHwThreadPrivateScratchSize * computeUnitsUsedForScratch; } static ErrorCode checkIfThereIsSpaceForScratchOrPrivate(KernelDescriptor::KernelAttributes attributes, Device *device); + + static bool isAnyArgumentPtrByValue(const KernelDescriptor &kernelDescriptor); }; } // namespace NEO diff --git a/shared/source/kernel/kernel_descriptor.h b/shared/source/kernel/kernel_descriptor.h index 2341722c8a..319e0f9f8f 100644 --- a/shared/source/kernel/kernel_descriptor.h +++ b/shared/source/kernel/kernel_descriptor.h @@ -54,9 +54,9 @@ struct KernelDescriptor { uint16_t numArgsToPatch = 0U; uint16_t numGrfRequired = GrfConfig::DefaultGrfNumber; uint8_t barrierCount = 0u; - bool hasNonKernelArgLoad = true; - bool hasNonKernelArgStore = true; - bool hasNonKernelArgAtomic = true; + bool hasNonKernelArgLoad = false; + bool hasNonKernelArgStore = false; + bool hasNonKernelArgAtomic = false; bool hasIndirectStatelessAccess = false; AddressingMode bufferAddressingMode = BindfulAndStateless; diff --git a/shared/source/os_interface/hw_info_config.h b/shared/source/os_interface/hw_info_config.h index cbc078eb3e..6c8fc2c242 100644 --- a/shared/source/os_interface/hw_info_config.h +++ b/shared/source/os_interface/hw_info_config.h @@ -25,6 +25,7 @@ class Device; enum class LocalMemoryAccessMode; struct FrontEndPropertiesSupport; struct HardwareInfo; +struct KernelDescriptor; struct PipelineSelectArgs; struct PipelineSelectPropertiesSupport; struct StateBaseAddressPropertiesSupport; @@ -166,6 +167,7 @@ class ProductHelper { virtual uint64_t overridePatIndex(AllocationType allocationType, uint64_t patIndex) const = 0; virtual bool isTlbFlushRequired() const = 0; virtual bool isDummyBlitWaRequired() const = 0; + virtual bool isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor) const = 0; virtual bool getFrontEndPropertyScratchSizeSupport() const = 0; virtual bool getFrontEndPropertyPrivateScratchSizeSupport() const = 0; virtual bool getFrontEndPropertyComputeDispatchAllWalkerSupport() const = 0; diff --git a/shared/source/os_interface/hw_info_config.inl b/shared/source/os_interface/hw_info_config.inl index c37dbd6df3..890fd494d9 100644 --- a/shared/source/os_interface/hw_info_config.inl +++ b/shared/source/os_interface/hw_info_config.inl @@ -56,6 +56,12 @@ bool ProductHelperHw::isTlbFlushRequired() const { return true; } +template +bool ProductHelperHw::isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor) const { + constexpr bool enabled = false; + return enabled; +} + template void ProductHelperHw::enableBlitterOperationsSupport(HardwareInfo *hwInfo) const { hwInfo->capabilityTable.blitterOperationsSupported = obtainBlitterPreference(*hwInfo); diff --git a/shared/source/os_interface/product_helper_hw.h b/shared/source/os_interface/product_helper_hw.h index 3b8424f348..55d47d1faf 100644 --- a/shared/source/os_interface/product_helper_hw.h +++ b/shared/source/os_interface/product_helper_hw.h @@ -122,6 +122,7 @@ class ProductHelperHw : public ProductHelper { uint64_t overridePatIndex(AllocationType allocationType, uint64_t patIndex) const override; bool isTlbFlushRequired() const override; bool isDummyBlitWaRequired() const override; + bool isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor) const override; bool getFrontEndPropertyScratchSizeSupport() const override; bool getFrontEndPropertyPrivateScratchSizeSupport() const override; bool getFrontEndPropertyComputeDispatchAllWalkerSupport() const override; diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index aca77a87b9..078e2c31f4 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -502,4 +502,5 @@ AppendAubStreamContextFlags = -1 DirectSubmissionRelaxedOrderingMinNumberOfClients = -1 UseDeprecatedClDeviceIpVersion = 0 ExperimentalCopyThroughLockWaitlistSizeThreshold= -1 -ForceDummyBlitWa = 0 \ No newline at end of file +ForceDummyBlitWa = 0 +DetectIndirectAccessInKernel = -1 diff --git a/shared/test/unit_test/helpers/kernel_helpers_tests.cpp b/shared/test/unit_test/helpers/kernel_helpers_tests.cpp index 88d8264c4a..970ad3ab85 100644 --- a/shared/test/unit_test/helpers/kernel_helpers_tests.cpp +++ b/shared/test/unit_test/helpers/kernel_helpers_tests.cpp @@ -162,4 +162,32 @@ TEST_F(KernelHelperTest, GivenScratchEqualsZeroAndPrivetGreaterThanZeroWhenCheck attributes.perThreadScratchSize[0] = 0; attributes.perThreadScratchSize[1] = 0; EXPECT_EQ(KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(attributes, pDevice), KernelHelper::ErrorCode::SUCCESS); +} + +TEST_F(KernelHelperTest, GivenNoPtrByValueWhenCheckingIsAnyArgumentPtrByValueThenFalseIsReturned) { + KernelDescriptor kernelDescriptor; + auto pointerArg = ArgDescriptor(ArgDescriptor::ArgTPointer); + + auto valueArg = ArgDescriptor(ArgDescriptor::ArgTValue); + ArgDescValue::Element element; + element.isPtr = false; + valueArg.as().elements.push_back(element); + + kernelDescriptor.payloadMappings.explicitArgs.push_back(pointerArg); + kernelDescriptor.payloadMappings.explicitArgs.push_back(valueArg); + EXPECT_FALSE(KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor)); +} + +TEST_F(KernelHelperTest, GivenPtrByValueWhenCheckingIsAnyArgumentPtrByValueThenTrueIsReturned) { + KernelDescriptor kernelDescriptor; + auto pointerArg = ArgDescriptor(ArgDescriptor::ArgTPointer); + + auto valueArg = ArgDescriptor(ArgDescriptor::ArgTValue); + ArgDescValue::Element element; + element.isPtr = true; + valueArg.as().elements.push_back(element); + + kernelDescriptor.payloadMappings.explicitArgs.push_back(pointerArg); + kernelDescriptor.payloadMappings.explicitArgs.push_back(valueArg); + EXPECT_TRUE(KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor)); } \ No newline at end of file diff --git a/shared/test/unit_test/os_interface/hw_info_config_tests.cpp b/shared/test/unit_test/os_interface/hw_info_config_tests.cpp index 4dad78746d..6c693187e4 100644 --- a/shared/test/unit_test/os_interface/hw_info_config_tests.cpp +++ b/shared/test/unit_test/os_interface/hw_info_config_tests.cpp @@ -10,6 +10,7 @@ #include "shared/source/aub_mem_dump/aub_mem_dump.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/local_memory_access_modes.h" +#include "shared/source/kernel/kernel_descriptor.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/unified_memory/usm_memory_support.h" #include "shared/test/common/fixtures/device_fixture.h" @@ -686,4 +687,14 @@ HWTEST_F(ProductHelperTest, givenProductHelperWhenCheckingIsUnlockingLockedPtrNe HWTEST_F(ProductHelperTest, givenProductHelperWhenCheckDummyBlitWaRequiredThenReturnFalse) { EXPECT_FALSE(productHelper->isDummyBlitWaRequired()); +} + +HWTEST_F(ProductHelperTest, givenProductHelperAndKernelBinaryFormatsWhenCheckingIsDetectIndirectAccessInKernelSupportedThenCorrectValueIsReturned) { + KernelDescriptor kernelDescriptor; + + kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens; + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor)); + + kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Zebin; + EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor)); } \ No newline at end of file