feature: check indirect access for kernel

Do not make indirect allocations resident if kernel does not use
indirect access.
For both level zero and opencl.
Currently disabled by default, enable with debug flag
DetectIndirectAccessInKernel

Related-To: NEO-7712

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2023-03-07 15:22:10 +00:00
committed by Compute-Runtime-Automation
parent c7311bc140
commit 69a16fd3ed
20 changed files with 354 additions and 54 deletions

View File

@@ -430,7 +430,7 @@ ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount
}
ze_result_t KernelImp::setIndirectAccess(ze_kernel_indirect_access_flags_t flags) {
if (NEO::DebugManager.flags.DisableIndirectAccess.get() == 1 || this->kernelHasIndirectAccess == false) {
if (NEO::DebugManager.flags.DisableIndirectAccess.get() == 1) {
return ZE_RESULT_SUCCESS;
}
@@ -924,9 +924,19 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
residencyContainer.insert(residencyContainer.end(), kernelImmData->getResidencyContainer().begin(),
kernelImmData->getResidencyContainer().end());
kernelHasIndirectAccess = kernelDescriptor.kernelAttributes.hasNonKernelArgLoad ||
kernelDescriptor.kernelAttributes.hasNonKernelArgStore ||
kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic;
bool detectIndirectAccessInKernel = productHelper.isDetectIndirectAccessInKernelSupported(kernelDescriptor);
if (NEO::DebugManager.flags.DetectIndirectAccessInKernel.get() != -1) {
detectIndirectAccessInKernel = NEO::DebugManager.flags.DetectIndirectAccessInKernel.get() == 1;
}
if (detectIndirectAccessInKernel) {
kernelHasIndirectAccess = kernelDescriptor.kernelAttributes.hasNonKernelArgLoad ||
kernelDescriptor.kernelAttributes.hasNonKernelArgStore ||
kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic ||
kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess ||
NEO::KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor);
} else {
kernelHasIndirectAccess = true;
}
if (this->usesRayTracing()) {
uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels;
@@ -1061,9 +1071,9 @@ Kernel *Kernel::create(uint32_t productFamily, Module *module,
}
bool KernelImp::hasIndirectAllocationsAllowed() const {
return (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
unifiedMemoryControls.indirectHostAllocationsAllowed ||
unifiedMemoryControls.indirectSharedAllocationsAllowed);
return this->kernelHasIndirectAccess && (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
unifiedMemoryControls.indirectHostAllocationsAllowed ||
unifiedMemoryControls.indirectSharedAllocationsAllowed);
}
uint32_t KernelImp::getSlmTotalSize() const {

View File

@@ -226,7 +226,7 @@ struct KernelImp : Kernel {
ze_cache_config_flags_t cacheConfigFlags = 0u;
bool kernelHasIndirectAccess = true;
bool kernelHasIndirectAccess = false;
std::unique_ptr<NEO::ImplicitArgs> pImplicitArgs;

View File

@@ -90,6 +90,7 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
using KernelImp::requiredWorkgroupOrder;
using KernelImp::surfaceStateHeapData;
using KernelImp::surfaceStateHeapDataSize;
using KernelImp::unifiedMemoryControls;
MockKernel(MockModule *mockModule) : WhiteBox<L0::KernelImp>(mockModule) {
}

View File

@@ -48,6 +48,7 @@ struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp {
using ::L0::KernelImp::dynamicStateHeapData;
using ::L0::KernelImp::dynamicStateHeapDataSize;
using ::L0::KernelImp::groupSize;
using ::L0::KernelImp::kernelHasIndirectAccess;
using ::L0::KernelImp::kernelImmData;
using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime;
using ::L0::KernelImp::midThreadPreemptionDisallowedForRayTracingKernels;

View File

@@ -19,6 +19,7 @@
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
@@ -27,26 +28,52 @@
namespace L0 {
namespace ult {
using CommandListAppendLaunchKernel = Test<ModuleFixture>;
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) {
createKernel();
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = true;
using CommandListAppendLaunchKernelMockModule = Test<ModuleMutableCommandListFixture>;
HWTEST_F(CommandListAppendLaunchKernelMockModule, givenKernelWithIndirectAllocationsAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) {
DebugManagerStateRestore restorer;
NEO::DebugManager.flags.DetectIndirectAccessInKernel.set(1);
mockKernelImmData->kernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = true;
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = false;
kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = false;
kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = true;
EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed);
EXPECT_TRUE(kernel->hasIndirectAllocationsAllowed());
ze_group_count_t groupCount{1, 1, 1};
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
{
returnValue = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_TRUE(commandList->hasIndirectAllocationsAllowed());
}
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_TRUE(commandList->hasIndirectAllocationsAllowed());
{
returnValue = commandList->reset();
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = false;
kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = true;
kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = false;
returnValue = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_TRUE(commandList->hasIndirectAllocationsAllowed());
}
{
returnValue = commandList->reset();
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = false;
kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = false;
returnValue = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_TRUE(commandList->hasIndirectAllocationsAllowed());
}
}
using CommandListAppendLaunchKernel = Test<ModuleFixture>;
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsNotAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) {
createKernel();
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = false;
@@ -356,9 +383,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernel
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
auto eventPool = std::unique_ptr<::L0::EventPool>(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto event = std::unique_ptr<::L0::Event>(::L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
@@ -451,9 +478,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelLaunchWithTSEventAndScopeFla
ZE_EVENT_SCOPE_FLAG_HOST,
ZE_EVENT_SCOPE_FLAG_HOST};
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
auto eventPool = std::unique_ptr<::L0::EventPool>(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto event = std::unique_ptr<::L0::Event>(::L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
@@ -568,9 +595,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenCommandListWhenAppendLaunchKernelS
ZE_EVENT_SCOPE_FLAG_HOST,
ZE_EVENT_SCOPE_FLAG_HOST};
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
auto eventPool = std::unique_ptr<::L0::EventPool>(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto event = std::unique_ptr<::L0::Event>(::L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
EXPECT_EQ(1u, event->getPacketsInUse());
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
@@ -784,9 +811,9 @@ HWTEST_F(CommandListAppendLaunchKernel, givenSingleValidWaitEventsThenAddSemapho
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
std::unique_ptr<EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
std::unique_ptr<::L0::EventPool> eventPool(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
std::unique_ptr<Event> event(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
std::unique_ptr<::L0::Event> event(::L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_event_handle_t hEventHandle = event->toHandle();
ze_group_count_t groupCount{1, 1, 1};
@@ -836,10 +863,10 @@ HWTEST_F(CommandListAppendLaunchKernel, givenMultipleValidWaitEventsThenAddSemap
ze_event_desc_t eventDesc2 = {};
eventDesc2.index = 1;
std::unique_ptr<EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
std::unique_ptr<::L0::EventPool> eventPool(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
std::unique_ptr<Event> event1(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc1, device));
std::unique_ptr<Event> event2(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc2, device));
std::unique_ptr<::L0::Event> event1(::L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc1, device));
std::unique_ptr<::L0::Event> event2(::L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc2, device));
ze_event_handle_t hEventHandle1 = event1->toHandle();
ze_event_handle_t hEventHandle2 = event2->toHandle();

View File

@@ -425,6 +425,8 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDebugModeToTreatIndirectAllocatio
createKernel();
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
kernel->kernelHasIndirectAccess = true;
EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed);
ze_group_count_t groupCount{1, 1, 1};
@@ -487,6 +489,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndir
createKernel();
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
kernel->kernelHasIndirectAccess = true;
EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed);
ze_group_count_t groupCount{1, 1, 1};
@@ -551,6 +554,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndir
createKernel();
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
kernel->kernelHasIndirectAccess = true;
EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed);
static_cast<MockMemoryManager *>(driverHandle->getMemoryManager())->overrideAllocateAsPackReturn = 1u;

View File

@@ -1214,9 +1214,10 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatche
using KernelIndirectPropertiesFromIGCTests = KernelImmutableDataTests;
TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicThenHasIndirectAccessIsSetToFalse) {
TEST_F(KernelIndirectPropertiesFromIGCTests, givenDetectIndirectAccessInKernelEnabledWhenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicAndNoHasIndirectStatelessAccessThenHasIndirectAccessIsSetToFalse) {
DebugManagerStateRestore restorer;
NEO::DebugManager.flags.DisableIndirectAccess.set(0);
NEO::DebugManager.flags.DetectIndirectAccessInKernel.set(1);
uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
bool isInternal = false;
@@ -1235,15 +1236,53 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithNoKernelL
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false;
kernel->initialize(&desc);
EXPECT_FALSE(kernel->hasIndirectAccess());
}
TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoadStoreAtomicThenHasIndirectAccessIsSetToTrue) {
TEST_F(KernelIndirectPropertiesFromIGCTests, givenDetectIndirectAccessInKernelEnabledAndPtrPassedByValueWhenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicAndNoHasIndirectStatelessAccessThenHasIndirectAccessIsSetToTrue) {
DebugManagerStateRestore restorer;
NEO::DebugManager.flags.DisableIndirectAccess.set(0);
NEO::DebugManager.flags.DetectIndirectAccessInKernel.set(1);
uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
bool isInternal = false;
std::unique_ptr<MockImmutableData> mockKernelImmData =
std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
mockKernelImmData->mockKernelDescriptor->kernelAttributes.binaryFormat = NEO::DeviceBinaryFormat::Zebin;
auto ptrByValueArg = ArgDescriptor(ArgDescriptor::ArgTValue);
ArgDescValue::Element element{};
element.isPtr = true;
ptrByValueArg.as<ArgDescValue>().elements.push_back(element);
mockKernelImmData->mockKernelDescriptor->payloadMappings.explicitArgs.push_back(ptrByValueArg);
EXPECT_EQ(mockKernelImmData->mockKernelDescriptor->payloadMappings.explicitArgs.size(), 1u);
createModuleFromMockBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
ze_kernel_desc_t desc = {};
desc.pKernelName = kernelName.c_str();
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false;
kernel->initialize(&desc);
EXPECT_TRUE(kernel->hasIndirectAccess());
}
TEST_F(KernelIndirectPropertiesFromIGCTests, givenDetectIndirectAccessInKernelEnabledWhenInitializingKernelWithKernelLoadStoreAtomicThenHasIndirectAccessIsSetToTrue) {
DebugManagerStateRestore restorer;
NEO::DebugManager.flags.DisableIndirectAccess.set(0);
NEO::DebugManager.flags.DetectIndirectAccessInKernel.set(1);
uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
bool isInternal = false;
@@ -1263,6 +1302,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoa
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = true;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false;
kernel->initialize(&desc);
@@ -1279,6 +1319,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoa
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = true;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false;
kernel->initialize(&desc);
@@ -1295,6 +1336,24 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoa
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = true;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false;
kernel->initialize(&desc);
EXPECT_TRUE(kernel->hasIndirectAccess());
}
{
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
ze_kernel_desc_t desc = {};
desc.pKernelName = kernelName.c_str();
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = true;
kernel->initialize(&desc);
@@ -1619,7 +1678,7 @@ TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThat
EXPECT_TRUE(unifiedMemoryControls.indirectSharedAllocationsAllowed);
}
TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatDoesNotHaveIndirectAccessThenIndirectAccessIsNotSet) {
TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatDoesNotHaveIndirectAccessThenIndirectAccessIsSet) {
DebugManagerStateRestore restorer;
NEO::DebugManager.flags.DisableIndirectAccess.set(0);
kernel->kernelHasIndirectAccess = false;
@@ -1636,9 +1695,9 @@ TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThat
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
unifiedMemoryControls = kernel->getUnifiedMemoryControls();
EXPECT_FALSE(unifiedMemoryControls.indirectDeviceAllocationsAllowed);
EXPECT_FALSE(unifiedMemoryControls.indirectHostAllocationsAllowed);
EXPECT_FALSE(unifiedMemoryControls.indirectSharedAllocationsAllowed);
EXPECT_TRUE(unifiedMemoryControls.indirectDeviceAllocationsAllowed);
EXPECT_TRUE(unifiedMemoryControls.indirectHostAllocationsAllowed);
EXPECT_TRUE(unifiedMemoryControls.indirectSharedAllocationsAllowed);
}
TEST_F(KernelPropertiesTests, givenValidKernelIndirectAccessFlagsSetThenExpectKernelIndirectAllocationsAllowedTrue) {

View File

@@ -148,13 +148,13 @@ void Kernel::patchWithImplicitSurface(uint64_t ptrToPatchInCrossThreadData, Grap
}
cl_int Kernel::initialize() {
this->kernelHasIndirectAccess = false;
auto pClDevice = &getDevice();
auto rootDeviceIndex = pClDevice->getRootDeviceIndex();
reconfigureKernel();
auto &hwInfo = pClDevice->getHardwareInfo();
auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment();
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
auto &kernelDescriptor = kernelInfo.kernelDescriptor;
const auto &implicitArgs = kernelDescriptor.payloadMappings.implicitArgs;
const auto &explicitArgs = kernelDescriptor.payloadMappings.explicitArgs;
@@ -281,11 +281,19 @@ cl_int Kernel::initialize() {
slmSizes.resize(numArgs);
this->setInlineSamplers();
this->kernelHasIndirectAccess |= kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgLoad ||
kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgStore ||
kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic;
bool detectIndirectAccessInKernel = productHelper.isDetectIndirectAccessInKernelSupported(kernelDescriptor);
if (DebugManager.flags.DetectIndirectAccessInKernel.get() != -1) {
detectIndirectAccessInKernel = DebugManager.flags.DetectIndirectAccessInKernel.get() == 1;
}
if (detectIndirectAccessInKernel) {
this->kernelHasIndirectAccess = kernelDescriptor.kernelAttributes.hasNonKernelArgLoad ||
kernelDescriptor.kernelAttributes.hasNonKernelArgStore ||
kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic ||
kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess ||
NEO::KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor);
} else {
this->kernelHasIndirectAccess = true;
}
provideInitializationHints();
// resolve the new kernel info to account for kernel handlers
// I think by this time we have decoded the binary and know the number of args etc.
@@ -1311,7 +1319,7 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
}
}
if (unifiedMemoryControls.indirectSharedAllocationsAllowed && pageFaultManager) {
if (getHasIndirectAccess() && unifiedMemoryControls.indirectSharedAllocationsAllowed && pageFaultManager) {
pageFaultManager->moveAllocationsWithinUMAllocsManagerToGpuDomain(this->getContext().getSVMAllocsManager());
}
makeArgsResident(commandStreamReceiver);
@@ -1323,9 +1331,9 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
gtpinNotifyMakeResident(this, &commandStreamReceiver);
if (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
unifiedMemoryControls.indirectHostAllocationsAllowed ||
unifiedMemoryControls.indirectSharedAllocationsAllowed) {
if (getHasIndirectAccess() && (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
unifiedMemoryControls.indirectHostAllocationsAllowed ||
unifiedMemoryControls.indirectSharedAllocationsAllowed)) {
this->getContext().getSVMAllocsManager()->makeInternalAllocationsResident(commandStreamReceiver, unifiedMemoryControls.generateMask());
}
}

View File

@@ -432,6 +432,7 @@ XE_HPG_CORETEST_P(XeHpgCoreUmStatelessCompressionInSBA, givenStatelessKernelWhen
}
XE_HPG_CORETEST_P(XeHpgCoreUmStatelessCompressionInSBA, givenKernelExecInfoWhenItHasIndirectHostAccessThenDisableCompressionInSBA) {
EXPECT_TRUE(multiDeviceKernel->getHasIndirectAccess());
const size_t bufferSize = MemoryConstants::kiloByte;
uint8_t bufferData[bufferSize] = {};

View File

@@ -1267,6 +1267,28 @@ HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryWhenMakeReside
svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation);
}
HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryButNotHasIndirectAccessWhenMakeResidentIsCalledThenOnlySharedAllocationsAreNotMadeResident) {
MockKernelWithInternals mockKernel(*this->pClDevice);
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager();
auto sharedProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields());
auto hostProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields());
auto unifiedSharedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(4096u, sharedProperties, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex()));
auto unifiedHostMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, hostProperties);
mockKernel.mockKernel->kernelHasIndirectAccess = false;
mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver());
EXPECT_EQ(0u, commandStreamReceiver.getResidencyAllocations().size());
mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, true);
mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver());
EXPECT_EQ(0u, commandStreamReceiver.getResidencyAllocations().size());
svmAllocationsManager->freeSVMAlloc(unifiedSharedMemoryAllocation);
svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation);
}
HWTEST_F(KernelResidencyTest, givenDeviceUnifiedMemoryAndPageFaultManagerWhenMakeResidentIsCalledThenAllocationIsNotDecommited) {
auto mockPageFaultManager = new MockPageFaultManager();
static_cast<MockMemoryManager *>(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager);
@@ -1704,12 +1726,43 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemor
EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed);
}
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicThenKernelHasIndirectAccessIsSetToFalse) {
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicAndHasIndirectStatelessAccessAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) {
DebugManagerStateRestore restorer;
DebugManager.flags.DetectIndirectAccessInKernel.set(1);
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = true;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;
auto memoryManager = commandStreamReceiver.getMemoryManager();
pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize});
MockProgram program(toClDeviceVector(*pClDevice));
MockContext ctx;
program.setContext(&ctx);
program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation();
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, *pKernelInfo, *pClDevice));
ASSERT_EQ(CL_SUCCESS, kernel->initialize());
EXPECT_TRUE(kernel->getHasIndirectAccess());
memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
}
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicNorIndirectStatelessAccessAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToFalse) {
DebugManagerStateRestore restorer;
DebugManager.flags.DetectIndirectAccessInKernel.set(1);
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;
@@ -1729,12 +1782,77 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNor
memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
}
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadThenKernelHasIndirectAccessIsSetToTrue) {
HWTEST_F(KernelResidencyTest, givenKernelWithPtrByValueArgumentAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) {
DebugManagerStateRestore restorer;
DebugManager.flags.DetectIndirectAccessInKernel.set(1);
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false;
auto ptrByValueArg = ArgDescriptor(ArgDescriptor::ArgTValue);
ArgDescValue::Element element;
element.isPtr = true;
ptrByValueArg.as<ArgDescValue>().elements.push_back(element);
pKernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(ptrByValueArg);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;
auto memoryManager = commandStreamReceiver.getMemoryManager();
pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize});
MockProgram program(toClDeviceVector(*pClDevice));
MockContext ctx;
program.setContext(&ctx);
program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation();
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, *pKernelInfo, *pClDevice));
ASSERT_EQ(CL_SUCCESS, kernel->initialize());
EXPECT_TRUE(kernel->getHasIndirectAccess());
memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
}
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicNorHasIndirectStatelessAccessAndDetectIndirectAccessInKernelDisabledThenKernelHasIndirectAccessIsSetToTrue) {
DebugManagerStateRestore restorer;
DebugManager.flags.DetectIndirectAccessInKernel.set(0);
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;
auto memoryManager = commandStreamReceiver.getMemoryManager();
pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize});
MockProgram program(toClDeviceVector(*pClDevice));
MockContext ctx;
program.setContext(&ctx);
program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation();
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, *pKernelInfo, *pClDevice));
ASSERT_EQ(CL_SUCCESS, kernel->initialize());
EXPECT_TRUE(kernel->getHasIndirectAccess());
memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
}
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) {
DebugManagerStateRestore restorer;
DebugManager.flags.DetectIndirectAccessInKernel.set(1);
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = true;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;
@@ -1754,12 +1872,15 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadThenKernelHasIndirec
memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
}
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreThenKernelHasIndirectAccessIsSetToTrue) {
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) {
DebugManagerStateRestore restorer;
DebugManager.flags.DetectIndirectAccessInKernel.set(1);
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = true;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;
@@ -1779,12 +1900,15 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreThenKernelHasIndire
memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
}
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgAtomicThenKernelHasIndirectAccessIsSetToTrue) {
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgAtomicAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) {
DebugManagerStateRestore restorer;
DebugManager.flags.DetectIndirectAccessInKernel.set(1);
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = true;
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;

View File

@@ -299,6 +299,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideKernelSizeLimitForSmallDispatch, -1, "-1
DECLARE_DEBUG_VARIABLE(int32_t, OverrideUseKmdWaitFunction, -1, "-1: default (L0: disabled), 0: disabled, 1: enabled. It uses only busy loop to wait or busy loop with KMD wait function, when KMD fallback is enabled")
DECLARE_DEBUG_VARIABLE(int32_t, ResolveDependenciesViaPipeControls, -1, "-1: default , 0: disabled, 1: enabled. If enabled, instead of programming semaphores, dependencies are resolved using task levels")
DECLARE_DEBUG_VARIABLE(int32_t, MakeIndirectAllocationsResidentAsPack, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver handles all indirect allocations as one pack instead of making them resident individually.")
DECLARE_DEBUG_VARIABLE(int32_t, DetectIndirectAccessInKernel, -1, "-1: default, 0:disabled, 1: enabled. If enabled and indirect accesses are not detected in kernel, indirect allocations will not be allowed even if set by API.")
DECLARE_DEBUG_VARIABLE(int32_t, MakeEachAllocationResident, -1, "-1: default, 0: disabled, 1: bind every allocation at creation time, 2: bind all created allocations in flush")
DECLARE_DEBUG_VARIABLE(int32_t, AssignBCSAtEnqueue, -1, "-1: default, 0:disabled, 1: enabled.")
DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQGpgpuInitialization, -1, "-1: default, 0:disabled, 1: enabled.")

View File

@@ -85,4 +85,17 @@ KernelHelper::ErrorCode KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(Ker
return KernelHelper::ErrorCode::SUCCESS;
}
bool KernelHelper::isAnyArgumentPtrByValue(const KernelDescriptor &kernelDescriptor) {
for (auto &argDescriptor : kernelDescriptor.payloadMappings.explicitArgs) {
if (argDescriptor.type == NEO::ArgDescriptor::ArgTValue) {
for (auto &element : argDescriptor.as<NEO::ArgDescValue>().elements) {
if (element.isPtr) {
return true;
}
}
}
}
return false;
}
} // namespace NEO

View File

@@ -34,6 +34,8 @@ struct KernelHelper {
return perHwThreadPrivateScratchSize * computeUnitsUsedForScratch;
}
static ErrorCode checkIfThereIsSpaceForScratchOrPrivate(KernelDescriptor::KernelAttributes attributes, Device *device);
static bool isAnyArgumentPtrByValue(const KernelDescriptor &kernelDescriptor);
};
} // namespace NEO

View File

@@ -54,9 +54,9 @@ struct KernelDescriptor {
uint16_t numArgsToPatch = 0U;
uint16_t numGrfRequired = GrfConfig::DefaultGrfNumber;
uint8_t barrierCount = 0u;
bool hasNonKernelArgLoad = true;
bool hasNonKernelArgStore = true;
bool hasNonKernelArgAtomic = true;
bool hasNonKernelArgLoad = false;
bool hasNonKernelArgStore = false;
bool hasNonKernelArgAtomic = false;
bool hasIndirectStatelessAccess = false;
AddressingMode bufferAddressingMode = BindfulAndStateless;

View File

@@ -25,6 +25,7 @@ class Device;
enum class LocalMemoryAccessMode;
struct FrontEndPropertiesSupport;
struct HardwareInfo;
struct KernelDescriptor;
struct PipelineSelectArgs;
struct PipelineSelectPropertiesSupport;
struct StateBaseAddressPropertiesSupport;
@@ -166,6 +167,7 @@ class ProductHelper {
virtual uint64_t overridePatIndex(AllocationType allocationType, uint64_t patIndex) const = 0;
virtual bool isTlbFlushRequired() const = 0;
virtual bool isDummyBlitWaRequired() const = 0;
virtual bool isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor) const = 0;
virtual bool getFrontEndPropertyScratchSizeSupport() const = 0;
virtual bool getFrontEndPropertyPrivateScratchSizeSupport() const = 0;
virtual bool getFrontEndPropertyComputeDispatchAllWalkerSupport() const = 0;

View File

@@ -56,6 +56,12 @@ bool ProductHelperHw<gfxProduct>::isTlbFlushRequired() const {
return true;
}
template <PRODUCT_FAMILY gfxProduct>
bool ProductHelperHw<gfxProduct>::isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor) const {
constexpr bool enabled = false;
return enabled;
}
template <PRODUCT_FAMILY gfxProduct>
void ProductHelperHw<gfxProduct>::enableBlitterOperationsSupport(HardwareInfo *hwInfo) const {
hwInfo->capabilityTable.blitterOperationsSupported = obtainBlitterPreference(*hwInfo);

View File

@@ -122,6 +122,7 @@ class ProductHelperHw : public ProductHelper {
uint64_t overridePatIndex(AllocationType allocationType, uint64_t patIndex) const override;
bool isTlbFlushRequired() const override;
bool isDummyBlitWaRequired() const override;
bool isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor) const override;
bool getFrontEndPropertyScratchSizeSupport() const override;
bool getFrontEndPropertyPrivateScratchSizeSupport() const override;
bool getFrontEndPropertyComputeDispatchAllWalkerSupport() const override;

View File

@@ -503,3 +503,4 @@ DirectSubmissionRelaxedOrderingMinNumberOfClients = -1
UseDeprecatedClDeviceIpVersion = 0
ExperimentalCopyThroughLockWaitlistSizeThreshold= -1
ForceDummyBlitWa = 0
DetectIndirectAccessInKernel = -1

View File

@@ -163,3 +163,31 @@ TEST_F(KernelHelperTest, GivenScratchEqualsZeroAndPrivetGreaterThanZeroWhenCheck
attributes.perThreadScratchSize[1] = 0;
EXPECT_EQ(KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(attributes, pDevice), KernelHelper::ErrorCode::SUCCESS);
}
TEST_F(KernelHelperTest, GivenNoPtrByValueWhenCheckingIsAnyArgumentPtrByValueThenFalseIsReturned) {
KernelDescriptor kernelDescriptor;
auto pointerArg = ArgDescriptor(ArgDescriptor::ArgTPointer);
auto valueArg = ArgDescriptor(ArgDescriptor::ArgTValue);
ArgDescValue::Element element;
element.isPtr = false;
valueArg.as<ArgDescValue>().elements.push_back(element);
kernelDescriptor.payloadMappings.explicitArgs.push_back(pointerArg);
kernelDescriptor.payloadMappings.explicitArgs.push_back(valueArg);
EXPECT_FALSE(KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor));
}
TEST_F(KernelHelperTest, GivenPtrByValueWhenCheckingIsAnyArgumentPtrByValueThenTrueIsReturned) {
KernelDescriptor kernelDescriptor;
auto pointerArg = ArgDescriptor(ArgDescriptor::ArgTPointer);
auto valueArg = ArgDescriptor(ArgDescriptor::ArgTValue);
ArgDescValue::Element element;
element.isPtr = true;
valueArg.as<ArgDescValue>().elements.push_back(element);
kernelDescriptor.payloadMappings.explicitArgs.push_back(pointerArg);
kernelDescriptor.payloadMappings.explicitArgs.push_back(valueArg);
EXPECT_TRUE(KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor));
}

View File

@@ -10,6 +10,7 @@
#include "shared/source/aub_mem_dump/aub_mem_dump.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/local_memory_access_modes.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/source/unified_memory/usm_memory_support.h"
#include "shared/test/common/fixtures/device_fixture.h"
@@ -687,3 +688,13 @@ HWTEST_F(ProductHelperTest, givenProductHelperWhenCheckingIsUnlockingLockedPtrNe
HWTEST_F(ProductHelperTest, givenProductHelperWhenCheckDummyBlitWaRequiredThenReturnFalse) {
EXPECT_FALSE(productHelper->isDummyBlitWaRequired());
}
HWTEST_F(ProductHelperTest, givenProductHelperAndKernelBinaryFormatsWhenCheckingIsDetectIndirectAccessInKernelSupportedThenCorrectValueIsReturned) {
KernelDescriptor kernelDescriptor;
kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor));
kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Zebin;
EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor));
}