mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-06 19:32:25 +08:00
feature: check indirect access for kernel
Do not make indirect allocations resident if kernel does not use indirect access. For both level zero and opencl. Currently disabled by default, enable with debug flag DetectIndirectAccessInKernel Related-To: NEO-7712 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c7311bc140
commit
69a16fd3ed
@@ -430,7 +430,7 @@ ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount
|
||||
}
|
||||
|
||||
ze_result_t KernelImp::setIndirectAccess(ze_kernel_indirect_access_flags_t flags) {
|
||||
if (NEO::DebugManager.flags.DisableIndirectAccess.get() == 1 || this->kernelHasIndirectAccess == false) {
|
||||
if (NEO::DebugManager.flags.DisableIndirectAccess.get() == 1) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -924,9 +924,19 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
residencyContainer.insert(residencyContainer.end(), kernelImmData->getResidencyContainer().begin(),
|
||||
kernelImmData->getResidencyContainer().end());
|
||||
|
||||
kernelHasIndirectAccess = kernelDescriptor.kernelAttributes.hasNonKernelArgLoad ||
|
||||
kernelDescriptor.kernelAttributes.hasNonKernelArgStore ||
|
||||
kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic;
|
||||
bool detectIndirectAccessInKernel = productHelper.isDetectIndirectAccessInKernelSupported(kernelDescriptor);
|
||||
if (NEO::DebugManager.flags.DetectIndirectAccessInKernel.get() != -1) {
|
||||
detectIndirectAccessInKernel = NEO::DebugManager.flags.DetectIndirectAccessInKernel.get() == 1;
|
||||
}
|
||||
if (detectIndirectAccessInKernel) {
|
||||
kernelHasIndirectAccess = kernelDescriptor.kernelAttributes.hasNonKernelArgLoad ||
|
||||
kernelDescriptor.kernelAttributes.hasNonKernelArgStore ||
|
||||
kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic ||
|
||||
kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess ||
|
||||
NEO::KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor);
|
||||
} else {
|
||||
kernelHasIndirectAccess = true;
|
||||
}
|
||||
|
||||
if (this->usesRayTracing()) {
|
||||
uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels;
|
||||
@@ -1061,9 +1071,9 @@ Kernel *Kernel::create(uint32_t productFamily, Module *module,
|
||||
}
|
||||
|
||||
bool KernelImp::hasIndirectAllocationsAllowed() const {
|
||||
return (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
|
||||
unifiedMemoryControls.indirectHostAllocationsAllowed ||
|
||||
unifiedMemoryControls.indirectSharedAllocationsAllowed);
|
||||
return this->kernelHasIndirectAccess && (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
|
||||
unifiedMemoryControls.indirectHostAllocationsAllowed ||
|
||||
unifiedMemoryControls.indirectSharedAllocationsAllowed);
|
||||
}
|
||||
|
||||
uint32_t KernelImp::getSlmTotalSize() const {
|
||||
|
||||
@@ -226,7 +226,7 @@ struct KernelImp : Kernel {
|
||||
|
||||
ze_cache_config_flags_t cacheConfigFlags = 0u;
|
||||
|
||||
bool kernelHasIndirectAccess = true;
|
||||
bool kernelHasIndirectAccess = false;
|
||||
|
||||
std::unique_ptr<NEO::ImplicitArgs> pImplicitArgs;
|
||||
|
||||
|
||||
@@ -90,6 +90,7 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
|
||||
using KernelImp::requiredWorkgroupOrder;
|
||||
using KernelImp::surfaceStateHeapData;
|
||||
using KernelImp::surfaceStateHeapDataSize;
|
||||
using KernelImp::unifiedMemoryControls;
|
||||
|
||||
MockKernel(MockModule *mockModule) : WhiteBox<L0::KernelImp>(mockModule) {
|
||||
}
|
||||
|
||||
@@ -48,6 +48,7 @@ struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp {
|
||||
using ::L0::KernelImp::dynamicStateHeapData;
|
||||
using ::L0::KernelImp::dynamicStateHeapDataSize;
|
||||
using ::L0::KernelImp::groupSize;
|
||||
using ::L0::KernelImp::kernelHasIndirectAccess;
|
||||
using ::L0::KernelImp::kernelImmData;
|
||||
using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime;
|
||||
using ::L0::KernelImp::midThreadPreemptionDisallowedForRayTracingKernels;
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
|
||||
#include "level_zero/core/source/event/event.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
|
||||
@@ -27,26 +28,52 @@
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
using CommandListAppendLaunchKernel = Test<ModuleFixture>;
|
||||
|
||||
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) {
|
||||
createKernel();
|
||||
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
|
||||
kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = true;
|
||||
using CommandListAppendLaunchKernelMockModule = Test<ModuleMutableCommandListFixture>;
|
||||
HWTEST_F(CommandListAppendLaunchKernelMockModule, givenKernelWithIndirectAllocationsAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) {
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::DebugManager.flags.DetectIndirectAccessInKernel.set(1);
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = true;
|
||||
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = false;
|
||||
kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = false;
|
||||
kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = true;
|
||||
EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed);
|
||||
|
||||
EXPECT_TRUE(kernel->hasIndirectAllocationsAllowed());
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
{
|
||||
returnValue = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
EXPECT_TRUE(commandList->hasIndirectAllocationsAllowed());
|
||||
}
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
ASSERT_TRUE(commandList->hasIndirectAllocationsAllowed());
|
||||
{
|
||||
returnValue = commandList->reset();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = false;
|
||||
kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = true;
|
||||
kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = false;
|
||||
|
||||
returnValue = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
EXPECT_TRUE(commandList->hasIndirectAllocationsAllowed());
|
||||
}
|
||||
|
||||
{
|
||||
returnValue = commandList->reset();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
|
||||
kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = false;
|
||||
kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = false;
|
||||
|
||||
returnValue = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
EXPECT_TRUE(commandList->hasIndirectAllocationsAllowed());
|
||||
}
|
||||
}
|
||||
|
||||
using CommandListAppendLaunchKernel = Test<ModuleFixture>;
|
||||
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsNotAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) {
|
||||
createKernel();
|
||||
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = false;
|
||||
@@ -356,9 +383,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernel
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
|
||||
|
||||
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
auto eventPool = std::unique_ptr<::L0::EventPool>(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
auto event = std::unique_ptr<::L0::Event>(::L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
@@ -451,9 +478,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelLaunchWithTSEventAndScopeFla
|
||||
ZE_EVENT_SCOPE_FLAG_HOST,
|
||||
ZE_EVENT_SCOPE_FLAG_HOST};
|
||||
|
||||
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
auto eventPool = std::unique_ptr<::L0::EventPool>(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
auto event = std::unique_ptr<::L0::Event>(::L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
@@ -568,9 +595,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenCommandListWhenAppendLaunchKernelS
|
||||
ZE_EVENT_SCOPE_FLAG_HOST,
|
||||
ZE_EVENT_SCOPE_FLAG_HOST};
|
||||
|
||||
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
auto eventPool = std::unique_ptr<::L0::EventPool>(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
auto event = std::unique_ptr<::L0::Event>(::L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
EXPECT_EQ(1u, event->getPacketsInUse());
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
@@ -784,9 +811,9 @@ HWTEST_F(CommandListAppendLaunchKernel, givenSingleValidWaitEventsThenAddSemapho
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
|
||||
std::unique_ptr<EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
std::unique_ptr<::L0::EventPool> eventPool(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
std::unique_ptr<Event> event(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
std::unique_ptr<::L0::Event> event(::L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
@@ -836,10 +863,10 @@ HWTEST_F(CommandListAppendLaunchKernel, givenMultipleValidWaitEventsThenAddSemap
|
||||
ze_event_desc_t eventDesc2 = {};
|
||||
eventDesc2.index = 1;
|
||||
|
||||
std::unique_ptr<EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
std::unique_ptr<::L0::EventPool> eventPool(::L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
std::unique_ptr<Event> event1(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc1, device));
|
||||
std::unique_ptr<Event> event2(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc2, device));
|
||||
std::unique_ptr<::L0::Event> event1(::L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc1, device));
|
||||
std::unique_ptr<::L0::Event> event2(::L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc2, device));
|
||||
ze_event_handle_t hEventHandle1 = event1->toHandle();
|
||||
ze_event_handle_t hEventHandle2 = event2->toHandle();
|
||||
|
||||
|
||||
@@ -425,6 +425,8 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDebugModeToTreatIndirectAllocatio
|
||||
|
||||
createKernel();
|
||||
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
|
||||
kernel->kernelHasIndirectAccess = true;
|
||||
|
||||
EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed);
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
@@ -487,6 +489,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndir
|
||||
|
||||
createKernel();
|
||||
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
|
||||
kernel->kernelHasIndirectAccess = true;
|
||||
EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed);
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
@@ -551,6 +554,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndir
|
||||
|
||||
createKernel();
|
||||
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
|
||||
kernel->kernelHasIndirectAccess = true;
|
||||
EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed);
|
||||
|
||||
static_cast<MockMemoryManager *>(driverHandle->getMemoryManager())->overrideAllocateAsPackReturn = 1u;
|
||||
|
||||
@@ -1214,9 +1214,10 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatche
|
||||
|
||||
using KernelIndirectPropertiesFromIGCTests = KernelImmutableDataTests;
|
||||
|
||||
TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicThenHasIndirectAccessIsSetToFalse) {
|
||||
TEST_F(KernelIndirectPropertiesFromIGCTests, givenDetectIndirectAccessInKernelEnabledWhenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicAndNoHasIndirectStatelessAccessThenHasIndirectAccessIsSetToFalse) {
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::DebugManager.flags.DisableIndirectAccess.set(0);
|
||||
NEO::DebugManager.flags.DetectIndirectAccessInKernel.set(1);
|
||||
|
||||
uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
|
||||
bool isInternal = false;
|
||||
@@ -1235,15 +1236,53 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithNoKernelL
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false;
|
||||
|
||||
kernel->initialize(&desc);
|
||||
|
||||
EXPECT_FALSE(kernel->hasIndirectAccess());
|
||||
}
|
||||
|
||||
TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoadStoreAtomicThenHasIndirectAccessIsSetToTrue) {
|
||||
TEST_F(KernelIndirectPropertiesFromIGCTests, givenDetectIndirectAccessInKernelEnabledAndPtrPassedByValueWhenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicAndNoHasIndirectStatelessAccessThenHasIndirectAccessIsSetToTrue) {
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::DebugManager.flags.DisableIndirectAccess.set(0);
|
||||
NEO::DebugManager.flags.DetectIndirectAccessInKernel.set(1);
|
||||
|
||||
uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
|
||||
bool isInternal = false;
|
||||
|
||||
std::unique_ptr<MockImmutableData> mockKernelImmData =
|
||||
std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
|
||||
mockKernelImmData->mockKernelDescriptor->kernelAttributes.binaryFormat = NEO::DeviceBinaryFormat::Zebin;
|
||||
auto ptrByValueArg = ArgDescriptor(ArgDescriptor::ArgTValue);
|
||||
ArgDescValue::Element element{};
|
||||
element.isPtr = true;
|
||||
ptrByValueArg.as<ArgDescValue>().elements.push_back(element);
|
||||
mockKernelImmData->mockKernelDescriptor->payloadMappings.explicitArgs.push_back(ptrByValueArg);
|
||||
EXPECT_EQ(mockKernelImmData->mockKernelDescriptor->payloadMappings.explicitArgs.size(), 1u);
|
||||
|
||||
createModuleFromMockBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false;
|
||||
|
||||
kernel->initialize(&desc);
|
||||
|
||||
EXPECT_TRUE(kernel->hasIndirectAccess());
|
||||
}
|
||||
|
||||
TEST_F(KernelIndirectPropertiesFromIGCTests, givenDetectIndirectAccessInKernelEnabledWhenInitializingKernelWithKernelLoadStoreAtomicThenHasIndirectAccessIsSetToTrue) {
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::DebugManager.flags.DisableIndirectAccess.set(0);
|
||||
NEO::DebugManager.flags.DetectIndirectAccessInKernel.set(1);
|
||||
|
||||
uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
|
||||
bool isInternal = false;
|
||||
@@ -1263,6 +1302,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoa
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = true;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false;
|
||||
|
||||
kernel->initialize(&desc);
|
||||
|
||||
@@ -1279,6 +1319,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoa
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = true;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false;
|
||||
|
||||
kernel->initialize(&desc);
|
||||
|
||||
@@ -1295,6 +1336,24 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoa
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = true;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false;
|
||||
|
||||
kernel->initialize(&desc);
|
||||
|
||||
EXPECT_TRUE(kernel->hasIndirectAccess());
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = true;
|
||||
|
||||
kernel->initialize(&desc);
|
||||
|
||||
@@ -1619,7 +1678,7 @@ TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThat
|
||||
EXPECT_TRUE(unifiedMemoryControls.indirectSharedAllocationsAllowed);
|
||||
}
|
||||
|
||||
TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatDoesNotHaveIndirectAccessThenIndirectAccessIsNotSet) {
|
||||
TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatDoesNotHaveIndirectAccessThenIndirectAccessIsSet) {
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::DebugManager.flags.DisableIndirectAccess.set(0);
|
||||
kernel->kernelHasIndirectAccess = false;
|
||||
@@ -1636,9 +1695,9 @@ TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThat
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
|
||||
unifiedMemoryControls = kernel->getUnifiedMemoryControls();
|
||||
EXPECT_FALSE(unifiedMemoryControls.indirectDeviceAllocationsAllowed);
|
||||
EXPECT_FALSE(unifiedMemoryControls.indirectHostAllocationsAllowed);
|
||||
EXPECT_FALSE(unifiedMemoryControls.indirectSharedAllocationsAllowed);
|
||||
EXPECT_TRUE(unifiedMemoryControls.indirectDeviceAllocationsAllowed);
|
||||
EXPECT_TRUE(unifiedMemoryControls.indirectHostAllocationsAllowed);
|
||||
EXPECT_TRUE(unifiedMemoryControls.indirectSharedAllocationsAllowed);
|
||||
}
|
||||
|
||||
TEST_F(KernelPropertiesTests, givenValidKernelIndirectAccessFlagsSetThenExpectKernelIndirectAllocationsAllowedTrue) {
|
||||
|
||||
@@ -148,13 +148,13 @@ void Kernel::patchWithImplicitSurface(uint64_t ptrToPatchInCrossThreadData, Grap
|
||||
}
|
||||
|
||||
cl_int Kernel::initialize() {
|
||||
this->kernelHasIndirectAccess = false;
|
||||
auto pClDevice = &getDevice();
|
||||
auto rootDeviceIndex = pClDevice->getRootDeviceIndex();
|
||||
reconfigureKernel();
|
||||
auto &hwInfo = pClDevice->getHardwareInfo();
|
||||
auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment();
|
||||
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
|
||||
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
|
||||
auto &kernelDescriptor = kernelInfo.kernelDescriptor;
|
||||
const auto &implicitArgs = kernelDescriptor.payloadMappings.implicitArgs;
|
||||
const auto &explicitArgs = kernelDescriptor.payloadMappings.explicitArgs;
|
||||
@@ -281,11 +281,19 @@ cl_int Kernel::initialize() {
|
||||
slmSizes.resize(numArgs);
|
||||
|
||||
this->setInlineSamplers();
|
||||
|
||||
this->kernelHasIndirectAccess |= kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgLoad ||
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgStore ||
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic;
|
||||
|
||||
bool detectIndirectAccessInKernel = productHelper.isDetectIndirectAccessInKernelSupported(kernelDescriptor);
|
||||
if (DebugManager.flags.DetectIndirectAccessInKernel.get() != -1) {
|
||||
detectIndirectAccessInKernel = DebugManager.flags.DetectIndirectAccessInKernel.get() == 1;
|
||||
}
|
||||
if (detectIndirectAccessInKernel) {
|
||||
this->kernelHasIndirectAccess = kernelDescriptor.kernelAttributes.hasNonKernelArgLoad ||
|
||||
kernelDescriptor.kernelAttributes.hasNonKernelArgStore ||
|
||||
kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic ||
|
||||
kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess ||
|
||||
NEO::KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor);
|
||||
} else {
|
||||
this->kernelHasIndirectAccess = true;
|
||||
}
|
||||
provideInitializationHints();
|
||||
// resolve the new kernel info to account for kernel handlers
|
||||
// I think by this time we have decoded the binary and know the number of args etc.
|
||||
@@ -1311,7 +1319,7 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
|
||||
}
|
||||
}
|
||||
|
||||
if (unifiedMemoryControls.indirectSharedAllocationsAllowed && pageFaultManager) {
|
||||
if (getHasIndirectAccess() && unifiedMemoryControls.indirectSharedAllocationsAllowed && pageFaultManager) {
|
||||
pageFaultManager->moveAllocationsWithinUMAllocsManagerToGpuDomain(this->getContext().getSVMAllocsManager());
|
||||
}
|
||||
makeArgsResident(commandStreamReceiver);
|
||||
@@ -1323,9 +1331,9 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
|
||||
|
||||
gtpinNotifyMakeResident(this, &commandStreamReceiver);
|
||||
|
||||
if (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
|
||||
unifiedMemoryControls.indirectHostAllocationsAllowed ||
|
||||
unifiedMemoryControls.indirectSharedAllocationsAllowed) {
|
||||
if (getHasIndirectAccess() && (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
|
||||
unifiedMemoryControls.indirectHostAllocationsAllowed ||
|
||||
unifiedMemoryControls.indirectSharedAllocationsAllowed)) {
|
||||
this->getContext().getSVMAllocsManager()->makeInternalAllocationsResident(commandStreamReceiver, unifiedMemoryControls.generateMask());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -432,6 +432,7 @@ XE_HPG_CORETEST_P(XeHpgCoreUmStatelessCompressionInSBA, givenStatelessKernelWhen
|
||||
}
|
||||
|
||||
XE_HPG_CORETEST_P(XeHpgCoreUmStatelessCompressionInSBA, givenKernelExecInfoWhenItHasIndirectHostAccessThenDisableCompressionInSBA) {
|
||||
EXPECT_TRUE(multiDeviceKernel->getHasIndirectAccess());
|
||||
const size_t bufferSize = MemoryConstants::kiloByte;
|
||||
uint8_t bufferData[bufferSize] = {};
|
||||
|
||||
|
||||
@@ -1267,6 +1267,28 @@ HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryWhenMakeReside
|
||||
svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryButNotHasIndirectAccessWhenMakeResidentIsCalledThenOnlySharedAllocationsAreNotMadeResident) {
|
||||
MockKernelWithInternals mockKernel(*this->pClDevice);
|
||||
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager();
|
||||
auto sharedProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields());
|
||||
auto hostProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields());
|
||||
auto unifiedSharedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(4096u, sharedProperties, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex()));
|
||||
auto unifiedHostMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, hostProperties);
|
||||
mockKernel.mockKernel->kernelHasIndirectAccess = false;
|
||||
|
||||
mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver());
|
||||
EXPECT_EQ(0u, commandStreamReceiver.getResidencyAllocations().size());
|
||||
mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, true);
|
||||
|
||||
mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver());
|
||||
EXPECT_EQ(0u, commandStreamReceiver.getResidencyAllocations().size());
|
||||
|
||||
svmAllocationsManager->freeSVMAlloc(unifiedSharedMemoryAllocation);
|
||||
svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenDeviceUnifiedMemoryAndPageFaultManagerWhenMakeResidentIsCalledThenAllocationIsNotDecommited) {
|
||||
auto mockPageFaultManager = new MockPageFaultManager();
|
||||
static_cast<MockMemoryManager *>(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager);
|
||||
@@ -1704,12 +1726,43 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemor
|
||||
EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicThenKernelHasIndirectAccessIsSetToFalse) {
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicAndHasIndirectStatelessAccessAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.DetectIndirectAccessInKernel.set(1);
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = true;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
|
||||
auto memoryManager = commandStreamReceiver.getMemoryManager();
|
||||
pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize});
|
||||
|
||||
MockProgram program(toClDeviceVector(*pClDevice));
|
||||
MockContext ctx;
|
||||
program.setContext(&ctx);
|
||||
program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation();
|
||||
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, *pKernelInfo, *pClDevice));
|
||||
ASSERT_EQ(CL_SUCCESS, kernel->initialize());
|
||||
|
||||
EXPECT_TRUE(kernel->getHasIndirectAccess());
|
||||
|
||||
memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicNorIndirectStatelessAccessAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToFalse) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.DetectIndirectAccessInKernel.set(1);
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
@@ -1729,12 +1782,77 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNor
|
||||
memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadThenKernelHasIndirectAccessIsSetToTrue) {
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWithPtrByValueArgumentAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.DetectIndirectAccessInKernel.set(1);
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false;
|
||||
|
||||
auto ptrByValueArg = ArgDescriptor(ArgDescriptor::ArgTValue);
|
||||
ArgDescValue::Element element;
|
||||
element.isPtr = true;
|
||||
ptrByValueArg.as<ArgDescValue>().elements.push_back(element);
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(ptrByValueArg);
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
|
||||
auto memoryManager = commandStreamReceiver.getMemoryManager();
|
||||
pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize});
|
||||
|
||||
MockProgram program(toClDeviceVector(*pClDevice));
|
||||
MockContext ctx;
|
||||
program.setContext(&ctx);
|
||||
program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation();
|
||||
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, *pKernelInfo, *pClDevice));
|
||||
ASSERT_EQ(CL_SUCCESS, kernel->initialize());
|
||||
|
||||
EXPECT_TRUE(kernel->getHasIndirectAccess());
|
||||
|
||||
memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicNorHasIndirectStatelessAccessAndDetectIndirectAccessInKernelDisabledThenKernelHasIndirectAccessIsSetToTrue) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.DetectIndirectAccessInKernel.set(0);
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
|
||||
auto memoryManager = commandStreamReceiver.getMemoryManager();
|
||||
pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize});
|
||||
|
||||
MockProgram program(toClDeviceVector(*pClDevice));
|
||||
MockContext ctx;
|
||||
program.setContext(&ctx);
|
||||
program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation();
|
||||
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, *pKernelInfo, *pClDevice));
|
||||
ASSERT_EQ(CL_SUCCESS, kernel->initialize());
|
||||
|
||||
EXPECT_TRUE(kernel->getHasIndirectAccess());
|
||||
|
||||
memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.DetectIndirectAccessInKernel.set(1);
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = true;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
@@ -1754,12 +1872,15 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadThenKernelHasIndirec
|
||||
memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreThenKernelHasIndirectAccessIsSetToTrue) {
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.DetectIndirectAccessInKernel.set(1);
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = true;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
@@ -1779,12 +1900,15 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreThenKernelHasIndire
|
||||
memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgAtomicThenKernelHasIndirectAccessIsSetToTrue) {
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgAtomicAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.DetectIndirectAccessInKernel.set(1);
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = true;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
|
||||
@@ -299,6 +299,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideKernelSizeLimitForSmallDispatch, -1, "-1
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideUseKmdWaitFunction, -1, "-1: default (L0: disabled), 0: disabled, 1: enabled. It uses only busy loop to wait or busy loop with KMD wait function, when KMD fallback is enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ResolveDependenciesViaPipeControls, -1, "-1: default , 0: disabled, 1: enabled. If enabled, instead of programming semaphores, dependencies are resolved using task levels")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, MakeIndirectAllocationsResidentAsPack, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver handles all indirect allocations as one pack instead of making them resident individually.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DetectIndirectAccessInKernel, -1, "-1: default, 0:disabled, 1: enabled. If enabled and indirect accesses are not detected in kernel, indirect allocations will not be allowed even if set by API.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, MakeEachAllocationResident, -1, "-1: default, 0: disabled, 1: bind every allocation at creation time, 2: bind all created allocations in flush")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, AssignBCSAtEnqueue, -1, "-1: default, 0:disabled, 1: enabled.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQGpgpuInitialization, -1, "-1: default, 0:disabled, 1: enabled.")
|
||||
|
||||
@@ -85,4 +85,17 @@ KernelHelper::ErrorCode KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(Ker
|
||||
return KernelHelper::ErrorCode::SUCCESS;
|
||||
}
|
||||
|
||||
bool KernelHelper::isAnyArgumentPtrByValue(const KernelDescriptor &kernelDescriptor) {
|
||||
for (auto &argDescriptor : kernelDescriptor.payloadMappings.explicitArgs) {
|
||||
if (argDescriptor.type == NEO::ArgDescriptor::ArgTValue) {
|
||||
for (auto &element : argDescriptor.as<NEO::ArgDescValue>().elements) {
|
||||
if (element.isPtr) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -34,6 +34,8 @@ struct KernelHelper {
|
||||
return perHwThreadPrivateScratchSize * computeUnitsUsedForScratch;
|
||||
}
|
||||
static ErrorCode checkIfThereIsSpaceForScratchOrPrivate(KernelDescriptor::KernelAttributes attributes, Device *device);
|
||||
|
||||
static bool isAnyArgumentPtrByValue(const KernelDescriptor &kernelDescriptor);
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -54,9 +54,9 @@ struct KernelDescriptor {
|
||||
uint16_t numArgsToPatch = 0U;
|
||||
uint16_t numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
uint8_t barrierCount = 0u;
|
||||
bool hasNonKernelArgLoad = true;
|
||||
bool hasNonKernelArgStore = true;
|
||||
bool hasNonKernelArgAtomic = true;
|
||||
bool hasNonKernelArgLoad = false;
|
||||
bool hasNonKernelArgStore = false;
|
||||
bool hasNonKernelArgAtomic = false;
|
||||
bool hasIndirectStatelessAccess = false;
|
||||
|
||||
AddressingMode bufferAddressingMode = BindfulAndStateless;
|
||||
|
||||
@@ -25,6 +25,7 @@ class Device;
|
||||
enum class LocalMemoryAccessMode;
|
||||
struct FrontEndPropertiesSupport;
|
||||
struct HardwareInfo;
|
||||
struct KernelDescriptor;
|
||||
struct PipelineSelectArgs;
|
||||
struct PipelineSelectPropertiesSupport;
|
||||
struct StateBaseAddressPropertiesSupport;
|
||||
@@ -166,6 +167,7 @@ class ProductHelper {
|
||||
virtual uint64_t overridePatIndex(AllocationType allocationType, uint64_t patIndex) const = 0;
|
||||
virtual bool isTlbFlushRequired() const = 0;
|
||||
virtual bool isDummyBlitWaRequired() const = 0;
|
||||
virtual bool isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor) const = 0;
|
||||
virtual bool getFrontEndPropertyScratchSizeSupport() const = 0;
|
||||
virtual bool getFrontEndPropertyPrivateScratchSizeSupport() const = 0;
|
||||
virtual bool getFrontEndPropertyComputeDispatchAllWalkerSupport() const = 0;
|
||||
|
||||
@@ -56,6 +56,12 @@ bool ProductHelperHw<gfxProduct>::isTlbFlushRequired() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
bool ProductHelperHw<gfxProduct>::isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor) const {
|
||||
constexpr bool enabled = false;
|
||||
return enabled;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
void ProductHelperHw<gfxProduct>::enableBlitterOperationsSupport(HardwareInfo *hwInfo) const {
|
||||
hwInfo->capabilityTable.blitterOperationsSupported = obtainBlitterPreference(*hwInfo);
|
||||
|
||||
@@ -122,6 +122,7 @@ class ProductHelperHw : public ProductHelper {
|
||||
uint64_t overridePatIndex(AllocationType allocationType, uint64_t patIndex) const override;
|
||||
bool isTlbFlushRequired() const override;
|
||||
bool isDummyBlitWaRequired() const override;
|
||||
bool isDetectIndirectAccessInKernelSupported(const KernelDescriptor &kernelDescriptor) const override;
|
||||
bool getFrontEndPropertyScratchSizeSupport() const override;
|
||||
bool getFrontEndPropertyPrivateScratchSizeSupport() const override;
|
||||
bool getFrontEndPropertyComputeDispatchAllWalkerSupport() const override;
|
||||
|
||||
@@ -503,3 +503,4 @@ DirectSubmissionRelaxedOrderingMinNumberOfClients = -1
|
||||
UseDeprecatedClDeviceIpVersion = 0
|
||||
ExperimentalCopyThroughLockWaitlistSizeThreshold= -1
|
||||
ForceDummyBlitWa = 0
|
||||
DetectIndirectAccessInKernel = -1
|
||||
|
||||
@@ -163,3 +163,31 @@ TEST_F(KernelHelperTest, GivenScratchEqualsZeroAndPrivetGreaterThanZeroWhenCheck
|
||||
attributes.perThreadScratchSize[1] = 0;
|
||||
EXPECT_EQ(KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(attributes, pDevice), KernelHelper::ErrorCode::SUCCESS);
|
||||
}
|
||||
|
||||
TEST_F(KernelHelperTest, GivenNoPtrByValueWhenCheckingIsAnyArgumentPtrByValueThenFalseIsReturned) {
|
||||
KernelDescriptor kernelDescriptor;
|
||||
auto pointerArg = ArgDescriptor(ArgDescriptor::ArgTPointer);
|
||||
|
||||
auto valueArg = ArgDescriptor(ArgDescriptor::ArgTValue);
|
||||
ArgDescValue::Element element;
|
||||
element.isPtr = false;
|
||||
valueArg.as<ArgDescValue>().elements.push_back(element);
|
||||
|
||||
kernelDescriptor.payloadMappings.explicitArgs.push_back(pointerArg);
|
||||
kernelDescriptor.payloadMappings.explicitArgs.push_back(valueArg);
|
||||
EXPECT_FALSE(KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor));
|
||||
}
|
||||
|
||||
TEST_F(KernelHelperTest, GivenPtrByValueWhenCheckingIsAnyArgumentPtrByValueThenTrueIsReturned) {
|
||||
KernelDescriptor kernelDescriptor;
|
||||
auto pointerArg = ArgDescriptor(ArgDescriptor::ArgTPointer);
|
||||
|
||||
auto valueArg = ArgDescriptor(ArgDescriptor::ArgTValue);
|
||||
ArgDescValue::Element element;
|
||||
element.isPtr = true;
|
||||
valueArg.as<ArgDescValue>().elements.push_back(element);
|
||||
|
||||
kernelDescriptor.payloadMappings.explicitArgs.push_back(pointerArg);
|
||||
kernelDescriptor.payloadMappings.explicitArgs.push_back(valueArg);
|
||||
EXPECT_TRUE(KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor));
|
||||
}
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "shared/source/aub_mem_dump/aub_mem_dump.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/local_memory_access_modes.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
#include "shared/source/unified_memory/usm_memory_support.h"
|
||||
#include "shared/test/common/fixtures/device_fixture.h"
|
||||
@@ -687,3 +688,13 @@ HWTEST_F(ProductHelperTest, givenProductHelperWhenCheckingIsUnlockingLockedPtrNe
|
||||
HWTEST_F(ProductHelperTest, givenProductHelperWhenCheckDummyBlitWaRequiredThenReturnFalse) {
|
||||
EXPECT_FALSE(productHelper->isDummyBlitWaRequired());
|
||||
}
|
||||
|
||||
HWTEST_F(ProductHelperTest, givenProductHelperAndKernelBinaryFormatsWhenCheckingIsDetectIndirectAccessInKernelSupportedThenCorrectValueIsReturned) {
|
||||
KernelDescriptor kernelDescriptor;
|
||||
|
||||
kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
|
||||
EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor));
|
||||
|
||||
kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Zebin;
|
||||
EXPECT_FALSE(productHelper->isDetectIndirectAccessInKernelSupported(kernelDescriptor));
|
||||
}
|
||||
Reference in New Issue
Block a user