Check IndirectStatelessCount from igc

If kernel has no stateless indirect accesses don't set the
kernelHasIndirectAccess flag.
Don't make resident or migrate if kernel has no indirect accesses.
Changed initial values in KernelAttributes.

Related-To: NEO-6597

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2022-01-25 11:45:33 +00:00
committed by Compute-Runtime-Automation
parent 8a0a556d16
commit 63f406a58c
12 changed files with 78 additions and 21 deletions

View File

@@ -851,7 +851,8 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
kernelHasIndirectAccess = kernelDescriptor.kernelAttributes.hasNonKernelArgLoad ||
kernelDescriptor.kernelAttributes.hasNonKernelArgStore ||
kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic;
kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic ||
getImmutableData()->getKernelInfo()->hasIndirectStatelessAccess;
if (this->usesRayTracing()) {
if (this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize > 0) {
@@ -978,9 +979,7 @@ Kernel *Kernel::create(uint32_t productFamily, Module *module,
}
bool KernelImp::hasIndirectAllocationsAllowed() const {
return (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
unifiedMemoryControls.indirectHostAllocationsAllowed ||
unifiedMemoryControls.indirectSharedAllocationsAllowed);
return kernelHasIndirectAccess && unifiedMemoryControls.anyIndirectAllocationsAllowed();
}
uint32_t KernelImp::getSlmTotalSize() const {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -139,7 +139,7 @@ struct KernelImp : Kernel {
return ZE_RESULT_SUCCESS;
}
bool hasIndirectAccess() {
bool hasIndirectAccess() const {
return kernelHasIndirectAccess;
}

View File

@@ -224,13 +224,14 @@ struct ModuleFixture : public DeviceFixture {
module.reset(Module::create(device, &moduleDesc, moduleBuildLog, type));
}
void createKernel() {
void createKernel(bool kernelHasIndirectAccess = false) {
ze_kernel_desc_t desc = {};
desc.pKernelName = kernelName.c_str();
kernel = std::make_unique<WhiteBox<::L0::Kernel>>();
kernel->module = module.get();
kernel->initialize(&desc);
kernel->kernelHasIndirectAccess = kernelHasIndirectAccess;
}
void TearDown() {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -45,6 +45,7 @@ struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp {
using ::L0::KernelImp::crossThreadData;
using ::L0::KernelImp::crossThreadDataSize;
using ::L0::KernelImp::groupSize;
using ::L0::KernelImp::kernelHasIndirectAccess;
using ::L0::KernelImp::kernelImmData;
using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime;
using ::L0::KernelImp::module;

View File

@@ -26,7 +26,7 @@ namespace ult {
using CommandListAppendLaunchKernel = Test<ModuleFixture>;
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) {
createKernel();
createKernel(true);
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = true;
kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = true;

View File

@@ -784,7 +784,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenCommandQueueWhenExecutingCommandL
auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(deviceAlloc)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, gpuAlloc);
createKernel();
createKernel(true);
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed);
@@ -843,7 +843,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDebugModeToTreatIndirectAllocatio
auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(deviceAlloc)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, gpuAlloc);
createKernel();
createKernel(true);
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed);

View File

@@ -827,6 +827,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithNoKernelL
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
module->mockKernelImmData->mockKernelInfo->hasIndirectStatelessAccess = false;
kernel->initialize(&desc);
@@ -855,6 +856,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoa
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = true;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
module->mockKernelImmData->mockKernelInfo->hasIndirectStatelessAccess = false;
kernel->initialize(&desc);
@@ -871,6 +873,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoa
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = true;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
module->mockKernelImmData->mockKernelInfo->hasIndirectStatelessAccess = false;
kernel->initialize(&desc);
@@ -887,6 +890,24 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoa
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = true;
module->mockKernelImmData->mockKernelInfo->hasIndirectStatelessAccess = false;
kernel->initialize(&desc);
EXPECT_TRUE(kernel->hasIndirectAccess());
}
{
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
ze_kernel_desc_t desc = {};
desc.pKernelName = kernelName.c_str();
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
module->mockKernelImmData->mockKernelInfo->hasIndirectStatelessAccess = true;
kernel->initialize(&desc);

View File

@@ -259,7 +259,8 @@ cl_int Kernel::initialize() {
this->kernelHasIndirectAccess |= kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgLoad ||
kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgStore ||
kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic;
kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic ||
kernelInfo.hasIndirectStatelessAccess;
provideInitializationHints();
// resolve the new kernel info to account for kernel handlers
@@ -1232,10 +1233,7 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
}
gtpinNotifyMakeResident(this, &commandStreamReceiver);
if (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
unifiedMemoryControls.indirectHostAllocationsAllowed ||
unifiedMemoryControls.indirectSharedAllocationsAllowed) {
if (kernelHasIndirectAccess && unifiedMemoryControls.anyIndirectAllocationsAllowed()) {
this->getContext().getSVMAllocsManager()->makeInternalAllocationsResident(commandStreamReceiver, unifiedMemoryControls.generateMask());
}
}

View File

@@ -1542,12 +1542,13 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemor
EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed);
}
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicThenKernelHasIndirectAccessIsSetToFalse) {
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicNorHasIndirectStatelessAccessThenKernelHasIndirectAccessIsSetToFalse) {
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
pKernelInfo->hasIndirectStatelessAccess = false;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;
@@ -1573,6 +1574,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadThenKernelHasIndirec
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = true;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
pKernelInfo->hasIndirectStatelessAccess = false;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;
@@ -1598,6 +1600,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreThenKernelHasIndire
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = true;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
pKernelInfo->hasIndirectStatelessAccess = false;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;
@@ -1623,6 +1626,33 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgAtomicThenKernelHasIndir
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = true;
pKernelInfo->hasIndirectStatelessAccess = false;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;
auto memoryManager = commandStreamReceiver.getMemoryManager();
pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize});
MockProgram program(toClDeviceVector(*pClDevice));
MockContext ctx;
program.setContext(&ctx);
program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation();
std::unique_ptr<MockKernel> pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice));
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
EXPECT_TRUE(pKernel->getHasIndirectAccess());
memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
}
HWTEST_F(KernelResidencyTest, givenKernelWithhasIndirectStatelessAccessThenKernelHasIndirectAccessIsSetToTrue) {
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
pKernelInfo->hasIndirectStatelessAccess = true;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;

View File

@@ -317,6 +317,9 @@ DecodeError readZeInfoExperimentalProperties(const NEO::Yaml::YamlParser &parser
ConstStringRef context,
std::string &outErrReason, std::string &outWarning) {
bool validExperimentalProperty = true;
outExperimentalProperties.hasNonKernelArgLoad = true;
outExperimentalProperties.hasNonKernelArgStore = true;
outExperimentalProperties.hasNonKernelArgAtomic = true;
for (const auto &experimentalPropertyNd : parser.createChildrenRange(node)) {
for (const auto &experimentalPropertyMemberNd : parser.createChildrenRange(experimentalPropertyNd)) {
auto key = parser.readKey(experimentalPropertyMemberNd);

View File

@@ -156,9 +156,9 @@ struct KernelDescriptor {
uint16_t numArgsToPatch = 0U;
uint16_t numGrfRequired = 0U;
uint8_t barrierCount = 0u;
bool hasNonKernelArgLoad = true;
bool hasNonKernelArgStore = true;
bool hasNonKernelArgAtomic = true;
bool hasNonKernelArgLoad = false;
bool hasNonKernelArgStore = false;
bool hasNonKernelArgAtomic = false;
AddressingMode bufferAddressingMode = BindfulAndStateless;
AddressingMode imageAddressingMode = Bindful;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
* Copyright (C) 2019-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -21,4 +21,8 @@ struct UnifiedMemoryControls {
bool indirectDeviceAllocationsAllowed = false;
bool indirectHostAllocationsAllowed = false;
bool indirectSharedAllocationsAllowed = false;
bool anyIndirectAllocationsAllowed() const {
return indirectDeviceAllocationsAllowed || indirectHostAllocationsAllowed || indirectSharedAllocationsAllowed;
}
};