mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 22:12:59 +08:00
Check IndirectStatelessCount from igc
If kernel has no stateless indirect accesses don't set the kernelHasIndirectAccess flag. Don't make resident or migrate if kernel has no indirect accesses. Changed initial values in KernelAttributes. Related-To: NEO-6597 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
8a0a556d16
commit
63f406a58c
@@ -851,7 +851,8 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
|
||||
kernelHasIndirectAccess = kernelDescriptor.kernelAttributes.hasNonKernelArgLoad ||
|
||||
kernelDescriptor.kernelAttributes.hasNonKernelArgStore ||
|
||||
kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic;
|
||||
kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic ||
|
||||
getImmutableData()->getKernelInfo()->hasIndirectStatelessAccess;
|
||||
|
||||
if (this->usesRayTracing()) {
|
||||
if (this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize > 0) {
|
||||
@@ -978,9 +979,7 @@ Kernel *Kernel::create(uint32_t productFamily, Module *module,
|
||||
}
|
||||
|
||||
bool KernelImp::hasIndirectAllocationsAllowed() const {
|
||||
return (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
|
||||
unifiedMemoryControls.indirectHostAllocationsAllowed ||
|
||||
unifiedMemoryControls.indirectSharedAllocationsAllowed);
|
||||
return kernelHasIndirectAccess && unifiedMemoryControls.anyIndirectAllocationsAllowed();
|
||||
}
|
||||
|
||||
uint32_t KernelImp::getSlmTotalSize() const {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -139,7 +139,7 @@ struct KernelImp : Kernel {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
bool hasIndirectAccess() {
|
||||
bool hasIndirectAccess() const {
|
||||
return kernelHasIndirectAccess;
|
||||
}
|
||||
|
||||
|
||||
@@ -224,13 +224,14 @@ struct ModuleFixture : public DeviceFixture {
|
||||
module.reset(Module::create(device, &moduleDesc, moduleBuildLog, type));
|
||||
}
|
||||
|
||||
void createKernel() {
|
||||
void createKernel(bool kernelHasIndirectAccess = false) {
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
kernel = std::make_unique<WhiteBox<::L0::Kernel>>();
|
||||
kernel->module = module.get();
|
||||
kernel->initialize(&desc);
|
||||
kernel->kernelHasIndirectAccess = kernelHasIndirectAccess;
|
||||
}
|
||||
|
||||
void TearDown() {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -45,6 +45,7 @@ struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp {
|
||||
using ::L0::KernelImp::crossThreadData;
|
||||
using ::L0::KernelImp::crossThreadDataSize;
|
||||
using ::L0::KernelImp::groupSize;
|
||||
using ::L0::KernelImp::kernelHasIndirectAccess;
|
||||
using ::L0::KernelImp::kernelImmData;
|
||||
using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime;
|
||||
using ::L0::KernelImp::module;
|
||||
|
||||
@@ -26,7 +26,7 @@ namespace ult {
|
||||
using CommandListAppendLaunchKernel = Test<ModuleFixture>;
|
||||
|
||||
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) {
|
||||
createKernel();
|
||||
createKernel(true);
|
||||
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
|
||||
kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = true;
|
||||
kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = true;
|
||||
|
||||
@@ -784,7 +784,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenCommandQueueWhenExecutingCommandL
|
||||
auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(deviceAlloc)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
ASSERT_NE(nullptr, gpuAlloc);
|
||||
|
||||
createKernel();
|
||||
createKernel(true);
|
||||
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
|
||||
EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed);
|
||||
|
||||
@@ -843,7 +843,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDebugModeToTreatIndirectAllocatio
|
||||
auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(deviceAlloc)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
ASSERT_NE(nullptr, gpuAlloc);
|
||||
|
||||
createKernel();
|
||||
createKernel(true);
|
||||
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
|
||||
EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed);
|
||||
|
||||
|
||||
@@ -827,6 +827,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithNoKernelL
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
module->mockKernelImmData->mockKernelInfo->hasIndirectStatelessAccess = false;
|
||||
|
||||
kernel->initialize(&desc);
|
||||
|
||||
@@ -855,6 +856,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoa
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = true;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
module->mockKernelImmData->mockKernelInfo->hasIndirectStatelessAccess = false;
|
||||
|
||||
kernel->initialize(&desc);
|
||||
|
||||
@@ -871,6 +873,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoa
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = true;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
module->mockKernelImmData->mockKernelInfo->hasIndirectStatelessAccess = false;
|
||||
|
||||
kernel->initialize(&desc);
|
||||
|
||||
@@ -887,6 +890,24 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoa
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = true;
|
||||
module->mockKernelImmData->mockKernelInfo->hasIndirectStatelessAccess = false;
|
||||
|
||||
kernel->initialize(&desc);
|
||||
|
||||
EXPECT_TRUE(kernel->hasIndirectAccess());
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
|
||||
module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
module->mockKernelImmData->mockKernelInfo->hasIndirectStatelessAccess = true;
|
||||
|
||||
kernel->initialize(&desc);
|
||||
|
||||
|
||||
@@ -259,7 +259,8 @@ cl_int Kernel::initialize() {
|
||||
|
||||
this->kernelHasIndirectAccess |= kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgLoad ||
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgStore ||
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic;
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic ||
|
||||
kernelInfo.hasIndirectStatelessAccess;
|
||||
|
||||
provideInitializationHints();
|
||||
// resolve the new kernel info to account for kernel handlers
|
||||
@@ -1232,10 +1233,7 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
|
||||
}
|
||||
|
||||
gtpinNotifyMakeResident(this, &commandStreamReceiver);
|
||||
|
||||
if (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
|
||||
unifiedMemoryControls.indirectHostAllocationsAllowed ||
|
||||
unifiedMemoryControls.indirectSharedAllocationsAllowed) {
|
||||
if (kernelHasIndirectAccess && unifiedMemoryControls.anyIndirectAllocationsAllowed()) {
|
||||
this->getContext().getSVMAllocsManager()->makeInternalAllocationsResident(commandStreamReceiver, unifiedMemoryControls.generateMask());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1542,12 +1542,13 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemor
|
||||
EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicThenKernelHasIndirectAccessIsSetToFalse) {
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicNorHasIndirectStatelessAccessThenKernelHasIndirectAccessIsSetToFalse) {
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
pKernelInfo->hasIndirectStatelessAccess = false;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
@@ -1573,6 +1574,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadThenKernelHasIndirec
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = true;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
pKernelInfo->hasIndirectStatelessAccess = false;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
@@ -1598,6 +1600,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreThenKernelHasIndire
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = true;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
pKernelInfo->hasIndirectStatelessAccess = false;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
@@ -1623,6 +1626,33 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgAtomicThenKernelHasIndir
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = true;
|
||||
pKernelInfo->hasIndirectStatelessAccess = false;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
|
||||
auto memoryManager = commandStreamReceiver.getMemoryManager();
|
||||
pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize});
|
||||
|
||||
MockProgram program(toClDeviceVector(*pClDevice));
|
||||
MockContext ctx;
|
||||
program.setContext(&ctx);
|
||||
program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation();
|
||||
std::unique_ptr<MockKernel> pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice));
|
||||
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
|
||||
|
||||
EXPECT_TRUE(pKernel->getHasIndirectAccess());
|
||||
|
||||
memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWithhasIndirectStatelessAccessThenKernelHasIndirectAccessIsSetToTrue) {
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false;
|
||||
pKernelInfo->hasIndirectStatelessAccess = true;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
|
||||
@@ -317,6 +317,9 @@ DecodeError readZeInfoExperimentalProperties(const NEO::Yaml::YamlParser &parser
|
||||
ConstStringRef context,
|
||||
std::string &outErrReason, std::string &outWarning) {
|
||||
bool validExperimentalProperty = true;
|
||||
outExperimentalProperties.hasNonKernelArgLoad = true;
|
||||
outExperimentalProperties.hasNonKernelArgStore = true;
|
||||
outExperimentalProperties.hasNonKernelArgAtomic = true;
|
||||
for (const auto &experimentalPropertyNd : parser.createChildrenRange(node)) {
|
||||
for (const auto &experimentalPropertyMemberNd : parser.createChildrenRange(experimentalPropertyNd)) {
|
||||
auto key = parser.readKey(experimentalPropertyMemberNd);
|
||||
|
||||
@@ -156,9 +156,9 @@ struct KernelDescriptor {
|
||||
uint16_t numArgsToPatch = 0U;
|
||||
uint16_t numGrfRequired = 0U;
|
||||
uint8_t barrierCount = 0u;
|
||||
bool hasNonKernelArgLoad = true;
|
||||
bool hasNonKernelArgStore = true;
|
||||
bool hasNonKernelArgAtomic = true;
|
||||
bool hasNonKernelArgLoad = false;
|
||||
bool hasNonKernelArgStore = false;
|
||||
bool hasNonKernelArgAtomic = false;
|
||||
|
||||
AddressingMode bufferAddressingMode = BindfulAndStateless;
|
||||
AddressingMode imageAddressingMode = Bindful;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
* Copyright (C) 2019-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -21,4 +21,8 @@ struct UnifiedMemoryControls {
|
||||
bool indirectDeviceAllocationsAllowed = false;
|
||||
bool indirectHostAllocationsAllowed = false;
|
||||
bool indirectSharedAllocationsAllowed = false;
|
||||
|
||||
bool anyIndirectAllocationsAllowed() const {
|
||||
return indirectDeviceAllocationsAllowed || indirectHostAllocationsAllowed || indirectSharedAllocationsAllowed;
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user