feat(zebin): Add support for indirect stateless count

Add support for newly added indirect statelss count check;
populate related field in kernelInfo.
- Move hasIndirectStatelessAccess check from KernelInfo to
KernelDescriptor.

Related-To: NEO-7428
Signed-off-by: Kacper Nowak <kacper.nowak@intel.com>
This commit is contained in:
Kacper Nowak
2022-10-18 14:37:41 +00:00
committed by Compute-Runtime-Automation
parent 2b963c7359
commit f06df021b5
12 changed files with 28 additions and 15 deletions

View File

@ -1991,7 +1991,7 @@ bool Kernel::hasDirectStatelessAccessToHostMemory() const {
}
bool Kernel::hasIndirectStatelessAccessToHostMemory() const {
if (!kernelInfo.hasIndirectStatelessAccess) {
if (!kernelInfo.kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess) {
return false;
}

View File

@ -262,7 +262,7 @@ struct UmStatelessCompressionInSBA : public KernelAUBFixture<StatelessKernelWith
DebugManager.flags.NodeOrdinal.set(GetParam());
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Builtin));
KernelAUBFixture<StatelessKernelWithIndirectAccessFixture>::setUp();
EXPECT_TRUE(multiDeviceKernel->getKernel(rootDeviceIndex)->getKernelInfo().hasIndirectStatelessAccess);
EXPECT_TRUE(multiDeviceKernel->getKernel(rootDeviceIndex)->getKernelInfo().kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess);
}
void TearDown() override {

View File

@ -271,7 +271,7 @@ struct XeHpgCoreUmStatelessCompressionInSBA : public KernelAUBFixture<StatelessK
if (!device->getHardwareInfo().featureTable.flags.ftrLocalMemory) {
GTEST_SKIP();
}
EXPECT_TRUE(multiDeviceKernel->getKernel(rootDeviceIndex)->getKernelInfo().hasIndirectStatelessAccess);
EXPECT_TRUE(multiDeviceKernel->getKernel(rootDeviceIndex)->getKernelInfo().kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess);
}
void TearDown() override {

View File

@ -339,7 +339,7 @@ class StatelessKernelWithIndirectAccessFixture : public ProgramFixture {
ASSERT_NE(nullptr, multiDeviceKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(multiDeviceKernel->getKernel(device->getRootDeviceIndex())->getKernelInfo().hasIndirectStatelessAccess);
EXPECT_TRUE(multiDeviceKernel->getKernel(device->getRootDeviceIndex())->getKernelInfo().kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess);
}
void tearDown() {

View File

@ -430,11 +430,12 @@ TEST_F(KernelArgBufferTest, givenInvalidKernelObjWhenHasDirectStatelessAccessToH
TEST_F(KernelArgBufferTest, givenKernelWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations) {
KernelInfo kernelInfo;
EXPECT_FALSE(kernelInfo.hasIndirectStatelessAccess);
auto &kernelDescriptor = kernelInfo.kernelDescriptor;
EXPECT_FALSE(kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess);
MockKernel kernelWithNoIndirectStatelessAccess(pProgram, kernelInfo, *pClDevice);
EXPECT_FALSE(kernelWithNoIndirectStatelessAccess.hasIndirectStatelessAccessToHostMemory());
kernelInfo.hasIndirectStatelessAccess = true;
kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = true;
MockKernel kernelWithNoIndirectHostAllocations(pProgram, kernelInfo, *pClDevice);
EXPECT_FALSE(kernelWithNoIndirectHostAllocations.hasIndirectStatelessAccessToHostMemory());
@ -458,7 +459,8 @@ TEST_F(KernelArgBufferTest, givenKernelWithIndirectStatelessAccessWhenHasIndirec
TEST_F(KernelArgBufferTest, givenKernelExecInfoWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations) {
KernelInfo kernelInfo;
kernelInfo.hasIndirectStatelessAccess = true;
auto &kernelDescriptor = kernelInfo.kernelDescriptor;
kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = true;
MockKernel mockKernel(pProgram, kernelInfo, *pClDevice);
EXPECT_FALSE(mockKernel.unifiedMemoryControls.indirectHostAllocationsAllowed);
@ -612,7 +614,7 @@ TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithNoIndirectS
DebugManagerStateRestore debugRestorer;
DebugManager.flags.EnableStatelessCompression.set(1);
pKernelInfo->hasIndirectStatelessAccess = false;
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false;
MockGraphicsAllocation gfxAllocation;
gfxAllocation.setAllocationType(AllocationType::BUFFER_HOST_MEMORY);
@ -632,7 +634,7 @@ TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectSta
DebugManagerStateRestore debugRestorer;
DebugManager.flags.EnableStatelessCompression.set(1);
pKernelInfo->hasIndirectStatelessAccess = true;
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = true;
const auto allocationTypes = {AllocationType::BUFFER,
AllocationType::BUFFER_HOST_MEMORY};
@ -669,7 +671,7 @@ TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectSta
DebugManagerStateRestore debugRestorer;
DebugManager.flags.EnableStatelessCompression.set(1);
pKernelInfo->hasIndirectStatelessAccess = true;
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = true;
constexpr std::array<AllocationTypeHelper, 4> allocationTypes = {{{AllocationType::BUFFER, false},
{AllocationType::BUFFER, true},

View File

@ -262,7 +262,7 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentNoReqdWorkGroupSizeWhenBuildingT
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);
EXPECT_FALSE(pKernelInfo->hasIndirectStatelessAccess);
EXPECT_FALSE(pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess);
}
TEST_F(KernelDataTest, GivenExecutionEnvironmentWhenBuildingThenProgramIsCorrect) {
@ -294,7 +294,7 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentWhenBuildingThenProgramIsCorrect
EXPECT_EQ(32u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
EXPECT_EQ(16u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
EXPECT_EQ(8u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);
EXPECT_TRUE(pKernelInfo->hasIndirectStatelessAccess);
EXPECT_TRUE(pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess);
EXPECT_EQ(KernelDescriptor::BindfulAndStateless, pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode);
}

View File

@ -162,6 +162,7 @@ constexpr ConstStringRef ageBased("age_based");
constexpr ConstStringRef roundRobin("round_robin");
constexpr ConstStringRef roundRobinStall("round_robin_stall");
} // namespace ThreadSchedulingMode
constexpr ConstStringRef indirectStatelessCount("indirect_stateless_count");
} // namespace ExecutionEnv
namespace Attributes {
@ -409,6 +410,7 @@ using SlmSizeT = int32_t;
using SubgroupIndependentForwardProgressT = bool;
using WorkgroupWalkOrderDimensionsT = int32_t[3];
using ThreadSchedulingModeT = ThreadSchedulingMode;
using IndirectStatelessCountT = int32_t;
namespace Defaults {
constexpr BarrierCountT barrierCount = 0;
@ -434,6 +436,7 @@ constexpr SlmSizeT slmSize = 0;
constexpr SubgroupIndependentForwardProgressT subgroupIndependentForwardProgress = false;
constexpr WorkgroupWalkOrderDimensionsT workgroupWalkOrderDimensions = {0, 1, 2};
constexpr ThreadSchedulingModeT threadSchedulingMode = ThreadSchedulingModeUnknown;
constexpr IndirectStatelessCountT indirectStatelessCount = 0;
} // namespace Defaults
constexpr ConstStringRef required[] = {
@ -463,6 +466,7 @@ struct ExecutionEnvBaseT {
SubgroupIndependentForwardProgressT subgroupIndependentForwardProgress = Defaults::subgroupIndependentForwardProgress;
WorkgroupWalkOrderDimensionsT workgroupWalkOrderDimensions{Defaults::workgroupWalkOrderDimensions[0], Defaults::workgroupWalkOrderDimensions[1], Defaults::workgroupWalkOrderDimensions[2]};
ThreadSchedulingModeT threadSchedulingMode = Defaults::threadSchedulingMode;
IndirectStatelessCountT indirectStatelessCount = Defaults::indirectStatelessCount;
};
struct ExperimentalPropertiesBaseT {

View File

@ -406,6 +406,8 @@ DecodeError readZeInfoExecutionEnvironment(const NEO::Yaml::YamlParser &parser,
validExecEnv = validExecEnv & readZeInfoValueCollectionChecked(outExecEnv.workgroupWalkOrderDimensions, parser, execEnvMetadataNd, context, outErrReason);
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::threadSchedulingMode == key) {
validExecEnv &= readZeInfoEnumChecked(parser, execEnvMetadataNd, outExecEnv.threadSchedulingMode, context, outErrReason);
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::indirectStatelessCount == key) {
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.indirectStatelessCount, context, outErrReason);
} else {
outWarning.append("DeviceBinaryFormat::Zebin::" + NEO::Elf::SectionsNamesZebin::zeInfo.str() + " : Unknown entry \"" + key.str() + "\" in context of " + context.str() + "\n");
}
@ -1281,6 +1283,7 @@ NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf<n
kernelDescriptor.kernelAttributes.workgroupWalkOrder[0] = static_cast<uint8_t>(execEnv.workgroupWalkOrderDimensions[0]);
kernelDescriptor.kernelAttributes.workgroupWalkOrder[1] = static_cast<uint8_t>(execEnv.workgroupWalkOrderDimensions[1]);
kernelDescriptor.kernelAttributes.workgroupWalkOrder[2] = static_cast<uint8_t>(execEnv.workgroupWalkOrderDimensions[2]);
kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = (execEnv.indirectStatelessCount > 0);
kernelDescriptor.kernelMetadata.requiredSubGroupSize = execEnv.requiredSubGroupSize;
using ThreadSchedulingMode = NEO::Elf::ZebinKernelMetadata::Types::Kernel::ExecutionEnv::ThreadSchedulingMode;

View File

@ -157,6 +157,7 @@ struct KernelDescriptor {
bool hasNonKernelArgLoad = true;
bool hasNonKernelArgStore = true;
bool hasNonKernelArgAtomic = true;
bool hasIndirectStatelessAccess = false;
AddressingMode bufferAddressingMode = BindfulAndStateless;
AddressingMode imageAddressingMode = Bindful;

View File

@ -114,7 +114,6 @@ struct KernelInfo {
const BuiltinDispatchInfoBuilder *builtinDispatchBuilder = nullptr;
uint32_t systemKernelOffset = 0;
uint64_t kernelId = 0;
bool hasIndirectStatelessAccess = false;
bool isKernelHeapSubstituted = false;
GraphicsAllocation *kernelAllocation = nullptr;
DebugData debugData;

View File

@ -33,7 +33,7 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch
dst.heapInfo.pSsh = src.heaps.surfaceState.begin();
if (src.tokens.executionEnvironment != nullptr) {
dst.hasIndirectStatelessAccess = (src.tokens.executionEnvironment->IndirectStatelessCount > 0);
dst.kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = (src.tokens.executionEnvironment->IndirectStatelessCount > 0);
}
dst.systemKernelOffset = src.tokens.stateSip ? src.tokens.stateSip->SystemKernelOffset : 0U;

View File

@ -1004,6 +1004,7 @@ kernels:
- 1
- 2
thread_scheduling_mode: age_based
indirect_stateless_count: 2
...
)===";
@ -1046,6 +1047,7 @@ kernels:
EXPECT_EQ(2, execEnv.workgroupWalkOrderDimensions[2]);
using ThreadSchedulingMode = NEO::Elf::ZebinKernelMetadata::Types::Kernel::ExecutionEnv::ThreadSchedulingMode;
EXPECT_EQ(ThreadSchedulingMode::ThreadSchedulingModeAgeBased, execEnv.threadSchedulingMode);
EXPECT_EQ(2, execEnv.indirectStatelessCount);
}
TEST(ReadZeInfoExecutionEnvironment, GivenUnknownEntryThenEmmitsWarning) {
@ -3784,7 +3786,8 @@ kernels:
work_group_walk_order_dimensions:
- 0
- 1
- 2
- 2
indirect_stateless_count : 2
)===";
NEO::ProgramInfo programInfo;
ZebinTestData::ValidEmptyProgram zebin;
@ -3831,6 +3834,7 @@ kernels:
EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.workgroupWalkOrder[0]);
EXPECT_EQ(1U, kernelDescriptor.kernelAttributes.workgroupWalkOrder[1]);
EXPECT_EQ(2U, kernelDescriptor.kernelAttributes.workgroupWalkOrder[2]);
EXPECT_TRUE(kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess);
}
TEST(PopulateArgDescriptorPerThreadPayload, GivenArgTypeLocalIdWhenOffsetIsNonZeroThenFail) {