mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
feat(zebin): Add support for indirect stateless count
Add support for newly added indirect statelss count check; populate related field in kernelInfo. - Move hasIndirectStatelessAccess check from KernelInfo to KernelDescriptor. Related-To: NEO-7428 Signed-off-by: Kacper Nowak <kacper.nowak@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
2b963c7359
commit
f06df021b5
@ -1991,7 +1991,7 @@ bool Kernel::hasDirectStatelessAccessToHostMemory() const {
|
||||
}
|
||||
|
||||
bool Kernel::hasIndirectStatelessAccessToHostMemory() const {
|
||||
if (!kernelInfo.hasIndirectStatelessAccess) {
|
||||
if (!kernelInfo.kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -262,7 +262,7 @@ struct UmStatelessCompressionInSBA : public KernelAUBFixture<StatelessKernelWith
|
||||
DebugManager.flags.NodeOrdinal.set(GetParam());
|
||||
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Builtin));
|
||||
KernelAUBFixture<StatelessKernelWithIndirectAccessFixture>::setUp();
|
||||
EXPECT_TRUE(multiDeviceKernel->getKernel(rootDeviceIndex)->getKernelInfo().hasIndirectStatelessAccess);
|
||||
EXPECT_TRUE(multiDeviceKernel->getKernel(rootDeviceIndex)->getKernelInfo().kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
|
@ -271,7 +271,7 @@ struct XeHpgCoreUmStatelessCompressionInSBA : public KernelAUBFixture<StatelessK
|
||||
if (!device->getHardwareInfo().featureTable.flags.ftrLocalMemory) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
EXPECT_TRUE(multiDeviceKernel->getKernel(rootDeviceIndex)->getKernelInfo().hasIndirectStatelessAccess);
|
||||
EXPECT_TRUE(multiDeviceKernel->getKernel(rootDeviceIndex)->getKernelInfo().kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
|
@ -339,7 +339,7 @@ class StatelessKernelWithIndirectAccessFixture : public ProgramFixture {
|
||||
ASSERT_NE(nullptr, multiDeviceKernel);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_TRUE(multiDeviceKernel->getKernel(device->getRootDeviceIndex())->getKernelInfo().hasIndirectStatelessAccess);
|
||||
EXPECT_TRUE(multiDeviceKernel->getKernel(device->getRootDeviceIndex())->getKernelInfo().kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess);
|
||||
}
|
||||
|
||||
void tearDown() {
|
||||
|
@ -430,11 +430,12 @@ TEST_F(KernelArgBufferTest, givenInvalidKernelObjWhenHasDirectStatelessAccessToH
|
||||
|
||||
TEST_F(KernelArgBufferTest, givenKernelWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations) {
|
||||
KernelInfo kernelInfo;
|
||||
EXPECT_FALSE(kernelInfo.hasIndirectStatelessAccess);
|
||||
auto &kernelDescriptor = kernelInfo.kernelDescriptor;
|
||||
EXPECT_FALSE(kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess);
|
||||
|
||||
MockKernel kernelWithNoIndirectStatelessAccess(pProgram, kernelInfo, *pClDevice);
|
||||
EXPECT_FALSE(kernelWithNoIndirectStatelessAccess.hasIndirectStatelessAccessToHostMemory());
|
||||
kernelInfo.hasIndirectStatelessAccess = true;
|
||||
kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = true;
|
||||
|
||||
MockKernel kernelWithNoIndirectHostAllocations(pProgram, kernelInfo, *pClDevice);
|
||||
EXPECT_FALSE(kernelWithNoIndirectHostAllocations.hasIndirectStatelessAccessToHostMemory());
|
||||
@ -458,7 +459,8 @@ TEST_F(KernelArgBufferTest, givenKernelWithIndirectStatelessAccessWhenHasIndirec
|
||||
|
||||
TEST_F(KernelArgBufferTest, givenKernelExecInfoWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations) {
|
||||
KernelInfo kernelInfo;
|
||||
kernelInfo.hasIndirectStatelessAccess = true;
|
||||
auto &kernelDescriptor = kernelInfo.kernelDescriptor;
|
||||
kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = true;
|
||||
|
||||
MockKernel mockKernel(pProgram, kernelInfo, *pClDevice);
|
||||
EXPECT_FALSE(mockKernel.unifiedMemoryControls.indirectHostAllocationsAllowed);
|
||||
@ -612,7 +614,7 @@ TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithNoIndirectS
|
||||
DebugManagerStateRestore debugRestorer;
|
||||
DebugManager.flags.EnableStatelessCompression.set(1);
|
||||
|
||||
pKernelInfo->hasIndirectStatelessAccess = false;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false;
|
||||
|
||||
MockGraphicsAllocation gfxAllocation;
|
||||
gfxAllocation.setAllocationType(AllocationType::BUFFER_HOST_MEMORY);
|
||||
@ -632,7 +634,7 @@ TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectSta
|
||||
DebugManagerStateRestore debugRestorer;
|
||||
DebugManager.flags.EnableStatelessCompression.set(1);
|
||||
|
||||
pKernelInfo->hasIndirectStatelessAccess = true;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = true;
|
||||
|
||||
const auto allocationTypes = {AllocationType::BUFFER,
|
||||
AllocationType::BUFFER_HOST_MEMORY};
|
||||
@ -669,7 +671,7 @@ TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectSta
|
||||
DebugManagerStateRestore debugRestorer;
|
||||
DebugManager.flags.EnableStatelessCompression.set(1);
|
||||
|
||||
pKernelInfo->hasIndirectStatelessAccess = true;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = true;
|
||||
|
||||
constexpr std::array<AllocationTypeHelper, 4> allocationTypes = {{{AllocationType::BUFFER, false},
|
||||
{AllocationType::BUFFER, true},
|
||||
|
@ -262,7 +262,7 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentNoReqdWorkGroupSizeWhenBuildingT
|
||||
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
|
||||
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
|
||||
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);
|
||||
EXPECT_FALSE(pKernelInfo->hasIndirectStatelessAccess);
|
||||
EXPECT_FALSE(pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenExecutionEnvironmentWhenBuildingThenProgramIsCorrect) {
|
||||
@ -294,7 +294,7 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentWhenBuildingThenProgramIsCorrect
|
||||
EXPECT_EQ(32u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
|
||||
EXPECT_EQ(16u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
|
||||
EXPECT_EQ(8u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);
|
||||
EXPECT_TRUE(pKernelInfo->hasIndirectStatelessAccess);
|
||||
EXPECT_TRUE(pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess);
|
||||
EXPECT_EQ(KernelDescriptor::BindfulAndStateless, pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode);
|
||||
}
|
||||
|
||||
|
@ -162,6 +162,7 @@ constexpr ConstStringRef ageBased("age_based");
|
||||
constexpr ConstStringRef roundRobin("round_robin");
|
||||
constexpr ConstStringRef roundRobinStall("round_robin_stall");
|
||||
} // namespace ThreadSchedulingMode
|
||||
constexpr ConstStringRef indirectStatelessCount("indirect_stateless_count");
|
||||
} // namespace ExecutionEnv
|
||||
|
||||
namespace Attributes {
|
||||
@ -409,6 +410,7 @@ using SlmSizeT = int32_t;
|
||||
using SubgroupIndependentForwardProgressT = bool;
|
||||
using WorkgroupWalkOrderDimensionsT = int32_t[3];
|
||||
using ThreadSchedulingModeT = ThreadSchedulingMode;
|
||||
using IndirectStatelessCountT = int32_t;
|
||||
|
||||
namespace Defaults {
|
||||
constexpr BarrierCountT barrierCount = 0;
|
||||
@ -434,6 +436,7 @@ constexpr SlmSizeT slmSize = 0;
|
||||
constexpr SubgroupIndependentForwardProgressT subgroupIndependentForwardProgress = false;
|
||||
constexpr WorkgroupWalkOrderDimensionsT workgroupWalkOrderDimensions = {0, 1, 2};
|
||||
constexpr ThreadSchedulingModeT threadSchedulingMode = ThreadSchedulingModeUnknown;
|
||||
constexpr IndirectStatelessCountT indirectStatelessCount = 0;
|
||||
} // namespace Defaults
|
||||
|
||||
constexpr ConstStringRef required[] = {
|
||||
@ -463,6 +466,7 @@ struct ExecutionEnvBaseT {
|
||||
SubgroupIndependentForwardProgressT subgroupIndependentForwardProgress = Defaults::subgroupIndependentForwardProgress;
|
||||
WorkgroupWalkOrderDimensionsT workgroupWalkOrderDimensions{Defaults::workgroupWalkOrderDimensions[0], Defaults::workgroupWalkOrderDimensions[1], Defaults::workgroupWalkOrderDimensions[2]};
|
||||
ThreadSchedulingModeT threadSchedulingMode = Defaults::threadSchedulingMode;
|
||||
IndirectStatelessCountT indirectStatelessCount = Defaults::indirectStatelessCount;
|
||||
};
|
||||
|
||||
struct ExperimentalPropertiesBaseT {
|
||||
|
@ -406,6 +406,8 @@ DecodeError readZeInfoExecutionEnvironment(const NEO::Yaml::YamlParser &parser,
|
||||
validExecEnv = validExecEnv & readZeInfoValueCollectionChecked(outExecEnv.workgroupWalkOrderDimensions, parser, execEnvMetadataNd, context, outErrReason);
|
||||
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::threadSchedulingMode == key) {
|
||||
validExecEnv &= readZeInfoEnumChecked(parser, execEnvMetadataNd, outExecEnv.threadSchedulingMode, context, outErrReason);
|
||||
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::indirectStatelessCount == key) {
|
||||
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.indirectStatelessCount, context, outErrReason);
|
||||
} else {
|
||||
outWarning.append("DeviceBinaryFormat::Zebin::" + NEO::Elf::SectionsNamesZebin::zeInfo.str() + " : Unknown entry \"" + key.str() + "\" in context of " + context.str() + "\n");
|
||||
}
|
||||
@ -1281,6 +1283,7 @@ NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf<n
|
||||
kernelDescriptor.kernelAttributes.workgroupWalkOrder[0] = static_cast<uint8_t>(execEnv.workgroupWalkOrderDimensions[0]);
|
||||
kernelDescriptor.kernelAttributes.workgroupWalkOrder[1] = static_cast<uint8_t>(execEnv.workgroupWalkOrderDimensions[1]);
|
||||
kernelDescriptor.kernelAttributes.workgroupWalkOrder[2] = static_cast<uint8_t>(execEnv.workgroupWalkOrderDimensions[2]);
|
||||
kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = (execEnv.indirectStatelessCount > 0);
|
||||
kernelDescriptor.kernelMetadata.requiredSubGroupSize = execEnv.requiredSubGroupSize;
|
||||
|
||||
using ThreadSchedulingMode = NEO::Elf::ZebinKernelMetadata::Types::Kernel::ExecutionEnv::ThreadSchedulingMode;
|
||||
|
@ -157,6 +157,7 @@ struct KernelDescriptor {
|
||||
bool hasNonKernelArgLoad = true;
|
||||
bool hasNonKernelArgStore = true;
|
||||
bool hasNonKernelArgAtomic = true;
|
||||
bool hasIndirectStatelessAccess = false;
|
||||
|
||||
AddressingMode bufferAddressingMode = BindfulAndStateless;
|
||||
AddressingMode imageAddressingMode = Bindful;
|
||||
|
@ -114,7 +114,6 @@ struct KernelInfo {
|
||||
const BuiltinDispatchInfoBuilder *builtinDispatchBuilder = nullptr;
|
||||
uint32_t systemKernelOffset = 0;
|
||||
uint64_t kernelId = 0;
|
||||
bool hasIndirectStatelessAccess = false;
|
||||
bool isKernelHeapSubstituted = false;
|
||||
GraphicsAllocation *kernelAllocation = nullptr;
|
||||
DebugData debugData;
|
||||
|
@ -33,7 +33,7 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch
|
||||
dst.heapInfo.pSsh = src.heaps.surfaceState.begin();
|
||||
|
||||
if (src.tokens.executionEnvironment != nullptr) {
|
||||
dst.hasIndirectStatelessAccess = (src.tokens.executionEnvironment->IndirectStatelessCount > 0);
|
||||
dst.kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = (src.tokens.executionEnvironment->IndirectStatelessCount > 0);
|
||||
}
|
||||
|
||||
dst.systemKernelOffset = src.tokens.stateSip ? src.tokens.stateSip->SystemKernelOffset : 0U;
|
||||
|
@ -1004,6 +1004,7 @@ kernels:
|
||||
- 1
|
||||
- 2
|
||||
thread_scheduling_mode: age_based
|
||||
indirect_stateless_count: 2
|
||||
...
|
||||
)===";
|
||||
|
||||
@ -1046,6 +1047,7 @@ kernels:
|
||||
EXPECT_EQ(2, execEnv.workgroupWalkOrderDimensions[2]);
|
||||
using ThreadSchedulingMode = NEO::Elf::ZebinKernelMetadata::Types::Kernel::ExecutionEnv::ThreadSchedulingMode;
|
||||
EXPECT_EQ(ThreadSchedulingMode::ThreadSchedulingModeAgeBased, execEnv.threadSchedulingMode);
|
||||
EXPECT_EQ(2, execEnv.indirectStatelessCount);
|
||||
}
|
||||
|
||||
TEST(ReadZeInfoExecutionEnvironment, GivenUnknownEntryThenEmmitsWarning) {
|
||||
@ -3784,7 +3786,8 @@ kernels:
|
||||
work_group_walk_order_dimensions:
|
||||
- 0
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
indirect_stateless_count : 2
|
||||
)===";
|
||||
NEO::ProgramInfo programInfo;
|
||||
ZebinTestData::ValidEmptyProgram zebin;
|
||||
@ -3831,6 +3834,7 @@ kernels:
|
||||
EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.workgroupWalkOrder[0]);
|
||||
EXPECT_EQ(1U, kernelDescriptor.kernelAttributes.workgroupWalkOrder[1]);
|
||||
EXPECT_EQ(2U, kernelDescriptor.kernelAttributes.workgroupWalkOrder[2]);
|
||||
EXPECT_TRUE(kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess);
|
||||
}
|
||||
|
||||
TEST(PopulateArgDescriptorPerThreadPayload, GivenArgTypeLocalIdWhenOffsetIsNonZeroThenFail) {
|
||||
|
Reference in New Issue
Block a user