Align per thread data size to GRF size

Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
This commit is contained in:
Krystian Chmielewski 2021-07-13 15:29:58 +00:00 committed by Compute-Runtime-Automation
parent d18172c00e
commit 1b2cfbbb1f
7 changed files with 52 additions and 7 deletions

View File

@ -200,6 +200,7 @@ bool ModuleTranslationUnit::processUnpackedBinary() {
auto blob = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(this->unpackedDeviceBinary.get()), this->unpackedDeviceBinarySize);
NEO::SingleDeviceBinary binary = {};
binary.deviceBinary = blob;
binary.targetDevice.grfSize = device->getHwInfo().capabilityTable.grfSize;
std::string decodeErrors;
std::string decodeWarnings;

View File

@ -146,6 +146,7 @@ cl_int Program::processGenBinary(const ClDevice &clDevice) {
auto blob = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(buildInfo.unpackedDeviceBinary.get()), buildInfo.unpackedDeviceBinarySize);
SingleDeviceBinary binary = {};
binary.deviceBinary = blob;
binary.targetDevice.grfSize = clDevice.getDevice().getHardwareInfo().capabilityTable.grfSize;
std::string decodeErrors;
std::string decodeWarnings;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -58,6 +58,7 @@ struct TargetDevice {
PRODUCT_FAMILY productFamily = IGFX_UNKNOWN;
uint32_t stepping = 0U;
uint32_t maxPointerSizeInBytes = 4U;
uint32_t grfSize = 32U;
};
struct SingleDeviceBinary {

View File

@ -605,7 +605,7 @@ bool setVecArgIndicesBasedOnSize(CrossThreadDataOffset (&vec)[Len], size_t vecSi
return true;
}
NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadPayloadArgument::PerThreadPayloadArgumentBaseT &src, NEO::KernelDescriptor &dst,
NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadPayloadArgument::PerThreadPayloadArgumentBaseT &src, NEO::KernelDescriptor &dst, uint32_t grfSize,
std::string &outErrReason, std::string &outWarning) {
switch (src.argType) {
default:
@ -620,6 +620,8 @@ NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Type
uint32_t singleChannelIndicesCount = (dst.kernelAttributes.simdSize == 32 ? 32 : 16);
uint32_t singleChannelBytes = singleChannelIndicesCount * sizeof(LocalIdT);
UNRECOVERABLE_IF(0 == grfSize);
singleChannelBytes = alignUp(singleChannelBytes, grfSize);
auto tupleSize = (src.size / singleChannelBytes);
switch (tupleSize) {
default:
@ -634,8 +636,9 @@ NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Type
break;
}
dst.kernelAttributes.perThreadDataSize = dst.kernelAttributes.simdSize;
dst.kernelAttributes.perThreadDataSize *= dst.kernelAttributes.numLocalIdChannels;
dst.kernelAttributes.perThreadDataSize *= sizeof(LocalIdT);
dst.kernelAttributes.perThreadDataSize = alignUp(dst.kernelAttributes.perThreadDataSize, grfSize);
dst.kernelAttributes.perThreadDataSize *= dst.kernelAttributes.numLocalIdChannels;
break;
}
case NEO::Elf::ZebinKernelMetadata::Types::Kernel::ArgTypePackedLocalIds: {
@ -956,7 +959,7 @@ NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf<N
}
for (const auto &arg : perThreadPayloadArguments) {
auto decodeErr = populateArgDescriptor(arg, kernelDescriptor, outErrReason, outWarning);
auto decodeErr = populateArgDescriptor(arg, kernelDescriptor, dst.grfSize, outErrReason, outWarning);
if (DecodeError::Success != decodeErr) {
return decodeErr;
}
@ -1130,6 +1133,7 @@ DecodeError decodeSingleDeviceBinary<NEO::DeviceBinaryFormat::Zebin>(ProgramInfo
}
dst.decodedElf = elf;
dst.grfSize = src.targetDevice.grfSize;
if (false == zebinSections.globalDataSections.empty()) {
dst.globalVariables.initData = zebinSections.globalDataSections[0]->data.begin();

View File

@ -95,7 +95,7 @@ DecodeError readZeInfoPerThreadMemoryBuffers(const NEO::Yaml::YamlParser &parser
ConstStringRef context,
std::string &outErrReason, std::string &outWarning);
NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadPayloadArgument::PerThreadPayloadArgumentBaseT &src, NEO::KernelDescriptor &dst,
NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadPayloadArgument::PerThreadPayloadArgumentBaseT &src, NEO::KernelDescriptor &dst, const uint32_t grfSize,
std::string &outErrReason, std::string &outWarning);
NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::PayloadArgument::PayloadArgumentBaseT &src, NEO::KernelDescriptor &dst, uint32_t &crossThreadDataSize,

View File

@ -41,6 +41,7 @@ struct ProgramInfo {
std::vector<KernelInfo *> kernelInfos;
Elf::Elf<Elf::EI_CLASS_64> decodedElf;
uint32_t grfSize = 32U;
};
size_t getMaxInlineSlmNeeded(const ProgramInfo &programInfo);

View File

@ -2520,9 +2520,9 @@ TEST(PopulateKernelDescriptor, GivenInvalidPerThreadArgThenFails) {
NEO::ConstStringRef zeinfo = R"===(
kernels:
- name : some_kernel
execution_env:
execution_env:
simd_size: 8
per_thread_payload_arguments:
per_thread_payload_arguments:
- arg_type: local_size
offset: 0
size: 8
@ -2544,6 +2544,43 @@ kernels:
EXPECT_STREQ("DeviceBinaryFormat::Zebin : Invalid arg type in per-thread data section in context of : some_kernel.\n", decodeErrors.c_str());
}
TEST(PopulateKernelDescriptor, GivenValidLocalIdThenAlignUpChannelSizeToGrfSize) {
NEO::ConstStringRef zeinfo = R"===(
kernels:
- name : some_kernel
execution_env:
simd_size: 16
per_thread_payload_arguments:
- arg_type: local_id
offset: 0
size: 192
)===";
NEO::ProgramInfo programInfo;
programInfo.grfSize = 64;
ZebinTestData::ValidEmptyProgram zebin;
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "some_kernel", {});
std::string errors, warnings;
auto elf = NEO::Elf::decodeElf(zebin.storage, errors, warnings);
ASSERT_NE(nullptr, elf.elfFileHeader) << errors << " " << warnings;
NEO::Yaml::YamlParser parser;
bool parseSuccess = parser.parse(zeinfo, errors, warnings);
ASSERT_TRUE(parseSuccess) << errors << " " << warnings;
NEO::ZebinSections zebinSections;
auto extractErr = NEO::extractZebinSections(elf, zebinSections, errors, warnings);
ASSERT_EQ(NEO::DecodeError::Success, extractErr) << errors << " " << warnings;
auto &kernelNode = *parser.createChildrenRange(*parser.findNodeWithKeyDfs("kernels")).begin();
auto err = NEO::populateKernelDescriptor(programInfo, elf, zebinSections, parser, kernelNode, errors, warnings);
EXPECT_EQ(NEO::DecodeError::Success, err);
EXPECT_TRUE(errors.empty()) << errors;
EXPECT_TRUE(warnings.empty()) << warnings;
ASSERT_EQ(1U, programInfo.kernelInfos.size());
EXPECT_EQ(3U, programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.numLocalIdChannels);
EXPECT_EQ(192U, programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.perThreadDataSize);
}
TEST(PopulateKernelDescriptor, GivenValidPerThreadArgThenPopulatesKernelDescriptor) {
NEO::ConstStringRef zeinfo = R"===(
kernels: