Align per thread data size to GRF size
Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
This commit is contained in:
parent
d18172c00e
commit
1b2cfbbb1f
|
@ -200,6 +200,7 @@ bool ModuleTranslationUnit::processUnpackedBinary() {
|
|||
auto blob = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(this->unpackedDeviceBinary.get()), this->unpackedDeviceBinarySize);
|
||||
NEO::SingleDeviceBinary binary = {};
|
||||
binary.deviceBinary = blob;
|
||||
binary.targetDevice.grfSize = device->getHwInfo().capabilityTable.grfSize;
|
||||
std::string decodeErrors;
|
||||
std::string decodeWarnings;
|
||||
|
||||
|
|
|
@ -146,6 +146,7 @@ cl_int Program::processGenBinary(const ClDevice &clDevice) {
|
|||
auto blob = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(buildInfo.unpackedDeviceBinary.get()), buildInfo.unpackedDeviceBinarySize);
|
||||
SingleDeviceBinary binary = {};
|
||||
binary.deviceBinary = blob;
|
||||
binary.targetDevice.grfSize = clDevice.getDevice().getHardwareInfo().capabilityTable.grfSize;
|
||||
std::string decodeErrors;
|
||||
std::string decodeWarnings;
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -58,6 +58,7 @@ struct TargetDevice {
|
|||
PRODUCT_FAMILY productFamily = IGFX_UNKNOWN;
|
||||
uint32_t stepping = 0U;
|
||||
uint32_t maxPointerSizeInBytes = 4U;
|
||||
uint32_t grfSize = 32U;
|
||||
};
|
||||
|
||||
struct SingleDeviceBinary {
|
||||
|
|
|
@ -605,7 +605,7 @@ bool setVecArgIndicesBasedOnSize(CrossThreadDataOffset (&vec)[Len], size_t vecSi
|
|||
return true;
|
||||
}
|
||||
|
||||
NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadPayloadArgument::PerThreadPayloadArgumentBaseT &src, NEO::KernelDescriptor &dst,
|
||||
NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadPayloadArgument::PerThreadPayloadArgumentBaseT &src, NEO::KernelDescriptor &dst, uint32_t grfSize,
|
||||
std::string &outErrReason, std::string &outWarning) {
|
||||
switch (src.argType) {
|
||||
default:
|
||||
|
@ -620,6 +620,8 @@ NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Type
|
|||
|
||||
uint32_t singleChannelIndicesCount = (dst.kernelAttributes.simdSize == 32 ? 32 : 16);
|
||||
uint32_t singleChannelBytes = singleChannelIndicesCount * sizeof(LocalIdT);
|
||||
UNRECOVERABLE_IF(0 == grfSize);
|
||||
singleChannelBytes = alignUp(singleChannelBytes, grfSize);
|
||||
auto tupleSize = (src.size / singleChannelBytes);
|
||||
switch (tupleSize) {
|
||||
default:
|
||||
|
@ -634,8 +636,9 @@ NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Type
|
|||
break;
|
||||
}
|
||||
dst.kernelAttributes.perThreadDataSize = dst.kernelAttributes.simdSize;
|
||||
dst.kernelAttributes.perThreadDataSize *= dst.kernelAttributes.numLocalIdChannels;
|
||||
dst.kernelAttributes.perThreadDataSize *= sizeof(LocalIdT);
|
||||
dst.kernelAttributes.perThreadDataSize = alignUp(dst.kernelAttributes.perThreadDataSize, grfSize);
|
||||
dst.kernelAttributes.perThreadDataSize *= dst.kernelAttributes.numLocalIdChannels;
|
||||
break;
|
||||
}
|
||||
case NEO::Elf::ZebinKernelMetadata::Types::Kernel::ArgTypePackedLocalIds: {
|
||||
|
@ -956,7 +959,7 @@ NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf<N
|
|||
}
|
||||
|
||||
for (const auto &arg : perThreadPayloadArguments) {
|
||||
auto decodeErr = populateArgDescriptor(arg, kernelDescriptor, outErrReason, outWarning);
|
||||
auto decodeErr = populateArgDescriptor(arg, kernelDescriptor, dst.grfSize, outErrReason, outWarning);
|
||||
if (DecodeError::Success != decodeErr) {
|
||||
return decodeErr;
|
||||
}
|
||||
|
@ -1130,6 +1133,7 @@ DecodeError decodeSingleDeviceBinary<NEO::DeviceBinaryFormat::Zebin>(ProgramInfo
|
|||
}
|
||||
|
||||
dst.decodedElf = elf;
|
||||
dst.grfSize = src.targetDevice.grfSize;
|
||||
|
||||
if (false == zebinSections.globalDataSections.empty()) {
|
||||
dst.globalVariables.initData = zebinSections.globalDataSections[0]->data.begin();
|
||||
|
|
|
@ -95,7 +95,7 @@ DecodeError readZeInfoPerThreadMemoryBuffers(const NEO::Yaml::YamlParser &parser
|
|||
ConstStringRef context,
|
||||
std::string &outErrReason, std::string &outWarning);
|
||||
|
||||
NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadPayloadArgument::PerThreadPayloadArgumentBaseT &src, NEO::KernelDescriptor &dst,
|
||||
NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadPayloadArgument::PerThreadPayloadArgumentBaseT &src, NEO::KernelDescriptor &dst, const uint32_t grfSize,
|
||||
std::string &outErrReason, std::string &outWarning);
|
||||
|
||||
NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::PayloadArgument::PayloadArgumentBaseT &src, NEO::KernelDescriptor &dst, uint32_t &crossThreadDataSize,
|
||||
|
|
|
@ -41,6 +41,7 @@ struct ProgramInfo {
|
|||
|
||||
std::vector<KernelInfo *> kernelInfos;
|
||||
Elf::Elf<Elf::EI_CLASS_64> decodedElf;
|
||||
uint32_t grfSize = 32U;
|
||||
};
|
||||
|
||||
size_t getMaxInlineSlmNeeded(const ProgramInfo &programInfo);
|
||||
|
|
|
@ -2520,9 +2520,9 @@ TEST(PopulateKernelDescriptor, GivenInvalidPerThreadArgThenFails) {
|
|||
NEO::ConstStringRef zeinfo = R"===(
|
||||
kernels:
|
||||
- name : some_kernel
|
||||
execution_env:
|
||||
execution_env:
|
||||
simd_size: 8
|
||||
per_thread_payload_arguments:
|
||||
per_thread_payload_arguments:
|
||||
- arg_type: local_size
|
||||
offset: 0
|
||||
size: 8
|
||||
|
@ -2544,6 +2544,43 @@ kernels:
|
|||
EXPECT_STREQ("DeviceBinaryFormat::Zebin : Invalid arg type in per-thread data section in context of : some_kernel.\n", decodeErrors.c_str());
|
||||
}
|
||||
|
||||
TEST(PopulateKernelDescriptor, GivenValidLocalIdThenAlignUpChannelSizeToGrfSize) {
|
||||
NEO::ConstStringRef zeinfo = R"===(
|
||||
kernels:
|
||||
- name : some_kernel
|
||||
execution_env:
|
||||
simd_size: 16
|
||||
per_thread_payload_arguments:
|
||||
- arg_type: local_id
|
||||
offset: 0
|
||||
size: 192
|
||||
)===";
|
||||
NEO::ProgramInfo programInfo;
|
||||
programInfo.grfSize = 64;
|
||||
ZebinTestData::ValidEmptyProgram zebin;
|
||||
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "some_kernel", {});
|
||||
std::string errors, warnings;
|
||||
auto elf = NEO::Elf::decodeElf(zebin.storage, errors, warnings);
|
||||
ASSERT_NE(nullptr, elf.elfFileHeader) << errors << " " << warnings;
|
||||
|
||||
NEO::Yaml::YamlParser parser;
|
||||
bool parseSuccess = parser.parse(zeinfo, errors, warnings);
|
||||
ASSERT_TRUE(parseSuccess) << errors << " " << warnings;
|
||||
|
||||
NEO::ZebinSections zebinSections;
|
||||
auto extractErr = NEO::extractZebinSections(elf, zebinSections, errors, warnings);
|
||||
ASSERT_EQ(NEO::DecodeError::Success, extractErr) << errors << " " << warnings;
|
||||
|
||||
auto &kernelNode = *parser.createChildrenRange(*parser.findNodeWithKeyDfs("kernels")).begin();
|
||||
auto err = NEO::populateKernelDescriptor(programInfo, elf, zebinSections, parser, kernelNode, errors, warnings);
|
||||
EXPECT_EQ(NEO::DecodeError::Success, err);
|
||||
EXPECT_TRUE(errors.empty()) << errors;
|
||||
EXPECT_TRUE(warnings.empty()) << warnings;
|
||||
ASSERT_EQ(1U, programInfo.kernelInfos.size());
|
||||
EXPECT_EQ(3U, programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.numLocalIdChannels);
|
||||
EXPECT_EQ(192U, programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.perThreadDataSize);
|
||||
}
|
||||
|
||||
TEST(PopulateKernelDescriptor, GivenValidPerThreadArgThenPopulatesKernelDescriptor) {
|
||||
NEO::ConstStringRef zeinfo = R"===(
|
||||
kernels:
|
||||
|
|
Loading…
Reference in New Issue