diff --git a/opencl/test/unit_test/helpers/hw_helper_tests.cpp b/opencl/test/unit_test/helpers/hw_helper_tests.cpp index 5dcdd7c2e4..da97a84bcc 100644 --- a/opencl/test/unit_test/helpers/hw_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hw_helper_tests.cpp @@ -1506,3 +1506,13 @@ HWTEST2_F(HwHelperTest, GivenModifiedGtSystemInfoAndXeHpOrXeHpgCoreWhenCallingCa EXPECT_EQ(expectedThreadCount, result); } } + +HWTEST2_F(HwHelperTest, givenAtMostGen12lpPlatformWhenGettingMinimalScratchSpaceSizeThen1024IsReturned, IsAtMostGen12lp) { + const auto &hwHelper = HwHelper::get(renderCoreFamily); + EXPECT_EQ(1024U, hwHelper.getMinimalScratchSpaceSize()); +} + +HWTEST2_F(HwHelperTest, givenAtLeastXeHpPlatformWhenGettingMinimalScratchSpaceSizeThen64IsReturned, IsAtLeastXeHpCore) { + const auto &hwHelper = HwHelper::get(renderCoreFamily); + EXPECT_EQ(64U, hwHelper.getMinimalScratchSpaceSize()); +} diff --git a/shared/source/device_binary_format/device_binary_formats.cpp b/shared/source/device_binary_format/device_binary_formats.cpp index 349b8f24b0..1c5945c9d0 100644 --- a/shared/source/device_binary_format/device_binary_formats.cpp +++ b/shared/source/device_binary_format/device_binary_formats.cpp @@ -7,6 +7,7 @@ #include "shared/source/device_binary_format/device_binary_formats.h" +#include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" @@ -29,6 +30,7 @@ TargetDevice targetDeviceFromHwInfo(const HardwareInfo &hwInfo) { targetDevice.stepping = hwInfo.platform.usRevId; targetDevice.maxPointerSizeInBytes = sizeof(uintptr_t); targetDevice.grfSize = hwInfo.capabilityTable.grfSize; + targetDevice.minScratchSpaceSize = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getMinimalScratchSpaceSize(); return targetDevice; } } // namespace NEO diff --git a/shared/source/device_binary_format/device_binary_formats.h b/shared/source/device_binary_format/device_binary_formats.h index 069030e5dc..51b948d266 100644 --- a/shared/source/device_binary_format/device_binary_formats.h +++ b/shared/source/device_binary_format/device_binary_formats.h @@ -61,6 +61,7 @@ struct TargetDevice { uint32_t stepping = 0U; uint32_t maxPointerSizeInBytes = 4U; uint32_t grfSize = 32U; + uint32_t minScratchSpaceSize = 0U; }; TargetDevice targetDeviceFromHwInfo(const NEO::HardwareInfo &hwInfo); diff --git a/shared/source/device_binary_format/zebin_decoder.cpp b/shared/source/device_binary_format/zebin_decoder.cpp index 2b1ade9a10..c9e1939e4a 100644 --- a/shared/source/device_binary_format/zebin_decoder.cpp +++ b/shared/source/device_binary_format/zebin_decoder.cpp @@ -15,6 +15,7 @@ #include "shared/source/device_binary_format/elf/zebin_elf.h" #include "shared/source/device_binary_format/elf/zeinfo_enum_lookup.h" #include "shared/source/device_binary_format/yaml/yaml_parser.h" +#include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/kernel/kernel_arg_descriptor_extended_vme.h" #include "shared/source/program/kernel_info.h" @@ -1018,11 +1019,16 @@ NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Type return DecodeError::Success; } -NEO::DecodeError populateKernelDescriptor(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadMemoryBuffer::PerThreadMemoryBufferBaseT &src, NEO::KernelDescriptor &dst, +NEO::DecodeError populateKernelDescriptor(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadMemoryBuffer::PerThreadMemoryBufferBaseT &src, NEO::KernelDescriptor &dst, uint32_t minScratchSpaceSize, std::string &outErrReason, std::string &outWarning) { using namespace NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadMemoryBuffer; using namespace NEO::Elf::ZebinKernelMetadata::Tags::Kernel::PerThreadMemoryBuffer::AllocationType; using namespace NEO::Elf::ZebinKernelMetadata::Tags::Kernel::PerThreadMemoryBuffer::MemoryUsage; + if (src.size <= 0) { + outErrReason.append("DeviceBinaryFormat::Zebin : Invalid per-thread memory buffer allocation size (size must be greater than 0) in context of : " + dst.kernelMetadata.kernelName + ".\n"); + return DecodeError::InvalidBinary; + } + auto size = src.size; if (src.isSimtThread) { size *= dst.kernelAttributes.simdSize; @@ -1048,7 +1054,9 @@ NEO::DecodeError populateKernelDescriptor(const NEO::Elf::ZebinKernelMetadata::T outErrReason.append("DeviceBinaryFormat::Zebin : Invalid duplicated scratch buffer entry " + std::to_string(src.slot) + " in context of : " + dst.kernelMetadata.kernelName + ".\n"); return DecodeError::InvalidBinary; } - dst.kernelAttributes.perThreadScratchSize[src.slot] = size; + uint32_t scratchSpaceSize = std::max(static_cast(size), minScratchSpaceSize); + scratchSpaceSize = Math::isPow2(scratchSpaceSize) ? scratchSpaceSize : Math::nextPowerOfTwo(scratchSpaceSize); + dst.kernelAttributes.perThreadScratchSize[src.slot] = scratchSpaceSize; break; } return DecodeError::Success; @@ -1206,7 +1214,7 @@ NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf(ProgramInfo dst.decodedElf = elf; dst.grfSize = src.targetDevice.grfSize; + dst.minScratchSpaceSize = src.targetDevice.minScratchSpaceSize; if (false == zebinSections.globalDataSections.empty()) { dst.globalVariables.initData = zebinSections.globalDataSections[0]->data.begin(); diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index 805b274921..6e8736c209 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -157,6 +157,7 @@ class HwHelper { virtual const void *getBatchBufferEndReference() const = 0; virtual bool isPlatformFlushTaskEnabled(const NEO::HardwareInfo &hwInfo) const = 0; virtual bool isPatIndexFallbackWaRequired() const = 0; + virtual uint32_t getMinimalScratchSpaceSize() const = 0; protected: HwHelper() = default; @@ -397,6 +398,7 @@ class HwHelperHw : public HwHelper { const void *getBatchBufferEndReference() const override; bool isPlatformFlushTaskEnabled(const NEO::HardwareInfo &hwInfo) const override; bool isPatIndexFallbackWaRequired() const override; + uint32_t getMinimalScratchSpaceSize() const override; protected: static const AuxTranslationMode defaultAuxTranslationMode; diff --git a/shared/source/helpers/hw_helper_bdw_and_later.inl b/shared/source/helpers/hw_helper_bdw_and_later.inl index 26a3e7a6c3..808db2c5cc 100644 --- a/shared/source/helpers/hw_helper_bdw_and_later.inl +++ b/shared/source/helpers/hw_helper_bdw_and_later.inl @@ -159,4 +159,9 @@ inline bool HwHelperHw::preferInternalBcsEngine() const { return false; } +template +uint32_t HwHelperHw::getMinimalScratchSpaceSize() const { + return 1024U; +} + } // namespace NEO diff --git a/shared/source/helpers/hw_helper_xehp_and_later.inl b/shared/source/helpers/hw_helper_xehp_and_later.inl index 5182e24597..450ce5989d 100644 --- a/shared/source/helpers/hw_helper_xehp_and_later.inl +++ b/shared/source/helpers/hw_helper_xehp_and_later.inl @@ -220,4 +220,9 @@ inline bool HwHelperHw::preferInternalBcsEngine() const { return preferInternalBcsEngine; } +template +uint32_t HwHelperHw::getMinimalScratchSpaceSize() const { + return 64U; +} + } // namespace NEO diff --git a/shared/source/program/program_info.h b/shared/source/program/program_info.h index 5922489ab4..da23c2f8c1 100644 --- a/shared/source/program/program_info.h +++ b/shared/source/program/program_info.h @@ -47,6 +47,7 @@ struct ProgramInfo { std::vector kernelInfos; Elf::Elf decodedElf; uint32_t grfSize = 32U; + uint32_t minScratchSpaceSize = 0U; }; size_t getMaxInlineSlmNeeded(const ProgramInfo &programInfo); diff --git a/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp b/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp index ed662c5ed0..a8e21128f9 100644 --- a/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp +++ b/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp @@ -3497,7 +3497,7 @@ kernels: EXPECT_EQ(256U * 8, programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize); } -TEST(PopulateKernelDescriptor, GivenPerThreadMemoryBufferWhenTypeIsScratchThenSetsProperFieldsInDescriptor) { +TEST(PopulateKernelDescriptor, GivenPerThreadMemoryBufferOfSizeSmallerThanMinimalWhenTypeIsScratchThenSetsProperFieldsInDescriptor) { NEO::ConstStringRef zeinfo = R"===( kernels: - name : some_kernel @@ -3509,6 +3509,8 @@ kernels: size: 512 )==="; NEO::ProgramInfo programInfo; + programInfo.minScratchSpaceSize = 1024U; + ZebinTestData::ValidEmptyProgram zebin; zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "some_kernel", {}); std::string errors, warnings; @@ -3529,7 +3531,45 @@ kernels: EXPECT_TRUE(errors.empty()) << errors; EXPECT_TRUE(warnings.empty()) << warnings; ASSERT_EQ(1U, programInfo.kernelInfos.size()); - EXPECT_EQ(512U, programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.perThreadScratchSize[0]); + EXPECT_EQ(1024U, programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.perThreadScratchSize[0]); + EXPECT_EQ(0U, programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.perThreadScratchSize[1]); +} + +TEST(PopulateKernelDescriptor, GivenPerThreadMemoryBufferOfSizeBiggerThanMinimalWhenTypeIsScratchThenSetsProperFieldsInDescriptor) { + NEO::ConstStringRef zeinfo = R"===( +kernels: + - name : some_kernel + execution_env: + simd_size: 8 + per_thread_memory_buffers: + - type: scratch + usage: private_space + size: 1540 +)==="; + NEO::ProgramInfo programInfo; + programInfo.minScratchSpaceSize = 1024U; + + ZebinTestData::ValidEmptyProgram zebin; + zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "some_kernel", {}); + std::string errors, warnings; + auto elf = NEO::Elf::decodeElf(zebin.storage, errors, warnings); + ASSERT_NE(nullptr, elf.elfFileHeader) << errors << " " << warnings; + + NEO::Yaml::YamlParser parser; + bool parseSuccess = parser.parse(zeinfo, errors, warnings); + ASSERT_TRUE(parseSuccess) << errors << " " << warnings; + + NEO::ZebinSections zebinSections; + auto extractErr = NEO::extractZebinSections(elf, zebinSections, errors, warnings); + ASSERT_EQ(NEO::DecodeError::Success, extractErr) << errors << " " << warnings; + + auto &kernelNode = *parser.createChildrenRange(*parser.findNodeWithKeyDfs("kernels")).begin(); + auto err = NEO::populateKernelDescriptor(programInfo, elf, zebinSections, parser, kernelNode, errors, warnings); + EXPECT_EQ(NEO::DecodeError::Success, err); + EXPECT_TRUE(errors.empty()) << errors; + EXPECT_TRUE(warnings.empty()) << warnings; + ASSERT_EQ(1U, programInfo.kernelInfos.size()); + EXPECT_EQ(2048U, programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.perThreadScratchSize[0]); EXPECT_EQ(0U, programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.perThreadScratchSize[1]); } @@ -3542,7 +3582,7 @@ kernels: per_thread_memory_buffers: - type: scratch usage: private_space - size: 512 + size: 1024 slot : 1 )==="; NEO::ProgramInfo programInfo; @@ -3567,7 +3607,7 @@ kernels: EXPECT_TRUE(warnings.empty()) << warnings; ASSERT_EQ(1U, programInfo.kernelInfos.size()); EXPECT_EQ(0U, programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.perThreadScratchSize[0]); - EXPECT_EQ(512U, programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.perThreadScratchSize[1]); + EXPECT_EQ(1024U, programInfo.kernelInfos[0]->kernelDescriptor.kernelAttributes.perThreadScratchSize[1]); } TEST(PopulateKernelDescriptor, GivenPerThreadMemoryBufferWhenSlotIsInvalidThenFails) { @@ -3604,6 +3644,39 @@ kernels: EXPECT_TRUE(warnings.empty()) << warnings; } +TEST(PopulateKernelDescriptor, GivenPerThreadMemoryBufferWithInvalidSizeThenErrorIsReturned) { + NEO::ConstStringRef zeinfo = R"===( +kernels: + - name : some_kernel + execution_env: + simd_size: 8 + per_thread_memory_buffers: + - type: scratch + usage: private_space + size: 0 +)==="; + NEO::ProgramInfo programInfo; + ZebinTestData::ValidEmptyProgram zebin; + zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "some_kernel", {}); + std::string errors, warnings; + auto elf = NEO::Elf::decodeElf(zebin.storage, errors, warnings); + ASSERT_NE(nullptr, elf.elfFileHeader) << errors << " " << warnings; + + NEO::Yaml::YamlParser parser; + bool parseSuccess = parser.parse(zeinfo, errors, warnings); + ASSERT_TRUE(parseSuccess) << errors << " " << warnings; + + NEO::ZebinSections zebinSections; + auto extractErr = NEO::extractZebinSections(elf, zebinSections, errors, warnings); + ASSERT_EQ(NEO::DecodeError::Success, extractErr) << errors << " " << warnings; + + auto &kernelNode = *parser.createChildrenRange(*parser.findNodeWithKeyDfs("kernels")).begin(); + auto err = NEO::populateKernelDescriptor(programInfo, elf, zebinSections, parser, kernelNode, errors, warnings); + EXPECT_EQ(NEO::DecodeError::InvalidBinary, err); + EXPECT_STREQ("DeviceBinaryFormat::Zebin : Invalid per-thread memory buffer allocation size (size must be greater than 0) in context of : some_kernel.\n", errors.c_str()); + EXPECT_TRUE(warnings.empty()) << warnings; +} + TEST(PopulateKernelDescriptor, GivenPerThreadMemoryBufferWithMultipleScratchEntriesForTheSameSlotThenFails) { NEO::ConstStringRef zeinfo = R"===( kernels: