diff --git a/level_zero/core/source/module/module_imp.cpp b/level_zero/core/source/module/module_imp.cpp index 1663fef162..592f1ba845 100644 --- a/level_zero/core/source/module/module_imp.cpp +++ b/level_zero/core/source/module/module_imp.cpp @@ -382,12 +382,16 @@ ze_result_t ModuleTranslationUnit::processUnpackedBinary() { } auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); - if (programInfo.globalConstants.size != 0) { - this->globalConstBuffer = NEO::allocateGlobalsSurface(svmAllocsManager, *device->getNEODevice(), programInfo.globalConstants.size, true, programInfo.linkerInput.get(), programInfo.globalConstants.initData); + auto globalConstDataSize = programInfo.globalConstants.size + programInfo.globalConstants.zeroInitSize; + if (globalConstDataSize != 0) { + this->globalConstBuffer = NEO::allocateGlobalsSurface(svmAllocsManager, *device->getNEODevice(), globalConstDataSize, + programInfo.globalConstants.zeroInitSize, true, programInfo.linkerInput.get(), programInfo.globalConstants.initData); } - if (programInfo.globalVariables.size != 0) { - this->globalVarBuffer = NEO::allocateGlobalsSurface(svmAllocsManager, *device->getNEODevice(), programInfo.globalVariables.size, false, programInfo.linkerInput.get(), programInfo.globalVariables.initData); + auto globalVariablesDataSize = programInfo.globalVariables.size + programInfo.globalVariables.zeroInitSize; + if (globalVariablesDataSize != 0) { + this->globalVarBuffer = NEO::allocateGlobalsSurface(svmAllocsManager, *device->getNEODevice(), globalVariablesDataSize, + programInfo.globalVariables.zeroInitSize, false, programInfo.linkerInput.get(), programInfo.globalVariables.initData); } for (auto &kernelInfo : this->programInfo.kernelInfos) { diff --git a/opencl/source/program/process_device_binary.cpp b/opencl/source/program/process_device_binary.cpp index 5e1de849fe..694138088e 100644 --- a/opencl/source/program/process_device_binary.cpp +++ b/opencl/source/program/process_device_binary.cpp @@ -255,14 +255,15 @@ cl_int Program::processProgramInfo(ProgramInfo &src, const ClDevice &clDevice) { kernelInfoArray = std::move(src.kernelInfos); auto svmAllocsManager = context ? context->getSVMAllocsManager() : nullptr; - if (src.globalConstants.size != 0) { - buildInfos[rootDeviceIndex].constantSurface = allocateGlobalsSurface(svmAllocsManager, clDevice.getDevice(), src.globalConstants.size, true, linkerInput, src.globalConstants.initData); + auto globalConstDataSize = src.globalConstants.size + src.globalConstants.zeroInitSize; + if (globalConstDataSize != 0) { + buildInfos[rootDeviceIndex].constantSurface = allocateGlobalsSurface(svmAllocsManager, clDevice.getDevice(), globalConstDataSize, src.globalConstants.zeroInitSize, true, linkerInput, src.globalConstants.initData); } - buildInfos[rootDeviceIndex].globalVarTotalSize = src.globalVariables.size; - - if (src.globalVariables.size != 0) { - buildInfos[rootDeviceIndex].globalSurface = allocateGlobalsSurface(svmAllocsManager, clDevice.getDevice(), src.globalVariables.size, false, linkerInput, src.globalVariables.initData); + auto globalVariablesDataSize = src.globalVariables.size + src.globalVariables.zeroInitSize; + buildInfos[rootDeviceIndex].globalVarTotalSize = globalVariablesDataSize; + if (globalVariablesDataSize != 0) { + buildInfos[rootDeviceIndex].globalSurface = allocateGlobalsSurface(svmAllocsManager, clDevice.getDevice(), globalVariablesDataSize, src.globalVariables.zeroInitSize, false, linkerInput, src.globalVariables.initData); if (clDevice.areOcl21FeaturesEnabled() == false) { buildInfos[rootDeviceIndex].globalVarTotalSize = 0u; } diff --git a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp index df427ad24f..1fe11594bc 100644 --- a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp +++ b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp @@ -2755,13 +2755,13 @@ TEST(MemoryManagerTest, whenMemoryManagerReturnsNullptrThenAllocateGlobalsSurfac linkerInput.traits.exportsGlobalConstants = true; linkerInput.traits.exportsGlobalVariables = true; memoryManager->recentlyPassedDeviceBitfield = {}; - GraphicsAllocation *allocation = allocateGlobalsSurface(nullptr, device.getDevice(), 1024, false, &linkerInput, nullptr); + GraphicsAllocation *allocation = allocateGlobalsSurface(nullptr, device.getDevice(), 1024, 0u, false, &linkerInput, nullptr); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(deviceBitfield, memoryManager->recentlyPassedDeviceBitfield); auto svmAllocsManager = std::make_unique(device.getMemoryManager(), false); memoryManager->recentlyPassedDeviceBitfield = {}; - allocation = allocateGlobalsSurface(svmAllocsManager.get(), device.getDevice(), 1024, false, &linkerInput, nullptr); + allocation = allocateGlobalsSurface(svmAllocsManager.get(), device.getDevice(), 1024, 0u, false, &linkerInput, nullptr); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(deviceBitfield, memoryManager->recentlyPassedDeviceBitfield); } @@ -2798,7 +2798,7 @@ TEST_F(MemoryManagerMultiRootDeviceTests, WhenAllocatingGlobalSurfaceThenItHasCo WhiteBox linkerInput; linkerInput.traits.exportsGlobalConstants = true; linkerInput.traits.exportsGlobalVariables = true; - GraphicsAllocation *allocation = allocateGlobalsSurface(context->svmAllocsManager, device1->getDevice(), initData.size(), false, &linkerInput, initData.data()); + GraphicsAllocation *allocation = allocateGlobalsSurface(context->svmAllocsManager, device1->getDevice(), initData.size(), 0u, false, &linkerInput, initData.data()); ASSERT_NE(nullptr, allocation); EXPECT_EQ(expectedRootDeviceIndex, allocation->getRootDeviceIndex()); diff --git a/opencl/test/unit_test/program/program_data_tests.cpp b/opencl/test/unit_test/program/program_data_tests.cpp index 947ea46bc1..9b47589619 100644 --- a/opencl/test/unit_test/program/program_data_tests.cpp +++ b/opencl/test/unit_test/program/program_data_tests.cpp @@ -550,20 +550,6 @@ TEST_F(ProgramDataTest, GivenProgramWith32bitPointerOptWhenProgramScopeGlobalPoi prog->setGlobalSurface(nullptr); } -TEST_F(ProgramDataTest, givenSymbolTablePatchTokenThenLinkerInputIsCreated) { - SPatchFunctionTableInfo token; - token.Token = PATCH_TOKEN_PROGRAM_SYMBOL_TABLE; - token.Size = static_cast(sizeof(SPatchFunctionTableInfo)); - token.NumEntries = 0; - - pProgramPatchList = &token; - programPatchListSize = token.Size; - - buildAndDecodeProgramPatchList(); - - EXPECT_NE(nullptr, pProgram->getLinkerInput(pContext->getDevice(0)->getRootDeviceIndex())); -} - TEST(ProgramLinkBinaryTest, whenLinkerInputEmptyThenLinkSuccessful) { auto linkerInput = std::make_unique>(); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); diff --git a/shared/source/compiler_interface/linker.cpp b/shared/source/compiler_interface/linker.cpp index 778d025e71..f40f8f5b7b 100644 --- a/shared/source/compiler_interface/linker.cpp +++ b/shared/source/compiler_interface/linker.cpp @@ -38,6 +38,10 @@ SegmentType LinkerInput::getSegmentForSection(ConstStringRef name) { return NEO::SegmentType::GlobalStrings; } else if (name.startsWith(NEO::Elf::SpecialSectionNames::text.data())) { return NEO::SegmentType::Instructions; + } else if (name == NEO::Elf::SectionsNamesZebin::dataConstZeroInit) { + return NEO::SegmentType::GlobalConstantsZeroInit; + } else if (name == NEO::Elf::SectionsNamesZebin::dataGlobalZeroInit) { + return NEO::SegmentType::GlobalVariablesZeroInit; } return NEO::SegmentType::Unknown; } @@ -286,7 +290,7 @@ LinkingStatus Linker::link(const SegmentInfo &globalVariablesSegInfo, const Segm ExternalFunctionsT &externalFunctions) { bool success = data.isValid(); auto initialUnresolvedExternalsCount = outUnresolvedExternals.size(); - success = success && processRelocations(globalVariablesSegInfo, globalConstantsSegInfo, exportedFunctionsSegInfo, globalStringsSegInfo, instructionsSegments); + success = success && processRelocations(globalVariablesSegInfo, globalConstantsSegInfo, exportedFunctionsSegInfo, globalStringsSegInfo, instructionsSegments, constantsInitDataSize, variablesInitDataSize); if (!success) { return LinkingStatus::Error; } @@ -306,11 +310,12 @@ LinkingStatus Linker::link(const SegmentInfo &globalVariablesSegInfo, const Segm } bool Linker::processRelocations(const SegmentInfo &globalVariables, const SegmentInfo &globalConstants, const SegmentInfo &exportedFunctions, const SegmentInfo &globalStrings, - const PatchableSegments &instructionsSegments) { + const PatchableSegments &instructionsSegments, size_t globalConstantsInitDataSize, size_t globalVariablesInitDataSize) { relocatedSymbols.reserve(data.getSymbols().size()); - for (auto &symbol : data.getSymbols()) { + for (const auto &[symbolName, symbolInfo] : data.getSymbols()) { const SegmentInfo *seg = nullptr; - switch (symbol.second.segment) { + uintptr_t gpuAddress = symbolInfo.offset; + switch (symbolInfo.segment) { default: DEBUG_BREAK_IF(true); return false; @@ -326,13 +331,21 @@ bool Linker::processRelocations(const SegmentInfo &globalVariables, const Segmen case SegmentType::Instructions: seg = &exportedFunctions; break; + case SegmentType::GlobalConstantsZeroInit: + seg = &globalConstants; + gpuAddress += globalConstantsInitDataSize; + break; + case SegmentType::GlobalVariablesZeroInit: + seg = &globalVariables; + gpuAddress += globalVariablesInitDataSize; + break; } - uintptr_t gpuAddress = seg->gpuAddress + symbol.second.offset; - if (symbol.second.offset + symbol.second.size > seg->segmentSize) { + if (gpuAddress + symbolInfo.size > seg->segmentSize) { DEBUG_BREAK_IF(true); return false; } - relocatedSymbols[symbol.first] = {symbol.second, gpuAddress}; + gpuAddress += seg->gpuAddress; + relocatedSymbols[symbolName] = {symbolInfo, gpuAddress}; } localRelocatedSymbols.reserve(data.getLocalSymbols().size()); for (auto &localSymbol : data.getLocalSymbols()) { @@ -425,10 +438,10 @@ void Linker::patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg, std::vector &outUnresolvedExternals, Device *pDevice, const void *constantsInitData, size_t constantsInitDataSize, const void *variablesInitData, size_t variablesInitDataSize) { - std::vector constantsInitDataCopy(constantsInitDataSize); - memcpy_s(constantsInitDataCopy.data(), constantsInitDataCopy.size(), constantsInitData, constantsInitDataSize); - std::vector variablesInitDataCopy(variablesInitDataSize); - memcpy_s(variablesInitDataCopy.data(), variablesInitDataCopy.size(), variablesInitData, variablesInitDataSize); + std::vector constantsData(globalConstantsSegInfo.segmentSize, 0u); + memcpy_s(constantsData.data(), constantsData.size(), constantsInitData, constantsInitDataSize); + std::vector variablesData(globalVariablesSegInfo.segmentSize, 0u); + memcpy_s(variablesData.data(), variablesData.size(), variablesInitData, variablesInitDataSize); bool isAnySymbolRelocated = false; for (const auto &relocation : data.getDataRelocations()) { @@ -439,22 +452,26 @@ void Linker::patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const } uint64_t srcGpuAddressAs64Bit = symbolIt->second.gpuAddress; - std::vector *dst = nullptr; + ArrayRef dst{}; const void *initData = nullptr; if (SegmentType::GlobalConstants == relocation.relocationSegment) { - dst = &constantsInitDataCopy; + dst = {constantsData.data(), constantsInitDataSize}; initData = constantsInitData; + } else if (SegmentType::GlobalConstantsZeroInit == relocation.relocationSegment) { + dst = {constantsData.data() + constantsInitDataSize, constantsData.size() - constantsInitDataSize}; } else if (SegmentType::GlobalVariables == relocation.relocationSegment) { - dst = &variablesInitDataCopy; + dst = {variablesData.data(), variablesInitDataSize}; initData = variablesInitData; + } else if (SegmentType::GlobalVariablesZeroInit == relocation.relocationSegment) { + dst = {variablesData.data() + variablesInitDataSize, variablesData.size() - variablesInitDataSize}; } else { outUnresolvedExternals.push_back(UnresolvedExternal{relocation}); continue; } - UNRECOVERABLE_IF(nullptr == dst); + UNRECOVERABLE_IF(dst.empty()); auto relocType = (LinkerInput::Traits::PointerSize::Ptr32bit == data.getTraits().pointerSize) ? RelocationInfo::Type::AddressLow : relocation.type; - bool invalidOffset = relocation.offset + addressSizeInBytes(relocType) > dst->size(); + bool invalidOffset = relocation.offset + addressSizeInBytes(relocType) > dst.size(); DEBUG_BREAK_IF(invalidOffset); if (invalidOffset) { outUnresolvedExternals.push_back(UnresolvedExternal{relocation}); @@ -466,15 +483,15 @@ void Linker::patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const switch (relocType) { default: UNRECOVERABLE_IF(RelocationInfo::Type::Address != relocType); - patchIncrement(dst->data(), static_cast(relocation.offset), initData, incrementValue); + patchIncrement(dst.begin(), static_cast(relocation.offset), initData, incrementValue); break; case RelocationInfo::Type::AddressLow: incrementValue = incrementValue & 0xffffffff; - patchIncrement(dst->data(), static_cast(relocation.offset), initData, incrementValue); + patchIncrement(dst.begin(), static_cast(relocation.offset), initData, incrementValue); break; case RelocationInfo::Type::AddressHigh: incrementValue = (incrementValue >> 32) & 0xffffffff; - patchIncrement(dst->data(), static_cast(relocation.offset), initData, incrementValue); + patchIncrement(dst.begin(), static_cast(relocation.offset), initData, incrementValue); break; } } @@ -484,11 +501,11 @@ void Linker::patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const auto &productHelper = pDevice->getProductHelper(); if (globalConstantsSeg) { bool useBlitter = productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *globalConstantsSeg); - MemoryTransferHelper::transferMemoryToAllocation(useBlitter, *pDevice, globalConstantsSeg, 0, constantsInitDataCopy.data(), constantsInitDataCopy.size()); + MemoryTransferHelper::transferMemoryToAllocation(useBlitter, *pDevice, globalConstantsSeg, 0, constantsData.data(), constantsData.size()); } if (globalVariablesSeg) { bool useBlitter = productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *globalVariablesSeg); - MemoryTransferHelper::transferMemoryToAllocation(useBlitter, *pDevice, globalVariablesSeg, 0, variablesInitDataCopy.data(), variablesInitDataCopy.size()); + MemoryTransferHelper::transferMemoryToAllocation(useBlitter, *pDevice, globalVariablesSeg, 0, variablesData.data(), variablesData.size()); } } } @@ -633,6 +650,10 @@ void Linker::resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolved template void Linker::patchIncrement(void *dstBegin, size_t relocationOffset, const void *initData, uint64_t incrementValue) { + if (nullptr == initData) { + *(reinterpret_cast(dstBegin) + relocationOffset) = static_cast(incrementValue); + return; + } auto initValue = ptrOffset(initData, relocationOffset); PatchSizeT value = 0; memcpy_s(&value, sizeof(PatchSizeT), initValue, sizeof(PatchSizeT)); diff --git a/shared/source/compiler_interface/linker.h b/shared/source/compiler_interface/linker.h index b5b9e604b5..f2e20081b1 100644 --- a/shared/source/compiler_interface/linker.h +++ b/shared/source/compiler_interface/linker.h @@ -28,8 +28,10 @@ struct ProgramInfo; enum class SegmentType : uint32_t { Unknown, GlobalConstants, + GlobalConstantsZeroInit, GlobalStrings, GlobalVariables, + GlobalVariablesZeroInit, Instructions, }; @@ -193,7 +195,7 @@ struct Linker { struct SegmentInfo { uintptr_t gpuAddress = std::numeric_limits::max(); - size_t segmentSize = std::numeric_limits::max(); + size_t segmentSize = 0u; }; struct PatchableSegment { @@ -247,7 +249,7 @@ struct Linker { RelocatedSymbolsMap relocatedSymbols; LocalsRelocatedSymbolsMap localRelocatedSymbols; - bool processRelocations(const SegmentInfo &globalVariables, const SegmentInfo &globalConstants, const SegmentInfo &exportedFunctions, const SegmentInfo &globalStrings, const PatchableSegments &instructionsSegments); + bool processRelocations(const SegmentInfo &globalVariables, const SegmentInfo &globalConstants, const SegmentInfo &exportedFunctions, const SegmentInfo &globalStrings, const PatchableSegments &instructionsSegments, size_t globalConstantsInitDataSize, size_t globalVariablesInitDataSize); void patchInstructionsSegments(const std::vector &instructionsSegments, std::vector &outUnresolvedExternals, const KernelDescriptorsT &kernelDescriptors); diff --git a/shared/source/device_binary_format/elf/zebin_elf.h b/shared/source/device_binary_format/elf/zebin_elf.h index 9bf5f3c84d..339e641f93 100644 --- a/shared/source/device_binary_format/elf/zebin_elf.h +++ b/shared/source/device_binary_format/elf/zebin_elf.h @@ -43,8 +43,10 @@ namespace SectionsNamesZebin { inline constexpr ConstStringRef textPrefix = ".text."; inline constexpr ConstStringRef functions = ".text.Intel_Symbol_Table_Void_Program"; inline constexpr ConstStringRef dataConst = ".data.const"; +inline constexpr ConstStringRef dataConstZeroInit = ".bss.const"; inline constexpr ConstStringRef dataGlobalConst = ".data.global_const"; inline constexpr ConstStringRef dataGlobal = ".data.global"; +inline constexpr ConstStringRef dataGlobalZeroInit = ".bss.global"; inline constexpr ConstStringRef dataConstString = ".data.const.string"; inline constexpr ConstStringRef symtab = ".symtab"; inline constexpr ConstStringRef relTablePrefix = ".rel."; diff --git a/shared/source/device_binary_format/zebin_decoder.cpp b/shared/source/device_binary_format/zebin_decoder.cpp index 1509795724..0f657dd3c0 100644 --- a/shared/source/device_binary_format/zebin_decoder.cpp +++ b/shared/source/device_binary_format/zebin_decoder.cpp @@ -278,6 +278,15 @@ DecodeError extractZebinSections(NEO::Elf::Elf &elf, ZebinSections DecodeError validateZebinSectionsCount(const ZebinSections §ions, std::string &outErrReason, std::string &outWarning) { bool valid = validateZebinSectionsCountAtMost(sections.zeInfoSections, NEO::Elf::SectionsNamesZebin::zeInfo, 1U, outErrReason, outWarning); valid &= validateZebinSectionsCountAtMost(sections.globalDataSections, NEO::Elf::SectionsNamesZebin::dataGlobal, 1U, outErrReason, outWarning); + valid &= validateZebinSectionsCountAtMost(sections.globalZeroInitDataSections, NEO::Elf::SectionsNamesZebin::dataGlobalZeroInit, 1U, outErrReason, outWarning); valid &= validateZebinSectionsCountAtMost(sections.constDataSections, NEO::Elf::SectionsNamesZebin::dataConst, 1U, outErrReason, outWarning); + valid &= validateZebinSectionsCountAtMost(sections.constZeroInitDataSections, NEO::Elf::SectionsNamesZebin::dataConstZeroInit, 1U, outErrReason, outWarning); valid &= validateZebinSectionsCountAtMost(sections.constDataStringSections, NEO::Elf::SectionsNamesZebin::dataConstString, 1U, outErrReason, outWarning); valid &= validateZebinSectionsCountAtMost(sections.symtabSections, NEO::Elf::SectionsNamesZebin::symtab, 1U, outErrReason, outWarning); valid &= validateZebinSectionsCountAtMost(sections.spirvSections, NEO::Elf::SectionsNamesZebin::spv, 1U, outErrReason, outWarning); @@ -706,11 +717,19 @@ DecodeError decodeZebin(ProgramInfo &dst, NEO::Elf::Elf &elf, std::stri dst.globalVariables.size = zebinSections.globalDataSections[0]->data.size(); } + if (false == zebinSections.globalZeroInitDataSections.empty()) { + dst.globalVariables.zeroInitSize = static_cast(zebinSections.globalZeroInitDataSections[0]->header->size); + } + if (false == zebinSections.constDataSections.empty()) { dst.globalConstants.initData = zebinSections.constDataSections[0]->data.begin(); dst.globalConstants.size = zebinSections.constDataSections[0]->data.size(); } + if (false == zebinSections.constZeroInitDataSections.empty()) { + dst.globalConstants.zeroInitSize = static_cast(zebinSections.constZeroInitDataSections[0]->header->size); + } + if (false == zebinSections.constDataStringSections.empty()) { dst.globalStrings.initData = zebinSections.constDataStringSections[0]->data.begin(); dst.globalStrings.size = zebinSections.constDataStringSections[0]->data.size(); diff --git a/shared/source/device_binary_format/zebin_decoder.h b/shared/source/device_binary_format/zebin_decoder.h index f31a67d2e1..ee0c880e8e 100644 --- a/shared/source/device_binary_format/zebin_decoder.h +++ b/shared/source/device_binary_format/zebin_decoder.h @@ -30,7 +30,9 @@ struct ZebinSections { StackVec textKernelSections; StackVec zeInfoSections; StackVec globalDataSections; + StackVec globalZeroInitDataSections; StackVec constDataSections; + StackVec constZeroInitDataSections; StackVec constDataStringSections; StackVec symtabSections; StackVec spirvSections; diff --git a/shared/source/memory_manager/graphics_allocation.h b/shared/source/memory_manager/graphics_allocation.h index 2e0d31f57e..a80384495b 100644 --- a/shared/source/memory_manager/graphics_allocation.h +++ b/shared/source/memory_manager/graphics_allocation.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -212,6 +212,11 @@ class GraphicsAllocation : public IDNode { type == AllocationType::DEBUG_SBA_TRACKING_BUFFER; } + static bool isConstantOrGlobalSurfaceAllocationType(AllocationType type) { + return type == AllocationType::CONSTANT_SURFACE || + type == AllocationType::GLOBAL_SURFACE; + } + static uint32_t getNumHandlesForKmdSharedAllocation(uint32_t numBanks); void *getReservedAddressPtr() const { diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index 086ca36137..ec5a794339 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -491,7 +491,8 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo allocationData.flags.multiOsContextCapable = properties.flags.multiOsContextCapable; allocationData.usmInitialPlacement = properties.usmInitialPlacement; - if (GraphicsAllocation::isDebugSurfaceAllocationType(properties.allocationType)) { + if (GraphicsAllocation::isDebugSurfaceAllocationType(properties.allocationType) || + GraphicsAllocation::isConstantOrGlobalSurfaceAllocationType(properties.allocationType)) { allocationData.flags.zeroMemory = 1; } diff --git a/shared/source/program/program_info.h b/shared/source/program/program_info.h index 17abc430cd..e997426240 100644 --- a/shared/source/program/program_info.h +++ b/shared/source/program/program_info.h @@ -31,6 +31,7 @@ struct ProgramInfo { struct GlobalSurfaceInfo { const void *initData = nullptr; size_t size = 0U; + size_t zeroInitSize = 0U; }; void prepareLinkerInputStorage(); diff --git a/shared/source/program/program_initialization.cpp b/shared/source/program/program_initialization.cpp index 138b5dcb78..207b5c6fee 100644 --- a/shared/source/program/program_initialization.cpp +++ b/shared/source/program/program_initialization.cpp @@ -19,7 +19,7 @@ namespace NEO { -GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAllocManager, NEO::Device &device, size_t size, bool constant, +GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAllocManager, NEO::Device &device, size_t totalSize, size_t zeroInitSize, bool constant, LinkerInput *const linkerInput, const void *initData) { bool globalsAreExported = false; GraphicsAllocation *gpuAllocation = nullptr; @@ -37,7 +37,7 @@ GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAlloc subDeviceBitfields.insert({rootDeviceIndex, deviceBitfield}); NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, subDeviceBitfields); unifiedMemoryProperties.device = &device; - auto ptr = svmAllocManager->createUnifiedMemoryAllocation(size, unifiedMemoryProperties); + auto ptr = svmAllocManager->createUnifiedMemoryAllocation(totalSize, unifiedMemoryProperties); DEBUG_BREAK_IF(ptr == nullptr); if (ptr == nullptr) { return nullptr; @@ -49,7 +49,7 @@ GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAlloc auto allocationType = constant ? AllocationType::CONSTANT_SURFACE : AllocationType::GLOBAL_SURFACE; gpuAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, true, // allocateMemory - size, allocationType, + totalSize, allocationType, false, // isMultiStorageAllocation deviceBitfield}); } @@ -61,11 +61,13 @@ GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAlloc auto &rootDeviceEnvironment = device.getRootDeviceEnvironment(); auto &productHelper = device.getProductHelper(); - auto success = MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *gpuAllocation), - device, gpuAllocation, 0, initData, size); - - UNRECOVERABLE_IF(!success); - + bool isOnlyBssData = (totalSize == zeroInitSize); + if (false == isOnlyBssData) { + auto initSize = totalSize - zeroInitSize; + auto success = MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *gpuAllocation), + device, gpuAllocation, 0, initData, initSize); + UNRECOVERABLE_IF(!success); + } return gpuAllocation; } diff --git a/shared/source/program/program_initialization.h b/shared/source/program/program_initialization.h index ab8d5d7032..fb3ecccacf 100644 --- a/shared/source/program/program_initialization.h +++ b/shared/source/program/program_initialization.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2022 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -17,7 +17,7 @@ class SVMAllocsManager; struct LinkerInput; GraphicsAllocation *allocateGlobalsSurface(SVMAllocsManager *const svmAllocManager, Device &device, - size_t size, bool constant, + size_t totalSize, size_t zeroInitSize, bool constant, LinkerInput *const linkerInput, const void *initData); } // namespace NEO diff --git a/shared/test/unit_test/compiler_interface/linker_tests.cpp b/shared/test/unit_test/compiler_interface/linker_tests.cpp index cb402eca31..559f611e57 100644 --- a/shared/test/unit_test/compiler_interface/linker_tests.cpp +++ b/shared/test/unit_test/compiler_interface/linker_tests.cpp @@ -287,6 +287,8 @@ TEST(LinkerInputTests, WhenGettingSegmentForSectionNameThenCorrectSegmentIsRetur auto segmentConstString = NEO::LinkerInput::getSegmentForSection(NEO::Elf::SectionsNamesZebin::dataConstString.str()); auto segmentInstructions = NEO::LinkerInput::getSegmentForSection(NEO::Elf::SectionsNamesZebin::textPrefix.str()); auto segmentInstructions2 = NEO::LinkerInput::getSegmentForSection(".text.abc"); + auto segmentGlobalZeroInit = NEO::LinkerInput::getSegmentForSection(NEO::Elf::SectionsNamesZebin::dataGlobalZeroInit.str()); + auto segmentGlobalConstZeroInit = NEO::LinkerInput::getSegmentForSection(NEO::Elf::SectionsNamesZebin::dataConstZeroInit.str()); EXPECT_EQ(NEO::SegmentType::GlobalConstants, segmentConst); EXPECT_EQ(NEO::SegmentType::GlobalConstants, segmentGlobalConst); @@ -294,6 +296,8 @@ TEST(LinkerInputTests, WhenGettingSegmentForSectionNameThenCorrectSegmentIsRetur EXPECT_EQ(NEO::SegmentType::GlobalStrings, segmentConstString); EXPECT_EQ(NEO::SegmentType::Instructions, segmentInstructions); EXPECT_EQ(NEO::SegmentType::Instructions, segmentInstructions2); + EXPECT_EQ(NEO::SegmentType::GlobalVariablesZeroInit, segmentGlobalZeroInit); + EXPECT_EQ(NEO::SegmentType::GlobalConstantsZeroInit, segmentGlobalConstZeroInit); } TEST(LinkerInputTests, WhenGettingSegmentForUnknownSectionNameThenUnknownSegmentIsReturned) { @@ -1693,6 +1697,146 @@ TEST(LinkerTests, givenValidSymbolsAndRelocationsWhenPatchingDataSegmentsThenThe } } +TEST(LinkerTests, givenValidSymbolsAndRelocationsToBssDataSectionsWhenPatchingDataSegmentsThenTheyAreProperlyPatched) { + uint64_t initGlobalConstantData[] = {0x1234}; //<- const1 - initValue should be ignored + uint64_t initGlobalVariablesData[] = {0x4321}; // <- var1 - initValue should be ignored + + uint64_t constantsSegmentData[2]{0}; // size 2 * uint64_t - contains also bss at the end + uint64_t globalVariablesSegmentData[2]{0}; // size 2 * uint64_t - contains also bss at the end + + NEO::MockGraphicsAllocation globalConstantsPatchableSegment{constantsSegmentData, sizeof(constantsSegmentData)}; + NEO::MockGraphicsAllocation globalVariablesPatchableSegment{globalVariablesSegmentData, sizeof(globalVariablesSegmentData)}; + globalConstantsPatchableSegment.gpuAddress = 0xA0000000; + globalVariablesPatchableSegment.gpuAddress = 0xB0000000; + + NEO::Linker::SegmentInfo globalConstantsSegmentInfo, globalVariablesSegmentInfo; + globalConstantsSegmentInfo.gpuAddress = static_cast(globalConstantsPatchableSegment.getGpuAddress()); + globalConstantsSegmentInfo.segmentSize = globalConstantsPatchableSegment.getUnderlyingBufferSize(); + + globalVariablesSegmentInfo.gpuAddress = static_cast(globalVariablesPatchableSegment.getGpuAddress()); + globalVariablesSegmentInfo.segmentSize = globalVariablesPatchableSegment.getUnderlyingBufferSize(); + + auto setUpInstructionSeg = [](std::vector &instrData, NEO::Linker::PatchableSegments &patchableInstrSeg) -> void { + uint64_t initData = 0x77777777; + instrData.resize(8, static_cast(initData)); + + auto &emplaced = patchableInstrSeg.emplace_back(); + emplaced.hostPointer = instrData.data(); + emplaced.segmentSize = instrData.size(); + }; + NEO::Linker::PatchableSegments patchableInstructionSegments; + std::vector instructionsData1, instructionsData2; + setUpInstructionSeg(instructionsData1, patchableInstructionSegments); + setUpInstructionSeg(instructionsData2, patchableInstructionSegments); + + WhiteBox linkerInput; + linkerInput.traits.requiresPatchingOfInstructionSegments = true; + + auto &var1 = linkerInput.symbols["var1"]; + var1.segment = SegmentType::GlobalVariables; + var1.offset = 0U; + var1.size = 8U; + + auto &bssVar = linkerInput.symbols["bssVar"]; + bssVar.segment = SegmentType::GlobalVariablesZeroInit; + bssVar.offset = 0U; + bssVar.size = 8U; + + auto &const1 = linkerInput.symbols["const1"]; + const1.segment = SegmentType::GlobalConstants; + const1.offset = 0U; + const1.size = 8U; + + auto &bssConst = linkerInput.symbols["bssConst"]; + bssConst.segment = SegmentType::GlobalConstantsZeroInit; + bssConst.offset = 0U; + bssConst.size = 8U; + + /* + Segments: + Const: + 0x00 0x1000 <- const 1 + 0x08 0x0 <- bss + + Var: + 0x00 0x4000 <- var 1 + 0x08 0x0 <- bss + + Instructions: + 0x0 0x0 <- will be patched with bss.const + 0x08 0x0 <- will be patched with bss.global + + 1. Patch bss data from const segment with var 1 + Patch bss data from global variables segment with const 1 + 2. Patch const 2 with symbol pointing to bss in const (patched in step 1). + Patch var 2 with symbol pointing to bss in variables (patched in step 1). + */ + + // Relocations for step 1. + // bssConst[0] = &var1 + { + NEO::LinkerInput::RelocationInfo relocation; + relocation.offset = 0U; + relocation.relocationSegment = NEO::SegmentType::GlobalConstantsZeroInit; + relocation.symbolName = "var1"; + relocation.type = NEO::LinkerInput::RelocationInfo::Type::Address; + linkerInput.dataRelocations.push_back(relocation); + } + // bssGlobal[0] = &const1 + { + NEO::LinkerInput::RelocationInfo relocation; + relocation.offset = 0U; + relocation.relocationSegment = NEO::SegmentType::GlobalVariablesZeroInit; + relocation.symbolName = "const1"; + relocation.type = NEO::LinkerInput::RelocationInfo::Type::Address; + linkerInput.dataRelocations.push_back(relocation); + } + // Relocation for step 2. + // instructions[0] = &bssConst + { + NEO::LinkerInput::RelocationInfo relocation; + relocation.offset = 0U; + relocation.relocationSegment = NEO::SegmentType::Instructions; + relocation.symbolName = "bssConst"; + relocation.type = NEO::LinkerInput::RelocationInfo::Type::Address; + linkerInput.textRelocations.push_back({relocation}); + } + // instructions[1] = &bssVar + { + NEO::LinkerInput::RelocationInfo relocation; + relocation.offset = 0U; + relocation.relocationSegment = NEO::SegmentType::Instructions; + relocation.symbolName = "bssVar"; + relocation.type = NEO::LinkerInput::RelocationInfo::Type::Address; + linkerInput.textRelocations.push_back({relocation}); + } + + NEO::Linker linker(linkerInput); + auto device = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get())); + NEO::Linker::UnresolvedExternals unresolvedExternals; + NEO::Linker::KernelDescriptorsT kernelDescriptors; + NEO::Linker::ExternalFunctionsT externalFunctions; + auto linkResult = linker.link(globalVariablesSegmentInfo, globalConstantsSegmentInfo, {}, {}, + &globalVariablesPatchableSegment, &globalConstantsPatchableSegment, patchableInstructionSegments, + unresolvedExternals, device.get(), initGlobalConstantData, sizeof(initGlobalConstantData), + initGlobalVariablesData, sizeof(initGlobalVariablesData), kernelDescriptors, externalFunctions); + EXPECT_EQ(NEO::LinkingStatus::LinkedFully, linkResult); + EXPECT_EQ(0U, unresolvedExternals.size()); + + auto globalConstantsSegmentAddr = reinterpret_cast(globalConstantsPatchableSegment.getUnderlyingBuffer()); + auto globalVariableSegmentAddr = reinterpret_cast(globalVariablesPatchableSegment.getUnderlyingBuffer()); + + auto var1Addr = globalVariablesPatchableSegment.getGpuAddress(); + auto const1Addr = globalConstantsPatchableSegment.getGpuAddress(); + auto bssConstAddrr = globalConstantsPatchableSegment.getGpuAddress() + sizeof(initGlobalConstantData); + auto bssVarAddr = globalVariablesPatchableSegment.getGpuAddress() + sizeof(initGlobalVariablesData); + + EXPECT_EQ(var1Addr, *(globalConstantsSegmentAddr + 1)); + EXPECT_EQ(const1Addr, *(globalVariableSegmentAddr + 1)); + EXPECT_EQ(bssConstAddrr, *(reinterpret_cast(instructionsData1.data()))); + EXPECT_EQ(bssVarAddr, *(reinterpret_cast(instructionsData2.data()))); +} + TEST(LinkerTests, givenInvalidSymbolWhenPatchingDataSegmentsThenRelocationIsUnresolved) { uint64_t initGlobalConstantData[3] = {}; uint64_t initGlobalVariablesData[3] = {}; @@ -1785,7 +1929,10 @@ TEST(LinkerTests, givenInvalidRelocationSegmentWhenPatchingDataSegmentsThenReloc NEO::Linker::UnresolvedExternals unresolvedExternals; NEO::Linker::KernelDescriptorsT kernelDescriptors; NEO::Linker::ExternalFunctionsT externalFunctions; - auto linkResult = linker.link({}, {}, {}, {}, + + NEO::Linker::SegmentInfo globalSegment; + globalSegment.segmentSize = 8u; + auto linkResult = linker.link(globalSegment, {}, {}, {}, nullptr, nullptr, {}, unresolvedExternals, device.get(), nullptr, 0, nullptr, 0, kernelDescriptors, externalFunctions); EXPECT_EQ(NEO::LinkingStatus::LinkedPartially, linkResult); @@ -2726,7 +2873,7 @@ TEST(LinkerTest, givenLocalFuncSymbolsWhenProcessingRelocationsThenLocalSymbolsA emplacedOther.gpuAddress = 0x2000; emplacedOther.kernelName = "other_kernel"; - auto res = linker.processRelocations(gVariables, gConstants, expFuncs, gStrings, insSegments); + auto res = linker.processRelocations(gVariables, gConstants, expFuncs, gStrings, insSegments, 0u, 0u); EXPECT_TRUE(res); EXPECT_EQ(1u, linker.localRelocatedSymbols.size()); const auto &localRelocatedSymbolInfo = linker.localRelocatedSymbols.at(kernelName); diff --git a/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp b/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp index af466a9b80..88b5a1afb5 100644 --- a/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp +++ b/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp @@ -152,6 +152,8 @@ TEST(ExtractZebinSections, GivenKnownSectionsThenCapturesThemProperly) { elfEncoder.appendSection(NEO::Elf::SHT_ZEBIN_GTPIN_INFO, NEO::Elf::SectionsNamesZebin::gtpinInfo, std::string{}); elfEncoder.appendSection(NEO::Elf::SHT_ZEBIN_VISA_ASM, NEO::Elf::SectionsNamesZebin::vIsaAsmPrefix.str() + "someKernel", std::string{}); elfEncoder.appendSection(NEO::Elf::SHT_ZEBIN_MISC, NEO::Elf::SectionsNamesZebin::buildOptions, std::string{}); + elfEncoder.appendSection(NEO::Elf::SHT_NOBITS, NEO::Elf::SectionsNamesZebin::dataConstZeroInit.str(), std::string{}); + elfEncoder.appendSection(NEO::Elf::SHT_NOBITS, NEO::Elf::SectionsNamesZebin::dataGlobalZeroInit.str(), std::string{}); elfEncoder.appendSection(NEO::Elf::SHT_REL, NEO::Elf::SpecialSectionNames::relPrefix.str() + "someKernel", std::string{}); elfEncoder.appendSection(NEO::Elf::SHT_RELA, NEO::Elf::SpecialSectionNames::relaPrefix.str() + "someKernel", std::string{}); @@ -177,6 +179,8 @@ TEST(ExtractZebinSections, GivenKnownSectionsThenCapturesThemProperly) { ASSERT_EQ(1U, sections.symtabSections.size()); ASSERT_EQ(1U, sections.spirvSections.size()); ASSERT_EQ(1U, sections.buildOptionsSection.size()); + ASSERT_EQ(1U, sections.constZeroInitDataSections.size()); + ASSERT_EQ(1U, sections.globalZeroInitDataSections.size()); auto stringSection = decodedElf.sectionHeaders[decodedElf.elfFileHeader->shStrNdx]; const char *strings = stringSection.data.toArrayRef().begin(); @@ -188,6 +192,8 @@ TEST(ExtractZebinSections, GivenKnownSectionsThenCapturesThemProperly) { EXPECT_STREQ(NEO::Elf::SectionsNamesZebin::zeInfo.data(), strings + sections.zeInfoSections[0]->header->name); EXPECT_STREQ(NEO::Elf::SectionsNamesZebin::symtab.data(), strings + sections.symtabSections[0]->header->name); EXPECT_STREQ(NEO::Elf::SectionsNamesZebin::spv.data(), strings + sections.spirvSections[0]->header->name); + EXPECT_STREQ(NEO::Elf::SectionsNamesZebin::dataConstZeroInit.data(), strings + sections.constZeroInitDataSections[0]->header->name); + EXPECT_STREQ(NEO::Elf::SectionsNamesZebin::dataGlobalZeroInit.data(), strings + sections.globalZeroInitDataSections[0]->header->name); } TEST(ExtractZebinSections, GivenMispelledConstDataSectionThenAllowItButEmitError) { @@ -236,6 +242,25 @@ TEST(ExtractZebinSections, GivenUnknownMiscSectionThenEmitWarning) { EXPECT_STREQ(expectedWarning.c_str(), warnings.c_str()); } +TEST(ExtractZebinSections, GivenUnknownElfNobitsSectionThenEmitWarning) { + NEO::Elf::ElfEncoder<> elfEncoder; + ConstStringRef unknownNobitsSectionName = "unknown_bss_section"; + elfEncoder.appendSection(NEO::Elf::SHT_NOBITS, unknownNobitsSectionName, std::string{}); + auto encodedElf = elfEncoder.encode(); + std::string elferrors; + std::string elfwarnings; + auto decodedElf = NEO::Elf::decodeElf(encodedElf, elferrors, elfwarnings); + + NEO::ZebinSections sections; + std::string errors; + std::string warnings; + auto decodeError = NEO::extractZebinSections(decodedElf, sections, errors, warnings); + EXPECT_EQ(NEO::DecodeError::Success, decodeError); + EXPECT_TRUE(errors.empty()) << errors; + auto expectedWarning = "DeviceBinaryFormat::Zebin : unhandled SHT_NOBITS section : " + unknownNobitsSectionName.str() + " currently supports only : .bss.const and .bss.global.\n"; + EXPECT_STREQ(expectedWarning.c_str(), warnings.c_str()); +} + TEST(ValidateZebinSectionsCount, GivenEmptyZebinThenReturnSuccess) { NEO::ZebinSections sections; std::string errors; @@ -328,6 +353,28 @@ TEST(ValidateZebinSectionsCount, GivenTwoIntelGTNoteSectionsThenFail) { EXPECT_TRUE(warnings.empty()) << warnings; } +TEST(ValidateZebinSectionsCount, GivenMoreThanOneConstZeroInitDataSectionThenFail) { + NEO::ZebinSections sections; + std::string errors; + std::string warnings; + sections.constZeroInitDataSections.resize(2); + auto err = NEO::validateZebinSectionsCount(sections, errors, warnings); + EXPECT_EQ(NEO::DecodeError::InvalidBinary, err); + EXPECT_STREQ("DeviceBinaryFormat::Zebin : Expected at most 1 of .bss.const section, got : 2\n", errors.c_str()); + EXPECT_TRUE(warnings.empty()) << warnings; +} + +TEST(ValidateZebinSectionsCount, GivenMoreThanOneGlobalZeroInitDataSectionThenFail) { + NEO::ZebinSections sections; + std::string errors; + std::string warnings; + sections.globalZeroInitDataSections.resize(2); + auto err = NEO::validateZebinSectionsCount(sections, errors, warnings); + EXPECT_EQ(NEO::DecodeError::InvalidBinary, err); + EXPECT_STREQ("DeviceBinaryFormat::Zebin : Expected at most 1 of .bss.global section, got : 2\n", errors.c_str()); + EXPECT_TRUE(warnings.empty()) << warnings; +} + TEST(PopulateZeInfoVersion, GivenValidVersionFormatThenParsesItProperly) { { NEO::ConstStringRef yaml = R"===(--- @@ -2547,6 +2594,51 @@ TEST(DecodeSingleDeviceBinaryZebin, GivenConstDataSectionThenSetsUpInitDataAndSi EXPECT_EQ(nullptr, programInfo.globalVariables.initData); } +TEST(DecodeSingleDeviceBinaryZebin, GivenConstZeroInitDataSectionThenSetUpZeroInitSizeBasedOnHeaderData) { + NEO::MockExecutionEnvironment mockExecutionEnvironment{}; + auto &gfxCoreHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper(); + ZebinTestData::ValidEmptyProgram zebin; + const uint8_t mockData[0x10]{0u}; // note that BSS section does not store any data in ELF + auto &bssConstHeader = zebin.appendSection(NEO::Elf::SHT_NOBITS, NEO::Elf::SectionsNamesZebin::dataConstZeroInit, mockData); + bssConstHeader.size = 16u; + + NEO::ProgramInfo programInfo; + NEO::SingleDeviceBinary singleBinary; + singleBinary.deviceBinary = zebin.storage; + std::string errors; + std::string warnings; + auto error = NEO::decodeSingleDeviceBinary(programInfo, singleBinary, errors, warnings, gfxCoreHelper); + EXPECT_EQ(NEO::DecodeError::Success, error); + EXPECT_TRUE(warnings.empty()) << warnings; + EXPECT_TRUE(errors.empty()) << errors; + EXPECT_EQ(16u, programInfo.globalConstants.zeroInitSize); + EXPECT_EQ(nullptr, programInfo.globalConstants.initData); + EXPECT_EQ(0u, programInfo.globalConstants.size); +} + +TEST(DecodeSingleDeviceBinaryZebin, GivenGlobalZeroInitDataSectionThenSetUpZeroInitSizeBasedOnHeaderData) { + NEO::MockExecutionEnvironment mockExecutionEnvironment{}; + auto &gfxCoreHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper(); + ZebinTestData::ValidEmptyProgram zebin; + const uint8_t mockData[0x10]{0u}; // note that BSS section does not store any data in ELF + auto &bssGlobalHeader = zebin.appendSection(NEO::Elf::SHT_NOBITS, NEO::Elf::SectionsNamesZebin::dataGlobalZeroInit, mockData); + bssGlobalHeader.size = 16u; + + NEO::ProgramInfo programInfo; + NEO::SingleDeviceBinary singleBinary; + singleBinary.deviceBinary = zebin.storage; + std::string errors; + std::string warnings; + auto error = NEO::decodeSingleDeviceBinary(programInfo, singleBinary, errors, warnings, gfxCoreHelper); + EXPECT_EQ(NEO::DecodeError::Success, error); + EXPECT_TRUE(warnings.empty()) << warnings; + EXPECT_TRUE(errors.empty()) << errors; + + EXPECT_EQ(16u, programInfo.globalVariables.zeroInitSize); + EXPECT_EQ(nullptr, programInfo.globalVariables.initData); + EXPECT_EQ(0u, programInfo.globalVariables.size); +} + TEST(DecodeSingleDeviceBinaryZebin, GivenConstDataStringsSectionThenSetsUpInitDataAndSize) { NEO::MockExecutionEnvironment mockExecutionEnvironment{}; auto &gfxCoreHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper(); diff --git a/shared/test/unit_test/memory_manager/memory_manager_allocate_in_preferred_pool_tests.cpp b/shared/test/unit_test/memory_manager/memory_manager_allocate_in_preferred_pool_tests.cpp index 382bd8bd78..b8a3d762f0 100644 --- a/shared/test/unit_test/memory_manager/memory_manager_allocate_in_preferred_pool_tests.cpp +++ b/shared/test/unit_test/memory_manager/memory_manager_allocate_in_preferred_pool_tests.cpp @@ -1016,6 +1016,22 @@ TEST(MemoryManagerTest, givenDebugContextSaveAreaTypeWhenGetAllocationDataIsCall EXPECT_TRUE(allocData.flags.zeroMemory); } +TEST(MemoryManagerTest, givenAllocationTypeConstantOrGlobalSurfaceWhenGetAllocationDataIsCalledThenZeroMemoryFlagIsSet) { + MockMemoryManager mockMemoryManager; + AllocationProperties propertiesGlobal{mockRootDeviceIndex, 1, AllocationType::GLOBAL_SURFACE, mockDeviceBitfield}; + AllocationProperties propertiesConstant{mockRootDeviceIndex, 1, AllocationType::CONSTANT_SURFACE, mockDeviceBitfield}; + { + AllocationData allocData; + mockMemoryManager.getAllocationData(allocData, propertiesGlobal, nullptr, mockMemoryManager.createStorageInfoFromProperties(propertiesGlobal)); + EXPECT_TRUE(allocData.flags.zeroMemory); + } + { + AllocationData allocData; + mockMemoryManager.getAllocationData(allocData, propertiesConstant, nullptr, mockMemoryManager.createStorageInfoFromProperties(propertiesConstant)); + EXPECT_TRUE(allocData.flags.zeroMemory); + } +} + TEST(MemoryManagerTest, givenPropertiesWithOsContextWhenGetAllocationDataIsCalledThenOsContextIsSet) { AllocationData allocData; MockMemoryManager mockMemoryManager; diff --git a/shared/test/unit_test/program/program_initialization_tests.cpp b/shared/test/unit_test/program/program_initialization_tests.cpp index 18bad9dae7..e5b8382f46 100644 --- a/shared/test/unit_test/program/program_initialization_tests.cpp +++ b/shared/test/unit_test/program/program_initialization_tests.cpp @@ -31,7 +31,7 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreNotExportedTh initData.resize(64, 7U); GraphicsAllocation *alloc = nullptr; - alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), true /* constant */, nullptr /* linker input */, initData.data()); + alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, nullptr /* linker input */, initData.data()); ASSERT_NE(nullptr, alloc); ASSERT_EQ(initData.size(), alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); @@ -39,7 +39,7 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreNotExportedTh EXPECT_EQ(AllocationType::CONSTANT_SURFACE, alloc->getAllocationType()); device.getMemoryManager()->freeGraphicsMemory(alloc); - alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), false /* constant */, nullptr /* linker input */, initData.data()); + alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, false /* constant */, nullptr /* linker input */, initData.data()); ASSERT_NE(nullptr, alloc); ASSERT_EQ(initData.size(), alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); @@ -47,7 +47,7 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreNotExportedTh EXPECT_EQ(AllocationType::GLOBAL_SURFACE, alloc->getAllocationType()); device.getMemoryManager()->freeGraphicsMemory(alloc); - alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), true /* constant */, &emptyLinkerInput, initData.data()); + alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, &emptyLinkerInput, initData.data()); ASSERT_NE(nullptr, alloc); ASSERT_EQ(initData.size(), alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); @@ -55,7 +55,7 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreNotExportedTh EXPECT_EQ(AllocationType::CONSTANT_SURFACE, alloc->getAllocationType()); device.getMemoryManager()->freeGraphicsMemory(alloc); - alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), false /* constant */, &emptyLinkerInput, initData.data()); + alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, false /* constant */, &emptyLinkerInput, initData.data()); ASSERT_NE(nullptr, alloc); ASSERT_EQ(initData.size(), alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); @@ -77,7 +77,7 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreExportedThenM initData.resize(64, 7U); GraphicsAllocation *alloc = nullptr; - alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), true /* constant */, &linkerInputExportGlobalConstants, initData.data()); + alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalConstants, initData.data()); ASSERT_NE(nullptr, alloc); ASSERT_EQ(MemoryConstants::pageSize64k, alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); @@ -86,21 +86,21 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreExportedThenM EXPECT_EQ(DEVICE_UNIFIED_MEMORY, svmAllocsManager.getSVMAlloc(reinterpret_cast(alloc->getGpuAddress()))->memoryType); svmAllocsManager.freeSVMAlloc(reinterpret_cast(static_cast(alloc->getGpuAddress()))); - alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), true /* constant */, &linkerInputExportGlobalVariables, initData.data()); + alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalVariables, initData.data()); ASSERT_NE(nullptr, alloc); ASSERT_EQ(initData.size(), alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); EXPECT_EQ(nullptr, svmAllocsManager.getSVMAlloc(reinterpret_cast(static_cast(alloc->getGpuAddress())))); device.getMemoryManager()->freeGraphicsMemory(alloc); - alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), false /* constant */, &linkerInputExportGlobalConstants, initData.data()); + alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalConstants, initData.data()); ASSERT_NE(nullptr, alloc); ASSERT_EQ(initData.size(), alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); EXPECT_EQ(nullptr, svmAllocsManager.getSVMAlloc(reinterpret_cast(static_cast(alloc->getGpuAddress())))); device.getMemoryManager()->freeGraphicsMemory(alloc); - alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), false /* constant */, &linkerInputExportGlobalVariables, initData.data()); + alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalVariables, initData.data()); ASSERT_NE(nullptr, alloc); ASSERT_EQ(MemoryConstants::pageSize64k, alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); @@ -120,28 +120,28 @@ TEST(AllocateGlobalSurfaceTest, GivenNullSvmAllocsManagerWhenGlobalsAreExportedT initData.resize(64, 7U); GraphicsAllocation *alloc = nullptr; - alloc = allocateGlobalsSurface(nullptr, device, initData.size(), true /* constant */, &linkerInputExportGlobalConstants, initData.data()); + alloc = allocateGlobalsSurface(nullptr, device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalConstants, initData.data()); ASSERT_NE(nullptr, alloc); ASSERT_EQ(initData.size(), alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); EXPECT_EQ(AllocationType::CONSTANT_SURFACE, alloc->getAllocationType()); device.getMemoryManager()->freeGraphicsMemory(alloc); - alloc = allocateGlobalsSurface(nullptr, device, initData.size(), true /* constant */, &linkerInputExportGlobalVariables, initData.data()); + alloc = allocateGlobalsSurface(nullptr, device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalVariables, initData.data()); ASSERT_NE(nullptr, alloc); ASSERT_EQ(initData.size(), alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); EXPECT_EQ(AllocationType::CONSTANT_SURFACE, alloc->getAllocationType()); device.getMemoryManager()->freeGraphicsMemory(alloc); - alloc = allocateGlobalsSurface(nullptr, device, initData.size(), false /* constant */, &linkerInputExportGlobalConstants, initData.data()); + alloc = allocateGlobalsSurface(nullptr, device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalConstants, initData.data()); ASSERT_NE(nullptr, alloc); ASSERT_EQ(initData.size(), alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); EXPECT_EQ(AllocationType::GLOBAL_SURFACE, alloc->getAllocationType()); device.getMemoryManager()->freeGraphicsMemory(alloc); - alloc = allocateGlobalsSurface(nullptr, device, initData.size(), false /* constant */, &linkerInputExportGlobalVariables, initData.data()); + alloc = allocateGlobalsSurface(nullptr, device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalVariables, initData.data()); ASSERT_NE(nullptr, alloc); ASSERT_EQ(initData.size(), alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); @@ -160,28 +160,28 @@ TEST(AllocateGlobalSurfaceTest, WhenGlobalsAreNotExportedAndAllocationFailsThenG initData.resize(64, 7U); GraphicsAllocation *alloc = nullptr; - alloc = allocateGlobalsSurface(&mockSvmAllocsManager, device, initData.size(), true /* constant */, nullptr /* linker input */, initData.data()); + alloc = allocateGlobalsSurface(&mockSvmAllocsManager, device, initData.size(), 0u, true /* constant */, nullptr /* linker input */, initData.data()); EXPECT_EQ(nullptr, alloc); - alloc = allocateGlobalsSurface(&mockSvmAllocsManager, device, initData.size(), false /* constant */, nullptr /* linker input */, initData.data()); + alloc = allocateGlobalsSurface(&mockSvmAllocsManager, device, initData.size(), 0u, false /* constant */, nullptr /* linker input */, initData.data()); EXPECT_EQ(nullptr, alloc); - alloc = allocateGlobalsSurface(&mockSvmAllocsManager, device, initData.size(), true /* constant */, &emptyLinkerInput, initData.data()); + alloc = allocateGlobalsSurface(&mockSvmAllocsManager, device, initData.size(), 0u, true /* constant */, &emptyLinkerInput, initData.data()); EXPECT_EQ(nullptr, alloc); - alloc = allocateGlobalsSurface(&mockSvmAllocsManager, device, initData.size(), false /* constant */, &emptyLinkerInput, initData.data()); + alloc = allocateGlobalsSurface(&mockSvmAllocsManager, device, initData.size(), 0u, false /* constant */, &emptyLinkerInput, initData.data()); EXPECT_EQ(nullptr, alloc); - alloc = allocateGlobalsSurface(nullptr /* svmAllocsManager */, device, initData.size(), true /* constant */, nullptr /* linker input */, initData.data()); + alloc = allocateGlobalsSurface(nullptr /* svmAllocsManager */, device, initData.size(), 0u, true /* constant */, nullptr /* linker input */, initData.data()); EXPECT_EQ(nullptr, alloc); - alloc = allocateGlobalsSurface(nullptr /* svmAllocsManager */, device, initData.size(), false /* constant */, nullptr /* linker input */, initData.data()); + alloc = allocateGlobalsSurface(nullptr /* svmAllocsManager */, device, initData.size(), 0u, false /* constant */, nullptr /* linker input */, initData.data()); EXPECT_EQ(nullptr, alloc); - alloc = allocateGlobalsSurface(nullptr /* svmAllocsManager */, device, initData.size(), true /* constant */, &emptyLinkerInput, initData.data()); + alloc = allocateGlobalsSurface(nullptr /* svmAllocsManager */, device, initData.size(), 0u, true /* constant */, &emptyLinkerInput, initData.data()); EXPECT_EQ(nullptr, alloc); - alloc = allocateGlobalsSurface(nullptr /* svmAllocsManager */, device, initData.size(), false /* constant */, &emptyLinkerInput, initData.data()); + alloc = allocateGlobalsSurface(nullptr /* svmAllocsManager */, device, initData.size(), 0u, false /* constant */, &emptyLinkerInput, initData.data()); EXPECT_EQ(nullptr, alloc); } @@ -215,7 +215,7 @@ TEST(AllocateGlobalSurfaceTest, GivenAllocationInLocalMemoryWhichRequiresBlitter device.getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; MockSVMAllocsManager svmAllocsManager(device.getMemoryManager(), false); - auto pAllocation = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), true /* constant */, + auto pAllocation = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, nullptr /* linker input */, initData.data()); ASSERT_NE(nullptr, pAllocation); EXPECT_EQ(nullptr, svmAllocsManager.getSVMAlloc(reinterpret_cast(static_cast(pAllocation->getGpuAddress())))); @@ -229,3 +229,38 @@ TEST(AllocateGlobalSurfaceTest, GivenAllocationInLocalMemoryWhichRequiresBlitter } } } + +TEST(AllocateGlobalSurfaceTest, whenAllocatingGlobalSurfaceWithNonZeroZeroInitSizeThenTransferOnlyInitDataToAllocation) { + MockDevice device{}; + WhiteBox emptyLinkerInput; + emptyLinkerInput.traits.exportsGlobalConstants = true; + std::vector initData; + initData.resize(64, 7u); + std::fill(initData.begin() + 32, initData.end(), 16u); // this data should not be transfered + GraphicsAllocation *alloc = nullptr; + size_t zeroInitSize = 32u; + + alloc = allocateGlobalsSurface(nullptr, device, initData.size(), zeroInitSize, true, &emptyLinkerInput, initData.data()); + ASSERT_NE(nullptr, alloc); + EXPECT_EQ(64u, alloc->getUnderlyingBufferSize()); + + auto dataPtr = reinterpret_cast(alloc->getUnderlyingBuffer()); + EXPECT_EQ(0, memcmp(dataPtr, initData.data(), 32u)); + EXPECT_NE(0, memcmp(dataPtr + 32, initData.data() + 32, 32u)); + device.getMemoryManager()->freeGraphicsMemory(alloc); +} + +TEST(AllocateGlobalSurfaceTest, whenAllocatingGlobalSurfaceWithZeroInitSizeGreaterThanZeroAndInitDataSizeSetToZeroThenDoNotTransferMemoryToAllocation) { + MockDevice device{}; + auto memoryManager = std::make_unique(*device.getExecutionEnvironment()); + device.injectMemoryManager(memoryManager.release()); + ASSERT_EQ(0u, static_cast(device.getMemoryManager())->copyMemoryToAllocationBanksCalled); + size_t totalSize = 64u, zeroInitSize = 64u; + + GraphicsAllocation *alloc = nullptr; + alloc = allocateGlobalsSurface(nullptr, device, totalSize, zeroInitSize, true, nullptr, nullptr); + ASSERT_NE(nullptr, alloc); + EXPECT_EQ(0u, static_cast(device.getMemoryManager())->copyMemoryToAllocationBanksCalled); + + device.getMemoryManager()->freeGraphicsMemory(alloc); +} \ No newline at end of file