From a2f60af5c6cd8811e3580255f637e38ca78adca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Zwoli=C5=84ski?= Date: Mon, 18 Aug 2025 15:15:55 +0000 Subject: [PATCH] fix: change global Var/Const Buffer type to SharedPoolAllocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is prep work for the future implementation of pooling these allocations. Related-To: NEO-12287 Signed-off-by: Fabian ZwoliƄski --- level_zero/core/source/kernel/kernel.h | 7 +- level_zero/core/source/kernel/kernel_imp.cpp | 40 +++-- .../kernel/patch_with_implicit_surface.inl | 15 +- level_zero/core/source/module/module_imp.cpp | 79 +++++---- level_zero/core/source/module/module_imp.h | 8 +- .../unit_tests/fixtures/module_fixture.cpp | 74 ++++++-- .../test/unit_tests/fixtures/module_fixture.h | 1 + .../test/unit_tests/mocks/mock_module.cpp | 4 +- .../core/test/unit_tests/mocks/mock_module.h | 5 +- .../unit_tests/sources/kernel/test_kernel.cpp | 20 +-- .../sources/kernel/test_kernel_2.cpp | 52 +++--- .../unit_tests/sources/module/test_module.cpp | 52 ++++-- opencl/source/kernel/kernel.cpp | 35 ++-- opencl/source/kernel/kernel.h | 1 + .../source/program/process_device_binary.cpp | 28 +-- opencl/source/program/program.cpp | 40 +++-- opencl/source/program/program.h | 21 +-- .../command_queue/blit_enqueue_2_tests.cpp | 6 +- .../debugger/ocl_with_l0_debugger_tests.cpp | 2 +- opencl/test/unit_test/kernel/kernel_tests.cpp | 42 ++--- .../cl_memory_manager_tests.cpp | 6 +- opencl/test/unit_test/mocks/mock_program.h | 26 ++- .../unit_test/program/program_data_tests.cpp | 143 +++++++++------ .../test/unit_test/program/program_tests.cpp | 52 +++--- .../unit_test/program/program_with_zebin.cpp | 27 ++- .../unit_test/program/program_with_zebin.h | 1 + .../program/program_with_zebin_tests.cpp | 20 ++- shared/source/compiler_interface/linker.cpp | 12 +- shared/source/compiler_interface/linker.h | 5 +- .../zebin/debug_zebin.cpp | 9 +- .../device_binary_format/zebin/debug_zebin.h | 6 +- .../source/program/program_initialization.cpp | 14 +- .../source/program/program_initialization.h | 9 +- .../source/utilities/shared_pool_allocation.h | 32 +++- .../compiler_interface/linker_tests.cpp | 165 ++++++++++-------- .../memory_manager/memory_manager_tests.cpp | 8 +- .../program/program_initialization_tests.cpp | 110 ++++++++---- .../test/unit_test/utilities/CMakeLists.txt | 1 + .../shared_pool_allocation_tests.cpp | 76 ++++++++ 39 files changed, 815 insertions(+), 439 deletions(-) create mode 100644 shared/test/unit_test/utilities/shared_pool_allocation_tests.cpp diff --git a/level_zero/core/source/kernel/kernel.h b/level_zero/core/source/kernel/kernel.h index 4559cea3a1..83ab667ebb 100644 --- a/level_zero/core/source/kernel/kernel.h +++ b/level_zero/core/source/kernel/kernel.h @@ -25,6 +25,7 @@ namespace NEO { class Device; struct KernelInfo; class MemoryManager; +class SharedPoolAllocation; } // namespace NEO namespace L0 { @@ -37,7 +38,7 @@ struct KernelImmutableData { virtual ~KernelImmutableData(); MOCKABLE_VIRTUAL ze_result_t initialize(NEO::KernelInfo *kernelInfo, Device *device, uint32_t computeUnitsUsedForSratch, - NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer, + NEO::SharedPoolAllocation *globalConstBuffer, NEO::SharedPoolAllocation *globalVarBuffer, bool internalKernel); const std::vector &getResidencyContainer() const { @@ -80,8 +81,8 @@ struct KernelImmutableData { return isaCopiedToAllocation; } - MOCKABLE_VIRTUAL void createRelocatedDebugData(NEO::GraphicsAllocation *globalConstBuffer, - NEO::GraphicsAllocation *globalVarBuffer); + MOCKABLE_VIRTUAL void createRelocatedDebugData(NEO::SharedPoolAllocation *globalConstBuffer, + NEO::SharedPoolAllocation *globalVarBuffer); protected: Device *device = nullptr; diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index c2b64e350f..4d9ccc7caa 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -72,7 +72,7 @@ KernelImmutableData::~KernelImmutableData() { } ze_result_t KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device, uint32_t computeUnitsUsedForSratch, - NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer, + NEO::SharedPoolAllocation *globalConstBuffer, NEO::SharedPoolAllocation *globalVarBuffer, bool internalKernel) { UNRECOVERABLE_IF(kernelInfo == nullptr); @@ -136,21 +136,22 @@ ze_result_t KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device patchWithImplicitSurface(crossThreadDataArrayRef, surfaceStateHeapArrayRef, static_cast(globalConstBuffer->getGpuAddressToPatch()), - *globalConstBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress, - *neoDevice, deviceImp->isImplicitScalingCapable()); - this->residencyContainer.push_back(globalConstBuffer); + *globalConstBuffer->getGraphicsAllocation(), globalConstBuffer->getGpuAddress(), globalConstBuffer->getSize(), + kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress, *neoDevice, deviceImp->isImplicitScalingCapable()); + this->residencyContainer.push_back(globalConstBuffer->getGraphicsAllocation()); } else if (nullptr != globalConstBuffer) { - this->residencyContainer.push_back(globalConstBuffer); + this->residencyContainer.push_back(globalConstBuffer->getGraphicsAllocation()); } if (globalConstBuffer && NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless)) { - if (!neoDevice->getMemoryManager()->allocateBindlessSlot(globalConstBuffer)) { + UNRECOVERABLE_IF(globalConstBuffer->getGraphicsAllocation()->getUnderlyingBufferSize() != globalConstBuffer->getSize()); + if (!neoDevice->getMemoryManager()->allocateBindlessSlot(globalConstBuffer->getGraphicsAllocation())) { return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - auto &ssInHeap = globalConstBuffer->getBindlessInfo(); + auto &ssInHeap = globalConstBuffer->getGraphicsAllocation()->getBindlessInfo(); patchImplicitArgBindlessOffsetAndSetSurfaceState(crossThreadDataArrayRef, surfaceStateHeapArrayRef, - globalConstBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress, + globalConstBuffer->getGraphicsAllocation(), kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress, *neoDevice, deviceImp->isImplicitScalingCapable(), ssInHeap, kernelInfo->kernelDescriptor); } @@ -159,38 +160,39 @@ ze_result_t KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device patchWithImplicitSurface(crossThreadDataArrayRef, surfaceStateHeapArrayRef, static_cast(globalVarBuffer->getGpuAddressToPatch()), - *globalVarBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress, - *neoDevice, deviceImp->isImplicitScalingCapable()); - this->residencyContainer.push_back(globalVarBuffer); + *globalVarBuffer->getGraphicsAllocation(), globalVarBuffer->getGpuAddress(), globalVarBuffer->getSize(), + kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress, *neoDevice, deviceImp->isImplicitScalingCapable()); + this->residencyContainer.push_back(globalVarBuffer->getGraphicsAllocation()); } else if (nullptr != globalVarBuffer) { - this->residencyContainer.push_back(globalVarBuffer); + this->residencyContainer.push_back(globalVarBuffer->getGraphicsAllocation()); } if (globalVarBuffer && NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless)) { - if (!neoDevice->getMemoryManager()->allocateBindlessSlot(globalVarBuffer)) { + UNRECOVERABLE_IF(globalVarBuffer->getGraphicsAllocation()->getUnderlyingBufferSize() != globalVarBuffer->getSize()); + if (!neoDevice->getMemoryManager()->allocateBindlessSlot(globalVarBuffer->getGraphicsAllocation())) { return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - auto &ssInHeap = globalVarBuffer->getBindlessInfo(); + auto &ssInHeap = globalVarBuffer->getGraphicsAllocation()->getBindlessInfo(); patchImplicitArgBindlessOffsetAndSetSurfaceState(crossThreadDataArrayRef, surfaceStateHeapArrayRef, - globalVarBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress, + globalVarBuffer->getGraphicsAllocation(), kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress, *neoDevice, deviceImp->isImplicitScalingCapable(), ssInHeap, kernelInfo->kernelDescriptor); } return ZE_RESULT_SUCCESS; } -void KernelImmutableData::createRelocatedDebugData(NEO::GraphicsAllocation *globalConstBuffer, - NEO::GraphicsAllocation *globalVarBuffer) { +void KernelImmutableData::createRelocatedDebugData(NEO::SharedPoolAllocation *globalConstBuffer, + NEO::SharedPoolAllocation *globalVarBuffer) { NEO::Linker::SegmentInfo globalData; NEO::Linker::SegmentInfo constData; if (globalVarBuffer) { globalData.gpuAddress = globalVarBuffer->getGpuAddress(); - globalData.segmentSize = globalVarBuffer->getUnderlyingBufferSize(); + globalData.segmentSize = globalVarBuffer->getSize(); } if (globalConstBuffer) { constData.gpuAddress = globalConstBuffer->getGpuAddress(); - constData.segmentSize = globalConstBuffer->getUnderlyingBufferSize(); + constData.segmentSize = globalConstBuffer->getSize(); } if (kernelInfo->kernelDescriptor.external.debugData.get()) { diff --git a/level_zero/core/source/kernel/patch_with_implicit_surface.inl b/level_zero/core/source/kernel/patch_with_implicit_surface.inl index cf586c8fd1..40fd4f232e 100644 --- a/level_zero/core/source/kernel/patch_with_implicit_surface.inl +++ b/level_zero/core/source/kernel/patch_with_implicit_surface.inl @@ -1,12 +1,12 @@ /* - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation * * SPDX-License-Identifier: MIT * */ inline void patchWithImplicitSurface(ArrayRef crossThreadData, ArrayRef surfaceStateHeap, - uintptr_t ptrToPatchInCrossThreadData, NEO::GraphicsAllocation &allocation, + uintptr_t ptrToPatchInCrossThreadData, NEO::GraphicsAllocation &allocation, uint64_t addressToPatch, size_t sizeToPatch, const NEO::ArgDescPointer &ptr, const NEO::Device &device, bool implicitScaling) { if (false == crossThreadData.empty()) { @@ -15,8 +15,6 @@ inline void patchWithImplicitSurface(ArrayRef crossThreadData, ArrayRef if ((false == surfaceStateHeap.empty()) && (NEO::isValidOffset(ptr.bindful))) { auto surfaceState = surfaceStateHeap.begin() + ptr.bindful; - auto addressToPatch = allocation.getGpuAddress(); - size_t sizeToPatch = allocation.getUnderlyingBufferSize(); auto &gfxCoreHelper = device.getGfxCoreHelper(); auto isDebuggerActive = device.getDebugger() != nullptr; @@ -36,6 +34,15 @@ inline void patchWithImplicitSurface(ArrayRef crossThreadData, ArrayRef } } +inline void patchWithImplicitSurface(ArrayRef crossThreadData, ArrayRef surfaceStateHeap, + uintptr_t ptrToPatchInCrossThreadData, NEO::GraphicsAllocation &allocation, + const NEO::ArgDescPointer &ptr, const NEO::Device &device, + bool implicitScaling) { + patchWithImplicitSurface(crossThreadData, surfaceStateHeap, + ptrToPatchInCrossThreadData, allocation, allocation.getGpuAddress(), allocation.getUnderlyingBufferSize(), + ptr, device, implicitScaling); +} + inline void patchImplicitArgBindlessOffsetAndSetSurfaceState(ArrayRef crossThreadData, ArrayRef surfaceStateHeap, NEO::GraphicsAllocation *allocation, const NEO::ArgDescPointer &ptr, const NEO::Device &device, bool implicitScaling, const NEO::SurfaceStateInHeapInfo &ssInHeap, const NEO::KernelDescriptor &kernelDescriptor) { diff --git a/level_zero/core/source/module/module_imp.cpp b/level_zero/core/source/module/module_imp.cpp index 95864b1937..7a9128509c 100644 --- a/level_zero/core/source/module/module_imp.cpp +++ b/level_zero/core/source/module/module_imp.cpp @@ -80,25 +80,8 @@ ModuleTranslationUnit::ModuleTranslationUnit(L0::Device *device) } ModuleTranslationUnit::~ModuleTranslationUnit() { - if (globalConstBuffer) { - auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); - - if (svmAllocsManager->getSVMAlloc(reinterpret_cast(globalConstBuffer->getGpuAddress()))) { - svmAllocsManager->freeSVMAlloc(reinterpret_cast(globalConstBuffer->getGpuAddress())); - } else { - this->device->getNEODevice()->getExecutionEnvironment()->memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(globalConstBuffer); - } - } - - if (globalVarBuffer) { - auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); - - if (svmAllocsManager->getSVMAlloc(reinterpret_cast(globalVarBuffer->getGpuAddress()))) { - svmAllocsManager->freeSVMAlloc(reinterpret_cast(globalVarBuffer->getGpuAddress())); - } else { - this->device->getNEODevice()->getExecutionEnvironment()->memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(globalVarBuffer); - } - } + freeGlobalBufferAllocation(globalConstBuffer); + freeGlobalBufferAllocation(globalVarBuffer); if (this->debugData != nullptr) { for (std::vector::iterator iter = alignedvIsas.begin(); iter != alignedvIsas.end(); ++iter) { @@ -107,6 +90,26 @@ ModuleTranslationUnit::~ModuleTranslationUnit() { } } +void ModuleTranslationUnit::freeGlobalBufferAllocation(const std::unique_ptr &globalBuffer) { + if (!globalBuffer) { + return; + } + + auto graphicsAllocation = globalBuffer->getGraphicsAllocation(); + if (!graphicsAllocation) { + return; + } + + auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); + auto gpuAddress = reinterpret_cast(globalBuffer->getGpuAddress()); + + if (svmAllocsManager->getSVMAlloc(gpuAddress)) { + svmAllocsManager->freeSVMAlloc(gpuAddress); + } else { + this->device->getNEODevice()->getExecutionEnvironment()->memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation); + } +} + std::vector ModuleTranslationUnit::generateElfFromSpirV(std::vector inputSpirVs, std::vector inputModuleSizes) { NEO::Elf::ElfEncoder<> elfEncoder(true, false, 1U); elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_OBJECTS; @@ -406,14 +409,14 @@ ze_result_t ModuleTranslationUnit::processUnpackedBinary() { auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); auto globalConstDataSize = programInfo.globalConstants.size + programInfo.globalConstants.zeroInitSize; if (globalConstDataSize != 0) { - this->globalConstBuffer = NEO::allocateGlobalsSurface(svmAllocsManager, *device->getNEODevice(), globalConstDataSize, - programInfo.globalConstants.zeroInitSize, true, programInfo.linkerInput.get(), programInfo.globalConstants.initData); + this->globalConstBuffer.reset(NEO::allocateGlobalsSurface(svmAllocsManager, *device->getNEODevice(), globalConstDataSize, + programInfo.globalConstants.zeroInitSize, true, programInfo.linkerInput.get(), programInfo.globalConstants.initData)); } auto globalVariablesDataSize = programInfo.globalVariables.size + programInfo.globalVariables.zeroInitSize; if (globalVariablesDataSize != 0) { - this->globalVarBuffer = NEO::allocateGlobalsSurface(svmAllocsManager, *device->getNEODevice(), globalVariablesDataSize, - programInfo.globalVariables.zeroInitSize, false, programInfo.linkerInput.get(), programInfo.globalVariables.initData); + this->globalVarBuffer.reset(NEO::allocateGlobalsSurface(svmAllocsManager, *device->getNEODevice(), globalVariablesDataSize, + programInfo.globalVariables.zeroInitSize, false, programInfo.linkerInput.get(), programInfo.globalVariables.initData)); } for (auto &kernelInfo : this->programInfo.kernelInfos) { @@ -493,6 +496,14 @@ void ModuleTranslationUnit::processDebugData() { } } +NEO::GraphicsAllocation *ModuleTranslationUnit::getGlobalConstBufferGA() const { + return globalConstBuffer ? globalConstBuffer->getGraphicsAllocation() : nullptr; +} + +NEO::GraphicsAllocation *ModuleTranslationUnit::getGlobalVarBufferGA() const { + return globalVarBuffer ? globalVarBuffer->getGraphicsAllocation() : nullptr; +} + ModuleImp::ModuleImp(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type) : device(device), translationUnit(std::make_unique(device)), moduleBuildLog(moduleBuildLog), type(type) { @@ -529,7 +540,7 @@ NEO::Zebin::Debug::Segments ModuleImp::getZebinSegments() { ArrayRef strings = {reinterpret_cast(translationUnit->programInfo.globalStrings.initData), translationUnit->programInfo.globalStrings.size}; - return NEO::Zebin::Debug::Segments(translationUnit->globalVarBuffer, translationUnit->globalConstBuffer, strings, kernels); + return NEO::Zebin::Debug::Segments(translationUnit->globalVarBuffer.get(), translationUnit->globalConstBuffer.get(), strings, kernels); } void ModuleImp::populateZebinExtendedArgsMetadata() { @@ -787,8 +798,8 @@ ze_result_t ModuleImp::initializeKernelImmutableDatas() { result = kernelImmDatas[i]->initialize(this->translationUnit->programInfo.kernelInfos[i], device, device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch, - this->translationUnit->globalConstBuffer, - this->translationUnit->globalVarBuffer, + this->translationUnit->globalConstBuffer.get(), + this->translationUnit->globalVarBuffer.get(), this->type == ModuleType::builtin); if (result != ZE_RESULT_SUCCESS) { kernelImmDatas[i].reset(); @@ -1080,19 +1091,19 @@ bool ModuleImp::linkBinary() { Linker::SegmentInfo constants; Linker::SegmentInfo exportedFunctions; Linker::SegmentInfo strings; - GraphicsAllocation *globalsForPatching = translationUnit->globalVarBuffer; - GraphicsAllocation *constantsForPatching = translationUnit->globalConstBuffer; + SharedPoolAllocation *globalsForPatching = translationUnit->globalVarBuffer.get(); + SharedPoolAllocation *constantsForPatching = translationUnit->globalConstBuffer.get(); auto &compilerProductHelper = this->device->getNEODevice()->getCompilerProductHelper(); bool useFullAddress = compilerProductHelper.isHeaplessModeEnabled(this->device->getHwInfo()); if (globalsForPatching != nullptr) { globals.gpuAddress = static_cast(globalsForPatching->getGpuAddress()); - globals.segmentSize = globalsForPatching->getUnderlyingBufferSize(); + globals.segmentSize = globalsForPatching->getSize(); } if (constantsForPatching != nullptr) { constants.gpuAddress = static_cast(constantsForPatching->getGpuAddress()); - constants.segmentSize = constantsForPatching->getUnderlyingBufferSize(); + constants.segmentSize = constantsForPatching->getSize(); } if (translationUnit->programInfo.globalStrings.initData != nullptr) { strings.gpuAddress = reinterpret_cast(translationUnit->programInfo.globalStrings.initData); @@ -1614,10 +1625,10 @@ void ModuleImp::registerElfInDebuggerL0() { } if (translationUnit->globalVarBuffer) { - segmentAllocs.push_back(translationUnit->globalVarBuffer); + segmentAllocs.push_back(translationUnit->globalVarBuffer->getGraphicsAllocation()); } if (translationUnit->globalConstBuffer) { - segmentAllocs.push_back(translationUnit->globalConstBuffer); + segmentAllocs.push_back(translationUnit->globalConstBuffer->getGraphicsAllocation()); } debuggerL0->attachZebinModuleToSegmentAllocations(segmentAllocs, this->debugModuleHandle, this->debugElfHandle); @@ -1696,10 +1707,10 @@ StackVec ModuleImp::getModuleAllocations() { if (translationUnit) { if (translationUnit->globalVarBuffer) { - allocs.push_back(translationUnit->globalVarBuffer); + allocs.push_back(translationUnit->globalVarBuffer->getGraphicsAllocation()); } if (translationUnit->globalConstBuffer) { - allocs.push_back(translationUnit->globalConstBuffer); + allocs.push_back(translationUnit->globalConstBuffer->getGraphicsAllocation()); } } return allocs; diff --git a/level_zero/core/source/module/module_imp.h b/level_zero/core/source/module/module_imp.h index 83873265b8..a3e4da44a1 100644 --- a/level_zero/core/source/module/module_imp.h +++ b/level_zero/core/source/module/module_imp.h @@ -69,10 +69,14 @@ struct ModuleTranslationUnit { MOCKABLE_VIRTUAL ze_result_t compileGenBinary(NEO::TranslationInput &inputArgs, bool staticLink); void updateBuildLog(const std::string &newLogEntry); void processDebugData(); + void freeGlobalBufferAllocation(const std::unique_ptr &buffer); + NEO::GraphicsAllocation *getGlobalConstBufferGA() const; + NEO::GraphicsAllocation *getGlobalVarBufferGA() const; + L0::Device *device = nullptr; - NEO::GraphicsAllocation *globalConstBuffer = nullptr; - NEO::GraphicsAllocation *globalVarBuffer = nullptr; + std::unique_ptr globalConstBuffer; + std::unique_ptr globalVarBuffer; NEO::ProgramInfo programInfo; std::string options; diff --git a/level_zero/core/test/unit_tests/fixtures/module_fixture.cpp b/level_zero/core/test/unit_tests/fixtures/module_fixture.cpp index d6e57f6669..06a44c3b3f 100644 --- a/level_zero/core/test/unit_tests/fixtures/module_fixture.cpp +++ b/level_zero/core/test/unit_tests/fixtures/module_fixture.cpp @@ -254,27 +254,65 @@ ModuleWithZebinFixture::MockModuleWithZebin::MockModuleWithZebin(L0::Device *dev isZebinBinary = true; } void ModuleWithZebinFixture::MockModuleWithZebin::addSegments() { + constexpr bool createWithSharedGlobalConstSurfaces = false; + addSegments(createWithSharedGlobalConstSurfaces); +} +void ModuleWithZebinFixture::MockModuleWithZebin::addSegments(bool createWithSharedGlobalConstSurfaces) { kernelImmDatas.push_back(std::make_unique(device)); auto ptr = reinterpret_cast(0x1234); auto canonizedGpuAddress = castToUint64(ptr); - translationUnit->globalVarBuffer = new NEO::MockGraphicsAllocation(0, - 1u /*num gmms*/, - NEO::AllocationType::globalSurface, - ptr, - 0x1000, - 0u, - MemoryPool::system4KBPages, - MemoryManager::maxOsContextCount, - canonizedGpuAddress); - translationUnit->globalConstBuffer = new NEO::MockGraphicsAllocation(0, - 1u /*num gmms*/, - NEO::AllocationType::globalSurface, - ptr, - 0x1000, - 0u, - MemoryPool::system4KBPages, - MemoryManager::maxOsContextCount, - canonizedGpuAddress); + + if (createWithSharedGlobalConstSurfaces) { + constexpr auto varBufferOffset = 128u; + constexpr auto varBufferSize = 64u; + + constexpr auto constBufferOffset = 256u; + constexpr auto constBufferSize = 64u; + + translationUnit->globalVarBuffer = std::make_unique(new NEO::MockGraphicsAllocation(0, + 1u /*num gmms*/, + NEO::AllocationType::globalSurface, + ptr, + 0x1000, + 0u, + MemoryPool::system4KBPages, + MemoryManager::maxOsContextCount, + canonizedGpuAddress), + varBufferOffset, + varBufferSize, + nullptr); + translationUnit->globalConstBuffer = std::make_unique(new NEO::MockGraphicsAllocation(0, + 1u /*num gmms*/, + NEO::AllocationType::constantSurface, + ptr, + 0x1000, + 0u, + MemoryPool::system4KBPages, + MemoryManager::maxOsContextCount, + canonizedGpuAddress), + constBufferOffset, + constBufferSize, + nullptr); + } else { + translationUnit->globalVarBuffer = std::make_unique(new NEO::MockGraphicsAllocation(0, + 1u /*num gmms*/, + NEO::AllocationType::globalSurface, + ptr, + 0x1000, + 0u, + MemoryPool::system4KBPages, + MemoryManager::maxOsContextCount, + canonizedGpuAddress)); + translationUnit->globalConstBuffer = std::make_unique(new NEO::MockGraphicsAllocation(0, + 1u /*num gmms*/, + NEO::AllocationType::globalSurface, + ptr, + 0x1000, + 0u, + MemoryPool::system4KBPages, + MemoryManager::maxOsContextCount, + canonizedGpuAddress)); + } translationUnit->programInfo.globalStrings.initData = &strings; translationUnit->programInfo.globalStrings.size = sizeof(strings); diff --git a/level_zero/core/test/unit_tests/fixtures/module_fixture.h b/level_zero/core/test/unit_tests/fixtures/module_fixture.h index 4cd1af04ba..d1e7b5148d 100644 --- a/level_zero/core/test/unit_tests/fixtures/module_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/module_fixture.h @@ -186,6 +186,7 @@ struct ModuleWithZebinFixture : public DeviceFixture { MockModuleWithZebin(L0::Device *device); void addSegments(); + void addSegments(bool createWithSharedGlobalConstSurfaces); void addEmptyZebin(); diff --git a/level_zero/core/test/unit_tests/mocks/mock_module.cpp b/level_zero/core/test/unit_tests/mocks/mock_module.cpp index 6a6fedc82d..7816e60c37 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_module.cpp +++ b/level_zero/core/test/unit_tests/mocks/mock_module.cpp @@ -13,10 +13,10 @@ namespace ult { ze_result_t WhiteBox<::L0::Module>::initializeTranslationUnit(const ze_module_desc_t *desc, NEO::Device *neoDevice) { auto result = this->BaseClass::initializeTranslationUnit(desc, neoDevice); if (this->mockGlobalConstBuffer) { - this->translationUnit->globalConstBuffer = this->mockGlobalConstBuffer; + this->translationUnit->globalConstBuffer = std::move(this->mockGlobalConstBuffer); } if (this->mockGlobalVarBuffer) { - this->translationUnit->globalVarBuffer = this->mockGlobalVarBuffer; + this->translationUnit->globalVarBuffer = std::move(mockGlobalVarBuffer); } return result; } diff --git a/level_zero/core/test/unit_tests/mocks/mock_module.h b/level_zero/core/test/unit_tests/mocks/mock_module.h index c0c5d149cb..b45ec0dc92 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_module.h +++ b/level_zero/core/test/unit_tests/mocks/mock_module.h @@ -9,6 +9,7 @@ #include "shared/source/compiler_interface/external_functions.h" #include "shared/source/program/kernel_info.h" +#include "shared/source/utilities/shared_pool_allocation.h" #include "shared/test/common/mocks/mock_cif.h" #include "shared/test/common/mocks/mock_compiler_interface.h" #include "shared/test/common/test_macros/mock_method_macros.h" @@ -91,8 +92,8 @@ struct WhiteBox<::L0::Module> : public ::L0::ModuleImp { ze_result_t initializeTranslationUnit(const ze_module_desc_t *desc, NEO::Device *neoDevice) override; - NEO::GraphicsAllocation *mockGlobalVarBuffer = nullptr; - NEO::GraphicsAllocation *mockGlobalConstBuffer = nullptr; + std::unique_ptr mockGlobalVarBuffer = nullptr; + std::unique_ptr mockGlobalConstBuffer = nullptr; }; using Module = WhiteBox<::L0::Module>; diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 575baa7168..8510bee983 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -949,8 +949,8 @@ TEST_F(KernelImmutableDataIsaCopyTests, whenImmutableDataIsInitializedForUserKer mockKernelImmData->initialize(mockKernelImmData->mockKernelInfo, device, device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch, - module->translationUnit->globalConstBuffer, - module->translationUnit->globalVarBuffer, + module->translationUnit->globalConstBuffer.get(), + module->translationUnit->globalVarBuffer.get(), isInternal); EXPECT_EQ(previouscopyMemoryToAllocationCalledTimes, @@ -972,8 +972,8 @@ TEST_F(KernelImmutableDataIsaCopyTests, whenImmutableDataIsInitializedForInterna mockKernelImmData->initialize(mockKernelImmData->mockKernelInfo, device, device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch, - module->translationUnit->globalConstBuffer, - module->translationUnit->globalVarBuffer, + module->translationUnit->globalConstBuffer.get(), + module->translationUnit->globalVarBuffer.get(), isInternal); EXPECT_EQ(previouscopyMemoryToAllocationCalledTimes, @@ -2407,20 +2407,20 @@ TEST_F(KernelIsaTests, givenGlobalBuffersWhenCreatingKernelImmutableDataThenBuff uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t size = 0x1100; - NEO::MockGraphicsAllocation globalVarBuffer(buffer, gpuAddress, size); - NEO::MockGraphicsAllocation globalConstBuffer(buffer, gpuAddress, size); + NEO::MockGraphicsAllocation globalVarBufferMockGA(buffer, gpuAddress, size); + NEO::MockGraphicsAllocation globalConstBufferMockGA(buffer, gpuAddress, size); ModuleBuildLog *moduleBuildLog = nullptr; this->module.reset(new WhiteBox<::L0::Module>{this->device, moduleBuildLog, ModuleType::user}); - this->module->mockGlobalVarBuffer = &globalVarBuffer; - this->module->mockGlobalConstBuffer = &globalConstBuffer; + this->module->mockGlobalVarBuffer = std::make_unique(&globalVarBufferMockGA); + this->module->mockGlobalConstBuffer = std::make_unique(&globalConstBufferMockGA); this->createModuleFromMockBinary(ModuleType::user); for (auto &kernelImmData : this->module->kernelImmDatas) { auto &resCont = kernelImmData->getResidencyContainer(); - EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalVarBuffer)); - EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalConstBuffer)); + EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalVarBufferMockGA)); + EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalConstBufferMockGA)); } this->module->translationUnit->globalConstBuffer = nullptr; this->module->translationUnit->globalVarBuffer = nullptr; diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel_2.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel_2.cpp index 506bce1a1c..e8609dbcba 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel_2.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel_2.cpp @@ -838,7 +838,8 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalConstBufferAndBindlessExpl uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t allocSize = 0x1100; - NEO::MockGraphicsAllocation globalConstBuffer(buffer, gpuAddress, allocSize); + auto globalConstBufferMockGA = NEO::MockGraphicsAllocation(buffer, gpuAddress, allocSize); + auto globalConstBuffer = std::make_unique(&globalConstBufferMockGA); auto kernelInfo = std::make_unique(); @@ -861,7 +862,7 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalConstBufferAndBindlessExpl const auto globalConstantsSurfaceAddressSSIndex = 1; auto kernelImmutableData = std::make_unique(&deviceImp); - kernelImmutableData->initialize(kernelInfo.get(), &deviceImp, 0, &globalConstBuffer, nullptr, false); + kernelImmutableData->initialize(kernelInfo.get(), &deviceImp, 0, globalConstBuffer.get(), nullptr, false); auto &gfxCoreHelper = device->getGfxCoreHelper(); auto surfaceStateSize = static_cast(gfxCoreHelper.getRenderSurfaceStateSize()); @@ -870,14 +871,14 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalConstBufferAndBindlessExpl auto &residencyContainer = kernelImmutableData->getResidencyContainer(); EXPECT_EQ(1u, residencyContainer.size()); - EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), &globalConstBuffer)); + EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), globalConstBuffer->getGraphicsAllocation())); EXPECT_EQ(1u, encodeBufferSurfaceStateCalled); EXPECT_EQ(allocSize, savedSurfaceStateArgs.size); EXPECT_EQ(gpuAddress, savedSurfaceStateArgs.graphicsAddress); EXPECT_EQ(ptrOffset(kernelImmutableData->getSurfaceStateHeapTemplate(), globalConstantsSurfaceAddressSSIndex * surfaceStateSize), savedSurfaceStateArgs.outMemory); - EXPECT_EQ(&globalConstBuffer, savedSurfaceStateArgs.allocation); + EXPECT_EQ(globalConstBuffer->getGraphicsAllocation(), savedSurfaceStateArgs.allocation); } } @@ -908,7 +909,8 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplic uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t allocSize = 0x1100; - NEO::MockGraphicsAllocation globalVarBuffer(buffer, gpuAddress, allocSize); + auto globalVarBufferMockGA = NEO::MockGraphicsAllocation(buffer, gpuAddress, allocSize); + auto globalVarBuffer = std::make_unique(&globalVarBufferMockGA); auto kernelInfo = std::make_unique(); @@ -931,7 +933,7 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplic const auto globalVariablesSurfaceAddressSSIndex = 1; auto kernelImmutableData = std::make_unique(&deviceImp); - kernelImmutableData->initialize(kernelInfo.get(), &deviceImp, 0, nullptr, &globalVarBuffer, false); + kernelImmutableData->initialize(kernelInfo.get(), &deviceImp, 0, nullptr, globalVarBuffer.get(), false); auto &gfxCoreHelper = device->getGfxCoreHelper(); auto surfaceStateSize = static_cast(gfxCoreHelper.getRenderSurfaceStateSize()); @@ -940,14 +942,14 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplic auto &residencyContainer = kernelImmutableData->getResidencyContainer(); EXPECT_EQ(1u, residencyContainer.size()); - EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), &globalVarBuffer)); + EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), globalVarBuffer->getGraphicsAllocation())); EXPECT_EQ(1u, encodeBufferSurfaceStateCalled); EXPECT_EQ(allocSize, savedSurfaceStateArgs.size); EXPECT_EQ(gpuAddress, savedSurfaceStateArgs.graphicsAddress); EXPECT_EQ(ptrOffset(kernelImmutableData->getSurfaceStateHeapTemplate(), globalVariablesSurfaceAddressSSIndex * surfaceStateSize), savedSurfaceStateArgs.outMemory); - EXPECT_EQ(&globalVarBuffer, savedSurfaceStateArgs.allocation); + EXPECT_EQ(globalVarBuffer->getGraphicsAllocation(), savedSurfaceStateArgs.allocation); } } @@ -982,7 +984,8 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalConstBufferAndBindlessExpl uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t allocSize = 0x1100; - NEO::MockGraphicsAllocation globalConstBuffer(buffer, gpuAddress, allocSize); + auto globalConstBufferMockGA = NEO::MockGraphicsAllocation(buffer, gpuAddress, allocSize); + auto globalConstBuffer = std::make_unique(&globalConstBufferMockGA); auto kernelInfo = std::make_unique(); @@ -1005,7 +1008,7 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalConstBufferAndBindlessExpl kernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState(); auto kernelImmutableData = std::make_unique(&deviceImp); - kernelImmutableData->initialize(kernelInfo.get(), &deviceImp, 0, &globalConstBuffer, nullptr, false); + kernelImmutableData->initialize(kernelInfo.get(), &deviceImp, 0, globalConstBuffer.get(), nullptr, false); auto &gfxCoreHelper = device->getGfxCoreHelper(); auto surfaceStateSize = static_cast(gfxCoreHelper.getRenderSurfaceStateSize()); @@ -1014,12 +1017,12 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalConstBufferAndBindlessExpl auto &residencyContainer = kernelImmutableData->getResidencyContainer(); EXPECT_EQ(1u, residencyContainer.size()); - EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), &globalConstBuffer)); - EXPECT_EQ(0, std::count(residencyContainer.begin(), residencyContainer.end(), globalConstBuffer.getBindlessInfo().heapAllocation)); + EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), globalConstBuffer->getGraphicsAllocation())); + EXPECT_EQ(0, std::count(residencyContainer.begin(), residencyContainer.end(), globalConstBuffer->getGraphicsAllocation()->getBindlessInfo().heapAllocation)); auto crossThreadData = kernelImmutableData->getCrossThreadDataTemplate(); auto patchLocation = reinterpret_cast(ptrOffset(crossThreadData, globalConstSurfaceAddressBindlessOffset)); - auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast(globalConstBuffer.getBindlessInfo().surfaceStateOffset)); + auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast(globalConstBuffer->getGraphicsAllocation()->getBindlessInfo().surfaceStateOffset)); EXPECT_EQ(patchValue, *patchLocation); @@ -1027,12 +1030,12 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalConstBufferAndBindlessExpl EXPECT_EQ(allocSize, savedSurfaceStateArgs.size); EXPECT_EQ(gpuAddress, savedSurfaceStateArgs.graphicsAddress); - EXPECT_NE(globalConstBuffer.getBindlessInfo().ssPtr, savedSurfaceStateArgs.outMemory); + EXPECT_NE(globalConstBuffer->getGraphicsAllocation()->getBindlessInfo().ssPtr, savedSurfaceStateArgs.outMemory); - const auto surfState = reinterpret_cast(globalConstBuffer.getBindlessInfo().ssPtr); + const auto surfState = reinterpret_cast(globalConstBuffer->getGraphicsAllocation()->getBindlessInfo().ssPtr); ASSERT_NE(nullptr, surfState); EXPECT_EQ(gpuAddress, surfState->getSurfaceBaseAddress()); - EXPECT_EQ(&globalConstBuffer, savedSurfaceStateArgs.allocation); + EXPECT_EQ(globalConstBuffer->getGraphicsAllocation(), savedSurfaceStateArgs.allocation); } } @@ -1067,7 +1070,8 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplic uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t allocSize = 0x1100; - NEO::MockGraphicsAllocation globalVarBuffer(buffer, gpuAddress, allocSize); + auto globalVarBufferMockGA = NEO::MockGraphicsAllocation(buffer, gpuAddress, allocSize); + auto globalVarBuffer = std::make_unique(&globalVarBufferMockGA); auto kernelInfo = std::make_unique(); @@ -1090,7 +1094,7 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplic kernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState(); auto kernelImmutableData = std::make_unique(&deviceImp); - kernelImmutableData->initialize(kernelInfo.get(), &deviceImp, 0, nullptr, &globalVarBuffer, false); + kernelImmutableData->initialize(kernelInfo.get(), &deviceImp, 0, nullptr, globalVarBuffer.get(), false); auto &gfxCoreHelper = device->getGfxCoreHelper(); auto surfaceStateSize = static_cast(gfxCoreHelper.getRenderSurfaceStateSize()); @@ -1099,12 +1103,12 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplic auto &residencyContainer = kernelImmutableData->getResidencyContainer(); EXPECT_EQ(1u, residencyContainer.size()); - EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), &globalVarBuffer)); - EXPECT_EQ(0, std::count(residencyContainer.begin(), residencyContainer.end(), globalVarBuffer.getBindlessInfo().heapAllocation)); + EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), globalVarBuffer->getGraphicsAllocation())); + EXPECT_EQ(0, std::count(residencyContainer.begin(), residencyContainer.end(), globalVarBuffer->getGraphicsAllocation()->getBindlessInfo().heapAllocation)); auto crossThreadData = kernelImmutableData->getCrossThreadDataTemplate(); auto patchLocation = reinterpret_cast(ptrOffset(crossThreadData, globalVariablesSurfaceAddressBindlessOffset)); - auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast(globalVarBuffer.getBindlessInfo().surfaceStateOffset)); + auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast(globalVarBuffer->getGraphicsAllocation()->getBindlessInfo().surfaceStateOffset)); EXPECT_EQ(patchValue, *patchLocation); @@ -1112,12 +1116,12 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplic EXPECT_EQ(allocSize, savedSurfaceStateArgs.size); EXPECT_EQ(gpuAddress, savedSurfaceStateArgs.graphicsAddress); - EXPECT_NE(globalVarBuffer.getBindlessInfo().ssPtr, savedSurfaceStateArgs.outMemory); + EXPECT_NE(globalVarBuffer->getGraphicsAllocation()->getBindlessInfo().ssPtr, savedSurfaceStateArgs.outMemory); - const auto surfState = reinterpret_cast(globalVarBuffer.getBindlessInfo().ssPtr); + const auto surfState = reinterpret_cast(globalVarBuffer->getGraphicsAllocation()->getBindlessInfo().ssPtr); ASSERT_NE(nullptr, surfState); EXPECT_EQ(gpuAddress, surfState->getSurfaceBaseAddress()); - EXPECT_EQ(&globalVarBuffer, savedSurfaceStateArgs.allocation); + EXPECT_EQ(globalVarBuffer->getGraphicsAllocation(), savedSurfaceStateArgs.allocation); } } diff --git a/level_zero/core/test/unit_tests/sources/module/test_module.cpp b/level_zero/core/test/unit_tests/sources/module/test_module.cpp index 4d75ac9d35..8df5664365 100644 --- a/level_zero/core/test/unit_tests/sources/module/test_module.cpp +++ b/level_zero/core/test/unit_tests/sources/module/test_module.cpp @@ -3578,8 +3578,8 @@ kernels: zebin.data(), zebin.size()); auto retVal = moduleTu.processUnpackedBinary(); EXPECT_EQ(retVal, ZE_RESULT_SUCCESS); - EXPECT_EQ(AllocationType::constantSurface, moduleTu.globalConstBuffer->getAllocationType()); - EXPECT_EQ(AllocationType::globalSurface, moduleTu.globalVarBuffer->getAllocationType()); + EXPECT_EQ(AllocationType::constantSurface, moduleTu.globalConstBuffer->getGraphicsAllocation()->getAllocationType()); + EXPECT_EQ(AllocationType::globalSurface, moduleTu.globalVarBuffer->getGraphicsAllocation()->getAllocationType()); auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); auto globalConstBufferAllocType = svmAllocsManager->getSVMAlloc(reinterpret_cast(moduleTu.globalConstBuffer->getGpuAddress()))->memoryType; @@ -4206,7 +4206,7 @@ struct ModuleIsaAllocationsFixture : public DeviceFixture { ~ProxyKernelImmutableData() override { this->KernelImmutableData::~KernelImmutableData(); } ADDMETHOD(initialize, ze_result_t, true, ZE_RESULT_ERROR_UNKNOWN, - (NEO::KernelInfo * kernelInfo, L0::Device *device, uint32_t computeUnitsUsedForScratch, NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer, bool internalKernel), + (NEO::KernelInfo * kernelInfo, L0::Device *device, uint32_t computeUnitsUsedForScratch, NEO::SharedPoolAllocation *globalConstBuffer, NEO::SharedPoolAllocation *globalVarBuffer, bool internalKernel), (kernelInfo, device, computeUnitsUsedForScratch, globalConstBuffer, globalVarBuffer, internalKernel)); }; @@ -4435,10 +4435,10 @@ TEST_F(ModuleDebugDataTest, GivenDebugDataWithRelocationsWhenCreatingRelocatedDe auto module = std::make_unique(device, moduleBuildLog, ModuleType::user); module->translationUnit = std::make_unique(device); - module->translationUnit->globalVarBuffer = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( - {device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::buffer, neoDevice->getDeviceBitfield()}); - module->translationUnit->globalConstBuffer = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( - {device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::buffer, neoDevice->getDeviceBitfield()}); + module->translationUnit->globalVarBuffer = std::make_unique(neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( + {device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::buffer, neoDevice->getDeviceBitfield()})); + module->translationUnit->globalConstBuffer = std::make_unique(neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( + {device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::buffer, neoDevice->getDeviceBitfield()})); uint32_t kernelHeap = 0; auto kernelInfo = new KernelInfo(); @@ -4458,8 +4458,8 @@ TEST_F(ModuleDebugDataTest, GivenDebugDataWithRelocationsWhenCreatingRelocatedDe auto kernelImmData = std::make_unique>(this->device); kernelImmData->setIsaPerKernelAllocation(module->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize)); - kernelImmData->initialize(kernelInfo, device, 0, module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer, false); - kernelImmData->createRelocatedDebugData(module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer); + kernelImmData->initialize(kernelInfo, device, 0, module->translationUnit->globalConstBuffer.get(), module->translationUnit->globalVarBuffer.get(), false); + kernelImmData->createRelocatedDebugData(module->translationUnit->globalConstBuffer.get(), module->translationUnit->globalVarBuffer.get()); module->kernelImmDatas.push_back(std::move(kernelImmData)); @@ -4912,10 +4912,10 @@ TEST_F(ModuleTests, givenModuleWithGlobalAndConstAllocationsWhenGettingModuleAll ModuleType::user); module->translationUnit = std::make_unique(device); - module->translationUnit->globalVarBuffer = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( - {device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::buffer, neoDevice->getDeviceBitfield()}); - module->translationUnit->globalConstBuffer = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( - {device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::buffer, neoDevice->getDeviceBitfield()}); + module->translationUnit->globalVarBuffer = std::make_unique(neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( + {device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::buffer, neoDevice->getDeviceBitfield()})); + module->translationUnit->globalConstBuffer = std::make_unique(neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( + {device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::buffer, neoDevice->getDeviceBitfield()})); uint32_t kernelHeap = 0; auto kernelInfo = new KernelInfo(); @@ -4929,17 +4929,17 @@ TEST_F(ModuleTests, givenModuleWithGlobalAndConstAllocationsWhenGettingModuleAll auto isaAlloc = module->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize); ASSERT_NE(isaAlloc, nullptr); kernelImmData->setIsaPerKernelAllocation(isaAlloc); - kernelImmData->initialize(kernelInfo, device, 0, module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer, false); + kernelImmData->initialize(kernelInfo, device, 0, module->translationUnit->globalConstBuffer.get(), module->translationUnit->globalVarBuffer.get(), false); module->kernelImmDatas.push_back(std::move(kernelImmData)); const auto allocs = module->getModuleAllocations(); EXPECT_EQ(3u, allocs.size()); - auto iter = std::find(allocs.begin(), allocs.end(), module->translationUnit->globalConstBuffer); + auto iter = std::find(allocs.begin(), allocs.end(), module->translationUnit->getGlobalConstBufferGA()); EXPECT_NE(allocs.end(), iter); - iter = std::find(allocs.begin(), allocs.end(), module->translationUnit->globalVarBuffer); + iter = std::find(allocs.begin(), allocs.end(), module->translationUnit->getGlobalVarBufferGA()); EXPECT_NE(allocs.end(), iter); iter = std::find(allocs.begin(), allocs.end(), module->kernelImmDatas[0]->getIsaGraphicsAllocation()); @@ -4995,14 +4995,30 @@ TEST_F(ModuleWithZebinTest, givenZebinSegmentsThenSegmentsArePopulated) { EXPECT_EQ(alloc->getGpuAddress(), segment.address); EXPECT_EQ(alloc->getUnderlyingBufferSize(), segment.size); }; - checkGPUSeg(module->translationUnit->globalConstBuffer, segments.constData); - checkGPUSeg(module->translationUnit->globalConstBuffer, segments.varData); + checkGPUSeg(module->translationUnit->getGlobalConstBufferGA(), segments.constData); + checkGPUSeg(module->translationUnit->getGlobalConstBufferGA(), segments.varData); checkGPUSeg(module->kernelImmDatas[0]->getIsaGraphicsAllocation(), segments.nameToSegMap[ZebinTestData::ValidEmptyProgram<>::kernelName]); EXPECT_EQ(reinterpret_cast(module->translationUnit->programInfo.globalStrings.initData), segments.stringData.address); EXPECT_EQ(module->translationUnit->programInfo.globalStrings.size, segments.stringData.size); } +TEST_F(ModuleWithZebinTest, givenZebinSegmentsWithSharedGlobalConstAndVarBuffersThenSegmentsArePopulated) { + constexpr bool createWithSharedGlobalConstSurfaces = true; + module->addSegments(createWithSharedGlobalConstSurfaces); + auto segments = module->getZebinSegments(); + + auto checkGPUSeg = [](NEO::SharedPoolAllocation *poolAlloc, NEO::Zebin::Debug::Segments::Segment segment) { + EXPECT_EQ(poolAlloc->getGpuAddress(), segment.address); + EXPECT_EQ(poolAlloc->getSize(), segment.size); + + EXPECT_NE(poolAlloc->getGpuAddress(), poolAlloc->getGraphicsAllocation()->getGpuAddress()); + EXPECT_NE(poolAlloc->getSize(), poolAlloc->getGraphicsAllocation()->getUnderlyingBufferSize()); + }; + checkGPUSeg(module->translationUnit->globalConstBuffer.get(), segments.constData); + checkGPUSeg(module->translationUnit->globalVarBuffer.get(), segments.varData); +} + TEST_F(ModuleWithZebinTest, givenValidZebinWhenGettingDebugInfoThenDebugZebinIsCreatedAndReturned) { module->addEmptyZebin(); module->addSegments(); diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 4822c6c0d7..0b25947505 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -124,6 +124,10 @@ inline void patch(const SrcT &src, void *dst, CrossThreadDataOffset dstOffsetByt } void Kernel::patchWithImplicitSurface(uint64_t ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg) { + patchWithImplicitSurface(ptrToPatchInCrossThreadData, allocation, reinterpret_cast(allocation.getGpuAddressToPatch()), allocation.getUnderlyingBufferSize(), arg); +} + +void Kernel::patchWithImplicitSurface(uint64_t ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, void *addressToPatch, size_t sizeToPatch, const ArgDescPointer &arg) { if ((nullptr != crossThreadData) && isValidOffset(arg.stateless)) { auto pp = ptrOffset(crossThreadData, arg.stateless); patchWithRequiredSize(pp, arg.pointerSize, ptrToPatchInCrossThreadData); @@ -135,9 +139,6 @@ void Kernel::patchWithImplicitSurface(uint64_t ptrToPatchInCrossThreadData, Grap void *ssh = getSurfaceStateHeap(); if (nullptr != ssh) { - void *addressToPatch = reinterpret_cast(allocation.getGpuAddressToPatch()); - size_t sizeToPatch = allocation.getUnderlyingBufferSize(); - if (isValidOffset(arg.bindful)) { auto surfaceState = ptrOffset(ssh, arg.bindful); Buffer::setSurfaceState(&clDevice.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0, @@ -264,19 +265,27 @@ cl_int Kernel::initialize() { if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless) || isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless)) { DEBUG_BREAK_IF(program->getConstantSurface(rootDeviceIndex) == nullptr); - uint64_t constMemory = isBuiltIn ? castToUint64(program->getConstantSurface(rootDeviceIndex)->getUnderlyingBuffer()) : program->getConstantSurface(rootDeviceIndex)->getGpuAddressToPatch(); + DEBUG_BREAK_IF(program->getConstantSurfaceGA(rootDeviceIndex) == nullptr); + const auto constantSurface = program->getConstantSurface(rootDeviceIndex); + const auto size = constantSurface->getSize(); + + uint64_t constMemory = isBuiltIn ? castToUint64(constantSurface->getUnderlyingBuffer()) : constantSurface->getGpuAddressToPatch(); const auto &arg = kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress; - patchWithImplicitSurface(constMemory, *program->getConstantSurface(rootDeviceIndex), arg); + patchWithImplicitSurface(constMemory, *constantSurface->getGraphicsAllocation(), reinterpret_cast(constantSurface->getGpuAddressToPatch()), size, arg); } if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless) || isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless)) { DEBUG_BREAK_IF(program->getGlobalSurface(rootDeviceIndex) == nullptr); - uint64_t globalMemory = isBuiltIn ? castToUint64(program->getGlobalSurface(rootDeviceIndex)->getUnderlyingBuffer()) : program->getGlobalSurface(rootDeviceIndex)->getGpuAddressToPatch(); + DEBUG_BREAK_IF(program->getGlobalSurfaceGA(rootDeviceIndex) == nullptr); + const auto globalSurface = program->getGlobalSurface(rootDeviceIndex); + const auto size = globalSurface->getSize(); + + uint64_t globalMemory = isBuiltIn ? castToUint64(globalSurface->getUnderlyingBuffer()) : globalSurface->getGpuAddressToPatch(); const auto &arg = kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress; - patchWithImplicitSurface(globalMemory, *program->getGlobalSurface(rootDeviceIndex), arg); + patchWithImplicitSurface(globalMemory, *globalSurface->getGraphicsAllocation(), reinterpret_cast(globalSurface->getGpuAddressToPatch()), size, arg); } if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful)) { @@ -1271,18 +1280,18 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { } if (program->getConstantSurface(rootDeviceIndex)) { - commandStreamReceiver.makeResident(*(program->getConstantSurface(rootDeviceIndex))); + commandStreamReceiver.makeResident(*(program->getConstantSurfaceGA(rootDeviceIndex))); - auto bindlessHeapAllocation = program->getConstantSurface(rootDeviceIndex)->getBindlessInfo().heapAllocation; + auto bindlessHeapAllocation = program->getConstantSurfaceGA(rootDeviceIndex)->getBindlessInfo().heapAllocation; if (bindlessHeapAllocation) { commandStreamReceiver.makeResident(*bindlessHeapAllocation); } } if (program->getGlobalSurface(rootDeviceIndex)) { - commandStreamReceiver.makeResident(*(program->getGlobalSurface(rootDeviceIndex))); + commandStreamReceiver.makeResident(*(program->getGlobalSurfaceGA(rootDeviceIndex))); - auto bindlessHeapAllocation = program->getGlobalSurface(rootDeviceIndex)->getBindlessInfo().heapAllocation; + auto bindlessHeapAllocation = program->getGlobalSurfaceGA(rootDeviceIndex)->getBindlessInfo().heapAllocation; if (bindlessHeapAllocation) { commandStreamReceiver.makeResident(*bindlessHeapAllocation); } @@ -1336,12 +1345,12 @@ void Kernel::getResidency(std::vector &dst) { auto rootDeviceIndex = getDevice().getRootDeviceIndex(); if (program->getConstantSurface(rootDeviceIndex)) { - GeneralSurface *surface = new GeneralSurface(program->getConstantSurface(rootDeviceIndex)); + GeneralSurface *surface = new GeneralSurface(program->getConstantSurfaceGA(rootDeviceIndex)); dst.push_back(surface); } if (program->getGlobalSurface(rootDeviceIndex)) { - GeneralSurface *surface = new GeneralSurface(program->getGlobalSurface(rootDeviceIndex)); + GeneralSurface *surface = new GeneralSurface(program->getGlobalSurfaceGA(rootDeviceIndex)); dst.push_back(surface); } diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 5098947de5..4e0ef4370c 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -404,6 +404,7 @@ class Kernel : public ReferenceTrackedObject, NEO::NonCopyableAndNonMova void *patchBufferOffset(const ArgDescPointer &argAsPtr, void *svmPtr, GraphicsAllocation *svmAlloc); void patchWithImplicitSurface(uint64_t ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg); + void patchWithImplicitSurface(uint64_t ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, void *addressToPatch, size_t sizeToPatch, const ArgDescPointer &arg); void provideInitializationHints(); diff --git a/opencl/source/program/process_device_binary.cpp b/opencl/source/program/process_device_binary.cpp index ae4646ce88..fbf5cb8721 100644 --- a/opencl/source/program/process_device_binary.cpp +++ b/opencl/source/program/process_device_binary.cpp @@ -91,15 +91,15 @@ cl_int Program::linkBinary(Device *pDevice, const void *constantsInitData, size_ Linker::SegmentInfo constants; Linker::SegmentInfo exportedFunctions; Linker::SegmentInfo strings; - GraphicsAllocation *globalsForPatching = getGlobalSurface(rootDeviceIndex); - GraphicsAllocation *constantsForPatching = getConstantSurface(rootDeviceIndex); + SharedPoolAllocation *globalsForPatching = getGlobalSurface(rootDeviceIndex); + SharedPoolAllocation *constantsForPatching = getConstantSurface(rootDeviceIndex); if (globalsForPatching != nullptr) { globals.gpuAddress = static_cast(globalsForPatching->getGpuAddress()); - globals.segmentSize = globalsForPatching->getUnderlyingBufferSize(); + globals.segmentSize = globalsForPatching->getSize(); } if (constantsForPatching != nullptr) { constants.gpuAddress = static_cast(constantsForPatching->getGpuAddress()); - constants.segmentSize = constantsForPatching->getUnderlyingBufferSize(); + constants.segmentSize = constantsForPatching->getSize(); } if (stringsInfo.initData != nullptr) { strings.gpuAddress = reinterpret_cast(stringsInfo.initData); @@ -217,11 +217,13 @@ cl_int Program::processGenBinary(const ClDevice &clDevice) { cleanCurrentKernelInfo(rootDeviceIndex); auto &buildInfo = buildInfos[rootDeviceIndex]; - if (buildInfo.constantSurface || buildInfo.globalSurface) { - clDevice.getMemoryManager()->freeGraphicsMemory(buildInfo.constantSurface); - clDevice.getMemoryManager()->freeGraphicsMemory(buildInfo.globalSurface); - buildInfo.constantSurface = nullptr; - buildInfo.globalSurface = nullptr; + if (buildInfo.constantSurface) { + clDevice.getMemoryManager()->freeGraphicsMemory(buildInfo.constantSurface->getGraphicsAllocation()); + buildInfo.constantSurface.reset(); + } + if (buildInfo.globalSurface) { + clDevice.getMemoryManager()->freeGraphicsMemory(buildInfo.globalSurface->getGraphicsAllocation()); + buildInfo.globalSurface.reset(); } if (!decodedSingleDeviceBinary.isSet) { @@ -286,9 +288,9 @@ cl_int Program::processProgramInfo(ProgramInfo &src, const ClDevice &clDevice) { auto svmAllocsManager = context ? context->getSVMAllocsManager() : nullptr; auto globalConstDataSize = src.globalConstants.size + src.globalConstants.zeroInitSize; if (globalConstDataSize != 0) { - buildInfos[rootDeviceIndex].constantSurface = allocateGlobalsSurface(svmAllocsManager, clDevice.getDevice(), globalConstDataSize, src.globalConstants.zeroInitSize, true, linkerInput, src.globalConstants.initData); + buildInfos[rootDeviceIndex].constantSurface.reset(allocateGlobalsSurface(svmAllocsManager, clDevice.getDevice(), globalConstDataSize, src.globalConstants.zeroInitSize, true, linkerInput, src.globalConstants.initData)); if (isBindlessKernelPresent) { - if (!clDevice.getMemoryManager()->allocateBindlessSlot(buildInfos[rootDeviceIndex].constantSurface)) { + if (!clDevice.getMemoryManager()->allocateBindlessSlot(buildInfos[rootDeviceIndex].constantSurface->getGraphicsAllocation())) { return CL_OUT_OF_HOST_MEMORY; } } @@ -297,9 +299,9 @@ cl_int Program::processProgramInfo(ProgramInfo &src, const ClDevice &clDevice) { auto globalVariablesDataSize = src.globalVariables.size + src.globalVariables.zeroInitSize; buildInfos[rootDeviceIndex].globalVarTotalSize = globalVariablesDataSize; if (globalVariablesDataSize != 0) { - buildInfos[rootDeviceIndex].globalSurface = allocateGlobalsSurface(svmAllocsManager, clDevice.getDevice(), globalVariablesDataSize, src.globalVariables.zeroInitSize, false, linkerInput, src.globalVariables.initData); + buildInfos[rootDeviceIndex].globalSurface.reset(allocateGlobalsSurface(svmAllocsManager, clDevice.getDevice(), globalVariablesDataSize, src.globalVariables.zeroInitSize, false, linkerInput, src.globalVariables.initData)); if (isBindlessKernelPresent) { - if (!clDevice.getMemoryManager()->allocateBindlessSlot(buildInfos[rootDeviceIndex].globalSurface)) { + if (!clDevice.getMemoryManager()->allocateBindlessSlot(buildInfos[rootDeviceIndex].globalSurface->getGraphicsAllocation())) { return CL_OUT_OF_HOST_MEMORY; } } diff --git a/opencl/source/program/program.cpp b/opencl/source/program/program.cpp index 4d47478658..e018ece1ac 100644 --- a/opencl/source/program/program.cpp +++ b/opencl/source/program/program.cpp @@ -124,19 +124,19 @@ Program::~Program() { } for (const auto &buildInfo : buildInfos) { - if (buildInfo.constantSurface) { - if ((nullptr != context) && (nullptr != context->getSVMAllocsManager()) && (context->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(buildInfo.constantSurface->getGpuAddress())))) { - context->getSVMAllocsManager()->freeSVMAlloc(reinterpret_cast(buildInfo.constantSurface->getGpuAddress())); + if (auto &constantSurface = buildInfo.constantSurface; constantSurface) { + if ((nullptr != context) && (nullptr != context->getSVMAllocsManager()) && (context->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(constantSurface->getGpuAddress())))) { + context->getSVMAllocsManager()->freeSVMAlloc(reinterpret_cast(constantSurface->getGpuAddress())); } else { - this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(buildInfo.constantSurface); + this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(constantSurface->getGraphicsAllocation()); } } - if (buildInfo.globalSurface) { - if ((nullptr != context) && (nullptr != context->getSVMAllocsManager()) && (context->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(buildInfo.globalSurface->getGpuAddress())))) { - context->getSVMAllocsManager()->freeSVMAlloc(reinterpret_cast(buildInfo.globalSurface->getGpuAddress())); + if (auto &globalSurface = buildInfo.globalSurface; globalSurface) { + if ((nullptr != context) && (nullptr != context->getSVMAllocsManager()) && (context->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(globalSurface->getGpuAddress())))) { + context->getSVMAllocsManager()->freeSVMAlloc(reinterpret_cast(globalSurface->getGpuAddress())); } else { - this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(buildInfo.globalSurface); + this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(globalSurface->getGraphicsAllocation()); } } } @@ -631,8 +631,8 @@ StackVec Program::getModuleAllocations(uint32_t r for (const auto &kernelInfo : kernelInfoArray) { allocs.push_back(kernelInfo->getGraphicsAllocation()); } - GraphicsAllocation *globalsForPatching = getGlobalSurface(rootIndex); - GraphicsAllocation *constantsForPatching = getConstantSurface(rootIndex); + GraphicsAllocation *globalsForPatching = getGlobalSurfaceGA(rootIndex); + GraphicsAllocation *constantsForPatching = getConstantSurfaceGA(rootIndex); if (globalsForPatching) { allocs.push_back(globalsForPatching); @@ -654,4 +654,24 @@ void Program::callGenerateDefaultExtendedArgsMetadataOnce(uint32_t rootDeviceInd metadataGeneration->callGenerateDefaultExtendedArgsMetadataOnce(buildInfo.kernelInfoArray); } +NEO::SharedPoolAllocation *Program::getConstantSurface(uint32_t rootDeviceIndex) const { + return buildInfos[rootDeviceIndex].constantSurface.get(); +} + +NEO::GraphicsAllocation *Program::getConstantSurfaceGA(uint32_t rootDeviceIndex) const { + return buildInfos[rootDeviceIndex].constantSurface ? buildInfos[rootDeviceIndex].constantSurface->getGraphicsAllocation() : nullptr; +} + +NEO::SharedPoolAllocation *Program::getGlobalSurface(uint32_t rootDeviceIndex) const { + return buildInfos[rootDeviceIndex].globalSurface.get(); +} + +NEO::GraphicsAllocation *Program::getGlobalSurfaceGA(uint32_t rootDeviceIndex) const { + return buildInfos[rootDeviceIndex].globalSurface ? buildInfos[rootDeviceIndex].globalSurface->getGraphicsAllocation() : nullptr; +} + +NEO::GraphicsAllocation *Program::getExportedFunctionsSurface(uint32_t rootDeviceIndex) const { + return buildInfos[rootDeviceIndex].exportedFunctionsSurface; +} + } // namespace NEO diff --git a/opencl/source/program/program.h b/opencl/source/program/program.h index ba31dd6e19..e3699983c2 100644 --- a/opencl/source/program/program.h +++ b/opencl/source/program/program.h @@ -37,6 +37,7 @@ struct MetadataGeneration; struct KernelInfo; enum class DecodeError : uint8_t; struct ExternalFunctionInfo; +class SharedPoolAlloction; template <> struct OpenCLObjectMapper<_cl_program> { @@ -187,17 +188,11 @@ class Program : public BaseObject<_cl_program> { return isSpirV; } - GraphicsAllocation *getConstantSurface(uint32_t rootDeviceIndex) const { - return buildInfos[rootDeviceIndex].constantSurface; - } - - GraphicsAllocation *getGlobalSurface(uint32_t rootDeviceIndex) const { - return buildInfos[rootDeviceIndex].globalSurface; - } - - GraphicsAllocation *getExportedFunctionsSurface(uint32_t rootDeviceIndex) const { - return buildInfos[rootDeviceIndex].exportedFunctionsSurface; - } + NEO::SharedPoolAllocation *getConstantSurface(uint32_t rootDeviceIndex) const; + NEO::GraphicsAllocation *getConstantSurfaceGA(uint32_t rootDeviceIndex) const; + NEO::SharedPoolAllocation *getGlobalSurface(uint32_t rootDeviceIndex) const; + NEO::GraphicsAllocation *getGlobalSurfaceGA(uint32_t rootDeviceIndex) const; + NEO::GraphicsAllocation *getExportedFunctionsSurface(uint32_t rootDeviceIndex) const; void cleanCurrentKernelInfo(uint32_t rootDeviceIndex); @@ -331,8 +326,8 @@ class Program : public BaseObject<_cl_program> { struct BuildInfo : public NonCopyableClass { std::vector kernelInfoArray; - GraphicsAllocation *constantSurface = nullptr; - GraphicsAllocation *globalSurface = nullptr; + std::unique_ptr constantSurface; + std::unique_ptr globalSurface; GraphicsAllocation *exportedFunctionsSurface = nullptr; size_t globalVarTotalSize = 0U; std::unique_ptr linkerInput; diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_2_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_2_tests.cpp index 6740e66a2c..0522055b1a 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_2_tests.cpp @@ -574,8 +574,10 @@ HWTEST_TEMPLATED_F(BlitCopyTests, givenLocalMemoryAccessNotAllowedWhenGlobalCons auto rootDeviceIndex = device->getRootDeviceIndex(); - ASSERT_NE(nullptr, program.getConstantSurface(rootDeviceIndex)); - auto gpuAddress = reinterpret_cast(program.getConstantSurface(rootDeviceIndex)->getGpuAddress()); + auto surface = program.getConstantSurface(rootDeviceIndex); + ASSERT_NE(nullptr, surface); + ASSERT_NE(nullptr, surface->getGraphicsAllocation()); + auto gpuAddress = reinterpret_cast(surface->getGpuAddress()); EXPECT_NE(nullptr, bcsMockContext->getSVMAllocsManager()->getSVMAlloc(gpuAddress)); } diff --git a/opencl/test/unit_test/debugger/ocl_with_l0_debugger_tests.cpp b/opencl/test/unit_test/debugger/ocl_with_l0_debugger_tests.cpp index 1b91b1b00b..56471ac2b3 100644 --- a/opencl/test/unit_test/debugger/ocl_with_l0_debugger_tests.cpp +++ b/opencl/test/unit_test/debugger/ocl_with_l0_debugger_tests.cpp @@ -156,7 +156,7 @@ struct DebuggerZebinProgramTest : public Test { delete program->getKernelInfoArray(rootDeviceIndex)[0]->kernelAllocation; delete program->getKernelInfoArray(rootDeviceIndex)[0]; program->getKernelInfoArray(rootDeviceIndex).clear(); - delete program->getGlobalSurface(rootDeviceIndex); + delete program->getGlobalSurfaceGA(rootDeviceIndex); program->setGlobalSurface(nullptr); delete program; program = nullptr; diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 1057460778..fac5a68ea1 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -1494,13 +1494,13 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenGlobalBuffe MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); - program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = std::make_unique(new MockGraphicsAllocation()); std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, kernel->initialize()); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); kernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); - EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface)); + EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface->getGraphicsAllocation())); std::vector residencySurfaces; kernel->getResidency(residencySurfaces); @@ -1515,7 +1515,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenGlobalBuffe s->makeResident(csrMock); delete s; } - EXPECT_EQ(1U, csrMock.residency.count(program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface->getUnderlyingBuffer())); + EXPECT_EQ(1U, csrMock.residency.count(program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface->getGraphicsAllocation()->getUnderlyingBuffer())); } memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); @@ -1537,10 +1537,10 @@ HWTEST_F(KernelResidencyTest, givenBindlessHeapsHelperAndGlobalAndConstantBuffer MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); - program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); - program.buildInfos[pDevice->getRootDeviceIndex()].constantSurface = new MockGraphicsAllocation(); - EXPECT_TRUE(memoryManager->allocateBindlessSlot(program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface)); - EXPECT_TRUE(memoryManager->allocateBindlessSlot(program.buildInfos[pDevice->getRootDeviceIndex()].constantSurface)); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = std::make_unique(new MockGraphicsAllocation()); + program.buildInfos[pDevice->getRootDeviceIndex()].constantSurface = std::make_unique(new MockGraphicsAllocation()); + EXPECT_TRUE(memoryManager->allocateBindlessSlot(program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface->getGraphicsAllocation())); + EXPECT_TRUE(memoryManager->allocateBindlessSlot(program.buildInfos[pDevice->getRootDeviceIndex()].constantSurface->getGraphicsAllocation())); std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, kernel->initialize()); @@ -1548,11 +1548,11 @@ HWTEST_F(KernelResidencyTest, givenBindlessHeapsHelperAndGlobalAndConstantBuffer EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); kernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); - EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface)); - EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.getGlobalSurface(rootDeviceIndex)->getBindlessInfo().heapAllocation)); + EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface->getGraphicsAllocation())); + EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.getGlobalSurfaceGA(rootDeviceIndex)->getBindlessInfo().heapAllocation)); - EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.buildInfos[pDevice->getRootDeviceIndex()].constantSurface)); - EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.getConstantSurface(rootDeviceIndex)->getBindlessInfo().heapAllocation)); + EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.buildInfos[pDevice->getRootDeviceIndex()].constantSurface->getGraphicsAllocation())); + EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.getConstantSurfaceGA(rootDeviceIndex)->getBindlessInfo().heapAllocation)); memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } @@ -2095,7 +2095,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNor MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); - program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = std::make_unique(new MockGraphicsAllocation()); std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, kernel->initialize()); @@ -2123,7 +2123,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNor MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); - program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = std::make_unique(new MockGraphicsAllocation()); std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, kernel->initialize()); @@ -2152,7 +2152,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithStackCallsAndDetectIndirectAccessIn MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); - program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = std::make_unique(new MockGraphicsAllocation()); std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, kernel->initialize()); @@ -2182,7 +2182,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithoutStackCallsAndDetectIndirectAcces MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); - program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = std::make_unique(new MockGraphicsAllocation()); std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, kernel->initialize()); @@ -2217,7 +2217,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithPtrByValueArgumentAndDetectIndirect MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); - program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = std::make_unique(new MockGraphicsAllocation()); std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, kernel->initialize()); @@ -2246,7 +2246,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNor MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); - program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = std::make_unique(new MockGraphicsAllocation()); std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, kernel->initialize()); @@ -2275,7 +2275,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadAndDetectIndirectAcc MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); - program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = std::make_unique(new MockGraphicsAllocation()); std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, kernel->initialize()); @@ -2304,7 +2304,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreAndDetectIndirectAc MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); - program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = std::make_unique(new MockGraphicsAllocation()); std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, kernel->initialize()); @@ -2333,7 +2333,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgAtomicAndDetectIndirectA MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); - program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = std::make_unique(new MockGraphicsAllocation()); std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, kernel->initialize()); @@ -2362,7 +2362,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgAtomicAndImplicitArgsHas MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); - program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = std::make_unique(new MockGraphicsAllocation()); std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, kernel->initialize()); diff --git a/opencl/test/unit_test/memory_manager/cl_memory_manager_tests.cpp b/opencl/test/unit_test/memory_manager/cl_memory_manager_tests.cpp index 860218d484..499f3e971b 100644 --- a/opencl/test/unit_test/memory_manager/cl_memory_manager_tests.cpp +++ b/opencl/test/unit_test/memory_manager/cl_memory_manager_tests.cpp @@ -370,15 +370,17 @@ TEST_F(ClMemoryManagerMultiRootDeviceTests, WhenAllocatingGlobalSurfaceThenItHas WhiteBox linkerInput; linkerInput.traits.exportsGlobalConstants = true; linkerInput.traits.exportsGlobalVariables = true; - GraphicsAllocation *allocation = allocateGlobalsSurface(context->svmAllocsManager, device1->getDevice(), initData.size(), 0u, false, &linkerInput, initData.data()); + std::unique_ptr surface = std::unique_ptr(allocateGlobalsSurface(context->svmAllocsManager, device1->getDevice(), initData.size(), 0u, false, &linkerInput, initData.data())); + ASSERT_NE(nullptr, surface); + auto allocation = surface->getGraphicsAllocation(); ASSERT_NE(nullptr, allocation); EXPECT_EQ(expectedRootDeviceIndex, allocation->getRootDeviceIndex()); if (device1->getMemoryManager()->isLimitedRange(expectedRootDeviceIndex)) { device1->getMemoryManager()->freeGraphicsMemory(allocation); } else { - context->getSVMAllocsManager()->freeSVMAlloc(reinterpret_cast(allocation->getGpuAddress())); + context->getSVMAllocsManager()->freeSVMAlloc(reinterpret_cast(surface->getGpuAddress())); } } diff --git a/opencl/test/unit_test/mocks/mock_program.h b/opencl/test/unit_test/mocks/mock_program.h index b27234c065..e35b06dbf1 100644 --- a/opencl/test/unit_test/mocks/mock_program.h +++ b/opencl/test/unit_test/mocks/mock_program.h @@ -93,19 +93,37 @@ class MockProgram : public Program { } void setConstantSurface(GraphicsAllocation *gfxAllocation) { if (gfxAllocation) { - buildInfos[gfxAllocation->getRootDeviceIndex()].constantSurface = gfxAllocation; + buildInfos[gfxAllocation->getRootDeviceIndex()].constantSurface = std::make_unique(gfxAllocation); } else { for (auto &buildInfo : buildInfos) { - buildInfo.constantSurface = nullptr; + buildInfo.constantSurface.reset(); + } + } + } + void setConstantSurface(std::unique_ptr constantSurface) { + if (constantSurface) { + buildInfos[constantSurface->getGraphicsAllocation()->getRootDeviceIndex()].constantSurface = std::move(constantSurface); + } else { + for (auto &buildInfo : buildInfos) { + buildInfo.constantSurface.reset(); } } } void setGlobalSurface(GraphicsAllocation *gfxAllocation) { if (gfxAllocation) { - buildInfos[gfxAllocation->getRootDeviceIndex()].globalSurface = gfxAllocation; + buildInfos[gfxAllocation->getRootDeviceIndex()].globalSurface = std::make_unique(gfxAllocation); } else { for (auto &buildInfo : buildInfos) { - buildInfo.globalSurface = nullptr; + buildInfo.globalSurface.reset(); + } + } + } + void setGlobalSurface(std::unique_ptr globalSurface) { + if (globalSurface) { + buildInfos[globalSurface->getGraphicsAllocation()->getRootDeviceIndex()].globalSurface = std::move(globalSurface); + } else { + for (auto &buildInfo : buildInfos) { + buildInfo.globalSurface.reset(); } } } diff --git a/opencl/test/unit_test/program/program_data_tests.cpp b/opencl/test/unit_test/program/program_data_tests.cpp index 9ab0b5c940..8529f973ec 100644 --- a/opencl/test/unit_test/program/program_data_tests.cpp +++ b/opencl/test/unit_test/program/program_data_tests.cpp @@ -187,8 +187,10 @@ TEST_F(ProgramDataTest, WhenAllocatingConstantMemorySurfaceThenUnderlyingBufferI buildAndDecodeProgramPatchList(); - EXPECT_NE(nullptr, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())); - EXPECT_EQ(0, memcmp(constValue, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())->getUnderlyingBuffer(), constSize)); + auto surface = pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex()); + ASSERT_NE(nullptr, surface); + EXPECT_NE(nullptr, surface->getGraphicsAllocation()); + EXPECT_EQ(0, memcmp(constValue, surface->getUnderlyingBuffer(), constSize)); } TEST_F(ProgramDataTest, givenProgramWhenAllocatingConstantMemorySurfaceThenProperDeviceBitfieldIsPassed) { @@ -219,8 +221,10 @@ TEST_F(ProgramDataTest, whenGlobalConstantsAreExportedThenAllocateSurfacesAsSvm) programInfo.linkerInput = std::move(mockLinkerInput); this->pProgram->processProgramInfo(programInfo, *pClDevice); - ASSERT_NE(nullptr, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())); - EXPECT_NE(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddress()))); + auto surface = pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex()); + ASSERT_NE(nullptr, surface); + ASSERT_NE(nullptr, surface->getGraphicsAllocation()); + EXPECT_NE(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(surface->getGpuAddress()))); } TEST_F(ProgramDataTest, whenGlobalConstantsAreNotExportedThenAllocateSurfacesAsNonSvm) { @@ -237,9 +241,10 @@ TEST_F(ProgramDataTest, whenGlobalConstantsAreNotExportedThenAllocateSurfacesAsN programInfo.linkerInput = std::move(mockLinkerInput); this->pProgram->processProgramInfo(programInfo, *pClDevice); - ASSERT_NE(nullptr, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())); - EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast( - pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddress()))); + auto surface = pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex()); + ASSERT_NE(nullptr, surface); + ASSERT_NE(nullptr, surface->getGraphicsAllocation()); + EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(surface->getGpuAddress()))); } TEST_F(ProgramDataTest, whenGlobalConstantsAreExportedButContextUnavailableThenAllocateSurfacesAsNonSvm) { @@ -261,9 +266,10 @@ TEST_F(ProgramDataTest, whenGlobalConstantsAreExportedButContextUnavailableThenA pProgram->context = pContext; - ASSERT_NE(nullptr, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())); - EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast( - pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddress()))); + auto surface = pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex()); + ASSERT_NE(nullptr, surface); + ASSERT_NE(nullptr, surface->getGraphicsAllocation()); + EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(surface->getGpuAddress()))); } TEST_F(ProgramDataTest, whenGlobalVariablesAreExportedThenAllocateSurfacesAsSvm) { @@ -279,8 +285,10 @@ TEST_F(ProgramDataTest, whenGlobalVariablesAreExportedThenAllocateSurfacesAsSvm) programInfo.linkerInput = std::move(mockLinkerInput); this->pProgram->processProgramInfo(programInfo, *pClDevice); - ASSERT_NE(nullptr, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())); - EXPECT_NE(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddress()))); + auto surface = pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex()); + ASSERT_NE(nullptr, surface); + ASSERT_NE(nullptr, surface->getGraphicsAllocation()); + EXPECT_NE(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(surface->getGpuAddress()))); } TEST_F(ProgramDataTest, whenGlobalVariablesAreExportedButContextUnavailableThenAllocateSurfacesAsNonSvm) { @@ -302,8 +310,10 @@ TEST_F(ProgramDataTest, whenGlobalVariablesAreExportedButContextUnavailableThenA pProgram->context = pContext; - ASSERT_NE(nullptr, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())); - EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddress()))); + auto surface = pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex()); + ASSERT_NE(nullptr, surface); + ASSERT_NE(nullptr, surface->getGraphicsAllocation()); + EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(surface->getGpuAddress()))); } TEST_F(ProgramDataTest, whenGlobalVariablesAreNotExportedThenAllocateSurfacesAsNonSvm) { @@ -320,8 +330,10 @@ TEST_F(ProgramDataTest, whenGlobalVariablesAreNotExportedThenAllocateSurfacesAsN programInfo.linkerInput = std::move(mockLinkerInput); this->pProgram->processProgramInfo(programInfo, *pClDevice); - ASSERT_NE(nullptr, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())); - EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddress()))); + auto surface = pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex()); + ASSERT_NE(nullptr, surface); + ASSERT_NE(nullptr, surface->getGraphicsAllocation()); + EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(surface->getGpuAddress()))); } using ProgramDataBindlessTest = ProgramDataTest; @@ -356,15 +368,20 @@ TEST_F(ProgramDataBindlessTest, givenBindlessKernelAndConstantsAndVariablesMemor programInfo.linkerInput = std::move(mockLinkerInput); this->pProgram->processProgramInfo(programInfo, *pClDevice); - ASSERT_NE(nullptr, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())); - ASSERT_NE(nullptr, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())); + auto constantSurface = pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex()); + ASSERT_NE(nullptr, constantSurface); + ASSERT_NE(nullptr, constantSurface->getGraphicsAllocation()); - auto globalConstantsAlloc = pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex()); + auto globalSurface = pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex()); + ASSERT_NE(nullptr, globalSurface); + ASSERT_NE(nullptr, globalSurface->getGraphicsAllocation()); + + auto globalConstantsAlloc = pProgram->getConstantSurfaceGA(pContext->getDevice(0)->getRootDeviceIndex()); auto &ssInHeap1 = globalConstantsAlloc->getBindlessInfo(); EXPECT_NE(nullptr, ssInHeap1.heapAllocation); - auto globalVariablesAlloc = pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex()); + auto globalVariablesAlloc = pProgram->getGlobalSurfaceGA(pContext->getDevice(0)->getRootDeviceIndex()); auto &ssInHeap2 = globalVariablesAlloc->getBindlessInfo(); EXPECT_NE(nullptr, ssInHeap2.heapAllocation); @@ -395,7 +412,7 @@ TEST_F(ProgramDataBindlessTest, givenBindlessKernelAndGlobalConstantsMemorySurfa auto ret = this->pProgram->processProgramInfo(programInfo, *pClDevice); EXPECT_EQ(ret, CL_OUT_OF_HOST_MEMORY); - auto globalConstantsAlloc = pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex()); + auto globalConstantsAlloc = pProgram->getConstantSurfaceGA(pContext->getDevice(0)->getRootDeviceIndex()); ASSERT_NE(nullptr, globalConstantsAlloc); auto &ssInHeap = globalConstantsAlloc->getBindlessInfo(); @@ -427,7 +444,7 @@ TEST_F(ProgramDataBindlessTest, givenBindlessKernelAndGlobalVariablesMemorySurfa auto ret = this->pProgram->processProgramInfo(programInfo, *pClDevice); EXPECT_EQ(ret, CL_OUT_OF_HOST_MEMORY); - auto globalVariablesAlloc = pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex()); + auto globalVariablesAlloc = pProgram->getGlobalSurfaceGA(pContext->getDevice(0)->getRootDeviceIndex()); ASSERT_NE(nullptr, globalVariablesAlloc); auto &ssInHeap = globalVariablesAlloc->getBindlessInfo(); @@ -442,7 +459,7 @@ TEST_F(ProgramDataTest, givenConstantAllocationThatIsInUseByGpuWhenProgramIsBein auto &csr = *pPlatform->getClDevice(0)->getDefaultEngine().commandStreamReceiver; auto tagAddress = csr.getTagAddress(); - auto constantSurface = pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex()); + auto constantSurface = pProgram->getConstantSurfaceGA(pContext->getDevice(0)->getRootDeviceIndex()); constantSurface->updateTaskCount(*tagAddress + 1, csr.getOsContext().getContextId()); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); @@ -461,7 +478,7 @@ TEST_F(ProgramDataTest, givenGlobalAllocationThatIsInUseByGpuWhenProgramIsBeingD auto &csr = *pPlatform->getClDevice(0)->getDefaultEngine().commandStreamReceiver; auto tagAddress = csr.getTagAddress(); - auto globalSurface = pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex()); + auto globalSurface = pProgram->getGlobalSurfaceGA(pContext->getDevice(0)->getRootDeviceIndex()); globalSurface->updateTaskCount(*tagAddress + 1, csr.getOsContext().getContextId()); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); @@ -479,20 +496,25 @@ TEST_F(ProgramDataTest, GivenDeviceForcing32BitMessagesWhenConstAllocationIsPres buildAndDecodeProgramPatchList(); - EXPECT_NE(nullptr, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())); - EXPECT_EQ(0, memcmp(constValue, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())->getUnderlyingBuffer(), constSize)); + auto constantSurface = pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex()); + EXPECT_NE(nullptr, constantSurface); + EXPECT_NE(nullptr, constantSurface->getGraphicsAllocation()); + EXPECT_EQ(0, memcmp(constValue, constantSurface->getUnderlyingBuffer(), constSize)); if constexpr (is64bit) { - EXPECT_TRUE(pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())->is32BitAllocation()); + EXPECT_TRUE(constantSurface->getGraphicsAllocation()->is32BitAllocation()); } } TEST_F(ProgramDataTest, WhenAllocatingGlobalMemorySurfaceThenUnderlyingBufferIsSetCorrectly) { auto globalSize = setupGlobalAllocation(); buildAndDecodeProgramPatchList(); - EXPECT_NE(nullptr, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())); - EXPECT_EQ(0, memcmp(globalValue, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())->getUnderlyingBuffer(), globalSize)); + auto surface = pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex()); + EXPECT_NE(nullptr, surface); + EXPECT_NE(nullptr, surface->getGraphicsAllocation()); + EXPECT_EQ(0, memcmp(globalValue, surface->getUnderlyingBuffer(), globalSize)); } + TEST_F(ProgramDataTest, givenProgramWhenAllocatingGlobalMemorySurfaceThenProperDeviceBitfieldIsPassed) { auto executionEnvironment = pClDevice->getExecutionEnvironment(); auto memoryManager = new MockMemoryManager(*executionEnvironment); @@ -504,6 +526,7 @@ TEST_F(ProgramDataTest, givenProgramWhenAllocatingGlobalMemorySurfaceThenProperD setupGlobalAllocation(); buildAndDecodeProgramPatchList(); EXPECT_NE(nullptr, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())); + EXPECT_NE(nullptr, pProgram->getGlobalSurfaceGA(pContext->getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(pClDevice->getDeviceBitfield(), memoryManager->recentlyPassedDeviceBitfield); std::swap(memoryManagerBackup, executionEnvironment->memoryManager); } @@ -535,10 +558,12 @@ TEST_F(ProgramDataTest, Given32BitDeviceWhenGlobalMemorySurfaceIsPresentThenItHa buildAndDecodeProgramPatchList(); - EXPECT_NE(nullptr, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())); - EXPECT_EQ(0, memcmp(globalValue, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())->getUnderlyingBuffer(), globalSize)); + auto surface = pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex()); + EXPECT_NE(nullptr, surface); + EXPECT_NE(nullptr, surface->getGraphicsAllocation()); + EXPECT_EQ(0, memcmp(globalValue, surface->getUnderlyingBuffer(), globalSize)); if constexpr (is64bit) { - EXPECT_TRUE(pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())->is32BitAllocation()); + EXPECT_TRUE(surface->getGraphicsAllocation()->is32BitAllocation()); } delete[] pAllocateGlobalMemorySurface; @@ -559,19 +584,25 @@ TEST(ProgramScopeMetadataTest, WhenPatchingGlobalSurfaceThenPickProperSourceBuff NEO::populateProgramInfo(programInfo, decodedProgram); program.processProgramInfo(programInfo, device); auto &buildInfo = program.buildInfos[device.getRootDeviceIndex()]; - ASSERT_NE(nullptr, buildInfo.globalSurface); - ASSERT_NE(nullptr, buildInfo.constantSurface); - ASSERT_NE(nullptr, buildInfo.globalSurface->getUnderlyingBuffer()); - ASSERT_NE(nullptr, buildInfo.constantSurface->getUnderlyingBuffer()); - EXPECT_EQ(static_cast(buildInfo.globalSurface->getGpuAddressToPatch()), *reinterpret_cast(buildInfo.constantSurface->getUnderlyingBuffer())); - EXPECT_EQ(static_cast(buildInfo.constantSurface->getGpuAddressToPatch()), *reinterpret_cast(buildInfo.globalSurface->getUnderlyingBuffer())); + + auto globalSurface = buildInfo.globalSurface.get(); + auto constantSurface = buildInfo.constantSurface.get(); + + ASSERT_NE(nullptr, globalSurface); + ASSERT_NE(nullptr, globalSurface->getGraphicsAllocation()); + ASSERT_NE(nullptr, constantSurface); + ASSERT_NE(nullptr, constantSurface->getGraphicsAllocation()); + ASSERT_NE(nullptr, globalSurface->getGraphicsAllocation()->getUnderlyingBuffer()); + ASSERT_NE(nullptr, constantSurface->getGraphicsAllocation()->getUnderlyingBuffer()); + EXPECT_EQ(static_cast(globalSurface->getGraphicsAllocation()->getGpuAddressToPatch()), *reinterpret_cast(constantSurface->getGraphicsAllocation()->getUnderlyingBuffer())); + EXPECT_EQ(static_cast(constantSurface->getGraphicsAllocation()->getGpuAddressToPatch()), *reinterpret_cast(globalSurface->getGraphicsAllocation()->getUnderlyingBuffer())); } TEST_F(ProgramDataTest, GivenProgramWith32bitPointerOptWhenProgramScopeConstantBufferPatchTokensAreReadThenConstantPointerOffsetIsPatchedWith32bitPointer) { MockProgram *prog = pProgram; // simulate case when constant surface was not allocated - EXPECT_EQ(nullptr, prog->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())); + EXPECT_EQ(nullptr, prog->getConstantSurfaceGA(pContext->getDevice(0)->getRootDeviceIndex())); ProgramInfo programInfo; programInfo.prepareLinkerInputStorage(); @@ -738,11 +769,15 @@ HWTEST2_F(ProgramDataTest, whenLinkerInputValidThenIsaIsProperlyPatched, MatchAn globalConstantsBuffer.resize(32, 7); std::vector globalVariablesInitData{32, 0}; std::vector globalConstantsInitData{32, 0}; - auto globalSurface = std::make_unique(globalVariablesBuffer.data(), globalVariablesBuffer.size()); - auto constantSurface = std::make_unique(globalConstantsBuffer.data(), globalConstantsBuffer.size()); - buildInfo.globalSurface = globalSurface.get(); - buildInfo.constantSurface = constantSurface.get(); + auto globalSurfaceMockGA = new MockGraphicsAllocation(globalVariablesBuffer.data(), globalVariablesBuffer.size()); + auto constantSurfaceMockGA = new MockGraphicsAllocation(globalConstantsBuffer.data(), globalConstantsBuffer.size()); + + auto globalSurface = std::make_unique(globalSurfaceMockGA); + auto constantSurface = std::make_unique(constantSurfaceMockGA); + + buildInfo.globalSurface = std::move(globalSurface); + buildInfo.constantSurface = std::move(constantSurface); auto ret = program.linkBinary(&pClDevice->getDevice(), globalConstantsInitData.data(), globalConstantsInitData.size(), globalVariablesInitData.data(), globalVariablesInitData.size(), {}, program.externalFunctions); EXPECT_EQ(CL_SUCCESS, ret); @@ -750,15 +785,19 @@ HWTEST2_F(ProgramDataTest, whenLinkerInputValidThenIsaIsProperlyPatched, MatchAn linkerInput.reset(static_cast *>(buildInfo.linkerInput.release())); for (size_t i = 0; i < linkerInput->textRelocations.size(); ++i) { - auto expectedPatch = buildInfo.globalSurface->getGpuAddress() + linkerInput->symbols[linkerInput->textRelocations[0][0].symbolName].offset; + auto expectedPatch = buildInfo.globalSurface->getGraphicsAllocation()->getGpuAddress() + linkerInput->symbols[linkerInput->textRelocations[0][0].symbolName].offset; auto relocationAddress = kernelHeap.data() + linkerInput->textRelocations[0][0].offset; EXPECT_EQ(static_cast(expectedPatch), *reinterpret_cast(relocationAddress)) << i; } program.getKernelInfoArray(rootDeviceIndex).clear(); - buildInfo.globalSurface = nullptr; - buildInfo.constantSurface = nullptr; + delete buildInfo.globalSurface->getGraphicsAllocation(); + buildInfo.globalSurface.reset(); + + delete buildInfo.constantSurface->getGraphicsAllocation(); + buildInfo.constantSurface.reset(); + device->device.getRootDeviceEnvironmentRef().compilerProductHelper.swap(backup); } } @@ -789,18 +828,18 @@ TEST_F(ProgramDataTest, whenRelocationsAreNotNeededThenIsaIsPreserved) { globalConstantsBuffer.resize(32, 7); std::vector globalVariablesInitData{32, 0}; std::vector globalConstantsInitData{32, 0}; - buildInfo.globalSurface = new MockGraphicsAllocation(globalVariablesBuffer.data(), globalVariablesBuffer.size()); - buildInfo.constantSurface = new MockGraphicsAllocation(globalConstantsBuffer.data(), globalConstantsBuffer.size()); + buildInfo.globalSurface = std::make_unique(new MockGraphicsAllocation(globalVariablesBuffer.data(), globalVariablesBuffer.size())); + buildInfo.constantSurface = std::make_unique(new MockGraphicsAllocation(globalConstantsBuffer.data(), globalConstantsBuffer.size())); auto ret = program.linkBinary(&pClDevice->getDevice(), globalConstantsInitData.data(), globalConstantsInitData.size(), globalVariablesInitData.data(), globalVariablesInitData.size(), {}, program.externalFunctions); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(kernelHeapData, kernelHeap); program.getKernelInfoArray(rootDeviceIndex).clear(); - delete buildInfo.globalSurface; - buildInfo.globalSurface = nullptr; - delete buildInfo.constantSurface; - buildInfo.constantSurface = nullptr; + delete buildInfo.globalSurface->getGraphicsAllocation(); + buildInfo.globalSurface.reset(); + delete buildInfo.constantSurface->getGraphicsAllocation(); + buildInfo.constantSurface.reset(); } TEST(ProgramStringSectionTest, WhenConstStringBufferIsPresentThenUseItForLinking) { diff --git a/opencl/test/unit_test/program/program_tests.cpp b/opencl/test/unit_test/program/program_tests.cpp index 7b52080634..b26bfa83ab 100644 --- a/opencl/test/unit_test/program/program_tests.cpp +++ b/opencl/test/unit_test/program/program_tests.cpp @@ -525,15 +525,15 @@ TEST_F(ProgramFromBinaryTest, whenProgramIsBeingRebuildThenOutdatedGlobalBuffers EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface); EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface); - pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface = new MockGraphicsAllocation(); + pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface = std::make_unique(new MockGraphicsAllocation()); pProgram->processGenBinary(*pClDevice); - EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface); - EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface); + EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface.get()); + EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface.get()); - pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface = std::make_unique(new MockGraphicsAllocation()); pProgram->processGenBinary(*pClDevice); - EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface); - EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface); + EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface.get()); + EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface.get()); } TEST_F(ProgramFromBinaryTest, givenProgramWhenCleanKernelInfoIsCalledThenKernelAllocationIsFreed) { @@ -583,16 +583,16 @@ TEST_F(ProgramFromBinaryTest, givenReuseKernelBinariesWhenCleanCurrentKernelInfo TEST_F(ProgramFromBinaryTest, givenProgramWithGlobalAndConstAllocationsWhenGettingModuleAllocationsThenAllAreReturned) { pProgram->build(pProgram->getDevices(), nullptr); pProgram->processGenBinary(*pClDevice); - pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface = new MockGraphicsAllocation(); - pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface = std::make_unique(new MockGraphicsAllocation()); + pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface = std::make_unique(new MockGraphicsAllocation()); auto allocs = pProgram->getModuleAllocations(pClDevice->getRootDeviceIndex()); EXPECT_EQ(pProgram->getNumKernels() + 2u, allocs.size()); - auto iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface); + auto iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface->getGraphicsAllocation()); EXPECT_NE(allocs.end(), iter); - iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface); + iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface->getGraphicsAllocation()); EXPECT_NE(allocs.end(), iter); iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].kernelInfoArray[0]->getGraphicsAllocation()); @@ -2080,22 +2080,24 @@ TEST_F(ProgramTests, givenExistingConstantSurfacesWhenProcessGenBinaryThenCleanu auto program = std::make_unique(nullptr, false, toClDeviceVector(*pClDevice)); program->buildInfos.resize(2); - program->buildInfos[0].constantSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::cacheLineSize, - AllocationType::constantSurface, pDevice->getDeviceBitfield()}); - program->buildInfos[1].constantSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::cacheLineSize, - AllocationType::constantSurface, pDevice->getDeviceBitfield()}); + program->buildInfos[0].constantSurface = std::make_unique(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::cacheLineSize, + AllocationType::constantSurface, pDevice->getDeviceBitfield()})); + program->buildInfos[1].constantSurface = std::make_unique(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::cacheLineSize, + AllocationType::constantSurface, pDevice->getDeviceBitfield()})); program->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(patchtokensProgram.storage.data(), patchtokensProgram.storage.size()); program->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = patchtokensProgram.storage.size(); - auto constantSurface0 = program->buildInfos[0].constantSurface; + auto constantSurface0 = program->buildInfos[0].constantSurface.get(); EXPECT_NE(nullptr, constantSurface0); - auto constantSurface1 = program->buildInfos[1].constantSurface; + EXPECT_NE(nullptr, constantSurface0->getGraphicsAllocation()); + auto constantSurface1 = program->buildInfos[1].constantSurface.get(); EXPECT_NE(nullptr, constantSurface1); + EXPECT_NE(nullptr, constantSurface1->getGraphicsAllocation()); auto retVal = program->processGenBinary(*pClDevice); EXPECT_EQ(nullptr, program->buildInfos[0].constantSurface); - EXPECT_EQ(constantSurface1, program->buildInfos[1].constantSurface); + EXPECT_EQ(constantSurface1->getGraphicsAllocation(), program->buildInfos[1].constantSurface->getGraphicsAllocation()); EXPECT_EQ(CL_SUCCESS, retVal); } @@ -2106,22 +2108,24 @@ TEST_F(ProgramTests, givenExistingGlobalSurfacesWhenProcessGenBinaryThenCleanupT auto program = std::make_unique(nullptr, false, toClDeviceVector(*pClDevice)); program->buildInfos.resize(2); - program->buildInfos[0].globalSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::cacheLineSize, - AllocationType::globalSurface, pDevice->getDeviceBitfield()}); - program->buildInfos[1].globalSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::cacheLineSize, - AllocationType::globalSurface, pDevice->getDeviceBitfield()}); + program->buildInfos[0].globalSurface = std::make_unique(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::cacheLineSize, + AllocationType::globalSurface, pDevice->getDeviceBitfield()})); + program->buildInfos[1].globalSurface = std::make_unique(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::cacheLineSize, + AllocationType::globalSurface, pDevice->getDeviceBitfield()})); program->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(patchtokensProgram.storage.data(), patchtokensProgram.storage.size()); program->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = patchtokensProgram.storage.size(); - auto globalSurface0 = program->buildInfos[0].globalSurface; + auto globalSurface0 = program->buildInfos[0].globalSurface.get(); EXPECT_NE(nullptr, globalSurface0); - auto globalSurface1 = program->buildInfos[1].globalSurface; + EXPECT_NE(nullptr, globalSurface0->getGraphicsAllocation()); + auto globalSurface1 = program->buildInfos[1].globalSurface.get(); EXPECT_NE(nullptr, globalSurface1); + EXPECT_NE(nullptr, globalSurface1->getGraphicsAllocation()); auto retVal = program->processGenBinary(*pClDevice); EXPECT_EQ(nullptr, program->buildInfos[0].globalSurface); - EXPECT_EQ(globalSurface1, program->buildInfos[1].globalSurface); + EXPECT_EQ(globalSurface1->getGraphicsAllocation(), program->buildInfos[1].globalSurface->getGraphicsAllocation()); EXPECT_EQ(CL_SUCCESS, retVal); } diff --git a/opencl/test/unit_test/program/program_with_zebin.cpp b/opencl/test/unit_test/program/program_with_zebin.cpp index d98ffbe4e1..e2d8ec6e9b 100644 --- a/opencl/test/unit_test/program/program_with_zebin.cpp +++ b/opencl/test/unit_test/program/program_with_zebin.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2023 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -38,6 +38,11 @@ void ProgramWithZebinFixture::addEmptyZebin(NEO::MockProgram *program) { } void ProgramWithZebinFixture::populateProgramWithSegments(NEO::MockProgram *program) { + constexpr bool createWithSharedGlobalConstSurfaces = false; + populateProgramWithSegments(program, createWithSharedGlobalConstSurfaces); +} + +void ProgramWithZebinFixture::populateProgramWithSegments(NEO::MockProgram *program, bool createWithSharedGlobalConstSurfaces) { kernelInfo = std::make_unique(); kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName; mockAlloc = std::make_unique(); @@ -47,11 +52,27 @@ void ProgramWithZebinFixture::populateProgramWithSegments(NEO::MockProgram *prog globalSurface = std::make_unique(); constantSurface = std::make_unique(); - program->setGlobalSurface(&globalSurface->mockGfxAllocation); - program->setConstantSurface(&constantSurface->mockGfxAllocation); + + if (createWithSharedGlobalConstSurfaces) { + const size_t globalSurfaceChunkOffset = 128u; + const size_t globalSurfaceChunkSize = globalSurface->mockGfxAllocation.getUnderlyingBufferSize() / 2; + const size_t constantSurfaceChunkOffset = 512u; + const size_t constantSurfaceChunkSize = constantSurface->mockGfxAllocation.getUnderlyingBufferSize() / 2; + + std::unique_ptr globalSurfacePoolAlloc = std::make_unique(&globalSurface->mockGfxAllocation, globalSurfaceChunkOffset, globalSurfaceChunkSize, nullptr); + std::unique_ptr constantSurfacePoolAlloc = std::make_unique(&constantSurface->mockGfxAllocation, constantSurfaceChunkOffset, constantSurfaceChunkSize, nullptr); + + program->setGlobalSurface(std::move(globalSurfacePoolAlloc)); + program->setConstantSurface(std::move(constantSurfacePoolAlloc)); + + } else { + program->setGlobalSurface(&globalSurface->mockGfxAllocation); + program->setConstantSurface(&constantSurface->mockGfxAllocation); + } program->buildInfos[rootDeviceIndex].constStringSectionData.initData = &strings; program->buildInfos[rootDeviceIndex].constStringSectionData.size = sizeof(strings); } + ProgramWithZebinFixture::ProgramWithZebinFixture() = default; ProgramWithZebinFixture::~ProgramWithZebinFixture() = default; diff --git a/opencl/test/unit_test/program/program_with_zebin.h b/opencl/test/unit_test/program/program_with_zebin.h index eca7a31bb4..9b3d54c1c0 100644 --- a/opencl/test/unit_test/program/program_with_zebin.h +++ b/opencl/test/unit_test/program/program_with_zebin.h @@ -31,6 +31,7 @@ class ProgramWithZebinFixture : public ProgramTests { void TearDown() override; void addEmptyZebin(MockProgram *program); void populateProgramWithSegments(MockProgram *program); + void populateProgramWithSegments(MockProgram *program, bool createWithSharedGlobalConstSurfaces); ~ProgramWithZebinFixture() override; ProgramWithZebinFixture(); }; diff --git a/opencl/test/unit_test/program/program_with_zebin_tests.cpp b/opencl/test/unit_test/program/program_with_zebin_tests.cpp index 8ece2a14b6..329365f3d2 100644 --- a/opencl/test/unit_test/program/program_with_zebin_tests.cpp +++ b/opencl/test/unit_test/program/program_with_zebin_tests.cpp @@ -40,8 +40,8 @@ TEST_F(ProgramWithZebinFixture, givenZebinSegmentsThenSegmentsArePopulated) { EXPECT_EQ(static_cast(alloc->getGpuAddress()), segment.address); EXPECT_EQ(static_cast(alloc->getUnderlyingBufferSize()), segment.size); }; - checkGPUSeg(program->buildInfos[rootDeviceIndex].constantSurface, segments.constData); - checkGPUSeg(program->buildInfos[rootDeviceIndex].globalSurface, segments.varData); + checkGPUSeg(program->buildInfos[rootDeviceIndex].constantSurface->getGraphicsAllocation(), segments.constData); + checkGPUSeg(program->buildInfos[rootDeviceIndex].globalSurface->getGraphicsAllocation(), segments.varData); checkGPUSeg(program->getKernelInfoArray(rootDeviceIndex)[0]->getGraphicsAllocation(), segments.nameToSegMap[ZebinTestData::ValidEmptyProgram<>::kernelName]); EXPECT_EQ(reinterpret_cast(program->buildInfos[rootDeviceIndex].constStringSectionData.initData), segments.stringData.address); @@ -49,6 +49,22 @@ TEST_F(ProgramWithZebinFixture, givenZebinSegmentsThenSegmentsArePopulated) { EXPECT_EQ(program->buildInfos[rootDeviceIndex].constStringSectionData.size, sizeof(strings)); } +TEST_F(ProgramWithZebinFixture, givenZebinSegmentsWithSharedGlobalAndConstSurfacesThenSegmentsArePopulated) { + const bool createWithSharedGlobalConstSurfaces = true; + populateProgramWithSegments(program.get(), createWithSharedGlobalConstSurfaces); + auto segments = program->getZebinSegments(rootDeviceIndex); + + auto checkGPUSeg = [](NEO::SharedPoolAllocation *surface, NEO::Zebin::Debug::Segments::Segment segment) { + EXPECT_EQ(static_cast(surface->getGpuAddress()), segment.address); + EXPECT_EQ(static_cast(surface->getSize()), segment.size); + + EXPECT_NE(surface->getGraphicsAllocation()->getGpuAddress(), surface->getGpuAddress()); + EXPECT_NE(surface->getGraphicsAllocation()->getUnderlyingBufferSize(), surface->getSize()); + }; + checkGPUSeg(program->buildInfos[rootDeviceIndex].constantSurface.get(), segments.constData); + checkGPUSeg(program->buildInfos[rootDeviceIndex].globalSurface.get(), segments.varData); +} + TEST_F(ProgramWithZebinFixture, givenNonEmptyDebugDataThenDebugZebinIsNotCreated) { addEmptyZebin(program.get()); populateProgramWithSegments(program.get()); diff --git a/shared/source/compiler_interface/linker.cpp b/shared/source/compiler_interface/linker.cpp index b82a72f208..e07305dad9 100644 --- a/shared/source/compiler_interface/linker.cpp +++ b/shared/source/compiler_interface/linker.cpp @@ -316,7 +316,7 @@ void LinkerInput::parseRelocationForExtFuncUsage(const RelocationInfo &relocInfo } LinkingStatus Linker::link(const SegmentInfo &globalVariablesSegInfo, const SegmentInfo &globalConstantsSegInfo, const SegmentInfo &exportedFunctionsSegInfo, - const SegmentInfo &globalStringsSegInfo, GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg, + const SegmentInfo &globalStringsSegInfo, SharedPoolAllocation *globalVariablesSeg, SharedPoolAllocation *globalConstantsSeg, const PatchableSegments &instructionsSegments, UnresolvedExternals &outUnresolvedExternals, Device *pDevice, const void *constantsInitData, size_t constantsInitDataSize, const void *variablesInitData, size_t variablesInitDataSize, const KernelDescriptorsT &kernelDescriptors, ExternalFunctionsT &externalFunctions) { @@ -471,7 +471,7 @@ void Linker::patchInstructionsSegments(const std::vector &inst } void Linker::patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const SegmentInfo &globalConstantsSegInfo, - GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg, + SharedPoolAllocation *globalVariablesSeg, SharedPoolAllocation *globalConstantsSeg, std::vector &outUnresolvedExternals, Device *pDevice, const void *constantsInitData, size_t constantsInitDataSize, const void *variablesInitData, size_t variablesInitDataSize) { std::vector constantsData(globalConstantsSegInfo.segmentSize, 0u); @@ -535,12 +535,12 @@ void Linker::patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); auto &productHelper = pDevice->getProductHelper(); if (globalConstantsSeg) { - bool useBlitter = productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *globalConstantsSeg); - MemoryTransferHelper::transferMemoryToAllocation(useBlitter, *pDevice, globalConstantsSeg, 0, constantsData.data(), constantsData.size()); + bool useBlitter = productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *globalConstantsSeg->getGraphicsAllocation()); + MemoryTransferHelper::transferMemoryToAllocation(useBlitter, *pDevice, globalConstantsSeg->getGraphicsAllocation(), globalConstantsSeg->getOffset(), constantsData.data(), constantsData.size()); } if (globalVariablesSeg) { - bool useBlitter = productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *globalVariablesSeg); - MemoryTransferHelper::transferMemoryToAllocation(useBlitter, *pDevice, globalVariablesSeg, 0, variablesData.data(), variablesData.size()); + bool useBlitter = productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *globalVariablesSeg->getGraphicsAllocation()); + MemoryTransferHelper::transferMemoryToAllocation(useBlitter, *pDevice, globalVariablesSeg->getGraphicsAllocation(), globalVariablesSeg->getOffset(), variablesData.data(), variablesData.size()); } } } diff --git a/shared/source/compiler_interface/linker.h b/shared/source/compiler_interface/linker.h index e6ab21d879..14c289c5c4 100644 --- a/shared/source/compiler_interface/linker.h +++ b/shared/source/compiler_interface/linker.h @@ -26,6 +26,7 @@ class Device; class GraphicsAllocation; struct KernelDescriptor; struct ProgramInfo; +class SharedPoolAllocation; enum class SegmentType : uint32_t { unknown, @@ -235,7 +236,7 @@ struct Linker { } LinkingStatus link(const SegmentInfo &globalVariablesSegInfo, const SegmentInfo &globalConstantsSegInfo, const SegmentInfo &exportedFunctionsSegInfo, - const SegmentInfo &globalStringsSegInfo, GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg, + const SegmentInfo &globalStringsSegInfo, SharedPoolAllocation *globalVariablesSeg, SharedPoolAllocation *globalConstantsSeg, const PatchableSegments &instructionsSegments, UnresolvedExternals &outUnresolvedExternals, Device *pDevice, const void *constantsInitData, size_t constantsInitDataSize, const void *variablesInitData, size_t variablesInitDataSize, const KernelDescriptorsT &kernelDescriptors, ExternalFunctionsT &externalFunctions); @@ -260,7 +261,7 @@ struct Linker { void patchInstructionsSegments(const std::vector &instructionsSegments, std::vector &outUnresolvedExternals, const KernelDescriptorsT &kernelDescriptors); void patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const SegmentInfo &globalConstantsSegInfo, - GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg, + SharedPoolAllocation *globalVariablesSeg, SharedPoolAllocation *globalConstantsSeg, std::vector &outUnresolvedExternals, Device *pDevice, const void *constantsInitData, size_t constantsInitDataSize, const void *variablesInitData, size_t variablesInitDataSize); diff --git a/shared/source/device_binary_format/zebin/debug_zebin.cpp b/shared/source/device_binary_format/zebin/debug_zebin.cpp index db0d79725d..aa7cc9187d 100644 --- a/shared/source/device_binary_format/zebin/debug_zebin.cpp +++ b/shared/source/device_binary_format/zebin/debug_zebin.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -12,18 +12,19 @@ #include "shared/source/device_binary_format/zebin/zebin_elf.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/memory_manager/graphics_allocation.h" +#include "shared/source/utilities/shared_pool_allocation.h" namespace NEO::Zebin::Debug { using namespace NEO::Zebin::Elf; Segments::Segments() {} -Segments::Segments(const GraphicsAllocation *globalVarAlloc, const GraphicsAllocation *globalConstAlloc, ArrayRef &globalStrings, std::vector &kernels) { +Segments::Segments(const SharedPoolAllocation *globalVarAlloc, const SharedPoolAllocation *globalConstAlloc, ArrayRef &globalStrings, std::vector &kernels) { if (globalVarAlloc) { - varData = {static_cast(globalVarAlloc->getGpuAddress()), globalVarAlloc->getUnderlyingBufferSize()}; + varData = {static_cast(globalVarAlloc->getGpuAddress()), globalVarAlloc->getSize()}; } if (globalConstAlloc) { - constData = {static_cast(globalConstAlloc->getGpuAddress()), globalConstAlloc->getUnderlyingBufferSize()}; + constData = {static_cast(globalConstAlloc->getGpuAddress()), globalConstAlloc->getSize()}; } if (false == globalStrings.empty()) { stringData = {reinterpret_cast(globalStrings.begin()), globalStrings.size()}; diff --git a/shared/source/device_binary_format/zebin/debug_zebin.h b/shared/source/device_binary_format/zebin/debug_zebin.h index e93b26f282..37536a8416 100644 --- a/shared/source/device_binary_format/zebin/debug_zebin.h +++ b/shared/source/device_binary_format/zebin/debug_zebin.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -18,6 +18,8 @@ namespace NEO { class GraphicsAllocation; +class SharedPoolAllocation; + namespace Zebin::Debug { struct Segments { struct Segment { @@ -34,7 +36,7 @@ struct Segments { CPUSegment stringData; KernelNameToSegmentMap nameToSegMap; Segments(); - Segments(const GraphicsAllocation *globalVarAlloc, const GraphicsAllocation *globalConstAlloc, ArrayRef &globalStrings, std::vector &kernels); + Segments(const SharedPoolAllocation *globalVarAlloc, const SharedPoolAllocation *globalConstAlloc, ArrayRef &globalStrings, std::vector &kernels); }; class DebugZebinCreator { diff --git a/shared/source/program/program_initialization.cpp b/shared/source/program/program_initialization.cpp index 81e9665c46..12ae491adb 100644 --- a/shared/source/program/program_initialization.cpp +++ b/shared/source/program/program_initialization.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -15,12 +15,15 @@ #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" +#include "shared/source/memory_manager/unified_memory_pooling.h" #include "shared/source/program/program_info.h" namespace NEO { -GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAllocManager, NEO::Device &device, size_t totalSize, size_t zeroInitSize, bool constant, - LinkerInput *const linkerInput, const void *initData) { +SharedPoolAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAllocManager, NEO::Device &device, size_t totalSize, size_t zeroInitSize, bool constant, + LinkerInput *const linkerInput, const void *initData) { + size_t allocationOffset{0u}; + size_t allocatedSize{0u}; bool globalsAreExported = false; GraphicsAllocation *gpuAllocation = nullptr; const auto rootDeviceIndex = device.getRootDeviceIndex(); @@ -58,6 +61,7 @@ GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAlloc if (!gpuAllocation) { return nullptr; } + allocatedSize = gpuAllocation->getUnderlyingBufferSize(); auto &rootDeviceEnvironment = device.getRootDeviceEnvironment(); auto &productHelper = device.getProductHelper(); @@ -66,10 +70,10 @@ GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAlloc if (false == isOnlyBssData) { auto initSize = totalSize - zeroInitSize; auto success = MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *gpuAllocation), - device, gpuAllocation, 0, initData, initSize); + device, gpuAllocation, allocationOffset, initData, initSize); UNRECOVERABLE_IF(!success); } - return gpuAllocation; + return new SharedPoolAllocation(gpuAllocation, allocationOffset, allocatedSize, nullptr); } } // namespace NEO diff --git a/shared/source/program/program_initialization.h b/shared/source/program/program_initialization.h index fb3ecccacf..b976b6a11d 100644 --- a/shared/source/program/program_initialization.h +++ b/shared/source/program/program_initialization.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -15,9 +15,10 @@ class Device; class GraphicsAllocation; class SVMAllocsManager; struct LinkerInput; +class SharedPoolAllocation; -GraphicsAllocation *allocateGlobalsSurface(SVMAllocsManager *const svmAllocManager, Device &device, - size_t totalSize, size_t zeroInitSize, bool constant, - LinkerInput *const linkerInput, const void *initData); +[[nodiscard]] SharedPoolAllocation *allocateGlobalsSurface(SVMAllocsManager *const svmAllocManager, Device &device, + size_t totalSize, size_t zeroInitSize, bool constant, + LinkerInput *const linkerInput, const void *initData); } // namespace NEO diff --git a/shared/source/utilities/shared_pool_allocation.h b/shared/source/utilities/shared_pool_allocation.h index b918eebb84..4b751406a3 100644 --- a/shared/source/utilities/shared_pool_allocation.h +++ b/shared/source/utilities/shared_pool_allocation.h @@ -7,15 +7,19 @@ #pragma once +#include "shared/source/memory_manager/graphics_allocation.h" + #include namespace NEO { -class GraphicsAllocation; class SharedPoolAllocation { public: SharedPoolAllocation(GraphicsAllocation *graphicsAllocation, size_t offset, size_t size, std::mutex *mtx) - : graphicsAllocation(graphicsAllocation), offset(offset), size(size), mtx(*mtx){}; + : graphicsAllocation(graphicsAllocation), offset(offset), size(size), mtx(mtx){}; + + explicit SharedPoolAllocation(GraphicsAllocation *graphicsAllocation) + : graphicsAllocation(graphicsAllocation), offset(0u), size(graphicsAllocation ? graphicsAllocation->getUnderlyingBufferSize() : 0u), mtx(nullptr){}; GraphicsAllocation *getGraphicsAllocation() const { return graphicsAllocation; @@ -29,15 +33,35 @@ class SharedPoolAllocation { return size; } + uint64_t getGpuAddress() const { + UNRECOVERABLE_IF(graphicsAllocation == nullptr); + return graphicsAllocation->getGpuAddress() + offset; + } + + uint64_t getGpuAddressToPatch() const { + UNRECOVERABLE_IF(graphicsAllocation == nullptr); + return graphicsAllocation->getGpuAddressToPatch() + offset; + } + + void *getUnderlyingBuffer() const { + UNRECOVERABLE_IF(graphicsAllocation == nullptr); + return ptrOffset(graphicsAllocation->getUnderlyingBuffer(), offset); + } + std::unique_lock obtainSharedAllocationLock() { - return std::unique_lock(mtx); + if (mtx) { + return std::unique_lock(*mtx); + } else { + DEBUG_BREAK_IF(true); + return std::unique_lock(); + } } private: GraphicsAllocation *graphicsAllocation; const size_t offset; const size_t size; - std::mutex &mtx; // This mutex is shared across all users of this GA + std::mutex *mtx; // This mutex is shared across all users of this GA }; } // namespace NEO \ No newline at end of file diff --git a/shared/test/unit_test/compiler_interface/linker_tests.cpp b/shared/test/unit_test/compiler_interface/linker_tests.cpp index d9b0388fcd..4bf8302aab 100644 --- a/shared/test/unit_test/compiler_interface/linker_tests.cpp +++ b/shared/test/unit_test/compiler_interface/linker_tests.cpp @@ -749,8 +749,8 @@ HWTEST_F(LinkerTests, givenEmptyLinkerInputThenLinkerOutputIsEmpty) { NEO::LinkerInput linkerInput; NEO::Linker linker(linkerInput); NEO::Linker::SegmentInfo globalVar, globalConst, exportedFunc; - NEO::GraphicsAllocation *patchableGlobalVarSeg = nullptr; - NEO::GraphicsAllocation *patchableConstVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableGlobalVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableConstVarSeg = nullptr; NEO::Linker::PatchableSegments patchableInstructionSegments; NEO::Linker::UnresolvedExternals unresolvedExternals; NEO::Linker::KernelDescriptorsT kernelDescriptors; @@ -770,8 +770,8 @@ HWTEST_F(LinkerTests, givenInvalidLinkerInputThenLinkerFails) { mockLinkerInput.valid = false; NEO::Linker linker(mockLinkerInput); NEO::Linker::SegmentInfo globalVar, globalConst, exportedFunc; - NEO::GraphicsAllocation *patchableGlobalVarSeg = nullptr; - NEO::GraphicsAllocation *patchableConstVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableGlobalVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableConstVarSeg = nullptr; NEO::Linker::PatchableSegments patchableInstructionSegments; NEO::Linker::UnresolvedExternals unresolvedExternals; NEO::Linker::KernelDescriptorsT kernelDescriptors; @@ -836,8 +836,8 @@ HWTEST_F(LinkerTests, givenUnresolvedExternalsWhenLinkThenSubDeviceIDSymbolsAreC NEO::Linker::KernelDescriptorsT kernelDescriptors; NEO::Linker::ExternalFunctionsT externalFunctions; - NEO::GraphicsAllocation *patchableGlobalVarSeg = nullptr; - NEO::GraphicsAllocation *patchableConstVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableGlobalVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableConstVarSeg = nullptr; std::vector instructionSegment; instructionSegment.resize(128u); NEO::Linker::PatchableSegments instructionsSegments; @@ -1013,8 +1013,8 @@ HWTEST_F(LinkerTests, givenUnresolvedExternalWhenPatchingInstructionsThenLinkPar NEO::Linker::PatchableSegment seg0; seg0.hostPointer = instructionSegment.data(); seg0.segmentSize = instructionSegment.size(); - NEO::GraphicsAllocation *patchableGlobalVarSeg = nullptr; - NEO::GraphicsAllocation *patchableConstVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableGlobalVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableConstVarSeg = nullptr; NEO::Linker::PatchableSegments patchableInstructionSegments{seg0}; auto linkResult = linker.link( @@ -1116,8 +1116,8 @@ HWTEST_F(LinkerTests, givenValidSymbolsAndRelocationsThenInstructionSegmentsAreP patchableInstructionSegments[1].gpuAddress = 0x0; patchableInstructionSegments[1].segmentSize = instructionSegment.size(); - NEO::GraphicsAllocation *patchableGlobalVarSeg = nullptr; - NEO::GraphicsAllocation *patchableConstVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableGlobalVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableConstVarSeg = nullptr; NEO::Linker::KernelDescriptorsT kernelDescriptors; NEO::Linker::ExternalFunctionsT externalFunctions; @@ -1189,8 +1189,8 @@ HWTEST_F(LinkerTests, givenInvalidSymbolOffsetWhenPatchingInstructionsThenReloca seg0.hostPointer = instructionSegment.data(); seg0.segmentSize = instructionSegment.size(); NEO::Linker::PatchableSegments patchableInstructionSegments{seg0}; - NEO::GraphicsAllocation *patchableGlobalVarSeg = nullptr; - NEO::GraphicsAllocation *patchableConstVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableGlobalVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableConstVarSeg = nullptr; auto linkResult = linker.link( globalVarSegment, globalConstSegment, exportedFuncSegment, {}, @@ -1241,8 +1241,8 @@ HWTEST_F(LinkerTests, givenInvalidRelocationOffsetThenPatchingOfInstructionsFail seg0.hostPointer = instructionSegment.data(); seg0.segmentSize = relocA.r_offset; NEO::Linker::PatchableSegments patchableInstructionSegments{seg0}; - NEO::GraphicsAllocation *patchableGlobalVarSeg = nullptr; - NEO::GraphicsAllocation *patchableConstVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableGlobalVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableConstVarSeg = nullptr; auto linkResult = linker.link( globalVarSegment, globalConstSegment, exportedFuncSegment, {}, @@ -1294,8 +1294,8 @@ HWTEST_F(LinkerTests, givenUnknownSymbolTypeWhenPatchingInstructionsThenRelocati seg0.hostPointer = instructionSegment.data(); seg0.segmentSize = instructionSegment.size(); NEO::Linker::PatchableSegments patchableInstructionSegments{seg0}; - NEO::GraphicsAllocation *patchableGlobalVarSeg = nullptr; - NEO::GraphicsAllocation *patchableConstVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableGlobalVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableConstVarSeg = nullptr; ASSERT_EQ(1U, linkerInput.symbols.count("A")); linkerInput.symbols["A"].segment = NEO::SegmentType::unknown; @@ -1467,19 +1467,22 @@ HWTEST_F(LinkerTests, givenValidSymbolsAndRelocationsWhenPatchingDataSegmentsThe exportedFunctionsInit[0] = 0x12; // <- fun1 exportedFunctionsInit[1] = 0x34; // <- fun2 - NEO::MockGraphicsAllocation globalConstantsPatchableSegment{initGlobalConstantData, sizeof(initGlobalConstantData)}; - NEO::MockGraphicsAllocation globalVariablesPatchableSegment{initGlobalVariablesData, sizeof(initGlobalVariablesData)}; - NEO::MockGraphicsAllocation exportedFunctions{&exportedFunctionsInit, sizeof(exportedFunctionsInit)}; + NEO::MockGraphicsAllocation globalConstantsPatchableSegmentMockGA{initGlobalConstantData, sizeof(initGlobalConstantData)}; + NEO::MockGraphicsAllocation globalVariablesPatchableSegmentMockGA{initGlobalVariablesData, sizeof(initGlobalVariablesData)}; + NEO::MockGraphicsAllocation exportedFunctionsMockGA{&exportedFunctionsInit, sizeof(exportedFunctionsInit)}; + + auto globalConstantsPatchableSegment = std::make_unique(&globalConstantsPatchableSegmentMockGA); + auto globalVariablesPatchableSegment = std::make_unique(&globalVariablesPatchableSegmentMockGA); NEO::Linker::SegmentInfo globalConstantsSegmentInfo, globalVariablesSegmentInfo, exportedFunctionsSegmentInfo; - globalConstantsSegmentInfo.gpuAddress = reinterpret_cast(globalConstantsPatchableSegment.getUnderlyingBuffer()); - globalConstantsSegmentInfo.segmentSize = globalConstantsPatchableSegment.getUnderlyingBufferSize(); + globalConstantsSegmentInfo.gpuAddress = reinterpret_cast(globalConstantsPatchableSegment->getGraphicsAllocation()->getUnderlyingBuffer()); + globalConstantsSegmentInfo.segmentSize = globalConstantsPatchableSegment->getGraphicsAllocation()->getUnderlyingBufferSize(); - globalVariablesSegmentInfo.gpuAddress = reinterpret_cast(globalVariablesPatchableSegment.getUnderlyingBuffer()); - globalVariablesSegmentInfo.segmentSize = globalVariablesPatchableSegment.getUnderlyingBufferSize(); + globalVariablesSegmentInfo.gpuAddress = reinterpret_cast(globalVariablesPatchableSegment->getGraphicsAllocation()->getUnderlyingBuffer()); + globalVariablesSegmentInfo.segmentSize = globalVariablesPatchableSegment->getGraphicsAllocation()->getUnderlyingBufferSize(); - exportedFunctionsSegmentInfo.gpuAddress = reinterpret_cast(exportedFunctions.getUnderlyingBuffer()); - exportedFunctionsSegmentInfo.segmentSize = exportedFunctions.getUnderlyingBufferSize(); + exportedFunctionsSegmentInfo.gpuAddress = reinterpret_cast(exportedFunctionsMockGA.getUnderlyingBuffer()); + exportedFunctionsSegmentInfo.segmentSize = exportedFunctionsMockGA.getUnderlyingBufferSize(); WhiteBox linkerInput; auto &fun1 = linkerInput.symbols["fun1"]; @@ -1594,7 +1597,7 @@ HWTEST_F(LinkerTests, givenValidSymbolsAndRelocationsWhenPatchingDataSegmentsThe NEO::Linker::KernelDescriptorsT kernelDescriptors; NEO::Linker::ExternalFunctionsT externalFunctions; auto linkResult = linker.link(globalVariablesSegmentInfo, globalConstantsSegmentInfo, exportedFunctionsSegmentInfo, {}, - &globalVariablesPatchableSegment, &globalConstantsPatchableSegment, {}, + globalVariablesPatchableSegment.get(), globalConstantsPatchableSegment.get(), {}, unresolvedExternals, pDevice, initGlobalConstantData, sizeof(initGlobalConstantData), initGlobalVariablesData, sizeof(initGlobalVariablesData), kernelDescriptors, externalFunctions); EXPECT_EQ(NEO::LinkingStatus::linkedFully, linkResult); @@ -1633,17 +1636,20 @@ HWTEST_F(LinkerTests, givenValidSymbolsAndRelocationsToBssDataSectionsWhenPatchi uint64_t constantsSegmentData[2]{0}; // size 2 * uint64_t - contains also bss at the end uint64_t globalVariablesSegmentData[2]{0}; // size 2 * uint64_t - contains also bss at the end - NEO::MockGraphicsAllocation globalConstantsPatchableSegment{constantsSegmentData, sizeof(constantsSegmentData)}; - NEO::MockGraphicsAllocation globalVariablesPatchableSegment{globalVariablesSegmentData, sizeof(globalVariablesSegmentData)}; - globalConstantsPatchableSegment.gpuAddress = 0xA0000000; - globalVariablesPatchableSegment.gpuAddress = 0xB0000000; + NEO::MockGraphicsAllocation globalConstantsPatchableSegmentMockGA{constantsSegmentData, sizeof(constantsSegmentData)}; + NEO::MockGraphicsAllocation globalVariablesPatchableSegmentMockGA{globalVariablesSegmentData, sizeof(globalVariablesSegmentData)}; + globalConstantsPatchableSegmentMockGA.gpuAddress = 0xA0000000; + globalVariablesPatchableSegmentMockGA.gpuAddress = 0xB0000000; + + auto globalConstantsPatchableSegment = std::make_unique(&globalConstantsPatchableSegmentMockGA); + auto globalVariablesPatchableSegment = std::make_unique(&globalVariablesPatchableSegmentMockGA); NEO::Linker::SegmentInfo globalConstantsSegmentInfo, globalVariablesSegmentInfo; - globalConstantsSegmentInfo.gpuAddress = static_cast(globalConstantsPatchableSegment.getGpuAddress()); - globalConstantsSegmentInfo.segmentSize = globalConstantsPatchableSegment.getUnderlyingBufferSize(); + globalConstantsSegmentInfo.gpuAddress = static_cast(globalConstantsPatchableSegment->getGraphicsAllocation()->getGpuAddress()); + globalConstantsSegmentInfo.segmentSize = globalConstantsPatchableSegment->getGraphicsAllocation()->getUnderlyingBufferSize(); - globalVariablesSegmentInfo.gpuAddress = static_cast(globalVariablesPatchableSegment.getGpuAddress()); - globalVariablesSegmentInfo.segmentSize = globalVariablesPatchableSegment.getUnderlyingBufferSize(); + globalVariablesSegmentInfo.gpuAddress = static_cast(globalVariablesPatchableSegment->getGraphicsAllocation()->getGpuAddress()); + globalVariablesSegmentInfo.segmentSize = globalVariablesPatchableSegment->getGraphicsAllocation()->getUnderlyingBufferSize(); auto setUpInstructionSeg = [](std::vector &instrData, NEO::Linker::PatchableSegments &patchableInstrSeg) -> void { uint64_t initData = 0x77777777; @@ -1745,19 +1751,19 @@ HWTEST_F(LinkerTests, givenValidSymbolsAndRelocationsToBssDataSectionsWhenPatchi NEO::Linker::KernelDescriptorsT kernelDescriptors; NEO::Linker::ExternalFunctionsT externalFunctions; auto linkResult = linker.link(globalVariablesSegmentInfo, globalConstantsSegmentInfo, {}, {}, - &globalVariablesPatchableSegment, &globalConstantsPatchableSegment, patchableInstructionSegments, + globalVariablesPatchableSegment.get(), globalConstantsPatchableSegment.get(), patchableInstructionSegments, unresolvedExternals, pDevice, initGlobalConstantData, sizeof(initGlobalConstantData), initGlobalVariablesData, sizeof(initGlobalVariablesData), kernelDescriptors, externalFunctions); EXPECT_EQ(NEO::LinkingStatus::linkedFully, linkResult); EXPECT_EQ(0U, unresolvedExternals.size()); - auto globalConstantsSegmentAddr = reinterpret_cast(globalConstantsPatchableSegment.getUnderlyingBuffer()); - auto globalVariableSegmentAddr = reinterpret_cast(globalVariablesPatchableSegment.getUnderlyingBuffer()); + auto globalConstantsSegmentAddr = reinterpret_cast(globalConstantsPatchableSegment->getGraphicsAllocation()->getUnderlyingBuffer()); + auto globalVariableSegmentAddr = reinterpret_cast(globalVariablesPatchableSegment->getGraphicsAllocation()->getUnderlyingBuffer()); - auto var1Addr = globalVariablesPatchableSegment.getGpuAddress(); - auto const1Addr = globalConstantsPatchableSegment.getGpuAddress(); - auto bssConstAddrr = globalConstantsPatchableSegment.getGpuAddress() + sizeof(initGlobalConstantData); - auto bssVarAddr = globalVariablesPatchableSegment.getGpuAddress() + sizeof(initGlobalVariablesData); + auto var1Addr = globalVariablesPatchableSegment->getGraphicsAllocation()->getGpuAddress(); + auto const1Addr = globalConstantsPatchableSegment->getGraphicsAllocation()->getGpuAddress(); + auto bssConstAddrr = globalConstantsPatchableSegment->getGraphicsAllocation()->getGpuAddress() + sizeof(initGlobalConstantData); + auto bssVarAddr = globalVariablesPatchableSegment->getGraphicsAllocation()->getGpuAddress() + sizeof(initGlobalVariablesData); EXPECT_EQ(var1Addr, *(globalConstantsSegmentAddr + 1)); EXPECT_EQ(const1Addr, *(globalVariableSegmentAddr + 1)); @@ -1769,15 +1775,18 @@ HWTEST_F(LinkerTests, givenInvalidSymbolWhenPatchingDataSegmentsThenRelocationIs uint64_t initGlobalConstantData[3] = {}; uint64_t initGlobalVariablesData[3] = {}; - NEO::MockGraphicsAllocation globalConstantsPatchableSegment{initGlobalConstantData, sizeof(initGlobalConstantData)}; - NEO::MockGraphicsAllocation globalVariablesPatchableSegment{initGlobalVariablesData, sizeof(initGlobalVariablesData)}; + NEO::MockGraphicsAllocation globalConstantsPatchableSegmentMockGA{initGlobalConstantData, sizeof(initGlobalConstantData)}; + NEO::MockGraphicsAllocation globalVariablesPatchableSegmentMockGA{initGlobalVariablesData, sizeof(initGlobalVariablesData)}; + + auto globalConstantsPatchableSegment = std::make_unique(&globalConstantsPatchableSegmentMockGA); + auto globalVariablesPatchableSegment = std::make_unique(&globalVariablesPatchableSegmentMockGA); NEO::Linker::SegmentInfo globalConstantsSegmentInfo, globalVariablesSegmentInfo; - globalConstantsSegmentInfo.gpuAddress = reinterpret_cast(globalConstantsPatchableSegment.getUnderlyingBuffer()); - globalConstantsSegmentInfo.segmentSize = globalConstantsPatchableSegment.getUnderlyingBufferSize(); + globalConstantsSegmentInfo.gpuAddress = reinterpret_cast(globalConstantsPatchableSegment->getGraphicsAllocation()->getUnderlyingBuffer()); + globalConstantsSegmentInfo.segmentSize = globalConstantsPatchableSegment->getGraphicsAllocation()->getUnderlyingBufferSize(); - globalVariablesSegmentInfo.gpuAddress = reinterpret_cast(globalVariablesPatchableSegment.getUnderlyingBuffer()); - globalVariablesSegmentInfo.segmentSize = globalVariablesPatchableSegment.getUnderlyingBufferSize(); + globalVariablesSegmentInfo.gpuAddress = reinterpret_cast(globalVariablesPatchableSegment->getGraphicsAllocation()->getUnderlyingBuffer()); + globalVariablesSegmentInfo.segmentSize = globalVariablesPatchableSegment->getGraphicsAllocation()->getUnderlyingBufferSize(); WhiteBox linkerInput; NEO::LinkerInput::RelocationInfo relocationInfo; @@ -1792,7 +1801,7 @@ HWTEST_F(LinkerTests, givenInvalidSymbolWhenPatchingDataSegmentsThenRelocationIs NEO::Linker::KernelDescriptorsT kernelDescriptors; NEO::Linker::ExternalFunctionsT externalFunctions; auto linkResult = linker.link(globalVariablesSegmentInfo, globalConstantsSegmentInfo, {}, {}, - &globalVariablesPatchableSegment, &globalConstantsPatchableSegment, {}, + globalVariablesPatchableSegment.get(), globalConstantsPatchableSegment.get(), {}, unresolvedExternals, pDevice, initGlobalConstantData, sizeof(initGlobalConstantData), initGlobalVariablesData, sizeof(initGlobalVariablesData), kernelDescriptors, externalFunctions); EXPECT_EQ(NEO::LinkingStatus::linkedPartially, linkResult); @@ -1803,15 +1812,18 @@ HWTEST_F(LinkerTests, givenInvalidRelocationOffsetWhenPatchingDataSegmentsThenRe uint64_t initGlobalConstantData[3] = {}; uint64_t initGlobalVariablesData[3] = {}; - NEO::MockGraphicsAllocation globalConstantsPatchableSegment{initGlobalConstantData, sizeof(initGlobalConstantData)}; - NEO::MockGraphicsAllocation globalVariablesPatchableSegment{initGlobalVariablesData, sizeof(initGlobalVariablesData)}; + NEO::MockGraphicsAllocation globalConstantsPatchableSegmentMockGA{initGlobalConstantData, sizeof(initGlobalConstantData)}; + NEO::MockGraphicsAllocation globalVariablesPatchableSegmentMockGA{initGlobalVariablesData, sizeof(initGlobalVariablesData)}; + + auto globalConstantsPatchableSegment = std::make_unique(&globalConstantsPatchableSegmentMockGA); + auto globalVariablesPatchableSegment = std::make_unique(&globalVariablesPatchableSegmentMockGA); NEO::Linker::SegmentInfo globalConstantsSegmentInfo, globalVariablesSegmentInfo; - globalConstantsSegmentInfo.gpuAddress = reinterpret_cast(globalConstantsPatchableSegment.getUnderlyingBuffer()); - globalConstantsSegmentInfo.segmentSize = globalConstantsPatchableSegment.getUnderlyingBufferSize(); + globalConstantsSegmentInfo.gpuAddress = reinterpret_cast(globalConstantsPatchableSegment->getGraphicsAllocation()->getUnderlyingBuffer()); + globalConstantsSegmentInfo.segmentSize = globalConstantsPatchableSegment->getGraphicsAllocation()->getUnderlyingBufferSize(); - globalVariablesSegmentInfo.gpuAddress = reinterpret_cast(globalVariablesPatchableSegment.getUnderlyingBuffer()); - globalVariablesSegmentInfo.segmentSize = globalVariablesPatchableSegment.getUnderlyingBufferSize(); + globalVariablesSegmentInfo.gpuAddress = reinterpret_cast(globalVariablesPatchableSegment->getGraphicsAllocation()->getUnderlyingBuffer()); + globalVariablesSegmentInfo.segmentSize = globalVariablesPatchableSegment->getGraphicsAllocation()->getUnderlyingBufferSize(); WhiteBox linkerInput; auto &symbol = linkerInput.symbols["symbol"]; @@ -1831,7 +1843,7 @@ HWTEST_F(LinkerTests, givenInvalidRelocationOffsetWhenPatchingDataSegmentsThenRe NEO::Linker::KernelDescriptorsT kernelDescriptors; NEO::Linker::ExternalFunctionsT externalFunctions; auto linkResult = linker.link(globalVariablesSegmentInfo, globalConstantsSegmentInfo, {}, {}, - &globalVariablesPatchableSegment, &globalConstantsPatchableSegment, {}, + globalVariablesPatchableSegment.get(), globalConstantsPatchableSegment.get(), {}, unresolvedExternals, pDevice, initGlobalConstantData, sizeof(initGlobalConstantData), initGlobalVariablesData, sizeof(initGlobalVariablesData), kernelDescriptors, externalFunctions); EXPECT_EQ(NEO::LinkingStatus::linkedPartially, linkResult); @@ -1868,15 +1880,18 @@ HWTEST_F(LinkerTests, given32BitBinaryWithValidSymbolsAndRelocationsWhenPatching uint64_t initGlobalConstantData[3] = {}; uint64_t initGlobalVariablesData[3] = {}; - NEO::MockGraphicsAllocation globalConstantsPatchableSegment{initGlobalConstantData, sizeof(initGlobalConstantData)}; - NEO::MockGraphicsAllocation globalVariablesPatchableSegment{initGlobalVariablesData, sizeof(initGlobalVariablesData)}; + NEO::MockGraphicsAllocation globalConstantsPatchableSegmentMockGA{initGlobalConstantData, sizeof(initGlobalConstantData)}; + NEO::MockGraphicsAllocation globalVariablesPatchableSegmentMockGA{initGlobalVariablesData, sizeof(initGlobalVariablesData)}; + + auto globalConstantsPatchableSegment = std::make_unique(&globalConstantsPatchableSegmentMockGA); + auto globalVariablesPatchableSegment = std::make_unique(&globalVariablesPatchableSegmentMockGA); NEO::Linker::SegmentInfo globalConstantsSegmentInfo, globalVariablesSegmentInfo; - globalConstantsSegmentInfo.gpuAddress = reinterpret_cast(globalConstantsPatchableSegment.getUnderlyingBuffer()); - globalConstantsSegmentInfo.segmentSize = globalConstantsPatchableSegment.getUnderlyingBufferSize(); + globalConstantsSegmentInfo.gpuAddress = reinterpret_cast(globalConstantsPatchableSegment->getGraphicsAllocation()->getUnderlyingBuffer()); + globalConstantsSegmentInfo.segmentSize = globalConstantsPatchableSegment->getGraphicsAllocation()->getUnderlyingBufferSize(); - globalVariablesSegmentInfo.gpuAddress = reinterpret_cast(globalVariablesPatchableSegment.getUnderlyingBuffer()); - globalVariablesSegmentInfo.segmentSize = globalVariablesPatchableSegment.getUnderlyingBufferSize(); + globalVariablesSegmentInfo.gpuAddress = reinterpret_cast(globalVariablesPatchableSegment->getGraphicsAllocation()->getUnderlyingBuffer()); + globalVariablesSegmentInfo.segmentSize = globalVariablesPatchableSegment->getGraphicsAllocation()->getUnderlyingBufferSize(); WhiteBox linkerInput; linkerInput.setPointerSize(NEO::LinkerInput::Traits::PointerSize::Ptr32bit); @@ -1897,7 +1912,7 @@ HWTEST_F(LinkerTests, given32BitBinaryWithValidSymbolsAndRelocationsWhenPatching NEO::Linker::KernelDescriptorsT kernelDescriptors; NEO::Linker::ExternalFunctionsT externalFunctions; auto linkResult = linker.link(globalVariablesSegmentInfo, globalConstantsSegmentInfo, {}, {}, - &globalVariablesPatchableSegment, &globalConstantsPatchableSegment, {}, + globalVariablesPatchableSegment.get(), globalConstantsPatchableSegment.get(), {}, unresolvedExternals, pDevice, initGlobalConstantData, sizeof(initGlobalConstantData), initGlobalVariablesData, sizeof(initGlobalVariablesData), kernelDescriptors, externalFunctions); EXPECT_EQ(NEO::LinkingStatus::linkedFully, linkResult); @@ -2630,8 +2645,8 @@ HWTEST_F(LinkerTests, givenDependencyOnMissingExternalFunctionWhenLinkingThenFai linkerInput.extFunDependencies.push_back({"fun0", "fun1"}); NEO::Linker linker(linkerInput); NEO::Linker::SegmentInfo globalVar, globalConst, exportedFunc; - NEO::GraphicsAllocation *patchableGlobalVarSeg = nullptr; - NEO::GraphicsAllocation *patchableConstVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableGlobalVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableConstVarSeg = nullptr; NEO::Linker::PatchableSegments patchableInstructionSegments; NEO::Linker::UnresolvedExternals unresolvedExternals; NEO::Linker::KernelDescriptorsT kernelDescriptors; @@ -2648,8 +2663,8 @@ HWTEST_F(LinkerTests, givenDependencyOnMissingExternalFunctionAndNoExternalFunct linkerInput.extFunDependencies.push_back({"fun0", "fun1"}); NEO::Linker linker(linkerInput); NEO::Linker::SegmentInfo globalVar, globalConst, exportedFunc; - NEO::GraphicsAllocation *patchableGlobalVarSeg = nullptr; - NEO::GraphicsAllocation *patchableConstVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableGlobalVarSeg = nullptr; + NEO::SharedPoolAllocation *patchableConstVarSeg = nullptr; NEO::Linker::PatchableSegments patchableInstructionSegments; NEO::Linker::UnresolvedExternals unresolvedExternals; NEO::Linker::KernelDescriptorsT kernelDescriptors; @@ -2689,11 +2704,12 @@ TEST_F(LinkerTests, givenRelaWhenPatchingInstructionsSegmentThenAddendIsAdded) { HWTEST_F(LinkerTests, givenRelaWhenPatchingDataSegmentThenAddendIsAdded) { uint64_t globalConstantSegmentData{0U}; - NEO::MockGraphicsAllocation globalConstantsPatchableSegment{&globalConstantSegmentData, sizeof(globalConstantSegmentData)}; + NEO::MockGraphicsAllocation globalConstantsPatchableSegmentMockGA{&globalConstantSegmentData, sizeof(globalConstantSegmentData)}; + auto globalConstantsPatchableSegment = std::make_unique(&globalConstantsPatchableSegmentMockGA); NEO::Linker::SegmentInfo globalConstantsSegmentInfo; - globalConstantsSegmentInfo.gpuAddress = reinterpret_cast(globalConstantsPatchableSegment.getUnderlyingBuffer()); - globalConstantsSegmentInfo.segmentSize = globalConstantsPatchableSegment.getUnderlyingBufferSize(); + globalConstantsSegmentInfo.gpuAddress = reinterpret_cast(globalConstantsPatchableSegment->getGraphicsAllocation()->getUnderlyingBuffer()); + globalConstantsSegmentInfo.segmentSize = globalConstantsPatchableSegment->getGraphicsAllocation()->getUnderlyingBufferSize(); WhiteBox linkerInput; linkerInput.traits.requiresPatchingOfGlobalConstantsBuffer = true; @@ -2710,17 +2726,18 @@ HWTEST_F(LinkerTests, givenRelaWhenPatchingDataSegmentThenAddendIsAdded) { linker.relocatedSymbols[rela.symbolName].gpuAddress = symValue; NEO::Linker::UnresolvedExternals unresolvedExternals; - linker.patchDataSegments({}, globalConstantsSegmentInfo, {}, &globalConstantsPatchableSegment, unresolvedExternals, pDevice, &globalConstantSegmentData, sizeof(globalConstantSegmentData), nullptr, 0); + linker.patchDataSegments({}, globalConstantsSegmentInfo, {}, globalConstantsPatchableSegment.get(), unresolvedExternals, pDevice, &globalConstantSegmentData, sizeof(globalConstantSegmentData), nullptr, 0); EXPECT_EQ(static_cast(rela.addend + symValue), globalConstantSegmentData); } HWTEST_F(LinkerTests, givenRelocationInfoWhenPatchingDataSegmentWithGlobalVariableSymbolThenAddendIsAdded) { uint64_t globalVariableSegmentData{0U}; - NEO::MockGraphicsAllocation globalVariablesPatchableSegment{&globalVariableSegmentData, sizeof(globalVariableSegmentData)}; + NEO::MockGraphicsAllocation globalVariablesPatchableSegmentMockGA{&globalVariableSegmentData, sizeof(globalVariableSegmentData)}; + auto globalVariablesPatchableSegment = std::make_unique(&globalVariablesPatchableSegmentMockGA); NEO::Linker::SegmentInfo globalVariablesSegmentInfo; - globalVariablesSegmentInfo.gpuAddress = reinterpret_cast(globalVariablesPatchableSegment.getUnderlyingBuffer()); - globalVariablesSegmentInfo.segmentSize = globalVariablesPatchableSegment.getUnderlyingBufferSize(); + globalVariablesSegmentInfo.gpuAddress = reinterpret_cast(globalVariablesPatchableSegment->getGraphicsAllocation()->getUnderlyingBuffer()); + globalVariablesSegmentInfo.segmentSize = globalVariablesPatchableSegment->getGraphicsAllocation()->getUnderlyingBufferSize(); WhiteBox linkerInput; linkerInput.traits.requiresPatchingOfGlobalVariablesBuffer = true; @@ -2737,7 +2754,7 @@ HWTEST_F(LinkerTests, givenRelocationInfoWhenPatchingDataSegmentWithGlobalVariab linker.relocatedSymbols[relocationInfo.symbolName].gpuAddress = symValue; NEO::Linker::UnresolvedExternals unresolvedExternals; - linker.patchDataSegments(globalVariablesSegmentInfo, {}, &globalVariablesPatchableSegment, {}, unresolvedExternals, pDevice, nullptr, 0, &globalVariableSegmentData, sizeof(globalVariableSegmentData)); + linker.patchDataSegments(globalVariablesSegmentInfo, {}, globalVariablesPatchableSegment.get(), {}, unresolvedExternals, pDevice, nullptr, 0, &globalVariableSegmentData, sizeof(globalVariableSegmentData)); EXPECT_EQ(static_cast(relocationInfo.addend + symValue), globalVariableSegmentData); } diff --git a/shared/test/unit_test/memory_manager/memory_manager_tests.cpp b/shared/test/unit_test/memory_manager/memory_manager_tests.cpp index aecfd457da..aa99a6ed26 100644 --- a/shared/test/unit_test/memory_manager/memory_manager_tests.cpp +++ b/shared/test/unit_test/memory_manager/memory_manager_tests.cpp @@ -2769,14 +2769,14 @@ TEST(MemoryManagerTest, whenMemoryManagerReturnsNullptrThenAllocateGlobalsSurfac linkerInput.traits.exportsGlobalConstants = true; linkerInput.traits.exportsGlobalVariables = true; memoryManager->recentlyPassedDeviceBitfield = {}; - GraphicsAllocation *allocation = allocateGlobalsSurface(nullptr, device, 1024, 0u, false, &linkerInput, nullptr); - EXPECT_EQ(nullptr, allocation); + std::unique_ptr globalSurface = std::unique_ptr(allocateGlobalsSurface(nullptr, device, 1024, 0u, false, &linkerInput, nullptr)); + EXPECT_EQ(nullptr, globalSurface); EXPECT_EQ(deviceBitfield, memoryManager->recentlyPassedDeviceBitfield); auto svmAllocsManager = std::make_unique(memoryManager); memoryManager->recentlyPassedDeviceBitfield = {}; - allocation = allocateGlobalsSurface(svmAllocsManager.get(), device, 1024, 0u, false, &linkerInput, nullptr); - EXPECT_EQ(nullptr, allocation); + globalSurface.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, 1024, 0u, false, &linkerInput, nullptr)); + EXPECT_EQ(nullptr, globalSurface); EXPECT_EQ(deviceBitfield, memoryManager->recentlyPassedDeviceBitfield); } diff --git a/shared/test/unit_test/program/program_initialization_tests.cpp b/shared/test/unit_test/program/program_initialization_tests.cpp index b612142760..9f13d15f3d 100644 --- a/shared/test/unit_test/program/program_initialization_tests.cpp +++ b/shared/test/unit_test/program/program_initialization_tests.cpp @@ -30,10 +30,13 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreNotExportedTh WhiteBox emptyLinkerInput; std::vector initData; initData.resize(64, 7U); - GraphicsAllocation *alloc = nullptr; + std::unique_ptr globalSurface; + GraphicsAllocation *alloc{nullptr}; size_t aligmentSize = alignUp(initData.size(), MemoryConstants::pageSize); - alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, nullptr /* linker input */, initData.data()); + globalSurface.reset(allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, nullptr /* linker input */, initData.data())); + ASSERT_NE(nullptr, globalSurface); + alloc = globalSurface->getGraphicsAllocation(); ASSERT_NE(nullptr, alloc); ASSERT_EQ(aligmentSize, alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); @@ -41,7 +44,9 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreNotExportedTh EXPECT_EQ(AllocationType::constantSurface, alloc->getAllocationType()); device.getMemoryManager()->freeGraphicsMemory(alloc); - alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, false /* constant */, nullptr /* linker input */, initData.data()); + globalSurface.reset(allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, false /* constant */, nullptr /* linker input */, initData.data())); + ASSERT_NE(nullptr, globalSurface); + alloc = globalSurface->getGraphicsAllocation(); ASSERT_NE(nullptr, alloc); ASSERT_EQ(aligmentSize, alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); @@ -49,7 +54,9 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreNotExportedTh EXPECT_EQ(AllocationType::globalSurface, alloc->getAllocationType()); device.getMemoryManager()->freeGraphicsMemory(alloc); - alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, &emptyLinkerInput, initData.data()); + globalSurface.reset(allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, &emptyLinkerInput, initData.data())); + ASSERT_NE(nullptr, globalSurface); + alloc = globalSurface->getGraphicsAllocation(); ASSERT_NE(nullptr, alloc); ASSERT_EQ(aligmentSize, alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); @@ -57,7 +64,9 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreNotExportedTh EXPECT_EQ(AllocationType::constantSurface, alloc->getAllocationType()); device.getMemoryManager()->freeGraphicsMemory(alloc); - alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, false /* constant */, &emptyLinkerInput, initData.data()); + globalSurface.reset(allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, false /* constant */, &emptyLinkerInput, initData.data())); + ASSERT_NE(nullptr, globalSurface); + alloc = globalSurface->getGraphicsAllocation(); ASSERT_NE(nullptr, alloc); ASSERT_EQ(aligmentSize, alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); @@ -78,10 +87,13 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreExportedThenM linkerInputExportGlobalConstants.traits.exportsGlobalConstants = true; std::vector initData; initData.resize(64, 7U); - GraphicsAllocation *alloc = nullptr; + std::unique_ptr globalSurface; + GraphicsAllocation *alloc{nullptr}; size_t expectedAlignedSize = alignUp(initData.size(), MemoryConstants::pageSize); - alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalConstants, initData.data()); + globalSurface.reset(allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalConstants, initData.data())); + ASSERT_NE(nullptr, globalSurface); + alloc = globalSurface->getGraphicsAllocation(); ASSERT_NE(nullptr, alloc); ASSERT_EQ(MemoryConstants::pageSize64k, alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); @@ -95,7 +107,9 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreExportedThenM EXPECT_TRUE(svmAllocsManager.requestedZeroedOutAllocation); svmAllocsManager.freeSVMAlloc(reinterpret_cast(static_cast(alloc->getGpuAddress()))); - alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalVariables, initData.data()); + globalSurface.reset(allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalVariables, initData.data())); + ASSERT_NE(nullptr, globalSurface); + alloc = globalSurface->getGraphicsAllocation(); ASSERT_NE(nullptr, alloc); ASSERT_EQ(expectedAlignedSize, alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); @@ -103,7 +117,9 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreExportedThenM EXPECT_EQ(AllocationType::constantSurface, alloc->getAllocationType()); device.getMemoryManager()->freeGraphicsMemory(alloc); - alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalConstants, initData.data()); + globalSurface.reset(allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalConstants, initData.data())); + ASSERT_NE(nullptr, globalSurface); + alloc = globalSurface->getGraphicsAllocation(); ASSERT_NE(nullptr, alloc); ASSERT_EQ(expectedAlignedSize, alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); @@ -111,7 +127,9 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreExportedThenM EXPECT_EQ(AllocationType::globalSurface, alloc->getAllocationType()); device.getMemoryManager()->freeGraphicsMemory(alloc); - alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalVariables, initData.data()); + globalSurface.reset(allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalVariables, initData.data())); + ASSERT_NE(nullptr, globalSurface); + alloc = globalSurface->getGraphicsAllocation(); ASSERT_NE(nullptr, alloc); ASSERT_EQ(MemoryConstants::pageSize64k, alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); @@ -132,31 +150,40 @@ TEST(AllocateGlobalSurfaceTest, GivenNullSvmAllocsManagerWhenGlobalsAreExportedT linkerInputExportGlobalConstants.traits.exportsGlobalConstants = true; std::vector initData; initData.resize(64, 7U); - GraphicsAllocation *alloc = nullptr; + std::unique_ptr globalSurface; + GraphicsAllocation *alloc{nullptr}; size_t expectedAlignedSize = alignUp(initData.size(), MemoryConstants::pageSize); - alloc = allocateGlobalsSurface(nullptr, device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalConstants, initData.data()); + globalSurface.reset(allocateGlobalsSurface(nullptr, device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalConstants, initData.data())); + ASSERT_NE(nullptr, globalSurface); + alloc = globalSurface->getGraphicsAllocation(); ASSERT_NE(nullptr, alloc); ASSERT_EQ(expectedAlignedSize, alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); EXPECT_EQ(AllocationType::constantSurface, alloc->getAllocationType()); device.getMemoryManager()->freeGraphicsMemory(alloc); - alloc = allocateGlobalsSurface(nullptr, device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalVariables, initData.data()); + globalSurface.reset(allocateGlobalsSurface(nullptr, device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalVariables, initData.data())); + ASSERT_NE(nullptr, globalSurface); + alloc = globalSurface->getGraphicsAllocation(); ASSERT_NE(nullptr, alloc); ASSERT_EQ(expectedAlignedSize, alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); EXPECT_EQ(AllocationType::constantSurface, alloc->getAllocationType()); device.getMemoryManager()->freeGraphicsMemory(alloc); - alloc = allocateGlobalsSurface(nullptr, device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalConstants, initData.data()); + globalSurface.reset(allocateGlobalsSurface(nullptr, device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalConstants, initData.data())); + ASSERT_NE(nullptr, globalSurface); + alloc = globalSurface->getGraphicsAllocation(); ASSERT_NE(nullptr, alloc); ASSERT_EQ(expectedAlignedSize, alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); EXPECT_EQ(AllocationType::globalSurface, alloc->getAllocationType()); device.getMemoryManager()->freeGraphicsMemory(alloc); - alloc = allocateGlobalsSurface(nullptr, device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalVariables, initData.data()); + globalSurface.reset(allocateGlobalsSurface(nullptr, device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalVariables, initData.data())); + ASSERT_NE(nullptr, globalSurface); + alloc = globalSurface->getGraphicsAllocation(); ASSERT_NE(nullptr, alloc); ASSERT_EQ(expectedAlignedSize, alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); @@ -173,31 +200,31 @@ TEST(AllocateGlobalSurfaceTest, WhenGlobalsAreNotExportedAndAllocationFailsThenG WhiteBox emptyLinkerInput; std::vector initData; initData.resize(64, 7U); - GraphicsAllocation *alloc = nullptr; + std::unique_ptr globalSurface; - alloc = allocateGlobalsSurface(&mockSvmAllocsManager, device, initData.size(), 0u, true /* constant */, nullptr /* linker input */, initData.data()); - EXPECT_EQ(nullptr, alloc); + globalSurface.reset(allocateGlobalsSurface(&mockSvmAllocsManager, device, initData.size(), 0u, true /* constant */, nullptr /* linker input */, initData.data())); + EXPECT_EQ(nullptr, globalSurface); - alloc = allocateGlobalsSurface(&mockSvmAllocsManager, device, initData.size(), 0u, false /* constant */, nullptr /* linker input */, initData.data()); - EXPECT_EQ(nullptr, alloc); + globalSurface.reset(allocateGlobalsSurface(&mockSvmAllocsManager, device, initData.size(), 0u, false /* constant */, nullptr /* linker input */, initData.data())); + EXPECT_EQ(nullptr, globalSurface); - alloc = allocateGlobalsSurface(&mockSvmAllocsManager, device, initData.size(), 0u, true /* constant */, &emptyLinkerInput, initData.data()); - EXPECT_EQ(nullptr, alloc); + globalSurface.reset(allocateGlobalsSurface(&mockSvmAllocsManager, device, initData.size(), 0u, true /* constant */, &emptyLinkerInput, initData.data())); + EXPECT_EQ(nullptr, globalSurface); - alloc = allocateGlobalsSurface(&mockSvmAllocsManager, device, initData.size(), 0u, false /* constant */, &emptyLinkerInput, initData.data()); - EXPECT_EQ(nullptr, alloc); + globalSurface.reset(allocateGlobalsSurface(&mockSvmAllocsManager, device, initData.size(), 0u, false /* constant */, &emptyLinkerInput, initData.data())); + EXPECT_EQ(nullptr, globalSurface); - alloc = allocateGlobalsSurface(nullptr /* svmAllocsManager */, device, initData.size(), 0u, true /* constant */, nullptr /* linker input */, initData.data()); - EXPECT_EQ(nullptr, alloc); + globalSurface.reset(allocateGlobalsSurface(nullptr /* svmAllocsManager */, device, initData.size(), 0u, true /* constant */, nullptr /* linker input */, initData.data())); + EXPECT_EQ(nullptr, globalSurface); - alloc = allocateGlobalsSurface(nullptr /* svmAllocsManager */, device, initData.size(), 0u, false /* constant */, nullptr /* linker input */, initData.data()); - EXPECT_EQ(nullptr, alloc); + globalSurface.reset(allocateGlobalsSurface(nullptr /* svmAllocsManager */, device, initData.size(), 0u, false /* constant */, nullptr /* linker input */, initData.data())); + EXPECT_EQ(nullptr, globalSurface); - alloc = allocateGlobalsSurface(nullptr /* svmAllocsManager */, device, initData.size(), 0u, true /* constant */, &emptyLinkerInput, initData.data()); - EXPECT_EQ(nullptr, alloc); + globalSurface.reset(allocateGlobalsSurface(nullptr /* svmAllocsManager */, device, initData.size(), 0u, true /* constant */, &emptyLinkerInput, initData.data())); + EXPECT_EQ(nullptr, globalSurface); - alloc = allocateGlobalsSurface(nullptr /* svmAllocsManager */, device, initData.size(), 0u, false /* constant */, &emptyLinkerInput, initData.data()); - EXPECT_EQ(nullptr, alloc); + globalSurface.reset(allocateGlobalsSurface(nullptr /* svmAllocsManager */, device, initData.size(), 0u, false /* constant */, &emptyLinkerInput, initData.data())); + EXPECT_EQ(nullptr, globalSurface); } TEST(AllocateGlobalSurfaceTest, GivenAllocationInLocalMemoryWhichRequiresBlitterWhenAllocatingNonSvmAllocationThenBlitterIsUsed) { @@ -228,9 +255,10 @@ TEST(AllocateGlobalSurfaceTest, GivenAllocationInLocalMemoryWhichRequiresBlitter MockDevice device; device.getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; MockSVMAllocsManager svmAllocsManager(device.getMemoryManager()); + std::unique_ptr globalSurface = std::unique_ptr(allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, nullptr /* linker input */, initData.data())); - auto pAllocation = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, - nullptr /* linker input */, initData.data()); + ASSERT_NE(nullptr, globalSurface); + GraphicsAllocation *pAllocation{globalSurface->getGraphicsAllocation()}; ASSERT_NE(nullptr, pAllocation); EXPECT_EQ(nullptr, svmAllocsManager.getSVMAlloc(reinterpret_cast(static_cast(pAllocation->getGpuAddress())))); EXPECT_EQ(AllocationType::constantSurface, pAllocation->getAllocationType()); @@ -252,10 +280,13 @@ TEST(AllocateGlobalSurfaceTest, whenAllocatingGlobalSurfaceWithNonZeroZeroInitSi std::vector initData; initData.resize(64, 7u); std::fill(initData.begin() + 32, initData.end(), 16u); // this data should not be transfered - GraphicsAllocation *alloc = nullptr; + std::unique_ptr globalSurface; + GraphicsAllocation *alloc{nullptr}; size_t zeroInitSize = 32u; size_t expectedAlignedSize = alignUp(initData.size(), MemoryConstants::pageSize); - alloc = allocateGlobalsSurface(nullptr, device, initData.size(), zeroInitSize, true, &emptyLinkerInput, initData.data()); + globalSurface.reset(allocateGlobalsSurface(nullptr, device, initData.size(), zeroInitSize, true, &emptyLinkerInput, initData.data())); + ASSERT_NE(nullptr, globalSurface); + alloc = globalSurface->getGraphicsAllocation(); ASSERT_NE(nullptr, alloc); EXPECT_EQ(expectedAlignedSize, alloc->getUnderlyingBufferSize()); @@ -272,8 +303,11 @@ TEST(AllocateGlobalSurfaceTest, whenAllocatingGlobalSurfaceWithZeroInitSizeGreat ASSERT_EQ(0u, static_cast(device.getMemoryManager())->copyMemoryToAllocationBanksCalled); size_t totalSize = 64u, zeroInitSize = 64u; - GraphicsAllocation *alloc = nullptr; - alloc = allocateGlobalsSurface(nullptr, device, totalSize, zeroInitSize, true, nullptr, nullptr); + std::unique_ptr globalSurface; + GraphicsAllocation *alloc{nullptr}; + globalSurface.reset(allocateGlobalsSurface(nullptr, device, totalSize, zeroInitSize, true, nullptr, nullptr)); + ASSERT_NE(nullptr, globalSurface); + alloc = globalSurface->getGraphicsAllocation(); ASSERT_NE(nullptr, alloc); EXPECT_EQ(0u, static_cast(device.getMemoryManager())->copyMemoryToAllocationBanksCalled); diff --git a/shared/test/unit_test/utilities/CMakeLists.txt b/shared/test/unit_test/utilities/CMakeLists.txt index cba488ba9f..1bc0eaf7e4 100644 --- a/shared/test/unit_test/utilities/CMakeLists.txt +++ b/shared/test/unit_test/utilities/CMakeLists.txt @@ -22,6 +22,7 @@ target_sources(neo_shared_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/numeric_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/perf_profiler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/reference_tracked_object_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/shared_pool_allocation_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/software_tags_manager_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sorted_vector_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/spinlock_tests.cpp diff --git a/shared/test/unit_test/utilities/shared_pool_allocation_tests.cpp b/shared/test/unit_test/utilities/shared_pool_allocation_tests.cpp new file mode 100644 index 0000000000..af6d3f6e89 --- /dev/null +++ b/shared/test/unit_test/utilities/shared_pool_allocation_tests.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2025 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/utilities/shared_pool_allocation.h" +#include "shared/test/common/mocks/mock_graphics_allocation.h" +#include "shared/test/common/test_macros/test.h" + +#include "gtest/gtest.h" + +using namespace NEO; + +struct SharedPoolAllocationTest : public ::testing::Test { + void SetUp() override { + buffer = reinterpret_cast(gpuAddress); + mockAllocation = std::make_unique(buffer, gpuAddress, allocationSize); + } + + const uint64_t gpuAddress = 0x1200; + const size_t allocationSize = 4096; + void *buffer = nullptr; + std::unique_ptr mockAllocation; +}; + +TEST_F(SharedPoolAllocationTest, givenSharedPoolAllocationWithOffsetWhenAccessingGettersThenReturnsCorrectValues) { + constexpr size_t chunkOffset = 64; + constexpr size_t chunkSize = 256; + SharedPoolAllocation sharedPoolAllocation(mockAllocation.get(), chunkOffset, chunkSize, nullptr); + + EXPECT_EQ(chunkOffset, sharedPoolAllocation.getOffset()); + EXPECT_EQ(chunkSize, sharedPoolAllocation.getSize()); + EXPECT_EQ(mockAllocation.get(), sharedPoolAllocation.getGraphicsAllocation()); + EXPECT_EQ(mockAllocation->getGpuAddress() + chunkOffset, sharedPoolAllocation.getGpuAddress()); + EXPECT_EQ(mockAllocation->getGpuAddressToPatch() + chunkOffset, sharedPoolAllocation.getGpuAddressToPatch()); + EXPECT_EQ(ptrOffset(mockAllocation->getUnderlyingBuffer(), chunkOffset), sharedPoolAllocation.getUnderlyingBuffer()); +} + +TEST_F(SharedPoolAllocationTest, givenSharedPoolAllocationWithNonPooledGraphicsAllocationWhenAccessingGettersThenReturnsZeroOffsetAndBaseValues) { + SharedPoolAllocation sharedPoolAllocation(mockAllocation.get()); + + EXPECT_EQ(0u, sharedPoolAllocation.getOffset()); + EXPECT_EQ(mockAllocation->getUnderlyingBufferSize(), sharedPoolAllocation.getSize()); + EXPECT_EQ(mockAllocation.get(), sharedPoolAllocation.getGraphicsAllocation()); + EXPECT_EQ(mockAllocation->getGpuAddress(), sharedPoolAllocation.getGpuAddress()); + EXPECT_EQ(mockAllocation->getGpuAddressToPatch(), sharedPoolAllocation.getGpuAddressToPatch()); + EXPECT_EQ(mockAllocation->getUnderlyingBuffer(), sharedPoolAllocation.getUnderlyingBuffer()); +} + +TEST_F(SharedPoolAllocationTest, givenSharedPoolAllocationWithMutexWhenObtainingLockThenMutexIsProperlyLocked) { + constexpr size_t chunkOffset = 64; + constexpr size_t chunkSize = 256; + std::mutex mtx; + SharedPoolAllocation sharedPoolAllocation(mockAllocation.get(), chunkOffset, chunkSize, &mtx); + + { + auto lock = sharedPoolAllocation.obtainSharedAllocationLock(); + EXPECT_TRUE(lock.owns_lock()); + EXPECT_EQ(&mtx, lock.mutex()); + EXPECT_FALSE(mtx.try_lock()); + } + + EXPECT_TRUE(mtx.try_lock()); + mtx.unlock(); +} + +TEST_F(SharedPoolAllocationTest, givenSharedPoolAllocationWithoutMutexWhenObtainingLockThenReturnsEmptyLock) { + SharedPoolAllocation sharedPoolAllocation(mockAllocation.get()); + + auto lock = sharedPoolAllocation.obtainSharedAllocationLock(); + + EXPECT_FALSE(lock.owns_lock()); + EXPECT_EQ(nullptr, lock.mutex()); +} \ No newline at end of file