feature: add global/const surface nonUSM allocation pooling

Related-To: NEO-12287
Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwoliński
2025-10-29 13:06:06 +00:00
committed by Compute-Runtime-Automation
parent 2eab7a12c9
commit 3c43276dab
16 changed files with 547 additions and 40 deletions

View File

@@ -126,6 +126,14 @@ class ProgramDataTestBase : public testing::Test,
programPatchListSize = static_cast<uint32_t>(allocateGlobalMemorySurface.Size + globalSize);
return globalSize;
}
void disableGlobalConstSurfacePooling() {
mockProductHelper = new MockProductHelper;
pClDevice->getDevice().getRootDeviceEnvironmentRef().productHelper.reset(mockProductHelper);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = false;
}
MockProductHelper *mockProductHelper{nullptr};
std::unique_ptr<cl_char[]> pAllocateConstMemorySurface;
std::unique_ptr<cl_char[]> pAllocateGlobalMemorySurface;
char *pCurPtr;
@@ -273,6 +281,63 @@ TEST_F(ProgramDataTest, GivenUsmPoolAnd2MBAlignmentEnabledWhenGlobalsExportedThe
EXPECT_EQ(1u, usmGlobalSurfaceAllocPool->freeSVMAllocCalled);
}
TEST_F(ProgramDataTest, GivenGenericPoolAnd2MBAlignmentEnabledWhenGlobalsNotExportedThenAllocateSurfacesFromGenericPoolAndFreeOnProgramDestroy) {
auto mockProductHelper = new MockProductHelper;
pClDevice->getDevice().getRootDeviceEnvironmentRef().productHelper.reset(mockProductHelper);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
constexpr size_t constantDataSize = ConstantSurfacePoolTraits::maxAllocationSize;
constexpr size_t globalDataSize = GlobalSurfacePoolTraits::maxAllocationSize;
std::vector<char> constantData(constantDataSize, 7);
std::vector<char> globalData(globalDataSize, 9);
ProgramInfo programInfo;
programInfo.globalConstants.initData = constantData.data();
programInfo.globalConstants.size = constantDataSize;
programInfo.globalVariables.initData = globalData.data();
programInfo.globalVariables.size = globalDataSize;
std::unique_ptr<WhiteBox<NEO::LinkerInput>> mockLinkerInput = std::make_unique<WhiteBox<NEO::LinkerInput>>();
mockLinkerInput->traits.exportsGlobalConstants = false;
mockLinkerInput->traits.exportsGlobalVariables = false;
programInfo.linkerInput = std::move(mockLinkerInput);
this->pProgram->processProgramInfo(programInfo, *pClDevice);
auto constantSurface = pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex());
ASSERT_NE(nullptr, constantSurface);
ASSERT_NE(nullptr, constantSurface->getGraphicsAllocation());
EXPECT_TRUE(constantSurface->isFromPool());
EXPECT_TRUE(pClDevice->getDevice().getConstantSurfacePoolAllocator().isPoolBuffer(constantSurface->getGraphicsAllocation()));
auto globalSurface = pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex());
ASSERT_NE(nullptr, globalSurface);
ASSERT_NE(nullptr, globalSurface->getGraphicsAllocation());
EXPECT_TRUE(globalSurface->isFromPool());
EXPECT_TRUE(pClDevice->getDevice().getGlobalSurfacePoolAllocator().isPoolBuffer(globalSurface->getGraphicsAllocation()));
// Store allocation details for verification
auto constantAllocation = constantSurface->getGraphicsAllocation();
auto globalAllocation = globalSurface->getGraphicsAllocation();
delete this->pProgram;
this->pProgram = nullptr;
// Allocate the same sizes again - should get the same chunks
auto newConstantAlloc = pClDevice->getDevice().getConstantSurfacePoolAllocator().requestGraphicsAllocation(constantDataSize);
ASSERT_NE(nullptr, newConstantAlloc);
EXPECT_TRUE(newConstantAlloc->isFromPool());
EXPECT_EQ(constantAllocation, newConstantAlloc->getGraphicsAllocation());
auto newGlobalAlloc = pClDevice->getDevice().getGlobalSurfacePoolAllocator().requestGraphicsAllocation(globalDataSize);
ASSERT_NE(nullptr, newGlobalAlloc);
EXPECT_TRUE(newGlobalAlloc->isFromPool());
EXPECT_EQ(globalAllocation, newGlobalAlloc->getGraphicsAllocation());
pClDevice->getDevice().getConstantSurfacePoolAllocator().freeSharedAllocation(newConstantAlloc);
pClDevice->getDevice().getGlobalSurfacePoolAllocator().freeSharedAllocation(newGlobalAlloc);
}
TEST_F(ProgramDataTest, whenGlobalConstantsAreNotExportedThenAllocateSurfacesAsNonSvm) {
if (this->pContext->getSVMAllocsManager() == nullptr) {
return;
@@ -498,7 +563,7 @@ TEST_F(ProgramDataBindlessTest, givenBindlessKernelAndGlobalVariablesMemorySurfa
}
TEST_F(ProgramDataTest, givenConstantAllocationThatIsInUseByGpuWhenProgramIsBeingDestroyedThenItIsAddedToTemporaryAllocationList) {
disableGlobalConstSurfacePooling();
setupConstantAllocation();
buildAndDecodeProgramPatchList();
@@ -518,6 +583,7 @@ TEST_F(ProgramDataTest, givenConstantAllocationThatIsInUseByGpuWhenProgramIsBein
}
TEST_F(ProgramDataTest, givenGlobalAllocationThatIsInUseByGpuWhenProgramIsBeingDestroyedThenItIsAddedToTemporaryAllocationList) {
disableGlobalConstSurfacePooling();
setupGlobalAllocation();
buildAndDecodeProgramPatchList();

View File

@@ -585,6 +585,66 @@ TEST_F(ProgramFromBinaryTest, GivenUsmPoolAnd2MBAlignmentEnabledWhenProgramIsBei
EXPECT_EQ(1u, usmGlobalSurfaceAllocPool->freeSVMAllocCalled);
}
TEST_F(ProgramFromBinaryTest, GivenGenericPoolAnd2MBAlignmentEnabledWhenProgramIsBeingRebuildThenOutdatedGlobalBuffersAreFreedFromGenericPool) {
pProgram->build(pProgram->getDevices(), nullptr);
EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface);
EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface);
auto mockProductHelper = new MockProductHelper;
pClDevice->getDevice().getRootDeviceEnvironmentRef().productHelper.reset(mockProductHelper);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
constexpr size_t constantDataSize = ConstantSurfacePoolTraits::maxAllocationSize * 3 / 4;
constexpr size_t globalDataSize = GlobalSurfacePoolTraits::maxAllocationSize * 3 / 4;
std::vector<unsigned char> constantInitData(constantDataSize, 0x5B);
std::vector<unsigned char> globalInitData(globalDataSize, 0x7C);
WhiteBox<NEO::LinkerInput> linkerInput;
linkerInput.traits.exportsGlobalConstants = false;
linkerInput.traits.exportsGlobalVariables = false;
pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface.reset(allocateGlobalsSurface(nullptr, pClDevice->getDevice(), constantDataSize, 0u, true, &linkerInput, constantInitData.data()));
auto &constantSurface = pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface;
ASSERT_NE(nullptr, constantSurface);
EXPECT_TRUE(constantSurface->isFromPool());
EXPECT_TRUE(pClDevice->getDevice().getConstantSurfacePoolAllocator().isPoolBuffer(constantSurface->getGraphicsAllocation()));
// Store allocation details before processGenBinary
auto constantAllocation = constantSurface->getGraphicsAllocation();
pProgram->processGenBinary(*pClDevice);
EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface.get());
EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface.get());
// Verify constant surface was freed by allocating the same size and expecting the same GA
auto newConstantAlloc = pClDevice->getDevice().getConstantSurfacePoolAllocator().requestGraphicsAllocation(constantDataSize);
ASSERT_NE(nullptr, newConstantAlloc);
EXPECT_TRUE(newConstantAlloc->isFromPool());
EXPECT_EQ(constantAllocation, newConstantAlloc->getGraphicsAllocation());
pClDevice->getDevice().getConstantSurfacePoolAllocator().freeSharedAllocation(newConstantAlloc);
pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface.reset(allocateGlobalsSurface(nullptr, pClDevice->getDevice(), globalDataSize, 0u, false, &linkerInput, globalInitData.data()));
auto &globalSurface = pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface;
ASSERT_NE(nullptr, globalSurface);
EXPECT_TRUE(globalSurface->isFromPool());
EXPECT_TRUE(pClDevice->getDevice().getGlobalSurfacePoolAllocator().isPoolBuffer(globalSurface->getGraphicsAllocation()));
// Store allocation details before processGenBinary
auto globalAllocation = globalSurface->getGraphicsAllocation();
pProgram->processGenBinary(*pClDevice);
EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface.get());
EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface.get());
// Verify global surface was freed by allocating the same size and expecting the same GA
auto newGlobalAlloc = pClDevice->getDevice().getGlobalSurfacePoolAllocator().requestGraphicsAllocation(globalDataSize);
ASSERT_NE(nullptr, newGlobalAlloc);
EXPECT_TRUE(newGlobalAlloc->isFromPool());
EXPECT_EQ(globalAllocation, newGlobalAlloc->getGraphicsAllocation());
pClDevice->getDevice().getGlobalSurfacePoolAllocator().freeSharedAllocation(newGlobalAlloc);
}
TEST_F(ProgramFromBinaryTest, givenProgramWhenCleanKernelInfoIsCalledThenKernelAllocationIsFreed) {
pProgram->build(pProgram->getDevices(), nullptr);
EXPECT_EQ(1u, pProgram->getNumKernels());