From 0cf5b36b26c2cfcf26f14d747110f78cec852ed6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Zwoli=C5=84ski?= Date: Tue, 14 Oct 2025 15:13:11 +0000 Subject: [PATCH] fix: zero-initialize chunks from pool in allocateGlobalsSurface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When reusing memory chunks from the usm allocation pool, they may contain stale data from previous executions. This commit ensures that pooled allocations are properly zero-initialized before use by: - Transferring init data using transferMemoryToAllocation (unchanged) - Zero-initializing the BSS section via memsetAllocation when present - Zero-initializing entire totalSize for BSS-only allocations This prevents stale data from affecting program execution when chunks are reused from the pool. HSD-13013893112, HSD-18043476772, HSD-18043481899, HSD-18043487849, HSD-18043489182 Related-To: NEO-12287 Signed-off-by: Fabian ZwoliƄski --- .../source/program/program_initialization.cpp | 10 ++ .../program/program_initialization_tests.cpp | 142 ++++++++++++++++++ 2 files changed, 152 insertions(+) diff --git a/shared/source/program/program_initialization.cpp b/shared/source/program/program_initialization.cpp index 90e0001d19..1247335c75 100644 --- a/shared/source/program/program_initialization.cpp +++ b/shared/source/program/program_initialization.cpp @@ -120,6 +120,12 @@ SharedPoolAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAll device, gpuAllocation, allocationOffset, initData, initSize); UNRECOVERABLE_IF(!success); + if (isAllocatedFromPool && zeroInitSize > 0) { + auto success = MemoryTransferHelper::memsetAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *gpuAllocation), + device, gpuAllocation, allocationOffset + initSize, 0, zeroInitSize); + UNRECOVERABLE_IF(!success); + } + if (auto csr = device.getDefaultEngine().commandStreamReceiver; isAllocatedFromPool && csr->getType() != NEO::CommandStreamReceiverType::hardware) { auto writeMemoryOperation = [&]() { @@ -141,6 +147,10 @@ SharedPoolAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAll writeMemoryOperation(); } } + } else if (isAllocatedFromPool) { + auto success = MemoryTransferHelper::memsetAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *gpuAllocation), + device, gpuAllocation, allocationOffset, 0, totalSize); + UNRECOVERABLE_IF(!success); } return new SharedPoolAllocation(gpuAllocation, allocationOffset, allocatedSize, nullptr); } diff --git a/shared/test/unit_test/program/program_initialization_tests.cpp b/shared/test/unit_test/program/program_initialization_tests.cpp index 9bee44ad65..75d47c7786 100644 --- a/shared/test/unit_test/program/program_initialization_tests.cpp +++ b/shared/test/unit_test/program/program_initialization_tests.cpp @@ -423,6 +423,148 @@ TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, GivenUsmAllocPoolAnd2MBLocalMemAlig } } +TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, givenPooledUSMAllocationWhenReusedChunkThenDataIsProperlyInitializedAndRestIsZeroed) { + mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true; + linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true; + + constexpr size_t initSize = 32u; + constexpr size_t zeroInitSize = 32u; + constexpr size_t totalSize = initSize + zeroInitSize; + constexpr uint8_t initValue = 7u; + constexpr uint8_t dirtyValue = 9u; + + std::vector initData(initSize, initValue); + + auto verifyAllocation = [&](SharedPoolAllocation *allocation) { + ASSERT_NE(nullptr, allocation); + EXPECT_TRUE(device.getUsmGlobalSurfaceAllocPool()->isInPool( + reinterpret_cast(allocation->getGpuAddress()))); + EXPECT_NE(allocation->getGraphicsAllocation()->getUnderlyingBufferSize(), + allocation->getSize()); + EXPECT_TRUE(allocation->getGraphicsAllocation()->isMemObjectsAllocationWithWritableFlags()); + EXPECT_EQ(AllocationType::globalSurface, + allocation->getGraphicsAllocation()->getAllocationType()); + }; + + std::unique_ptr globalSurface1; + std::unique_ptr globalSurface2; + + // First allocation - new chunk from pool + globalSurface1.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, initData.data())); + verifyAllocation(globalSurface1.get()); + EXPECT_EQ(0, memcmp(globalSurface1->getUnderlyingBuffer(), initData.data(), initSize)); + + // Dirty the chunk before returning to pool + std::memset(globalSurface1->getUnderlyingBuffer(), dirtyValue, globalSurface1->getSize()); + device.getUsmGlobalSurfaceAllocPool()->freeSVMAlloc(reinterpret_cast(globalSurface1->getGpuAddress()), false); + + // Second allocation - should reuse the same chunk + globalSurface2.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, initData.data())); + verifyAllocation(globalSurface2.get()); + + // Verify it's the same chunk + EXPECT_EQ(globalSurface1->getGraphicsAllocation(), globalSurface2->getGraphicsAllocation()); + EXPECT_EQ(globalSurface1->getGpuAddress(), globalSurface2->getGpuAddress()); + EXPECT_EQ(globalSurface1->getOffset(), globalSurface2->getOffset()); + EXPECT_EQ(globalSurface1->getSize(), globalSurface2->getSize()); + + // Verify proper initialization: initData followed by zeros for entire chunk + std::vector expectedData(globalSurface2->getSize(), 0); + std::memcpy(expectedData.data(), initData.data(), initSize); + + EXPECT_EQ(0, memcmp(globalSurface2->getUnderlyingBuffer(), expectedData.data(), expectedData.size())); +} + +TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, givenPooledUSMAllocationWhenReusedChunkWithBssOnlyDataThenEntireChunkIsZeroed) { + mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true; + linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true; + + constexpr size_t totalSize = 64u; + constexpr size_t zeroInitSize = totalSize; // BSS only - no init data + constexpr uint8_t dirtyValue = 9u; + + auto verifyAllocation = [&](SharedPoolAllocation *allocation) { + ASSERT_NE(nullptr, allocation); + EXPECT_TRUE(device.getUsmGlobalSurfaceAllocPool()->isInPool( + reinterpret_cast(allocation->getGpuAddress()))); + EXPECT_NE(allocation->getGraphicsAllocation()->getUnderlyingBufferSize(), + allocation->getSize()); + EXPECT_TRUE(allocation->getGraphicsAllocation()->isMemObjectsAllocationWithWritableFlags()); + EXPECT_EQ(AllocationType::globalSurface, + allocation->getGraphicsAllocation()->getAllocationType()); + }; + + std::unique_ptr globalSurface1; + std::unique_ptr globalSurface2; + + // First allocation - BSS only (no init data) + globalSurface1.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, nullptr)); + verifyAllocation(globalSurface1.get()); + + // Verify initial allocation is zeroed + std::vector expectedZeros(globalSurface1->getSize(), 0); + EXPECT_EQ(0, memcmp(globalSurface1->getUnderlyingBuffer(), expectedZeros.data(), expectedZeros.size())); + + // Dirty the chunk before returning to pool + std::memset(globalSurface1->getUnderlyingBuffer(), dirtyValue, globalSurface1->getSize()); + device.getUsmGlobalSurfaceAllocPool()->freeSVMAlloc(reinterpret_cast(globalSurface1->getGpuAddress()), false); + + // Second allocation - should reuse the same chunk + globalSurface2.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, nullptr)); + verifyAllocation(globalSurface2.get()); + + // Verify it's the same chunk + EXPECT_EQ(globalSurface1->getGraphicsAllocation(), globalSurface2->getGraphicsAllocation()); + EXPECT_EQ(globalSurface1->getGpuAddress(), globalSurface2->getGpuAddress()); + EXPECT_EQ(globalSurface1->getOffset(), globalSurface2->getOffset()); + EXPECT_EQ(globalSurface1->getSize(), globalSurface2->getSize()); + + // Verify entire chunk is zeroed (no dirty data from previous use) + EXPECT_EQ(0, memcmp(globalSurface2->getUnderlyingBuffer(), expectedZeros.data(), expectedZeros.size())); +} + +TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, givenPooledUSMAllocationWhenOnlyInitDataWithoutBssSectionThenMemsetAllocationIsNotCalled) { + mockProductHelper->isBlitCopyRequiredForLocalMemoryResult = false; + mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true; + linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true; + + constexpr size_t initSize = 64u; + constexpr size_t zeroInitSize = 0u; + constexpr size_t totalSize = initSize + zeroInitSize; + constexpr uint8_t initValue = 7u; + + std::vector initData(initSize, initValue); + + auto mockMemoryManager = static_cast(device.getMemoryManager()); + mockMemoryManager->memsetAllocationCalled = 0; + + auto globalSurface = std::unique_ptr(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, initData.data())); + + ASSERT_NE(nullptr, globalSurface); + EXPECT_EQ(0u, mockMemoryManager->memsetAllocationCalled); +} + +TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, givenPooledUSMAllocationWhenInitDataAndBssSectionThenMemsetAllocationIsCalledOnceForBssSection) { + mockProductHelper->isBlitCopyRequiredForLocalMemoryResult = false; + mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true; + linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true; + + constexpr size_t initSize = 32u; + constexpr size_t zeroInitSize = 32u; + constexpr size_t totalSize = initSize + zeroInitSize; + constexpr uint8_t initValue = 7u; + + std::vector initData(initSize, initValue); + + auto mockMemoryManager = static_cast(device.getMemoryManager()); + mockMemoryManager->memsetAllocationCalled = 0; + + auto globalSurface = std::unique_ptr(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, initData.data())); + + ASSERT_NE(nullptr, globalSurface); + EXPECT_EQ(1u, mockMemoryManager->memsetAllocationCalled); +} + TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, Given2MBLocalMemAlignmentEnabledButUsmPoolInitializeFailsThenDoNotUseUsmPool) { mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;