mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 21:18:24 +08:00
fix: zero-initialize chunks from pool in allocateGlobalsSurface
When reusing memory chunks from the usm allocation pool, they may contain stale data from previous executions. This commit ensures that pooled allocations are properly zero-initialized before use by: - Transferring init data using transferMemoryToAllocation (unchanged) - Zero-initializing the BSS section via memsetAllocation when present - Zero-initializing entire totalSize for BSS-only allocations This prevents stale data from affecting program execution when chunks are reused from the pool. HSD-13013893112, HSD-18043476772, HSD-18043481899, HSD-18043487849, HSD-18043489182 Related-To: NEO-12287 Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
70fe9c5df0
commit
0cf5b36b26
@@ -120,6 +120,12 @@ SharedPoolAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAll
|
||||
device, gpuAllocation, allocationOffset, initData, initSize);
|
||||
UNRECOVERABLE_IF(!success);
|
||||
|
||||
if (isAllocatedFromPool && zeroInitSize > 0) {
|
||||
auto success = MemoryTransferHelper::memsetAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *gpuAllocation),
|
||||
device, gpuAllocation, allocationOffset + initSize, 0, zeroInitSize);
|
||||
UNRECOVERABLE_IF(!success);
|
||||
}
|
||||
|
||||
if (auto csr = device.getDefaultEngine().commandStreamReceiver;
|
||||
isAllocatedFromPool && csr->getType() != NEO::CommandStreamReceiverType::hardware) {
|
||||
auto writeMemoryOperation = [&]() {
|
||||
@@ -141,6 +147,10 @@ SharedPoolAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAll
|
||||
writeMemoryOperation();
|
||||
}
|
||||
}
|
||||
} else if (isAllocatedFromPool) {
|
||||
auto success = MemoryTransferHelper::memsetAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *gpuAllocation),
|
||||
device, gpuAllocation, allocationOffset, 0, totalSize);
|
||||
UNRECOVERABLE_IF(!success);
|
||||
}
|
||||
return new SharedPoolAllocation(gpuAllocation, allocationOffset, allocatedSize, nullptr);
|
||||
}
|
||||
|
||||
@@ -423,6 +423,148 @@ TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, GivenUsmAllocPoolAnd2MBLocalMemAlig
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, givenPooledUSMAllocationWhenReusedChunkThenDataIsProperlyInitializedAndRestIsZeroed) {
|
||||
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
|
||||
linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true;
|
||||
|
||||
constexpr size_t initSize = 32u;
|
||||
constexpr size_t zeroInitSize = 32u;
|
||||
constexpr size_t totalSize = initSize + zeroInitSize;
|
||||
constexpr uint8_t initValue = 7u;
|
||||
constexpr uint8_t dirtyValue = 9u;
|
||||
|
||||
std::vector<uint8_t> initData(initSize, initValue);
|
||||
|
||||
auto verifyAllocation = [&](SharedPoolAllocation *allocation) {
|
||||
ASSERT_NE(nullptr, allocation);
|
||||
EXPECT_TRUE(device.getUsmGlobalSurfaceAllocPool()->isInPool(
|
||||
reinterpret_cast<void *>(allocation->getGpuAddress())));
|
||||
EXPECT_NE(allocation->getGraphicsAllocation()->getUnderlyingBufferSize(),
|
||||
allocation->getSize());
|
||||
EXPECT_TRUE(allocation->getGraphicsAllocation()->isMemObjectsAllocationWithWritableFlags());
|
||||
EXPECT_EQ(AllocationType::globalSurface,
|
||||
allocation->getGraphicsAllocation()->getAllocationType());
|
||||
};
|
||||
|
||||
std::unique_ptr<SharedPoolAllocation> globalSurface1;
|
||||
std::unique_ptr<SharedPoolAllocation> globalSurface2;
|
||||
|
||||
// First allocation - new chunk from pool
|
||||
globalSurface1.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, initData.data()));
|
||||
verifyAllocation(globalSurface1.get());
|
||||
EXPECT_EQ(0, memcmp(globalSurface1->getUnderlyingBuffer(), initData.data(), initSize));
|
||||
|
||||
// Dirty the chunk before returning to pool
|
||||
std::memset(globalSurface1->getUnderlyingBuffer(), dirtyValue, globalSurface1->getSize());
|
||||
device.getUsmGlobalSurfaceAllocPool()->freeSVMAlloc(reinterpret_cast<void *>(globalSurface1->getGpuAddress()), false);
|
||||
|
||||
// Second allocation - should reuse the same chunk
|
||||
globalSurface2.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, initData.data()));
|
||||
verifyAllocation(globalSurface2.get());
|
||||
|
||||
// Verify it's the same chunk
|
||||
EXPECT_EQ(globalSurface1->getGraphicsAllocation(), globalSurface2->getGraphicsAllocation());
|
||||
EXPECT_EQ(globalSurface1->getGpuAddress(), globalSurface2->getGpuAddress());
|
||||
EXPECT_EQ(globalSurface1->getOffset(), globalSurface2->getOffset());
|
||||
EXPECT_EQ(globalSurface1->getSize(), globalSurface2->getSize());
|
||||
|
||||
// Verify proper initialization: initData followed by zeros for entire chunk
|
||||
std::vector<uint8_t> expectedData(globalSurface2->getSize(), 0);
|
||||
std::memcpy(expectedData.data(), initData.data(), initSize);
|
||||
|
||||
EXPECT_EQ(0, memcmp(globalSurface2->getUnderlyingBuffer(), expectedData.data(), expectedData.size()));
|
||||
}
|
||||
|
||||
TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, givenPooledUSMAllocationWhenReusedChunkWithBssOnlyDataThenEntireChunkIsZeroed) {
|
||||
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
|
||||
linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true;
|
||||
|
||||
constexpr size_t totalSize = 64u;
|
||||
constexpr size_t zeroInitSize = totalSize; // BSS only - no init data
|
||||
constexpr uint8_t dirtyValue = 9u;
|
||||
|
||||
auto verifyAllocation = [&](SharedPoolAllocation *allocation) {
|
||||
ASSERT_NE(nullptr, allocation);
|
||||
EXPECT_TRUE(device.getUsmGlobalSurfaceAllocPool()->isInPool(
|
||||
reinterpret_cast<void *>(allocation->getGpuAddress())));
|
||||
EXPECT_NE(allocation->getGraphicsAllocation()->getUnderlyingBufferSize(),
|
||||
allocation->getSize());
|
||||
EXPECT_TRUE(allocation->getGraphicsAllocation()->isMemObjectsAllocationWithWritableFlags());
|
||||
EXPECT_EQ(AllocationType::globalSurface,
|
||||
allocation->getGraphicsAllocation()->getAllocationType());
|
||||
};
|
||||
|
||||
std::unique_ptr<SharedPoolAllocation> globalSurface1;
|
||||
std::unique_ptr<SharedPoolAllocation> globalSurface2;
|
||||
|
||||
// First allocation - BSS only (no init data)
|
||||
globalSurface1.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, nullptr));
|
||||
verifyAllocation(globalSurface1.get());
|
||||
|
||||
// Verify initial allocation is zeroed
|
||||
std::vector<uint8_t> expectedZeros(globalSurface1->getSize(), 0);
|
||||
EXPECT_EQ(0, memcmp(globalSurface1->getUnderlyingBuffer(), expectedZeros.data(), expectedZeros.size()));
|
||||
|
||||
// Dirty the chunk before returning to pool
|
||||
std::memset(globalSurface1->getUnderlyingBuffer(), dirtyValue, globalSurface1->getSize());
|
||||
device.getUsmGlobalSurfaceAllocPool()->freeSVMAlloc(reinterpret_cast<void *>(globalSurface1->getGpuAddress()), false);
|
||||
|
||||
// Second allocation - should reuse the same chunk
|
||||
globalSurface2.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, nullptr));
|
||||
verifyAllocation(globalSurface2.get());
|
||||
|
||||
// Verify it's the same chunk
|
||||
EXPECT_EQ(globalSurface1->getGraphicsAllocation(), globalSurface2->getGraphicsAllocation());
|
||||
EXPECT_EQ(globalSurface1->getGpuAddress(), globalSurface2->getGpuAddress());
|
||||
EXPECT_EQ(globalSurface1->getOffset(), globalSurface2->getOffset());
|
||||
EXPECT_EQ(globalSurface1->getSize(), globalSurface2->getSize());
|
||||
|
||||
// Verify entire chunk is zeroed (no dirty data from previous use)
|
||||
EXPECT_EQ(0, memcmp(globalSurface2->getUnderlyingBuffer(), expectedZeros.data(), expectedZeros.size()));
|
||||
}
|
||||
|
||||
TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, givenPooledUSMAllocationWhenOnlyInitDataWithoutBssSectionThenMemsetAllocationIsNotCalled) {
|
||||
mockProductHelper->isBlitCopyRequiredForLocalMemoryResult = false;
|
||||
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
|
||||
linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true;
|
||||
|
||||
constexpr size_t initSize = 64u;
|
||||
constexpr size_t zeroInitSize = 0u;
|
||||
constexpr size_t totalSize = initSize + zeroInitSize;
|
||||
constexpr uint8_t initValue = 7u;
|
||||
|
||||
std::vector<uint8_t> initData(initSize, initValue);
|
||||
|
||||
auto mockMemoryManager = static_cast<MockMemoryManager *>(device.getMemoryManager());
|
||||
mockMemoryManager->memsetAllocationCalled = 0;
|
||||
|
||||
auto globalSurface = std::unique_ptr<SharedPoolAllocation>(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, initData.data()));
|
||||
|
||||
ASSERT_NE(nullptr, globalSurface);
|
||||
EXPECT_EQ(0u, mockMemoryManager->memsetAllocationCalled);
|
||||
}
|
||||
|
||||
TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, givenPooledUSMAllocationWhenInitDataAndBssSectionThenMemsetAllocationIsCalledOnceForBssSection) {
|
||||
mockProductHelper->isBlitCopyRequiredForLocalMemoryResult = false;
|
||||
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
|
||||
linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true;
|
||||
|
||||
constexpr size_t initSize = 32u;
|
||||
constexpr size_t zeroInitSize = 32u;
|
||||
constexpr size_t totalSize = initSize + zeroInitSize;
|
||||
constexpr uint8_t initValue = 7u;
|
||||
|
||||
std::vector<uint8_t> initData(initSize, initValue);
|
||||
|
||||
auto mockMemoryManager = static_cast<MockMemoryManager *>(device.getMemoryManager());
|
||||
mockMemoryManager->memsetAllocationCalled = 0;
|
||||
|
||||
auto globalSurface = std::unique_ptr<SharedPoolAllocation>(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, initData.data()));
|
||||
|
||||
ASSERT_NE(nullptr, globalSurface);
|
||||
EXPECT_EQ(1u, mockMemoryManager->memsetAllocationCalled);
|
||||
}
|
||||
|
||||
TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, Given2MBLocalMemAlignmentEnabledButUsmPoolInitializeFailsThenDoNotUseUsmPool) {
|
||||
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user