fix: Use getGpuAddressToPatch() of pattern allocation

When using FillBuffer builtin, use GPU address to patch (i.e. gpuAddress plus allocationOffset minus gpuBaseAddress) instead of gpuAddress. This change fixes a bug - possible abort due to huge difference between gpuAddress() and gpuAddressToPatch() values. Related-To: NEO-7973 Signed-off-by: Kacper Nowak <kacper.nowak@intel.com>
2026-01-03 06:49:52 +08:00 · 2023-10-17 02:40:07 +00:00
parent 750b5ba89a
commit 26666d8c6f
3 changed files with 39 additions and 6 deletions
--- a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2023 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -523,6 +523,39 @@ HWTEST_F(EnqueueFillBufferCmdTests, givenEnqueueFillBufferWhenPatternAllocationI
    EXPECT_EQ(AllocationType::FILL_PATTERN, patternAllocation->getAllocationType());
 }

+HWTEST_F(EnqueueFillBufferCmdTests, whenFillingBufferThenUseGpuAddressForPatchingOfPatternAllocation) {
+    auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize});
+
+    // Set gpuBaseAddress and offset so gpuAddress != gpuAddressToPatch
+    if (0u == patternAllocation->getGpuBaseAddress()) {
+        patternAllocation->setGpuBaseAddress(4096u);
+    }
+    patternAllocation->setAllocationOffset(10u);
+
+    EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
+    auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getClDevice());
+    ASSERT_NE(nullptr, &builder);
+
+    BuiltinOpParams dc;
+    MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(),
+                         patternAllocation->getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(patternAllocation), false, false, true);
+    dc.srcMemObj = &patternMemObj;
+    dc.dstMemObj = buffer;
+    dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0};
+    dc.size = {EnqueueFillBufferTraits::size, 0, 0};
+
+    MultiDispatchInfo multiDispatchInfo(dc);
+    builder.buildDispatchInfos(multiDispatchInfo);
+    EXPECT_NE(0u, multiDispatchInfo.size());
+
+    auto kernel = multiDispatchInfo.begin()->getKernel();
+    auto patternArgIndex = 2;
+    const auto &patternArg = kernel->getKernelArguments().at(patternArgIndex);
+    EXPECT_EQ(patternAllocation->getGpuAddressToPatch(), reinterpret_cast<uint64_t>(patternArg.value));
+
+    context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
+}
+
 struct EnqueueFillBufferHw : public ::testing::Test {

    void SetUp() override {
--- a/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp
@@ -55,7 +55,7 @@ struct BaseEnqueueSvmMemFillFixture : public ClDeviceFixture,

 using BaseEnqueueSvmMemFillTest = Test<BaseEnqueueSvmMemFillFixture>;

-HWTEST_F(BaseEnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBuilderThenUseGpuAddressOfPatternSVMAllocation) {
+HWTEST_F(BaseEnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBuilderThenUseGpuAddressForPatchingOfPatternSVMAllocation) {
    struct MockFillBufferBuilder : MockBuiltinDispatchInfoBuilder {
        MockFillBufferBuilder(BuiltIns &kernelLib, ClDevice &clDevice, BuiltinDispatchInfoBuilder *origBuilder, const void *pattern, size_t patternSize)
            : MockBuiltinDispatchInfoBuilder(kernelLib, clDevice, origBuilder),
@@ -82,10 +82,10 @@ HWTEST_F(BaseEnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBui
    size_t patternSize = 0x10u;
    auto patternAllocation = static_cast<MockGraphicsAllocation *>(context->getMemoryManager()->allocateGraphicsMemoryWithProperties({pCmdQ->getDevice().getRootDeviceIndex(), 2 * patternSize, AllocationType::FILL_PATTERN, pCmdQ->getDevice().getDeviceBitfield()}));

-    // offset cpuPtr so cpuPtr != gpuAddress in order to ensure that setArgSVM will be called using gpu address of the pattern allocation
+    // offset cpuPtr so cpuPtr != gpuAddress (for patching) in order to ensure that setArgSVM will be called using gpu address of the pattern allocation
    auto origCpuPtr = patternAllocation->cpuPtr;
    patternAllocation->cpuPtr = ptrOffset(patternAllocation->cpuPtr, patternSize);
-    ASSERT_NE((uint64_t)patternAllocation->cpuPtr, patternAllocation->getGpuAddress());
+    ASSERT_NE((uint64_t)patternAllocation->cpuPtr, patternAllocation->getGpuAddressToPatch());

    auto internalAllocStorage = pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
    internalAllocStorage->storeAllocation(std::unique_ptr<GraphicsAllocation>(patternAllocation), REUSABLE_ALLOCATION);
@@ -111,7 +111,7 @@ HWTEST_F(BaseEnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBui

    auto patternArgIndex = 2;
    const auto &patternArg = kernel->getKernelArguments().at(patternArgIndex);
-    EXPECT_EQ(patternAllocation->getGpuAddress(), reinterpret_cast<uint64_t>(patternArg.value));
+    EXPECT_EQ(patternAllocation->getGpuAddressToPatch(), reinterpret_cast<uint64_t>(patternArg.value));

    patternAllocation->cpuPtr = origCpuPtr;
 }