From 26666d8c6f2157e1708860dcb74d54f538f75d7d Mon Sep 17 00:00:00 2001
From: Kacper Nowak <kacper.nowak@intel.com>
Date: Tue, 17 Oct 2023 02:40:07 +0000
Subject: [PATCH] fix: Use getGpuAddressToPatch() of pattern allocation

When using FillBuffer builtin, use GPU address to patch (i.e. gpuAddress
plus allocationOffset minus gpuBaseAddress) instead of gpuAddress.
This change fixes a bug - possible abort due to huge difference between
gpuAddress() and gpuAddressToPatch() values.

Related-To: NEO-7973
Signed-off-by: Kacper Nowak <kacper.nowak@intel.com>
---
 .../built_ins/builtins_dispatch_builder.cpp   |  2 +-
 .../enqueue_fill_buffer_tests.cpp             | 35 ++++++++++++++++++-
 .../enqueue_svm_mem_fill_tests.cpp            |  8 ++---
 3 files changed, 39 insertions(+), 6 deletions(-)
diff --git a/opencl/source/built_ins/builtins_dispatch_builder.cpp b/opencl/source/built_ins/builtins_dispatch_builder.cpp
index 83da662e74..73260d22f8 100644
--- a/opencl/source/built_ins/builtins_dispatch_builder.cpp
+++ b/opencl/source/built_ins/builtins_dispatch_builder.cpp
@@ -340,7 +340,7 @@ class BuiltInOp<EBuiltInOps::FillBuffer> : public BuiltinDispatchInfoBuilder {
 
         // Set-up srcMemObj with pattern
         auto graphicsAllocation = operationParams.srcMemObj->getMultiGraphicsAllocation().getDefaultGraphicsAllocation();
-        kernelSplit1DBuilder.setArgSvm(2, operationParams.srcMemObj->getSize(), reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()), graphicsAllocation, CL_MEM_READ_ONLY);
+        kernelSplit1DBuilder.setArgSvm(2, operationParams.srcMemObj->getSize(), reinterpret_cast<void *>(graphicsAllocation->getGpuAddressToPatch()), graphicsAllocation, CL_MEM_READ_ONLY);
 
         // Set-up patternSizeInEls
         kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 3, static_cast<OffsetType>(operationParams.srcMemObj->getSize()));
diff --git a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp
index 6ca6f21267..a14c4d134b 100644
--- a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2023 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -523,6 +523,39 @@ HWTEST_F(EnqueueFillBufferCmdTests, givenEnqueueFillBufferWhenPatternAllocationI
     EXPECT_EQ(AllocationType::FILL_PATTERN, patternAllocation->getAllocationType());
 }
 
+HWTEST_F(EnqueueFillBufferCmdTests, whenFillingBufferThenUseGpuAddressForPatchingOfPatternAllocation) {
+    auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize});
+
+    // Set gpuBaseAddress and offset so gpuAddress != gpuAddressToPatch
+    if (0u == patternAllocation->getGpuBaseAddress()) {
+        patternAllocation->setGpuBaseAddress(4096u);
+    }
+    patternAllocation->setAllocationOffset(10u);
+
+    EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
+    auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getClDevice());
+    ASSERT_NE(nullptr, &builder);
+
+    BuiltinOpParams dc;
+    MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(),
+                         patternAllocation->getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(patternAllocation), false, false, true);
+    dc.srcMemObj = &patternMemObj;
+    dc.dstMemObj = buffer;
+    dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0};
+    dc.size = {EnqueueFillBufferTraits::size, 0, 0};
+
+    MultiDispatchInfo multiDispatchInfo(dc);
+    builder.buildDispatchInfos(multiDispatchInfo);
+    EXPECT_NE(0u, multiDispatchInfo.size());
+
+    auto kernel = multiDispatchInfo.begin()->getKernel();
+    auto patternArgIndex = 2;
+    const auto &patternArg = kernel->getKernelArguments().at(patternArgIndex);
+    EXPECT_EQ(patternAllocation->getGpuAddressToPatch(), reinterpret_cast<uint64_t>(patternArg.value));
+
+    context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
+}
+
 struct EnqueueFillBufferHw : public ::testing::Test {
 
     void SetUp() override {
diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp
index e81527c1a2..7874081197 100644
--- a/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp
@@ -55,7 +55,7 @@ struct BaseEnqueueSvmMemFillFixture : public ClDeviceFixture,
 
 using BaseEnqueueSvmMemFillTest = Test<BaseEnqueueSvmMemFillFixture>;
 
-HWTEST_F(BaseEnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBuilderThenUseGpuAddressOfPatternSVMAllocation) {
+HWTEST_F(BaseEnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBuilderThenUseGpuAddressForPatchingOfPatternSVMAllocation) {
     struct MockFillBufferBuilder : MockBuiltinDispatchInfoBuilder {
         MockFillBufferBuilder(BuiltIns &kernelLib, ClDevice &clDevice, BuiltinDispatchInfoBuilder *origBuilder, const void *pattern, size_t patternSize)
             : MockBuiltinDispatchInfoBuilder(kernelLib, clDevice, origBuilder),
@@ -82,10 +82,10 @@ HWTEST_F(BaseEnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBui
     size_t patternSize = 0x10u;
     auto patternAllocation = static_cast<MockGraphicsAllocation *>(context->getMemoryManager()->allocateGraphicsMemoryWithProperties({pCmdQ->getDevice().getRootDeviceIndex(), 2 * patternSize, AllocationType::FILL_PATTERN, pCmdQ->getDevice().getDeviceBitfield()}));
 
-    // offset cpuPtr so cpuPtr != gpuAddress in order to ensure that setArgSVM will be called using gpu address of the pattern allocation
+    // offset cpuPtr so cpuPtr != gpuAddress (for patching) in order to ensure that setArgSVM will be called using gpu address of the pattern allocation
     auto origCpuPtr = patternAllocation->cpuPtr;
     patternAllocation->cpuPtr = ptrOffset(patternAllocation->cpuPtr, patternSize);
-    ASSERT_NE((uint64_t)patternAllocation->cpuPtr, patternAllocation->getGpuAddress());
+    ASSERT_NE((uint64_t)patternAllocation->cpuPtr, patternAllocation->getGpuAddressToPatch());
 
     auto internalAllocStorage = pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
     internalAllocStorage->storeAllocation(std::unique_ptr<GraphicsAllocation>(patternAllocation), REUSABLE_ALLOCATION);
@@ -111,7 +111,7 @@ HWTEST_F(BaseEnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBui
 
     auto patternArgIndex = 2;
     const auto &patternArg = kernel->getKernelArguments().at(patternArgIndex);
-    EXPECT_EQ(patternAllocation->getGpuAddress(), reinterpret_cast<uint64_t>(patternArg.value));
+    EXPECT_EQ(patternAllocation->getGpuAddressToPatch(), reinterpret_cast<uint64_t>(patternArg.value));
 
     patternAllocation->cpuPtr = origCpuPtr;
 }