From 26666d8c6f2157e1708860dcb74d54f538f75d7d Mon Sep 17 00:00:00 2001 From: Kacper Nowak Date: Tue, 17 Oct 2023 02:40:07 +0000 Subject: [PATCH] fix: Use getGpuAddressToPatch() of pattern allocation When using FillBuffer builtin, use GPU address to patch (i.e. gpuAddress plus allocationOffset minus gpuBaseAddress) instead of gpuAddress. This change fixes a bug - possible abort due to huge difference between gpuAddress() and gpuAddressToPatch() values. Related-To: NEO-7973 Signed-off-by: Kacper Nowak --- .../built_ins/builtins_dispatch_builder.cpp | 2 +- .../enqueue_fill_buffer_tests.cpp | 35 ++++++++++++++++++- .../enqueue_svm_mem_fill_tests.cpp | 8 ++--- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/opencl/source/built_ins/builtins_dispatch_builder.cpp b/opencl/source/built_ins/builtins_dispatch_builder.cpp index 83da662e74..73260d22f8 100644 --- a/opencl/source/built_ins/builtins_dispatch_builder.cpp +++ b/opencl/source/built_ins/builtins_dispatch_builder.cpp @@ -340,7 +340,7 @@ class BuiltInOp : public BuiltinDispatchInfoBuilder { // Set-up srcMemObj with pattern auto graphicsAllocation = operationParams.srcMemObj->getMultiGraphicsAllocation().getDefaultGraphicsAllocation(); - kernelSplit1DBuilder.setArgSvm(2, operationParams.srcMemObj->getSize(), reinterpret_cast(graphicsAllocation->getGpuAddress()), graphicsAllocation, CL_MEM_READ_ONLY); + kernelSplit1DBuilder.setArgSvm(2, operationParams.srcMemObj->getSize(), reinterpret_cast(graphicsAllocation->getGpuAddressToPatch()), graphicsAllocation, CL_MEM_READ_ONLY); // Set-up patternSizeInEls kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 3, static_cast(operationParams.srcMemObj->getSize())); diff --git a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp index 6ca6f21267..a14c4d134b 100644 --- a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -523,6 +523,39 @@ HWTEST_F(EnqueueFillBufferCmdTests, givenEnqueueFillBufferWhenPatternAllocationI EXPECT_EQ(AllocationType::FILL_PATTERN, patternAllocation->getAllocationType()); } +HWTEST_F(EnqueueFillBufferCmdTests, whenFillingBufferThenUseGpuAddressForPatchingOfPatternAllocation) { + auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); + + // Set gpuBaseAddress and offset so gpuAddress != gpuAddressToPatch + if (0u == patternAllocation->getGpuBaseAddress()) { + patternAllocation->setGpuBaseAddress(4096u); + } + patternAllocation->setAllocationOffset(10u); + + EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); + auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getClDevice()); + ASSERT_NE(nullptr, &builder); + + BuiltinOpParams dc; + MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), + patternAllocation->getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(patternAllocation), false, false, true); + dc.srcMemObj = &patternMemObj; + dc.dstMemObj = buffer; + dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0}; + dc.size = {EnqueueFillBufferTraits::size, 0, 0}; + + MultiDispatchInfo multiDispatchInfo(dc); + builder.buildDispatchInfos(multiDispatchInfo); + EXPECT_NE(0u, multiDispatchInfo.size()); + + auto kernel = multiDispatchInfo.begin()->getKernel(); + auto patternArgIndex = 2; + const auto &patternArg = kernel->getKernelArguments().at(patternArgIndex); + EXPECT_EQ(patternAllocation->getGpuAddressToPatch(), reinterpret_cast(patternArg.value)); + + context.getMemoryManager()->freeGraphicsMemory(patternAllocation); +} + struct EnqueueFillBufferHw : public ::testing::Test { void SetUp() override { diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp index e81527c1a2..7874081197 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp @@ -55,7 +55,7 @@ struct BaseEnqueueSvmMemFillFixture : public ClDeviceFixture, using BaseEnqueueSvmMemFillTest = Test; -HWTEST_F(BaseEnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBuilderThenUseGpuAddressOfPatternSVMAllocation) { +HWTEST_F(BaseEnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBuilderThenUseGpuAddressForPatchingOfPatternSVMAllocation) { struct MockFillBufferBuilder : MockBuiltinDispatchInfoBuilder { MockFillBufferBuilder(BuiltIns &kernelLib, ClDevice &clDevice, BuiltinDispatchInfoBuilder *origBuilder, const void *pattern, size_t patternSize) : MockBuiltinDispatchInfoBuilder(kernelLib, clDevice, origBuilder), @@ -82,10 +82,10 @@ HWTEST_F(BaseEnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBui size_t patternSize = 0x10u; auto patternAllocation = static_cast(context->getMemoryManager()->allocateGraphicsMemoryWithProperties({pCmdQ->getDevice().getRootDeviceIndex(), 2 * patternSize, AllocationType::FILL_PATTERN, pCmdQ->getDevice().getDeviceBitfield()})); - // offset cpuPtr so cpuPtr != gpuAddress in order to ensure that setArgSVM will be called using gpu address of the pattern allocation + // offset cpuPtr so cpuPtr != gpuAddress (for patching) in order to ensure that setArgSVM will be called using gpu address of the pattern allocation auto origCpuPtr = patternAllocation->cpuPtr; patternAllocation->cpuPtr = ptrOffset(patternAllocation->cpuPtr, patternSize); - ASSERT_NE((uint64_t)patternAllocation->cpuPtr, patternAllocation->getGpuAddress()); + ASSERT_NE((uint64_t)patternAllocation->cpuPtr, patternAllocation->getGpuAddressToPatch()); auto internalAllocStorage = pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage(); internalAllocStorage->storeAllocation(std::unique_ptr(patternAllocation), REUSABLE_ALLOCATION); @@ -111,7 +111,7 @@ HWTEST_F(BaseEnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBui auto patternArgIndex = 2; const auto &patternArg = kernel->getKernelArguments().at(patternArgIndex); - EXPECT_EQ(patternAllocation->getGpuAddress(), reinterpret_cast(patternArg.value)); + EXPECT_EQ(patternAllocation->getGpuAddressToPatch(), reinterpret_cast(patternArg.value)); patternAllocation->cpuPtr = origCpuPtr; }