From 047f2bec87fa3540b98ea5ed157c50f2f0b960d2 Mon Sep 17 00:00:00 2001 From: Mateusz Hoppe Date: Mon, 25 Mar 2019 11:21:59 +0100 Subject: [PATCH] Program correct addresses in EnqueueReadWriteBufferRect scenarios - SurfaceBaseAddress should be programmed with aligned address this was not the case for certain origin and region values - offset from aligned address added to operationParams Change-Id: I0742b826dd0b70f0a6dedf436b850734fa015688 Signed-off-by: Mateusz Hoppe --- runtime/built_ins/built_ins.cpp | 23 ++++- .../enqueue_read_buffer_rect_tests.cpp | 48 +++++++++ .../enqueue_write_buffer_rect_tests.cpp | 48 +++++++++ unit_tests/fixtures/CMakeLists.txt | 1 + unit_tests/fixtures/buffer_enqueue_fixture.h | 97 +++++++++++++++++++ unit_tests/helpers/hw_info_helper.h | 3 +- 6 files changed, 215 insertions(+), 5 deletions(-) create mode 100644 unit_tests/fixtures/buffer_enqueue_fixture.h diff --git a/runtime/built_ins/built_ins.cpp b/runtime/built_ins/built_ins.cpp index 12181f1dc1..9653a91da8 100644 --- a/runtime/built_ins/built_ins.cpp +++ b/runtime/built_ins/built_ins.cpp @@ -323,26 +323,41 @@ class BuiltInOp : public BuiltinDispatchI int dimensions = is3D ? 3 : 2; kernelNoSplit3DBuilder.setKernel(kernelBytes[dimensions - 1]); + size_t srcOffsetFromAlignedPtr = 0; + size_t dstOffsetFromAlignedPtr = 0; + // arg0 = src if (operationParams.srcMemObj) { kernelNoSplit3DBuilder.setArg(0, operationParams.srcMemObj); } else { - kernelNoSplit3DBuilder.setArgSvm(0, hostPtrSize, is3D ? operationParams.srcPtr : ptrOffset(operationParams.srcPtr, operationParams.srcOffset.z * operationParams.srcSlicePitch), nullptr, CL_MEM_READ_ONLY); + void *srcPtrToSet = operationParams.srcPtr; + if (!is3D) { + auto srcPtr = ptrOffset(operationParams.srcPtr, operationParams.srcOffset.z * operationParams.srcSlicePitch); + srcPtrToSet = alignDown(srcPtr, 4); + srcOffsetFromAlignedPtr = ptrDiff(srcPtr, srcPtrToSet); + } + kernelNoSplit3DBuilder.setArgSvm(0, hostPtrSize, srcPtrToSet, nullptr, CL_MEM_READ_ONLY); } // arg1 = dst if (operationParams.dstMemObj) { kernelNoSplit3DBuilder.setArg(1, operationParams.dstMemObj); } else { - kernelNoSplit3DBuilder.setArgSvm(1, hostPtrSize, is3D ? operationParams.dstPtr : ptrOffset(operationParams.dstPtr, operationParams.dstOffset.z * operationParams.dstSlicePitch), nullptr, 0u); + void *dstPtrToSet = operationParams.dstPtr; + if (!is3D) { + auto dstPtr = ptrOffset(operationParams.dstPtr, operationParams.dstOffset.z * operationParams.dstSlicePitch); + dstPtrToSet = alignDown(dstPtr, 4); + dstOffsetFromAlignedPtr = ptrDiff(dstPtr, dstPtrToSet); + } + kernelNoSplit3DBuilder.setArgSvm(1, hostPtrSize, dstPtrToSet, nullptr, 0u); } // arg2 = srcOrigin - uint32_t kSrcOrigin[4] = {(uint32_t)operationParams.srcOffset.x, (uint32_t)operationParams.srcOffset.y, (uint32_t)operationParams.srcOffset.z, 0}; + uint32_t kSrcOrigin[4] = {static_cast(operationParams.srcOffset.x + srcOffsetFromAlignedPtr), (uint32_t)operationParams.srcOffset.y, (uint32_t)operationParams.srcOffset.z, 0}; kernelNoSplit3DBuilder.setArg(2, sizeof(uint32_t) * 4, kSrcOrigin); // arg3 = dstOrigin - uint32_t kDstOrigin[4] = {(uint32_t)operationParams.dstOffset.x, (uint32_t)operationParams.dstOffset.y, (uint32_t)operationParams.dstOffset.z, 0}; + uint32_t kDstOrigin[4] = {static_cast(operationParams.dstOffset.x + dstOffsetFromAlignedPtr), (uint32_t)operationParams.dstOffset.y, (uint32_t)operationParams.dstOffset.z, 0}; kernelNoSplit3DBuilder.setArg(3, sizeof(uint32_t) * 4, kDstOrigin); // arg4 = srcPitch diff --git a/unit_tests/command_queue/enqueue_read_buffer_rect_tests.cpp b/unit_tests/command_queue/enqueue_read_buffer_rect_tests.cpp index c9cbda3d56..d7bf6e98ea 100644 --- a/unit_tests/command_queue/enqueue_read_buffer_rect_tests.cpp +++ b/unit_tests/command_queue/enqueue_read_buffer_rect_tests.cpp @@ -12,6 +12,7 @@ #include "runtime/memory_manager/memory_constants.h" #include "test.h" #include "unit_tests/command_queue/enqueue_read_buffer_rect_fixture.h" +#include "unit_tests/fixtures/buffer_enqueue_fixture.h" #include "unit_tests/gen_common/gen_commands_common_validation.h" #include "reg_configs_common.h" @@ -531,6 +532,53 @@ HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrAndNonZ EXPECT_EQ(pCmdQ->taskLevel, 1u); } +HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtrWhenEnqueueReadBufferRectForNon3DCaseIsCalledThenAddressInStateBaseAddressIsAlignedAndMatchesKernelDispatchInfoParams) { + initializeFixture(); + auto cmdQ = std::make_unique>(context.get(), device.get(), &properties); + + buffer->forceDisallowCPUCopy = true; + Vec3 hostOffset(hostOrigin); + auto misalignedDstPtr = ptrOffset(reinterpret_cast(memory), hostOffset.z * hostSlicePitch); + + auto retVal = cmdQ->enqueueReadBufferRect(buffer.get(), CL_FALSE, + bufferOrigin, hostOrigin, region, + bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, + memory, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + ASSERT_NE(0u, cmdQ->lastEnqueuedKernels.size()); + Kernel *kernel = cmdQ->lastEnqueuedKernels[0]; + + cmdQ->finish(true); + + parseCommands(*cmdQ); + + if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { + const auto &surfaceStateDst = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 1); + + if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { + auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() + + kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); + EXPECT_EQ(reinterpret_cast(alignDown(misalignedDstPtr, 4)), *pKernelArg); + EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress()); + + } else if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { + auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() + + kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); + EXPECT_EQ(reinterpret_cast(alignDown(misalignedDstPtr, 4)), static_cast(*pKernelArg)); + EXPECT_EQ(static_cast(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress()); + } + } + + if (kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].size == 4 * sizeof(uint32_t)) { // size of uint4 DstOrigin + auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() + + kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset); + EXPECT_EQ(hostOffset.x + ptrDiff(misalignedDstPtr, alignDown(misalignedDstPtr, 4)), *dstOffset); + } else { + // DstOrigin arg should be 16 bytes in size, if that changes, above if path should be modified + EXPECT_TRUE(false); + } +} + using NegativeFailAllocationTest = Test; HWTEST_F(NegativeFailAllocationTest, givenEnqueueReadBufferRectWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { diff --git a/unit_tests/command_queue/enqueue_write_buffer_rect_tests.cpp b/unit_tests/command_queue/enqueue_write_buffer_rect_tests.cpp index c27f3d9d47..9a5fdea57f 100644 --- a/unit_tests/command_queue/enqueue_write_buffer_rect_tests.cpp +++ b/unit_tests/command_queue/enqueue_write_buffer_rect_tests.cpp @@ -11,6 +11,7 @@ #include "runtime/helpers/dispatch_info.h" #include "test.h" #include "unit_tests/command_queue/enqueue_write_buffer_rect_fixture.h" +#include "unit_tests/fixtures/buffer_enqueue_fixture.h" #include "unit_tests/fixtures/buffer_fixture.h" #include "unit_tests/gen_common/gen_commands_common_validation.h" @@ -528,6 +529,53 @@ HWTEST_F(EnqueueWriteBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrAndNon EXPECT_EQ(pCmdQ->taskLevel, 1u); } +HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtrWhenEnqueueWriteBufferRectForNon3DCaseIsCalledThenAddressInStateBaseAddressIsAlignedAndMatchesKernelDispatchInfoParams) { + initializeFixture(); + auto cmdQ = std::make_unique>(context.get(), device.get(), &properties); + + buffer->forceDisallowCPUCopy = true; + Vec3 hostOffset(hostOrigin); + auto misalignedHostPtr = ptrOffset(reinterpret_cast(memory), hostOffset.z * hostSlicePitch); + + auto retVal = cmdQ->enqueueWriteBufferRect(buffer.get(), CL_FALSE, + bufferOrigin, hostOrigin, region, + bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, + memory, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + ASSERT_NE(0u, cmdQ->lastEnqueuedKernels.size()); + Kernel *kernel = cmdQ->lastEnqueuedKernels[0]; + + cmdQ->finish(true); + + parseCommands(*cmdQ); + + if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { + const auto &surfaceState = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 0); + + if (kernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { + auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() + + kernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + EXPECT_EQ(reinterpret_cast(alignDown(misalignedHostPtr, 4)), *pKernelArg); + EXPECT_EQ(*pKernelArg, surfaceState.getSurfaceBaseAddress()); + + } else if (kernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { + auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() + + kernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + EXPECT_EQ(reinterpret_cast(alignDown(misalignedHostPtr, 4)), static_cast(*pKernelArg)); + EXPECT_EQ(static_cast(*pKernelArg), surfaceState.getSurfaceBaseAddress()); + } + } + + if (kernel->getKernelInfo().kernelArgInfo[2].kernelArgPatchInfoVector[0].size == 4 * sizeof(uint32_t)) { // size of uint4 SrcOrigin + auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() + + kernel->getKernelInfo().kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset); + EXPECT_EQ(hostOffset.x + ptrDiff(misalignedHostPtr, alignDown(misalignedHostPtr, 4)), *dstOffset); + } else { + // SrcOrigin arg should be 16 bytes in size, if that changes, above if path should be modified + EXPECT_TRUE(false); + } +} + using NegativeFailAllocationTest = Test; HWTEST_F(NegativeFailAllocationTest, givenEnqueueWriteBufferRectWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { diff --git a/unit_tests/fixtures/CMakeLists.txt b/unit_tests/fixtures/CMakeLists.txt index ce94df60e5..c6afd0a364 100644 --- a/unit_tests/fixtures/CMakeLists.txt +++ b/unit_tests/fixtures/CMakeLists.txt @@ -6,6 +6,7 @@ set(IGDRCL_SRCS_tests_fixtures ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/buffer_enqueue_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/context_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/device_host_queue_fixture.cpp diff --git a/unit_tests/fixtures/buffer_enqueue_fixture.h b/unit_tests/fixtures/buffer_enqueue_fixture.h new file mode 100644 index 0000000000..f61a948a1c --- /dev/null +++ b/unit_tests/fixtures/buffer_enqueue_fixture.h @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "runtime/memory_manager/internal_allocation_storage.h" +#include "test.h" +#include "unit_tests/fixtures/buffer_fixture.h" +#include "unit_tests/helpers/execution_environment_helper.h" +#include "unit_tests/helpers/hw_info_helper.h" +#include "unit_tests/helpers/hw_parse.h" +#include "unit_tests/mocks/mock_command_queue.h" +#include "unit_tests/mocks/mock_csr.h" +#include "unit_tests/mocks/mock_device.h" +#include "unit_tests/mocks/mock_memory_manager.h" + +using namespace OCLRT; + +struct BufferEnqueueFixture : public HardwareParse, + public ::testing::Test { + BufferEnqueueFixture(void) + : buffer(nullptr) { + } + + void SetUp() override { + executionEnvironment = getExecutionEnvironmentImpl(hwInfo); + } + + void TearDown() override { + buffer.reset(nullptr); + } + + template + void initializeFixture() { + auto mockCsr = new MockCsrHw2(*executionEnvironment); + + executionEnvironment->commandStreamReceivers.resize(1); + executionEnvironment->commandStreamReceivers[0].push_back(std::unique_ptr(mockCsr)); + + memoryManager = new MockMemoryManager(*executionEnvironment); + executionEnvironment->memoryManager.reset(memoryManager); + + device = std::unique_ptr(Device::create(hwInfo, executionEnvironment, 0)); + + context = std::make_unique(device.get()); + + bufferMemory = std::make_unique(alignUp(bufferSizeInDwords, sizeof(uint32_t))); + cl_int retVal = 0; + + buffer.reset(Buffer::create(context.get(), + CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, + bufferSizeInDwords, + reinterpret_cast(bufferMemory.get()), + retVal)); + + EXPECT_EQ(CL_SUCCESS, retVal); + } + + protected: + const size_t bufferSizeInDwords = 64; + HwInfoHelper hwInfoHelper; + HardwareInfo *hwInfo = nullptr; + ExecutionEnvironment *executionEnvironment; + cl_queue_properties properties = {}; + std::unique_ptr bufferMemory; + std::unique_ptr device; + std::unique_ptr context; + std::unique_ptr buffer; + + MockMemoryManager *memoryManager = nullptr; +}; + +struct EnqueueReadWriteBufferRectDispatch : public BufferEnqueueFixture { + + void SetUp() override { + BufferEnqueueFixture::SetUp(); + } + + void TearDown() override { + BufferEnqueueFixture::TearDown(); + } + + uint32_t memory[64] = {0}; + + size_t bufferOrigin[3] = {0, 0, 0}; + size_t hostOrigin[3] = {1, 1, 1}; + size_t region[3] = {1, 2, 1}; + + size_t bufferRowPitch = 4; + size_t bufferSlicePitch = bufferSizeInDwords; + + size_t hostRowPitch = 5; + size_t hostSlicePitch = 15; +}; diff --git a/unit_tests/helpers/hw_info_helper.h b/unit_tests/helpers/hw_info_helper.h index 2c06eff879..f48c8a5fe8 100644 --- a/unit_tests/helpers/hw_info_helper.h +++ b/unit_tests/helpers/hw_info_helper.h @@ -1,10 +1,11 @@ /* - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation * * SPDX-License-Identifier: MIT * */ +#pragma once #include "runtime/helpers/hw_info.h" #include "runtime/helpers/options.h"