diff --git a/runtime/command_queue/enqueue_read_buffer_rect.h b/runtime/command_queue/enqueue_read_buffer_rect.h index da2abca39a..cb820698bd 100644 --- a/runtime/command_queue/enqueue_read_buffer_rect.h +++ b/runtime/command_queue/enqueue_read_buffer_rect.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -83,11 +83,15 @@ cl_int CommandQueueHw::enqueueReadBufferRect( dstPtr = reinterpret_cast(hostPtrSurf.getAllocation()->getGpuAddressToPatch()); } + void *alignedDstPtr = alignDown(dstPtr, 4); + size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr); + BuiltinDispatchInfoBuilder::BuiltinOpParams dc; dc.srcMemObj = buffer; - dc.dstPtr = dstPtr; + dc.dstPtr = alignedDstPtr; dc.srcOffset = bufferOrigin; dc.dstOffset = hostOrigin; + dc.dstOffset.x += dstPtrOffset; dc.size = region; dc.srcRowPitch = bufferRowPitch; dc.srcSlicePitch = bufferSlicePitch; diff --git a/runtime/command_queue/enqueue_write_buffer_rect.h b/runtime/command_queue/enqueue_write_buffer_rect.h index 3fd447f787..7d76304440 100644 --- a/runtime/command_queue/enqueue_write_buffer_rect.h +++ b/runtime/command_queue/enqueue_write_buffer_rect.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -82,10 +82,14 @@ cl_int CommandQueueHw::enqueueWriteBufferRect( srcPtr = reinterpret_cast(hostPtrSurf.getAllocation()->getGpuAddressToPatch()); } + void *alignedSrcPtr = alignDown(srcPtr, 4); + size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr); + BuiltinDispatchInfoBuilder::BuiltinOpParams dc; - dc.srcPtr = srcPtr; + dc.srcPtr = alignedSrcPtr; dc.dstMemObj = buffer; dc.srcOffset = hostOrigin; + dc.srcOffset.x += srcPtrOffset; dc.dstOffset = bufferOrigin; dc.size = region; dc.srcRowPitch = hostRowPitch; diff --git a/unit_tests/aub_tests/command_queue/enqueue_read_buffer_rect_aub_tests.cpp b/unit_tests/aub_tests/command_queue/enqueue_read_buffer_rect_aub_tests.cpp index 193053a5e8..5b752e33b0 100644 --- a/unit_tests/aub_tests/command_queue/enqueue_read_buffer_rect_aub_tests.cpp +++ b/unit_tests/aub_tests/command_queue/enqueue_read_buffer_rect_aub_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -112,3 +112,83 @@ INSTANTIATE_TEST_CASE_P(AUBReadBufferRect_simple, ::testing::Combine( ::testing::Values(0, 1, 2, 3, 4), ::testing::Values(0, 1, 2, 3, 4))); + +struct AUBReadBufferRectUnaligned + : public CommandEnqueueAUBFixture, + public ::testing::Test { + + void SetUp() override { + CommandEnqueueAUBFixture::SetUp(); + } + + void TearDown() override { + CommandEnqueueAUBFixture::TearDown(); + } + + template + void testReadBufferUnaligned(size_t offset, size_t size) { + MockContext context(&pCmdQ->getDevice()); + + char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + const auto bufferSize = sizeof(srcMemory); + void *dstMemory = alignedMalloc(bufferSize, MemoryConstants::pageSize); + memset(dstMemory, 0, bufferSize); + char referenceMemory[bufferSize] = {0}; + + auto retVal = CL_INVALID_VALUE; + + auto buffer = std::unique_ptr(Buffer::create( + &context, + CL_MEM_COPY_HOST_PTR, + bufferSize, + srcMemory, + retVal)); + ASSERT_NE(nullptr, buffer); + + buffer->forceDisallowCPUCopy = true; + + // Map destination memory to GPU + GraphicsAllocation *allocation = createResidentAllocationAndStoreItInCsr(dstMemory, bufferSize); + auto dstMemoryGPUPtr = reinterpret_cast(allocation->getGpuAddress()); + + cl_bool blockingRead = CL_TRUE; + + size_t rowPitch = bufferSize / 4; + size_t slicePitch = 4 * rowPitch; + size_t bufferOrigin[] = {0, 1, 0}; + size_t hostOrigin[] = {0, 0, 0}; + size_t region[] = {size, 1, 1}; + + retVal = pCmdQ->enqueueReadBufferRect( + buffer.get(), + blockingRead, + bufferOrigin, + hostOrigin, + region, + rowPitch, + slicePitch, + rowPitch, + slicePitch, + ptrOffset(dstMemory, offset), + 0, + nullptr, + nullptr); + + EXPECT_EQ(CL_SUCCESS, retVal); + + AUBCommandStreamFixture::expectMemory(dstMemoryGPUPtr, referenceMemory, offset); + AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, offset), &srcMemory[rowPitch * bufferOrigin[1]], size); + AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, size + offset), referenceMemory, bufferSize - offset - size); + alignedFree(dstMemory); + } +}; + +HWTEST_F(AUBReadBufferRectUnaligned, misalignedHostPtr) { + const std::vector offsets = {0, 1, 2, 3}; + const std::vector sizes = {4, 3, 2, 1}; + for (auto offset : offsets) { + for (auto size : sizes) { + testReadBufferUnaligned(offset, size); + } + } +} diff --git a/unit_tests/aub_tests/command_queue/enqueue_write_buffer_rect_aub_tests.cpp b/unit_tests/aub_tests/command_queue/enqueue_write_buffer_rect_aub_tests.cpp index 388dbc0acb..ef0a3668e7 100644 --- a/unit_tests/aub_tests/command_queue/enqueue_write_buffer_rect_aub_tests.cpp +++ b/unit_tests/aub_tests/command_queue/enqueue_write_buffer_rect_aub_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -113,3 +113,80 @@ INSTANTIATE_TEST_CASE_P(AUBWriteBufferRect_simple, ::testing::Combine( ::testing::Values(0, 1, 2, 3, 4), ::testing::Values(0, 1, 2, 3, 4))); + +struct AUBWriteBufferRectUnaligned + : public CommandEnqueueAUBFixture, + public ::testing::Test { + + void SetUp() override { + CommandEnqueueAUBFixture::SetUp(); + } + + void TearDown() override { + CommandEnqueueAUBFixture::TearDown(); + } + + template + void testWriteBufferUnaligned(size_t offset, size_t size) { + MockContext context(&pCmdQ->getDevice()); + + char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + const auto bufferSize = sizeof(srcMemory); + char dstMemory[bufferSize] = {0}; + char referenceMemory[bufferSize] = {0}; + + auto retVal = CL_INVALID_VALUE; + + auto buffer = std::unique_ptr(Buffer::create( + &context, + CL_MEM_COPY_HOST_PTR, + bufferSize, + dstMemory, + retVal)); + ASSERT_NE(nullptr, buffer); + + buffer->forceDisallowCPUCopy = true; + + uint8_t *pDestMemory = (uint8_t *)buffer->getGraphicsAllocation()->getGpuAddress(); + + cl_bool blockingWrite = CL_TRUE; + + size_t rowPitch = bufferSize / 4; + size_t slicePitch = 4 * rowPitch; + size_t bufferOrigin[] = {0, 1, 0}; + size_t hostOrigin[] = {0, 0, 0}; + size_t region[] = {size, 1, 1}; + + retVal = pCmdQ->enqueueWriteBufferRect( + buffer.get(), + blockingWrite, + bufferOrigin, + hostOrigin, + region, + rowPitch, + slicePitch, + rowPitch, + slicePitch, + ptrOffset(srcMemory, offset), + 0, + nullptr, + nullptr); + + EXPECT_EQ(CL_SUCCESS, retVal); + pCmdQ->finish(true); + + AUBCommandStreamFixture::expectMemory(pDestMemory, referenceMemory, rowPitch); + AUBCommandStreamFixture::expectMemory(pDestMemory + rowPitch * bufferOrigin[1], ptrOffset(srcMemory, offset), size); + AUBCommandStreamFixture::expectMemory(pDestMemory + rowPitch * bufferOrigin[1] + size, referenceMemory, bufferSize - size - rowPitch); + } +}; + +HWTEST_F(AUBWriteBufferRectUnaligned, misalignedHostPtr) { + const std::vector offsets = {0, 1, 2, 3}; + const std::vector sizes = {4, 3, 2, 1}; + for (auto offset : offsets) { + for (auto size : sizes) { + testWriteBufferUnaligned(offset, size); + } + } +} diff --git a/unit_tests/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp b/unit_tests/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp index 8ecc1874b5..bd8fd9abb0 100644 --- a/unit_tests/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp +++ b/unit_tests/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp @@ -111,6 +111,7 @@ HWTEST_F(EnqueueBufferWindowsTest, givenMisalignedHostPtrWhenEnqueueReadBufferCa kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(ptrDiff(misalignedPtr, alignDown(misalignedPtr, 4)), *dstOffset); } else { + // dstOffset arg should be 4 bytes in size, if that changes, above if path should be modified EXPECT_TRUE(false); } }