diff --git a/runtime/built_ins/built_ins.cpp b/runtime/built_ins/built_ins.cpp index 3667d01f39..12a8bf093c 100644 --- a/runtime/built_ins/built_ins.cpp +++ b/runtime/built_ins/built_ins.cpp @@ -243,14 +243,14 @@ class BuiltInOp : public BuiltinDispa } else if (operationParams.srcMemObj) { kernelSplit1DBuilder.setArg(0, operationParams.srcMemObj); } else { - kernelSplit1DBuilder.setArgSvm(0, operationParams.size.x, operationParams.srcPtr, nullptr, CL_MEM_READ_ONLY); + kernelSplit1DBuilder.setArgSvm(0, operationParams.size.x + operationParams.srcOffset.x, operationParams.srcPtr, nullptr, CL_MEM_READ_ONLY); } if (operationParams.dstSvmAlloc) { kernelSplit1DBuilder.setArgSvmAlloc(1, operationParams.dstPtr, operationParams.dstSvmAlloc); } else if (operationParams.dstMemObj) { kernelSplit1DBuilder.setArg(1, operationParams.dstMemObj); } else { - kernelSplit1DBuilder.setArgSvm(1, operationParams.size.x, operationParams.dstPtr); + kernelSplit1DBuilder.setArgSvm(1, operationParams.size.x + operationParams.dstOffset.x, operationParams.dstPtr); } // Set-up srcOffset diff --git a/runtime/command_queue/enqueue_read_buffer.h b/runtime/command_queue/enqueue_read_buffer.h index 7d072744d0..cb463fb0ce 100644 --- a/runtime/command_queue/enqueue_read_buffer.h +++ b/runtime/command_queue/enqueue_read_buffer.h @@ -84,9 +84,16 @@ cl_int CommandQueueHw::enqueueReadBuffer( BuiltInOwnershipWrapper builtInLock(builder, this->context); void *dstPtr = ptr; + void *alignedDstPtr = dstPtr; + size_t dstPtrOffset = 0; + + if (!isAligned<4>(dstPtr)) { + alignedDstPtr = alignDown(dstPtr, 4); + dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr); + } MemObjSurface bufferSurf(buffer); - HostPtrSurface hostPtrSurf(dstPtr, size); + HostPtrSurface hostPtrSurf(alignedDstPtr, size + dstPtrOffset); Surface *surfaces[] = {&bufferSurf, &hostPtrSurf}; if (size != 0) { @@ -94,11 +101,13 @@ cl_int CommandQueueHw::enqueueReadBuffer( if (!status) { return CL_OUT_OF_RESOURCES; } - dstPtr = reinterpret_cast(hostPtrSurf.getAllocation()->getGpuAddressToPatch()); + + hostPtrSurf.getAllocation()->allocationOffset = dstPtrOffset; } BuiltinDispatchInfoBuilder::BuiltinOpParams dc; - dc.dstPtr = dstPtr; + dc.dstPtr = alignedDstPtr; + dc.dstOffset = {dstPtrOffset, 0, 0}; dc.srcMemObj = buffer; dc.srcOffset = {offset, 0, 0}; dc.size = {size, 0, 0}; diff --git a/runtime/command_queue/enqueue_write_buffer.h b/runtime/command_queue/enqueue_write_buffer.h index dc3cfcc6bd..480c0ff7ca 100644 --- a/runtime/command_queue/enqueue_write_buffer.h +++ b/runtime/command_queue/enqueue_write_buffer.h @@ -83,8 +83,15 @@ cl_int CommandQueueHw::enqueueWriteBuffer( BuiltInOwnershipWrapper builtInLock(builder, this->context); void *srcPtr = const_cast(ptr); + void *alignedSrcPtr = srcPtr; + size_t srcPtrOffset = 0; - HostPtrSurface hostPtrSurf(srcPtr, size, true); + if (!isAligned<4>(srcPtr)) { + alignedSrcPtr = alignDown(srcPtr, 4); + srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr); + } + + HostPtrSurface hostPtrSurf(alignedSrcPtr, size + srcPtrOffset, true); MemObjSurface bufferSurf(buffer); Surface *surfaces[] = {&bufferSurf, &hostPtrSurf}; @@ -93,11 +100,13 @@ cl_int CommandQueueHw::enqueueWriteBuffer( if (!status) { return CL_OUT_OF_RESOURCES; } - srcPtr = reinterpret_cast(hostPtrSurf.getAllocation()->getGpuAddressToPatch()); + + hostPtrSurf.getAllocation()->allocationOffset = srcPtrOffset; } BuiltinDispatchInfoBuilder::BuiltinOpParams dc; - dc.srcPtr = srcPtr; + dc.srcPtr = alignedSrcPtr; + dc.srcOffset = {srcPtrOffset, 0, 0}; dc.dstMemObj = buffer; dc.dstOffset = {offset, 0, 0}; dc.size = {size, 0, 0}; diff --git a/runtime/command_stream/aub_command_stream_receiver_hw.inl b/runtime/command_stream/aub_command_stream_receiver_hw.inl index 01c44359f7..9e584627c4 100644 --- a/runtime/command_stream/aub_command_stream_receiver_hw.inl +++ b/runtime/command_stream/aub_command_stream_receiver_hw.inl @@ -607,7 +607,7 @@ void AUBCommandStreamReceiverHw::makeNonResidentExternal(uint64_t gpu template bool AUBCommandStreamReceiverHw::writeMemory(GraphicsAllocation &gfxAllocation) { - auto cpuAddress = gfxAllocation.getUnderlyingBuffer(); + auto cpuAddress = ptrOffset(gfxAllocation.getUnderlyingBuffer(), static_cast(gfxAllocation.allocationOffset)); auto gpuAddress = GmmHelper::decanonize(gfxAllocation.getGpuAddress()); auto size = gfxAllocation.getUnderlyingBufferSize(); if (gfxAllocation.gmm && gfxAllocation.gmm->isRenderCompressed) { diff --git a/unit_tests/aub_tests/command_queue/enqueue_read_buffer_aub_tests.cpp b/unit_tests/aub_tests/command_queue/enqueue_read_buffer_aub_tests.cpp index 1a561a32e0..abab7c60b8 100644 --- a/unit_tests/aub_tests/command_queue/enqueue_read_buffer_aub_tests.cpp +++ b/unit_tests/aub_tests/command_queue/enqueue_read_buffer_aub_tests.cpp @@ -143,3 +143,70 @@ HWTEST_F(AUBReadBuffer, reserveCanonicalGpuAddress) { AUBCommandStreamFixture::expectMemory(dstGpuAddress, srcMemory, sizeof(dstMemory)); } + +struct ReadBufferUnalignedHw + : public CommandEnqueueAUBFixture, + public ::testing::WithParamInterface>, + public ::testing::Test { + + void SetUp() override { + CommandEnqueueAUBFixture::SetUp(); + } + + void TearDown() override { + CommandEnqueueAUBFixture::TearDown(); + } +}; + +typedef ReadBufferUnalignedHw AUBReadBufferUnalignedBytes; + +HWTEST_P(AUBReadBufferUnalignedBytes, simple) { + MockContext context(&pCmdQ->getDevice()); + + char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + const auto bufferSize = sizeof(srcMemory); + char dstMemory[bufferSize] = {0}; + + auto retVal = CL_INVALID_VALUE; + + auto buffer = std::unique_ptr(Buffer::create( + &context, + CL_MEM_USE_HOST_PTR, + bufferSize, + srcMemory, + retVal)); + ASSERT_NE(nullptr, buffer); + + buffer->forceDisallowCPUCopy = true; + + // Get test params + size_t offset = std::get<0>(GetParam()); + size_t size = std::get<1>(GetParam()); + + // Map destination memory to GPU + GraphicsAllocation *allocation = createResidentAllocationAndStoreItInCsr(dstMemory, bufferSize); + auto dstMemoryGPUPtr = reinterpret_cast(allocation->getGpuAddress()); + + // Do unaligned read + retVal = pCmdQ->enqueueReadBuffer( + buffer.get(), + CL_TRUE, + offset, + size, + ptrOffset(dstMemory, offset), + 0, + nullptr, + nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + + // Check the memory + AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, offset), ptrOffset(srcMemory, offset), size); +} + +INSTANTIATE_TEST_CASE_P(AUBReadBufferUnalignedBytes_simple, + AUBReadBufferUnalignedBytes, + ::testing::Combine( + ::testing::Values( // offset + 0, 1, 2, 3), + ::testing::Values( // size + 4, 3, 2, 1))); diff --git a/unit_tests/aub_tests/command_queue/enqueue_write_buffer_aub_tests.cpp b/unit_tests/aub_tests/command_queue/enqueue_write_buffer_aub_tests.cpp index b4103234a0..ae9b57a363 100644 --- a/unit_tests/aub_tests/command_queue/enqueue_write_buffer_aub_tests.cpp +++ b/unit_tests/aub_tests/command_queue/enqueue_write_buffer_aub_tests.cpp @@ -1,23 +1,8 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (C) 2017-2018 Intel Corporation * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: + * SPDX-License-Identifier: MIT * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. */ #include "runtime/command_stream/command_stream_receiver.h" @@ -123,3 +108,65 @@ INSTANTIATE_TEST_CASE_P(AUBWriteBuffer_simple, 1 * sizeof(cl_float), 2 * sizeof(cl_float), 3 * sizeof(cl_float))); + +struct WriteBufferUnalignedHw + : public CommandEnqueueAUBFixture, + public ::testing::WithParamInterface>, + public ::testing::Test { + + void SetUp() override { + CommandEnqueueAUBFixture::SetUp(); + } + + void TearDown() override { + CommandEnqueueAUBFixture::TearDown(); + } +}; + +typedef WriteBufferUnalignedHw AUBWriteBufferUnalignedBytes; + +HWTEST_P(AUBWriteBufferUnalignedBytes, simple) { + MockContext context(&pCmdQ->getDevice()); + + char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + const auto bufferSize = sizeof(srcMemory); + char dstMemory[bufferSize] = {0}; + + auto retVal = CL_INVALID_VALUE; + + auto buffer = std::unique_ptr(Buffer::create( + &context, + CL_MEM_USE_HOST_PTR, + bufferSize, + dstMemory, + retVal)); + ASSERT_NE(nullptr, buffer); + + buffer->forceDisallowCPUCopy = true; + + // Get test params + size_t offset = std::get<0>(GetParam()); + size_t size = std::get<1>(GetParam()); + + retVal = pCmdQ->enqueueWriteBuffer( + buffer.get(), + CL_TRUE, + offset, + size, + ptrOffset(srcMemory, offset), + 0, + nullptr, + nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + + auto bufferGPUPtr = reinterpret_cast((buffer->getGraphicsAllocation()->getGpuAddress())); + AUBCommandStreamFixture::expectMemory(ptrOffset(bufferGPUPtr, offset), ptrOffset(srcMemory, offset), size); +} + +INSTANTIATE_TEST_CASE_P(AUBWriteBufferUnalignedBytes_simple, + AUBWriteBufferUnalignedBytes, + ::testing::Combine( + ::testing::Values( // offset + 0, 1, 2, 3), + ::testing::Values( // size + 4, 3, 2, 1)));