diff --git a/runtime/command_queue/enqueue_read_buffer.h b/runtime/command_queue/enqueue_read_buffer.h index 25b6507cf9..2b4b5ddf9a 100644 --- a/runtime/command_queue/enqueue_read_buffer.h +++ b/runtime/command_queue/enqueue_read_buffer.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -97,7 +97,7 @@ cl_int CommandQueueHw::enqueueReadBuffer( } MemObjSurface bufferSurf(buffer); - HostPtrSurface hostPtrSurf(alignedDstPtr, size + dstPtrOffset); + HostPtrSurface hostPtrSurf(dstPtr, size); Surface *surfaces[] = {&bufferSurf, &hostPtrSurf}; if (size != 0) { @@ -105,8 +105,6 @@ cl_int CommandQueueHw::enqueueReadBuffer( if (!status) { return CL_OUT_OF_RESOURCES; } - - hostPtrSurf.getAllocation()->allocationOffset += dstPtrOffset; } BuiltinDispatchInfoBuilder::BuiltinOpParams dc; diff --git a/runtime/command_queue/enqueue_write_buffer.h b/runtime/command_queue/enqueue_write_buffer.h index ad54e743b4..edf36635e2 100644 --- a/runtime/command_queue/enqueue_write_buffer.h +++ b/runtime/command_queue/enqueue_write_buffer.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -97,7 +97,7 @@ cl_int CommandQueueHw::enqueueWriteBuffer( srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr); } - HostPtrSurface hostPtrSurf(alignedSrcPtr, size + srcPtrOffset, true); + HostPtrSurface hostPtrSurf(srcPtr, size, true); MemObjSurface bufferSurf(buffer); Surface *surfaces[] = {&bufferSurf, &hostPtrSurf}; @@ -106,8 +106,6 @@ cl_int CommandQueueHw::enqueueWriteBuffer( if (!status) { return CL_OUT_OF_RESOURCES; } - - hostPtrSurf.getAllocation()->allocationOffset += srcPtrOffset; } BuiltinDispatchInfoBuilder::BuiltinOpParams dc; diff --git a/runtime/mem_obj/buffer.inl b/runtime/mem_obj/buffer.inl index 78f627e7e4..24fec2d1ab 100644 --- a/runtime/mem_obj/buffer.inl +++ b/runtime/mem_obj/buffer.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -33,7 +33,15 @@ void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode) { auto gmmHelper = executionEnvironment->getGmmHelper(); auto surfaceState = reinterpret_cast(memory); - auto surfaceSize = alignUp(getSize(), 4); + + // The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address + auto bufferAddress = (getGraphicsAllocation() != nullptr) ? getGraphicsAllocation()->getGpuAddress() : reinterpret_cast(getHostPtr()); + bufferAddress += this->offset; + + auto bufferAddressAligned = alignDown(bufferAddress, 4); + auto bufferOffset = ptrDiff(bufferAddress, bufferAddressAligned); + + auto surfaceSize = alignUp(getSize() + bufferOffset, 4); SURFACE_STATE_BUFFER_LENGTH Length = {0}; Length.Length = static_cast(surfaceSize - 1); @@ -42,10 +50,6 @@ void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode) { surfaceState->setHeight(Length.SurfaceState.Height + 1); surfaceState->setDepth(Length.SurfaceState.Depth + 1); - // The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address - auto bufferAddress = (getGraphicsAllocation() != nullptr) ? getGraphicsAllocation()->getGpuAddress() : reinterpret_cast(getHostPtr()); - bufferAddress += this->offset; - auto bufferSize = (getGraphicsAllocation() != nullptr) ? getGraphicsAllocation()->getUnderlyingBufferSize() : getSize(); if (bufferAddress != 0) { @@ -67,7 +71,7 @@ void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode) { surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); } - surfaceState->setSurfaceBaseAddress(bufferAddress); + surfaceState->setSurfaceBaseAddress(bufferAddressAligned); Gmm *gmm = graphicsAllocation ? graphicsAllocation->gmm : nullptr; diff --git a/unit_tests/command_queue/enqueue_read_buffer_tests.cpp b/unit_tests/command_queue/enqueue_read_buffer_tests.cpp index b175e2f960..65a913ee37 100644 --- a/unit_tests/command_queue/enqueue_read_buffer_tests.cpp +++ b/unit_tests/command_queue/enqueue_read_buffer_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -327,30 +327,6 @@ HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenRead EXPECT_FALSE(csr.disableL3Cache); } -HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenReadBufferIsCalledThenHostGraphicsAllocationHasCorrectOffset) { - void *ptr = (void *)0x1039; - - cl_int retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), - CL_FALSE, - 0, - MemoryConstants::cacheLineSize, - ptr, - 0, - nullptr, - nullptr); - - EXPECT_EQ(CL_SUCCESS, retVal); - auto &csr = pDevice->getUltCommandStreamReceiver(); - - auto allocation = csr.getTemporaryAllocations().peekHead(); - while (allocation && allocation->getUnderlyingBuffer() != alignDown(ptr, 4)) { - allocation = allocation->next; - } - - ASSERT_NE(allocation, nullptr); - EXPECT_EQ((void *)allocation->getGpuAddressToPatch(), ptr); -} - HWTEST_F(EnqueueReadBufferTypeTest, givenOOQWithEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelNotIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(true); diff --git a/unit_tests/command_queue/enqueue_write_buffer_tests.cpp b/unit_tests/command_queue/enqueue_write_buffer_tests.cpp index 7971947c7e..09adabab92 100644 --- a/unit_tests/command_queue/enqueue_write_buffer_tests.cpp +++ b/unit_tests/command_queue/enqueue_write_buffer_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -434,27 +434,3 @@ HWTEST_F(NegativeFailAllocationTest, givenEnqueueWriteBufferWhenHostPtrAllocatio EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } - -HWTEST_F(EnqueueWriteBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenWriteBufferIsCalledThenHostGraphicsAllocationHasCorrectOffset) { - void *ptr = (void *)0x1039; - - cl_int retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(), - CL_FALSE, - 0, - MemoryConstants::cacheLineSize, - ptr, - 0, - nullptr, - nullptr); - - EXPECT_EQ(CL_SUCCESS, retVal); - auto &csr = pDevice->getUltCommandStreamReceiver(); - - auto allocation = csr.getTemporaryAllocations().peekHead(); - while (allocation && allocation->getUnderlyingBuffer() != alignDown(ptr, 4)) { - allocation = allocation->next; - } - - ASSERT_NE(allocation, nullptr); - EXPECT_EQ((void *)allocation->getGpuAddressToPatch(), ptr); -} diff --git a/unit_tests/mem_obj/buffer_tests.cpp b/unit_tests/mem_obj/buffer_tests.cpp index a206999e88..b1c5c97cc5 100644 --- a/unit_tests/mem_obj/buffer_tests.cpp +++ b/unit_tests/mem_obj/buffer_tests.cpp @@ -1345,6 +1345,29 @@ HWTEST_F(BufferSetSurfaceTests, givenNonRenderCompressedGmmResourceWhenSurfaceSt EXPECT_TRUE(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT == surfaceState.getCoherencyType()); } +HWTEST_F(BufferSetSurfaceTests, givenMisalignedPointerWhenSurfaceStateIsProgrammedThenBaseAddressAndLengthAreAlignedToDword) { + using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; + using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; + + RENDER_SURFACE_STATE surfaceState = {}; + MockContext context; + void *svmPtr = reinterpret_cast(0x1005); + + Buffer::setSurfaceState(device.get(), + &surfaceState, + 5, + svmPtr, + nullptr, + 0); + + EXPECT_EQ(0x1004u, surfaceState.getSurfaceBaseAddress()); + SURFACE_STATE_BUFFER_LENGTH length = {}; + length.SurfaceState.Width = surfaceState.getWidth() - 1; + length.SurfaceState.Height = surfaceState.getHeight() - 1; + length.SurfaceState.Depth = surfaceState.getDepth() - 1; + EXPECT_EQ(alignUp(5u, 4u), length.Length + 1); +} + struct BufferUnmapTest : public DeviceFixture, public ::testing::Test { void SetUp() override { DeviceFixture::SetUp(); diff --git a/unit_tests/scenarios/CMakeLists.txt b/unit_tests/scenarios/CMakeLists.txt index 602861ed7f..0f83df4d91 100644 --- a/unit_tests/scenarios/CMakeLists.txt +++ b/unit_tests/scenarios/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2017-2018 Intel Corporation +# Copyright (C) 2017-2019 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -9,4 +9,5 @@ set(IGDRCL_SRCS_tests_scenarios ${CMAKE_CURRENT_SOURCE_DIR}/blocked_enqueue_barrier_scenario_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/blocked_enqueue_with_callback_scenario_tests.cpp ) -target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scenarios}) \ No newline at end of file +target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scenarios}) +add_subdirectories() \ No newline at end of file diff --git a/unit_tests/scenarios/windows/CMakeLists.txt b/unit_tests/scenarios/windows/CMakeLists.txt new file mode 100644 index 0000000000..e8d7f49742 --- /dev/null +++ b/unit_tests/scenarios/windows/CMakeLists.txt @@ -0,0 +1,13 @@ +# +# Copyright (C) 2017-2019 Intel Corporation +# +# SPDX-License-Identifier: MIT +# + +set(IGDRCL_SRCS_tests_scenarios_windows + ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_write_buffer_scenarios_windows_tests.cpp +) +if(WIN32) + target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scenarios_windows}) +endif() \ No newline at end of file diff --git a/unit_tests/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp b/unit_tests/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp new file mode 100644 index 0000000000..8ecc1874b5 --- /dev/null +++ b/unit_tests/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2017-2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "runtime/os_interface/windows/os_interface.h" +#include "runtime/os_interface/windows/wddm_device_command_stream.h" + +#include "unit_tests/fixtures/buffer_fixture.h" +#include "unit_tests/helpers/hw_info_helper.h" +#include "unit_tests/helpers/execution_environment_helper.h" +#include "unit_tests/helpers/hw_parse.h" +#include "unit_tests/mocks/mock_command_queue.h" +#include "unit_tests/mocks/mock_device.h" +#include "unit_tests/os_interface/windows/mock_wddm_memory_manager.h" +#include "test.h" + +using namespace OCLRT; + +struct EnqueueBufferWindowsTest : public HardwareParse, + public ::testing::Test { + EnqueueBufferWindowsTest(void) + : buffer(nullptr) { + } + + void SetUp() override { + executionEnvironment = getExecutionEnvironmentImpl(hwInfo); + } + + void TearDown() override { + buffer.reset(nullptr); + } + + template + void initializeFixture() { + auto wddmCsr = new WddmCommandStreamReceiver(*hwInfo, *executionEnvironment); + + executionEnvironment->commandStreamReceivers.resize(1); + executionEnvironment->commandStreamReceivers[0][0].reset(wddmCsr); + + memoryManager = new MockWddmMemoryManager(executionEnvironment->osInterface->get()->getWddm(), *executionEnvironment); + executionEnvironment->memoryManager.reset(memoryManager); + + device = std::unique_ptr(Device::create(hwInfo, executionEnvironment, 0)); + + context = std::make_unique(device.get()); + + const size_t bufferMisalignment = 1; + const size_t bufferSize = 16; + bufferMemory = std::make_unique(alignUp(bufferSize + bufferMisalignment, sizeof(uint32_t))); + cl_int retVal = 0; + + buffer.reset(Buffer::create(context.get(), + CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, + bufferSize, + reinterpret_cast(bufferMemory.get()) + bufferMisalignment, + retVal)); + + EXPECT_EQ(CL_SUCCESS, retVal); + } + + protected: + HwInfoHelper hwInfoHelper; + HardwareInfo *hwInfo = nullptr; + ExecutionEnvironment *executionEnvironment; + cl_queue_properties properties = {}; + std::unique_ptr bufferMemory; + std::unique_ptr device; + std::unique_ptr context; + std::unique_ptr buffer; + + MockWddmMemoryManager *memoryManager = nullptr; +}; + +HWTEST_F(EnqueueBufferWindowsTest, givenMisalignedHostPtrWhenEnqueueReadBufferCalledThenStateBaseAddressAddressIsAlignedAndMatchesKernelDispatchInfoParams) { + initializeFixture(); + auto cmdQ = std::make_unique>(context.get(), device.get(), &properties); + uint32_t memory[2] = {}; + char *misalignedPtr = reinterpret_cast(memory) + 1; + + buffer->forceDisallowCPUCopy = true; + auto retVal = cmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, 4, misalignedPtr, 0, nullptr, nullptr); + + EXPECT_EQ(CL_SUCCESS, retVal); + ASSERT_NE(0, cmdQ->lastEnqueuedKernels.size()); + Kernel *kernel = cmdQ->lastEnqueuedKernels[0]; + + parseCommands(*cmdQ); + + if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { + const auto &surfaceStateDst = getSurfaceState(1); + + if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { + auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() + + kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); + EXPECT_EQ(reinterpret_cast(alignDown(misalignedPtr, 4)), *pKernelArg); + EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress()); + + } else if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { + auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() + + kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); + EXPECT_EQ(reinterpret_cast(alignDown(misalignedPtr, 4)), static_cast(*pKernelArg)); + EXPECT_EQ(static_cast(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress()); + } + } + + if (kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { + auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() + + kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset); + EXPECT_EQ(ptrDiff(misalignedPtr, alignDown(misalignedPtr, 4)), *dstOffset); + } else { + EXPECT_TRUE(false); + } +}