diff --git a/opencl/source/command_queue/enqueue_read_buffer.h b/opencl/source/command_queue/enqueue_read_buffer.h index ffa401e09c..670ef0c855 100644 --- a/opencl/source/command_queue/enqueue_read_buffer.h +++ b/opencl/source/command_queue/enqueue_read_buffer.h @@ -60,7 +60,6 @@ cl_int CommandQueueHw::enqueueReadBuffer( if ((svmEntry->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + svmEntry->size) < (castToUint64(ptr) + size)) { return CL_INVALID_OPERATION; } - mapAllocation = svmEntry->cpuAllocation ? svmEntry->cpuAllocation : svmEntry->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); if (isCpuCopyAllowed) { if (svmEntry->memoryType == DEVICE_UNIFIED_MEMORY) { @@ -99,7 +98,7 @@ cl_int CommandQueueHw::enqueueReadBuffer( surfaces[1] = &mapSurface; mapSurface.setGraphicsAllocation(mapAllocation); //get offset between base cpu ptr of map allocation and dst ptr - if (memoryType != DEVICE_UNIFIED_MEMORY) { + if ((memoryType != DEVICE_UNIFIED_MEMORY) && (memoryType != SHARED_UNIFIED_MEMORY)) { size_t dstOffset = ptrDiff(dstPtr, mapAllocation->getUnderlyingBuffer()); dstPtr = reinterpret_cast(mapAllocation->getGpuAddress() + dstOffset); } diff --git a/opencl/source/command_queue/enqueue_write_buffer.h b/opencl/source/command_queue/enqueue_write_buffer.h index 01dad49624..e187c44d07 100644 --- a/opencl/source/command_queue/enqueue_write_buffer.h +++ b/opencl/source/command_queue/enqueue_write_buffer.h @@ -56,7 +56,6 @@ cl_int CommandQueueHw::enqueueWriteBuffer( isCpuCopyAllowed = false; } } - mapAllocation = svmEntry->cpuAllocation ? svmEntry->cpuAllocation : svmEntry->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); } } @@ -91,7 +90,7 @@ cl_int CommandQueueHw::enqueueWriteBuffer( surfaces[1] = &mapSurface; mapSurface.setGraphicsAllocation(mapAllocation); //get offset between base cpu ptr of map allocation and dst ptr - if (memoryType != DEVICE_UNIFIED_MEMORY) { + if ((memoryType != DEVICE_UNIFIED_MEMORY) && (memoryType != SHARED_UNIFIED_MEMORY)) { size_t srcOffset = ptrDiff(srcPtr, mapAllocation->getUnderlyingBuffer()); srcPtr = reinterpret_cast(mapAllocation->getGpuAddress() + srcOffset); } diff --git a/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp b/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp index 9f565b0ef6..df351b5a30 100644 --- a/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp +++ b/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp @@ -1003,4 +1003,58 @@ HWTEST_F(UnfiedSharedMemoryHWTest, givenDeviceUsmAllocationWhenReadBufferThenCpu gpuAllocation->setCpuPtrAndGpuAddress(cpuPtr, gpuAddress); delete buffer; clMemFreeINTEL(&mockContext, deviceMemory); +} + +HWTEST_F(UnfiedSharedMemoryHWTest, givenSharedUsmAllocationWhenWriteBufferThenCpuPtrIsNotUsed) { + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, + mockContext.getRootDeviceIndices(), mockContext.getDeviceBitfields()); + auto sharedMemory = mockContext.getSVMAllocsManager()->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties); + auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(sharedMemory); + GraphicsAllocation *gpuAllocation = svmAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex()); + + char *cpuPtr = static_cast(gpuAllocation->getUnderlyingBuffer()); + auto gpuAddress = gpuAllocation->getGpuAddress(); + void *gpuPtr = reinterpret_cast(gpuAddress); + char *shiftedPtr = cpuPtr + 0x10; + gpuAllocation->setCpuPtrAndGpuAddress(shiftedPtr, reinterpret_cast(shiftedPtr)); + + cl_mem_flags flags = 0; + auto status = CL_INVALID_PLATFORM; + auto buffer = Buffer::create(&mockContext, flags, 4096u, nullptr, status); + ASSERT_EQ(CL_SUCCESS, status); + + TestCommandQueueHw myCmdQ(&mockContext, mockContext.getDevice(0u), 0); + myCmdQ.enqueueWriteBuffer(buffer, false, 0u, 4096u, sharedMemory, nullptr, 0u, nullptr, nullptr); + EXPECT_EQ(gpuPtr, myCmdQ.srcPtr); + + gpuAllocation->setCpuPtrAndGpuAddress(cpuPtr, gpuAddress); + delete buffer; + clMemFreeINTEL(&mockContext, sharedMemory); +} + +HWTEST_F(UnfiedSharedMemoryHWTest, givenSharedUsmAllocationWhenReadBufferThenCpuPtrIsNotUsed) { + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, + mockContext.getRootDeviceIndices(), mockContext.getDeviceBitfields()); + auto sharedMemory = mockContext.getSVMAllocsManager()->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties); + auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(sharedMemory); + GraphicsAllocation *gpuAllocation = svmAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex()); + + char *cpuPtr = static_cast(gpuAllocation->getUnderlyingBuffer()); + auto gpuAddress = gpuAllocation->getGpuAddress(); + void *gpuPtr = reinterpret_cast(gpuAddress); + char *shiftedPtr = cpuPtr + 0x10; + gpuAllocation->setCpuPtrAndGpuAddress(shiftedPtr, reinterpret_cast(shiftedPtr)); + + cl_mem_flags flags = 0; + auto status = CL_INVALID_PLATFORM; + auto buffer = Buffer::create(&mockContext, flags, 4096u, nullptr, status); + ASSERT_EQ(CL_SUCCESS, status); + + TestCommandQueueHw myCmdQ(&mockContext, mockContext.getDevice(0u), 0); + myCmdQ.enqueueReadBuffer(buffer, false, 0u, 4096u, sharedMemory, nullptr, 0u, nullptr, nullptr); + EXPECT_EQ(gpuPtr, myCmdQ.dstPtr); + + gpuAllocation->setCpuPtrAndGpuAddress(cpuPtr, gpuAddress); + delete buffer; + clMemFreeINTEL(&mockContext, sharedMemory); } \ No newline at end of file