diff --git a/core/memory_manager/graphics_allocation.h b/core/memory_manager/graphics_allocation.h index 1254eccbf3..ee95109db0 100644 --- a/core/memory_manager/graphics_allocation.h +++ b/core/memory_manager/graphics_allocation.h @@ -217,11 +217,10 @@ class GraphicsAllocation : public IDNode { static constexpr uint32_t defaultBank = 0b1u; static constexpr uint32_t allBanks = 0xffffffff; - - protected: constexpr static uint32_t objectNotResident = std::numeric_limits::max(); constexpr static uint32_t objectNotUsed = std::numeric_limits::max(); + protected: struct UsageInfo { uint32_t taskCount = objectNotUsed; uint32_t residencyTaskCount = objectNotResident; diff --git a/runtime/command_queue/enqueue_read_buffer.h b/runtime/command_queue/enqueue_read_buffer.h index 8abc546059..b44dcd4ec9 100644 --- a/runtime/command_queue/enqueue_read_buffer.h +++ b/runtime/command_queue/enqueue_read_buffer.h @@ -41,6 +41,23 @@ cl_int CommandQueueHw::enqueueReadBuffer( bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingRead, size, ptr, numEventsInWaitList, eventWaitList); + //check if we are dealing with SVM pointer here for which we already have an allocation + if (!mapAllocation && this->getContext().getSVMAllocsManager()) { + auto svmEntry = this->getContext().getSVMAllocsManager()->getSVMAlloc(ptr); + if (svmEntry) { + if ((svmEntry->gpuAllocation->getGpuAddress() + svmEntry->size) < (castToUint64(ptr) + size)) { + return CL_INVALID_OPERATION; + } + + mapAllocation = svmEntry->cpuAllocation ? svmEntry->cpuAllocation : svmEntry->gpuAllocation; + if (isCpuCopyAllowed) { + if (svmEntry->memoryType == DEVICE_UNIFIED_MEMORY) { + isCpuCopyAllowed = false; + } + } + } + } + if (isCpuCopyAllowed) { if (isMemTransferNeeded) { return enqueueReadWriteBufferOnCpuWithMemoryTransfer(cmdType, buffer, offset, size, ptr, @@ -70,21 +87,6 @@ cl_int CommandQueueHw::enqueueReadBuffer( GeneralSurface mapSurface; Surface *surfaces[] = {&bufferSurf, nullptr}; - //check if we are dealing with SVM pointer here for which we already have an allocation - if (!mapAllocation && this->getContext().getSVMAllocsManager()) { - auto svmEntry = this->getContext().getSVMAllocsManager()->getSVMAlloc(ptr); - if (svmEntry) { - if (svmEntry->memoryType == DEVICE_UNIFIED_MEMORY) { - return CL_INVALID_OPERATION; - } - if ((svmEntry->gpuAllocation->getGpuAddress() + svmEntry->size) < (castToUint64(ptr) + size)) { - return CL_INVALID_OPERATION; - } - - mapAllocation = svmEntry->cpuAllocation ? svmEntry->cpuAllocation : svmEntry->gpuAllocation; - } - } - if (mapAllocation) { surfaces[1] = &mapSurface; mapSurface.setGraphicsAllocation(mapAllocation); diff --git a/runtime/command_queue/enqueue_write_buffer.h b/runtime/command_queue/enqueue_write_buffer.h index 12276cc8c8..21dbdee0ce 100644 --- a/runtime/command_queue/enqueue_write_buffer.h +++ b/runtime/command_queue/enqueue_write_buffer.h @@ -36,6 +36,24 @@ cl_int CommandQueueHw::enqueueWriteBuffer( bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingWrite, size, const_cast(ptr), numEventsInWaitList, eventWaitList); + //check if we are dealing with SVM pointer here for which we already have an allocation + if (!mapAllocation && this->getContext().getSVMAllocsManager()) { + auto svmEntry = this->getContext().getSVMAllocsManager()->getSVMAlloc(ptr); + if (svmEntry) { + if ((svmEntry->gpuAllocation->getGpuAddress() + svmEntry->size) < (castToUint64(ptr) + size)) { + return CL_INVALID_OPERATION; + } + + if (isCpuCopyAllowed) { + if (svmEntry->memoryType == DEVICE_UNIFIED_MEMORY) { + isCpuCopyAllowed = false; + } + } + + mapAllocation = svmEntry->cpuAllocation ? svmEntry->cpuAllocation : svmEntry->gpuAllocation; + } + } + if (isCpuCopyAllowed) { if (isMemTransferNeeded) { return enqueueReadWriteBufferOnCpuWithMemoryTransfer(cmdType, buffer, offset, size, const_cast(ptr), @@ -66,21 +84,6 @@ cl_int CommandQueueHw::enqueueWriteBuffer( GeneralSurface mapSurface; Surface *surfaces[] = {&bufferSurf, nullptr}; - //check if we are dealing with SVM pointer here for which we already have an allocation - if (!mapAllocation && this->getContext().getSVMAllocsManager()) { - auto svmEntry = this->getContext().getSVMAllocsManager()->getSVMAlloc(ptr); - if (svmEntry) { - if (svmEntry->memoryType == DEVICE_UNIFIED_MEMORY) { - return CL_INVALID_OPERATION; - } - if ((svmEntry->gpuAllocation->getGpuAddress() + svmEntry->size) < (castToUint64(ptr) + size)) { - return CL_INVALID_OPERATION; - } - - mapAllocation = svmEntry->cpuAllocation ? svmEntry->cpuAllocation : svmEntry->gpuAllocation; - } - } - if (mapAllocation) { surfaces[1] = &mapSurface; mapSurface.setGraphicsAllocation(mapAllocation); diff --git a/unit_tests/memory_manager/unified_memory_manager_tests.cpp b/unit_tests/memory_manager/unified_memory_manager_tests.cpp index 6290dbfec5..fd2912fd62 100644 --- a/unit_tests/memory_manager/unified_memory_manager_tests.cpp +++ b/unit_tests/memory_manager/unified_memory_manager_tests.cpp @@ -619,7 +619,7 @@ TEST(UnfiedSharedMemoryTransferCalls, givenHostUSMllocationWhenPointerIsUsedForT ASSERT_EQ(CL_SUCCESS, status); clReleaseCommandQueue(commandQueue); } -TEST(UnfiedSharedMemoryTransferCalls, givenDeviceUsmAllocationWhenPtrIsUsedForTransferCallsThenErrorIsReturned) { +TEST(UnfiedSharedMemoryTransferCalls, givenDeviceUsmAllocationWhenPtrIsUsedForTransferCallsThenUsmAllocationIsReused) { MockContext mockContext; cl_context clContext = &mockContext; @@ -627,6 +627,7 @@ TEST(UnfiedSharedMemoryTransferCalls, givenDeviceUsmAllocationWhenPtrIsUsedForTr cl_device_id clDevice = mockContext.getDevice(0u); auto deviceMemory = clDeviceMemAllocINTEL(clContext, clDevice, nullptr, 4096u, 0u, &status); + auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(deviceMemory); ASSERT_EQ(CL_SUCCESS, status); auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); @@ -636,10 +637,19 @@ TEST(UnfiedSharedMemoryTransferCalls, givenDeviceUsmAllocationWhenPtrIsUsedForTr ASSERT_EQ(CL_SUCCESS, status); status = clEnqueueWriteBuffer(commandQueue, buffer, false, 0u, 4096u, deviceMemory, 0u, nullptr, nullptr); - EXPECT_EQ(CL_INVALID_OPERATION, status); + ASSERT_EQ(CL_SUCCESS, status); + + auto neoQueue = castToObject(commandQueue); + auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); + EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); + auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); + + EXPECT_EQ(1u, svmAllocation->gpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, false, 0u, 4096u, deviceMemory, 0u, nullptr, nullptr); - ASSERT_EQ(CL_INVALID_OPERATION, status); + ASSERT_EQ(CL_SUCCESS, status); + + EXPECT_EQ(2u, svmAllocation->gpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); @@ -648,6 +658,90 @@ TEST(UnfiedSharedMemoryTransferCalls, givenDeviceUsmAllocationWhenPtrIsUsedForTr clReleaseCommandQueue(commandQueue); } +TEST(UnfiedSharedMemoryTransferCalls, givenDeviceUsmAllocationWhenPtrIsUsedForTransferCallsThenCPUPathIsNotChoosen) { + MockContext mockContext; + cl_context clContext = &mockContext; + + auto status = CL_SUCCESS; + cl_device_id clDevice = mockContext.getDevice(0u); + + auto deviceMemory = clDeviceMemAllocINTEL(clContext, clDevice, nullptr, 4096u, 0u, &status); + auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(deviceMemory); + + ASSERT_EQ(CL_SUCCESS, status); + auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); + ASSERT_EQ(CL_SUCCESS, status); + + auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status); + ASSERT_EQ(CL_SUCCESS, status); + + status = clEnqueueWriteBuffer(commandQueue, buffer, true, 0u, 4096u, deviceMemory, 0u, nullptr, nullptr); + ASSERT_EQ(CL_SUCCESS, status); + + auto neoQueue = castToObject(commandQueue); + auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); + EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); + auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); + + EXPECT_EQ(1u, svmAllocation->gpuAllocation->getTaskCount(osContextId)); + + status = clEnqueueReadBuffer(commandQueue, buffer, true, 0u, 4096u, deviceMemory, 0u, nullptr, nullptr); + ASSERT_EQ(CL_SUCCESS, status); + + EXPECT_EQ(2u, svmAllocation->gpuAllocation->getTaskCount(osContextId)); + + status = clReleaseMemObject(buffer); + ASSERT_EQ(CL_SUCCESS, status); + status = clMemFreeINTEL(clContext, deviceMemory); + ASSERT_EQ(CL_SUCCESS, status); + clReleaseCommandQueue(commandQueue); +} + +TEST(UnfiedSharedMemoryTransferCalls, givenHostUsmAllocationWhenPtrIsUsedForTransferCallsThenCPUPathIsChoosen) { + DebugManagerStateRestore restorer; + DebugManager.flags.EnableLocalMemory.set(false); + MockContext mockContext; + cl_context clContext = &mockContext; + + if (mockContext.getDevice(0u)->getHardwareInfo().capabilityTable.clVersionSupport < 20) { + GTEST_SKIP(); + } + + auto status = CL_SUCCESS; + cl_device_id clDevice = mockContext.getDevice(0u); + + auto sharedMemory = clSharedMemAllocINTEL(clContext, clDevice, nullptr, 4096u, 0u, &status); + auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(sharedMemory); + + ASSERT_EQ(CL_SUCCESS, status); + auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); + ASSERT_EQ(CL_SUCCESS, status); + + auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status); + ASSERT_EQ(CL_SUCCESS, status); + + status = clEnqueueWriteBuffer(commandQueue, buffer, true, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr); + ASSERT_EQ(CL_SUCCESS, status); + + auto neoQueue = castToObject(commandQueue); + auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); + EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); + auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); + + EXPECT_EQ(GraphicsAllocation::objectNotUsed, svmAllocation->gpuAllocation->getTaskCount(osContextId)); + + status = clEnqueueReadBuffer(commandQueue, buffer, true, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr); + ASSERT_EQ(CL_SUCCESS, status); + + EXPECT_EQ(GraphicsAllocation::objectNotUsed, svmAllocation->gpuAllocation->getTaskCount(osContextId)); + + status = clReleaseMemObject(buffer); + ASSERT_EQ(CL_SUCCESS, status); + status = clMemFreeINTEL(clContext, sharedMemory); + ASSERT_EQ(CL_SUCCESS, status); + clReleaseCommandQueue(commandQueue); +} + TEST(UnfiedSharedMemoryTransferCalls, givenHostAllocationThatIsSmallerThenTransferRequirementsThenErrorIsReturned) { MockContext mockContext; cl_context clContext = &mockContext;