Reuse SVM storage on writeBuffer call.

- Instead of creating new allocation, look if it already exists if so re-use it. Change-Id: I23bc4ac8b8e59e96fce7d48546b76289bedc157f Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
2020-01-22 14:50:16 +01:00 · 2020-01-22 14:50:16 +01:00 · 35b59b7cbe
parent 0d7cc3a8c0
commit 35b59b7cbe
3 changed files with 126 additions and 2 deletions
--- a/2
+++ b/2
@ -1,5 +1,5 @@
 #!groovy
 dependenciesRevision='31422303aeb91920dbad182d97ef610bf75b5599-1360'
 strategy='EQUAL'
-allowedCD=259
+allowedCD=257
 allowedF=11
--- a/runtime/command_queue/enqueue_write_buffer.h
+++ b/runtime/command_queue/enqueue_write_buffer.h
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017-2019 Intel Corporation
+ * Copyright (C) 2017-2020 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@ -7,6 +7,7 @@

 #pragma once
 #include "core/helpers/string.h"
+#include "core/memory_manager/unified_memory_manager.h"
 #include "runtime/built_ins/built_ins.h"
 #include "runtime/command_queue/command_queue_hw.h"
 #include "runtime/command_stream/command_stream_receiver.h"
@ -65,6 +66,14 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
    GeneralSurface mapSurface;
    Surface *surfaces[] = {&bufferSurf, nullptr};

+    //check if we are dealing with SVM pointer here for which we already have an allocation
+    if (!mapAllocation && this->getContext().getSVMAllocsManager()) {
+        auto svmEntry = this->getContext().getSVMAllocsManager()->getSVMAlloc(ptr);
+        if (svmEntry) {
+            mapAllocation = svmEntry->cpuAllocation ? svmEntry->cpuAllocation : svmEntry->gpuAllocation;
+        }
+    }
+
    if (mapAllocation) {
        surfaces[1] = &mapSurface;
        mapSurface.setGraphicsAllocation(mapAllocation);
--- a/unit_tests/memory_manager/unified_memory_manager_tests.cpp
+++ b/unit_tests/memory_manager/unified_memory_manager_tests.cpp
@ -5,12 +5,15 @@
 *
 */

+#include "core/memory_manager/allocations_list.h"
 #include "core/unit_tests/helpers/debug_manager_state_restore.h"
 #include "core/unit_tests/page_fault_manager/mock_cpu_page_fault_manager.h"
+#include "runtime/api/api.h"
 #include "runtime/command_stream/command_stream_receiver.h"
 #include "runtime/mem_obj/mem_obj_helper.h"
 #include "test.h"
 #include "unit_tests/mocks/mock_command_queue.h"
+#include "unit_tests/mocks/mock_context.h"
 #include "unit_tests/mocks/mock_execution_environment.h"
 #include "unit_tests/mocks/mock_memory_manager.h"
 #include "unit_tests/mocks/mock_svm_manager.h"
@ -576,3 +579,115 @@ TEST_F(ShareableUnifiedMemoryManagerPropertiesTest, givenShareableUnifiedPropert
    EXPECT_TRUE(memoryManager->shareablePassed);
    svmManager->freeSVMAlloc(ptr);
 }
+
+TEST(UnfiedSharedMemoryTransferCalls, givenHostUSMllocationWhenPointerIsUsedAsWriteBufferSourceThenUSMAllocationIsReused) {
+    MockContext mockContext;
+    cl_context clContext = &mockContext;
+
+    auto status = CL_SUCCESS;
+
+    auto hostMemory = clHostMemAllocINTEL(clContext, nullptr, 4096u, 0u, &status);
+    auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(hostMemory);
+
+    ASSERT_EQ(CL_SUCCESS, status);
+    auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status);
+    ASSERT_EQ(CL_SUCCESS, status);
+
+    cl_device_id clDevice = mockContext.getDevice(0u);
+
+    auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status);
+    ASSERT_EQ(CL_SUCCESS, status);
+
+    status = clEnqueueWriteBuffer(commandQueue, buffer, false, 0u, 4096u, hostMemory, 0u, nullptr, nullptr);
+    ASSERT_EQ(CL_SUCCESS, status);
+
+    auto neoQueue = castToObject<CommandQueue>(commandQueue);
+    auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations();
+    EXPECT_TRUE(temporaryAllocations.peekIsEmpty());
+    auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId();
+
+    EXPECT_EQ(1u, svmAllocation->gpuAllocation->getTaskCount(osContextId));
+
+    status = clReleaseMemObject(buffer);
+    ASSERT_EQ(CL_SUCCESS, status);
+    status = clMemFreeINTEL(clContext, hostMemory);
+    ASSERT_EQ(CL_SUCCESS, status);
+    clReleaseCommandQueue(commandQueue);
+}
+
+TEST(UnfiedSharedMemoryTransferCalls, givenSharedUSMllocationWithoutLocalMemoryWhenPointerIsUsedAsWriteBufferSourceThenUSMAllocationIsReused) {
+    DebugManagerStateRestore restore;
+    DebugManager.flags.EnableLocalMemory.set(0);
+
+    MockContext mockContext;
+    cl_context clContext = &mockContext;
+    cl_device_id clDevice = mockContext.getDevice(0u);
+
+    auto status = CL_SUCCESS;
+
+    auto sharedMemory = clSharedMemAllocINTEL(clContext, clDevice, nullptr, 4096u, 0u, &status);
+    auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(sharedMemory);
+
+    ASSERT_EQ(CL_SUCCESS, status);
+    auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status);
+    ASSERT_EQ(CL_SUCCESS, status);
+
+    auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status);
+    ASSERT_EQ(CL_SUCCESS, status);
+
+    status = clEnqueueWriteBuffer(commandQueue, buffer, false, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr);
+    ASSERT_EQ(CL_SUCCESS, status);
+
+    auto neoQueue = castToObject<CommandQueue>(commandQueue);
+    auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations();
+    EXPECT_TRUE(temporaryAllocations.peekIsEmpty());
+    auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId();
+
+    EXPECT_EQ(1u, svmAllocation->gpuAllocation->getTaskCount(osContextId));
+
+    status = clReleaseMemObject(buffer);
+    ASSERT_EQ(CL_SUCCESS, status);
+    status = clMemFreeINTEL(clContext, sharedMemory);
+    ASSERT_EQ(CL_SUCCESS, status);
+    clReleaseCommandQueue(commandQueue);
+}
+
+TEST(UnfiedSharedMemoryTransferCalls, givenSharedUSMllocationWithLocalMemoryWhenPointerIsUsedAsWriteBufferSourceThenUSMAllocationIsReused) {
+    DebugManagerStateRestore restore;
+    DebugManager.flags.EnableLocalMemory.set(1);
+
+    MockContext mockContext;
+    cl_context clContext = &mockContext;
+    cl_device_id clDevice = mockContext.getDevice(0u);
+
+    auto status = CL_SUCCESS;
+
+    auto sharedMemory = clSharedMemAllocINTEL(clContext, clDevice, nullptr, 4096u, 0u, &status);
+    auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(sharedMemory);
+
+    ASSERT_EQ(CL_SUCCESS, status);
+    auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status);
+    ASSERT_EQ(CL_SUCCESS, status);
+
+    auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status);
+    ASSERT_EQ(CL_SUCCESS, status);
+
+    auto neoQueue = castToObject<CommandQueue>(commandQueue);
+    auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId();
+
+    EXPECT_EQ(1u, svmAllocation->cpuAllocation->getTaskCount(osContextId));
+
+    status = clEnqueueWriteBuffer(commandQueue, buffer, false, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr);
+    ASSERT_EQ(CL_SUCCESS, status);
+
+    auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations();
+    EXPECT_TRUE(temporaryAllocations.peekIsEmpty());
+
+    EXPECT_EQ(2u, svmAllocation->cpuAllocation->getTaskCount(osContextId));
+
+    status = clReleaseMemObject(buffer);
+    ASSERT_EQ(CL_SUCCESS, status);
+    status = clMemFreeINTEL(clContext, sharedMemory);
+    ASSERT_EQ(CL_SUCCESS, status);
+    clReleaseCommandQueue(commandQueue);
+}