Enabling USM path for write buffer

Signed-off-by: Andrzej Koska <andrzej.koska@intel.com>
Related-To: NEO-5360
This commit is contained in:
Andrzej Koska
2021-01-05 11:39:04 +00:00
committed by Compute-Runtime-Automation
parent ae1e5a91b4
commit 670013e88a
3 changed files with 101 additions and 8 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -51,10 +51,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingRead, size, ptr,
numEventsInWaitList, eventWaitList);
InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED;
//check if we are dealing with SVM pointer here for which we already have an allocation
if (!mapAllocation && this->getContext().getSVMAllocsManager()) {
auto svmEntry = this->getContext().getSVMAllocsManager()->getSVMAlloc(ptr);
if (svmEntry) {
memoryType = svmEntry->memoryType;
if ((svmEntry->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + svmEntry->size) < (castToUint64(ptr) + size)) {
return CL_INVALID_OPERATION;
}
@ -97,8 +99,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
surfaces[1] = &mapSurface;
mapSurface.setGraphicsAllocation(mapAllocation);
//get offset between base cpu ptr of map allocation and dst ptr
size_t dstOffset = ptrDiff(dstPtr, mapAllocation->getUnderlyingBuffer());
dstPtr = reinterpret_cast<void *>(mapAllocation->getGpuAddress() + dstOffset);
if (memoryType != DEVICE_UNIFIED_MEMORY) {
size_t dstOffset = ptrDiff(dstPtr, mapAllocation->getUnderlyingBuffer());
dstPtr = reinterpret_cast<void *>(mapAllocation->getGpuAddress() + dstOffset);
}
} else {
surfaces[1] = &hostPtrSurf;
if (size != 0) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -41,10 +41,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingWrite, size, const_cast<void *>(ptr),
numEventsInWaitList, eventWaitList);
InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED;
//check if we are dealing with SVM pointer here for which we already have an allocation
if (!mapAllocation && this->getContext().getSVMAllocsManager()) {
auto svmEntry = this->getContext().getSVMAllocsManager()->getSVMAlloc(ptr);
if (svmEntry) {
memoryType = svmEntry->memoryType;
if ((svmEntry->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + svmEntry->size) < (castToUint64(ptr) + size)) {
return CL_INVALID_OPERATION;
}
@ -89,8 +91,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
surfaces[1] = &mapSurface;
mapSurface.setGraphicsAllocation(mapAllocation);
//get offset between base cpu ptr of map allocation and dst ptr
size_t srcOffset = ptrDiff(srcPtr, mapAllocation->getUnderlyingBuffer());
srcPtr = reinterpret_cast<void *>(mapAllocation->getGpuAddress() + srcOffset);
if (memoryType != DEVICE_UNIFIED_MEMORY) {
size_t srcOffset = ptrDiff(srcPtr, mapAllocation->getUnderlyingBuffer());
srcPtr = reinterpret_cast<void *>(mapAllocation->getGpuAddress() + srcOffset);
}
} else {
surfaces[1] = &hostPtrSurf;
if (size != 0) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -11,10 +11,12 @@
#include "shared/test/unit_test/mocks/mock_device.h"
#include "shared/test/unit_test/mocks/mock_graphics_allocation.h"
#include "shared/test/unit_test/mocks/ult_device_factory.h"
#include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h"
#include "shared/test/unit_test/page_fault_manager/cpu_page_fault_manager_tests_fixture.h"
#include "opencl/source/api/api.h"
#include "opencl/source/mem_obj/mem_obj_helper.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_execution_environment.h"
@ -919,3 +921,86 @@ TEST(UnfiedSharedMemoryTransferCalls, givenSharedUsmAllocationWithLocalMemoryWhe
ASSERT_EQ(CL_SUCCESS, status);
clReleaseCommandQueue(commandQueue);
}
class UnfiedSharedMemoryHWTest : public testing::Test {
public:
MockContext mockContext;
};
template <typename GfxFamily>
class TestCommandQueueHw : public CommandQueueHw<GfxFamily> {
typedef CommandQueueHw<GfxFamily> BaseClass;
public:
TestCommandQueueHw(Context *context, ClDevice *device, cl_queue_properties *properties) : BaseClass(context, device, properties, false){};
void *srcPtr = nullptr;
void *dstPtr = nullptr;
void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &multiDispatchInfo) override {
auto svmEntrySrc = this->getContext().getSVMAllocsManager()->getSVMAlloc(multiDispatchInfo.peekBuiltinOpParams().srcPtr);
if (svmEntrySrc) {
srcPtr = multiDispatchInfo.peekBuiltinOpParams().srcPtr;
} else {
srcPtr = multiDispatchInfo.peekBuiltinOpParams().transferAllocation->getUnderlyingBuffer();
}
auto svmEntryDst = this->getContext().getSVMAllocsManager()->getSVMAlloc(multiDispatchInfo.peekBuiltinOpParams().dstPtr);
if (svmEntryDst) {
dstPtr = multiDispatchInfo.peekBuiltinOpParams().dstPtr;
} else {
dstPtr = multiDispatchInfo.peekBuiltinOpParams().transferAllocation->getUnderlyingBuffer();
}
}
};
HWTEST_F(UnfiedSharedMemoryHWTest, givenDeviceUsmAllocationWhenWriteBufferThenCpuPtrIsNotUsed) {
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY,
mockContext.getRootDeviceIndices(), mockContext.getDeviceBitfields());
auto deviceMemory = mockContext.getSVMAllocsManager()->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties);
auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(deviceMemory);
GraphicsAllocation *gpuAllocation = svmAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex());
char *cpuPtr = static_cast<char *>(gpuAllocation->getUnderlyingBuffer());
auto gpuAddress = gpuAllocation->getGpuAddress();
void *gpuPtr = reinterpret_cast<void *>(gpuAddress);
char *shiftedPtr = cpuPtr + 0x10;
gpuAllocation->setCpuPtrAndGpuAddress(shiftedPtr, reinterpret_cast<uint64_t>(shiftedPtr));
cl_mem_flags flags = 0;
auto status = CL_INVALID_PLATFORM;
auto buffer = Buffer::create(&mockContext, flags, 4096u, nullptr, status);
ASSERT_EQ(CL_SUCCESS, status);
TestCommandQueueHw<FamilyType> myCmdQ(&mockContext, mockContext.getDevice(0u), 0);
myCmdQ.enqueueWriteBuffer(buffer, false, 0u, 4096u, deviceMemory, nullptr, 0u, nullptr, nullptr);
EXPECT_EQ(gpuPtr, myCmdQ.srcPtr);
gpuAllocation->setCpuPtrAndGpuAddress(cpuPtr, gpuAddress);
delete buffer;
clMemFreeINTEL(&mockContext, deviceMemory);
}
HWTEST_F(UnfiedSharedMemoryHWTest, givenDeviceUsmAllocationWhenReadBufferThenCpuPtrIsNotUsed) {
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY,
mockContext.getRootDeviceIndices(), mockContext.getDeviceBitfields());
auto deviceMemory = mockContext.getSVMAllocsManager()->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties);
auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(deviceMemory);
GraphicsAllocation *gpuAllocation = svmAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex());
char *cpuPtr = static_cast<char *>(gpuAllocation->getUnderlyingBuffer());
auto gpuAddress = gpuAllocation->getGpuAddress();
void *gpuPtr = reinterpret_cast<void *>(gpuAddress);
char *shiftedPtr = cpuPtr + 0x10;
gpuAllocation->setCpuPtrAndGpuAddress(shiftedPtr, reinterpret_cast<uint64_t>(shiftedPtr));
cl_mem_flags flags = 0;
auto status = CL_INVALID_PLATFORM;
auto buffer = Buffer::create(&mockContext, flags, 4096u, nullptr, status);
ASSERT_EQ(CL_SUCCESS, status);
TestCommandQueueHw<FamilyType> myCmdQ(&mockContext, mockContext.getDevice(0u), 0);
myCmdQ.enqueueReadBuffer(buffer, false, 0u, 4096u, deviceMemory, nullptr, 0u, nullptr, nullptr);
EXPECT_EQ(gpuPtr, myCmdQ.dstPtr);
gpuAllocation->setCpuPtrAndGpuAddress(cpuPtr, gpuAddress);
delete buffer;
clMemFreeINTEL(&mockContext, deviceMemory);
}