compute-runtime/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp

2368 lines
97 KiB
C++

/*
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/memory_manager/allocations_list.h"
#include "shared/source/memory_manager/surface.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/os_interface/device_factory.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/mocks/mock_svm_manager.h"
#include "shared/test/common/test_macros/test.h"
#include "shared/test/common/utilities/base_object_utils.h"
#include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h"
#include "opencl/source/event/user_event.h"
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
#include "opencl/test/unit_test/command_queue/enqueue_map_buffer_fixture.h"
#include "opencl/test/unit_test/fixtures/buffer_fixture.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
using namespace NEO;
struct EnqueueSvmTest : public ClDeviceFixture,
public CommandQueueHwFixture,
public ::testing::Test {
typedef CommandQueueHwFixture CommandQueueFixture;
EnqueueSvmTest() {
}
void SetUp() override {
REQUIRE_SVM_OR_SKIP(defaultHwInfo);
ClDeviceFixture::setUp();
CommandQueueFixture::setUp(pClDevice, 0);
ptrSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
}
void TearDown() override {
if (defaultHwInfo->capabilityTable.ftrSvm == false) {
return;
}
context->getSVMAllocsManager()->freeSVMAlloc(ptrSVM);
CommandQueueFixture::tearDown();
ClDeviceFixture::tearDown();
}
std::pair<ReleaseableObjectPtr<Buffer>, void *> createBufferAndMapItOnGpu() {
DebugManagerStateRestore restore{};
DebugManager.flags.DisableZeroCopyForBuffers.set(1);
BufferDefaults::context = this->context;
ReleaseableObjectPtr<Buffer> buffer = clUniquePtr(BufferHelper<>::create());
void *mappedPtr = pCmdQ->enqueueMapBuffer(buffer.get(), CL_TRUE, CL_MAP_READ, 0, buffer->getSize(), 0, nullptr, nullptr, retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, mappedPtr);
return {std::move(buffer), mappedPtr};
}
cl_int retVal = CL_SUCCESS;
void *ptrSVM = nullptr;
};
TEST_F(EnqueueSvmTest, GivenInvalidSvmPtrWhenMappingSvmThenInvalidValueErrorIsReturned) {
void *svmPtr = nullptr;
retVal = this->pCmdQ->enqueueSVMMap(
CL_FALSE, // cl_bool blocking_map
CL_MAP_READ, // cl_map_flags map_flags
svmPtr, // void *svm_ptr
0, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // const cL_event *event_wait_list
nullptr, // cl_event *event
false);
EXPECT_EQ(CL_INVALID_VALUE, retVal);
}
TEST_F(EnqueueSvmTest, GivenValidParamsWhenMappingSvmThenSuccessIsReturned) {
retVal = this->pCmdQ->enqueueSVMMap(
CL_FALSE, // cl_bool blocking_map
CL_MAP_READ, // cl_map_flags map_flags
ptrSVM, // void *svm_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // const cL_event *event_wait_list
nullptr, // cl_event *event
false);
EXPECT_EQ(CL_SUCCESS, retVal);
}
HWTEST_F(EnqueueSvmTest, GivenGpuHangAndBlockingCallAndValidParamsWhenMappingSvmThenOutOfResourcesIsReturned) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.MakeEachEnqueueBlocking.set(true);
std::unique_ptr<ClDevice> device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr)});
cl_queue_properties props = {};
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang;
const auto enqueueResult = mockCommandQueueHw.enqueueSVMMap(
CL_TRUE, // cl_bool blocking_map
CL_MAP_READ, // cl_map_flags map_flags
ptrSVM, // void *svm_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // const cL_event *event_wait_list
nullptr, // cl_event *event
false); // bool externalAppCall
EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult);
EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount);
}
TEST_F(EnqueueSvmTest, GivenValidParamsWhenMappingSvmWithBlockingThenSuccessIsReturned) {
retVal = this->pCmdQ->enqueueSVMMap(
CL_TRUE, // cl_bool blocking_map
CL_MAP_READ, // cl_map_flags map_flags
ptrSVM, // void *svm_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // const cL_event *event_wait_list
nullptr, // cl_event *event
false);
EXPECT_EQ(CL_SUCCESS, retVal);
}
TEST_F(EnqueueSvmTest, GivenValidParamsWhenMappingSvmWithEventsThenSuccessIsReturned) {
UserEvent uEvent;
cl_event eventWaitList[] = {&uEvent};
retVal = this->pCmdQ->enqueueSVMMap(
CL_FALSE, // cl_bool blocking_map
CL_MAP_READ, // cl_map_flags map_flags
ptrSVM, // void *svm_ptr
256, // size_t size
1, // cl_uint num_events_in_wait_list
eventWaitList, // const cL_event *event_wait_list
nullptr, // cl_event *event
false);
EXPECT_EQ(CL_SUCCESS, retVal);
}
TEST_F(EnqueueSvmTest, GivenInvalidSvmPtrWhenUnmappingSvmThenInvalidValueErrorIsReturned) {
void *svmPtr = nullptr;
retVal = this->pCmdQ->enqueueSVMUnmap(
svmPtr, // void *svm_ptr
0, // cl_uint num_events_in_wait_list
nullptr, // const cL_event *event_wait_list
nullptr, // cl_event *event
false);
EXPECT_EQ(CL_INVALID_VALUE, retVal);
}
TEST_F(EnqueueSvmTest, GivenValidParamsWhenUnmappingSvmThenSuccessIsReturned) {
retVal = this->pCmdQ->enqueueSVMUnmap(
ptrSVM, // void *svm_ptr
0, // cl_uint num_events_in_wait_list
nullptr, // const cL_event *event_wait_list
nullptr, // cl_event *event
false);
EXPECT_EQ(CL_SUCCESS, retVal);
}
HWTEST_F(EnqueueSvmTest, GivenGpuHangAndBlockingCallAndValidParamsWhenUnmappingSvmThenOutOfResourcesIsReturned) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.MakeEachEnqueueBlocking.set(true);
std::unique_ptr<ClDevice> device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr)});
cl_queue_properties props = {};
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang;
const auto enqueueResult = mockCommandQueueHw.enqueueSVMUnmap(
ptrSVM,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult);
EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount);
}
TEST_F(EnqueueSvmTest, GivenValidParamsWhenUnmappingSvmWithEventsThenSuccessIsReturned) {
UserEvent uEvent;
cl_event eventWaitList[] = {&uEvent};
retVal = this->pCmdQ->enqueueSVMUnmap(
ptrSVM, // void *svm_ptr
1, // cl_uint num_events_in_wait_list
eventWaitList, // const cL_event *event_wait_list
nullptr, // cl_event *event
false);
EXPECT_EQ(CL_SUCCESS, retVal);
}
TEST_F(EnqueueSvmTest, GivenValidParamsWhenFreeingSvmThenSuccessIsReturned) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableAsyncEventsHandler.set(false);
ASSERT_EQ(1U, this->context->getSVMAllocsManager()->getNumAllocs());
void *svmPtrs[] = {ptrSVM};
retVal = this->pCmdQ->enqueueSVMFree(
1, // cl_uint num_svm_pointers
svmPtrs, // void *svm_pointers[]
nullptr, // (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[])
nullptr, // void *user_data
0, // cl_uint num_events_in_wait_list
nullptr, // const cl_event *event_wait_list
nullptr // cl_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_EQ(0U, this->context->getSVMAllocsManager()->getNumAllocs());
}
TEST_F(EnqueueSvmTest, GivenValidParamsWhenFreeingSvmWithCallbackThenSuccessIsReturned) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableAsyncEventsHandler.set(false);
void *svmPtrs[] = {ptrSVM};
bool callbackWasCalled = false;
struct ClbHelper {
ClbHelper(bool &callbackWasCalled)
: callbackWasCalled(callbackWasCalled) {}
static void CL_CALLBACK clb(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *usrData) {
ClbHelper *data = (ClbHelper *)usrData;
data->callbackWasCalled = true;
}
bool &callbackWasCalled;
} userData(callbackWasCalled);
retVal = this->pCmdQ->enqueueSVMFree(
1, // cl_uint num_svm_pointers
svmPtrs, // void *svm_pointers[]
ClbHelper::clb, // (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[])
&userData, // void *user_data
0, // cl_uint num_events_in_wait_list
nullptr, // const cl_event *event_wait_list
nullptr // cl_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(callbackWasCalled);
}
TEST_F(EnqueueSvmTest, GivenValidParamsWhenFreeingSvmWithCallbackAndEventThenSuccessIsReturned) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableAsyncEventsHandler.set(false);
void *svmPtrs[] = {ptrSVM};
bool callbackWasCalled = false;
struct ClbHelper {
ClbHelper(bool &callbackWasCalled)
: callbackWasCalled(callbackWasCalled) {}
static void CL_CALLBACK clb(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *usrData) {
ClbHelper *data = (ClbHelper *)usrData;
data->callbackWasCalled = true;
}
bool &callbackWasCalled;
} userData(callbackWasCalled);
cl_event event = nullptr;
retVal = this->pCmdQ->enqueueSVMFree(
1, // cl_uint num_svm_pointers
svmPtrs, // void *svm_pointers[]
ClbHelper::clb, // (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[])
&userData, // void *user_data
0, // cl_uint num_events_in_wait_list
nullptr, // const cl_event *event_wait_list
&event // cl_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(callbackWasCalled);
auto pEvent = (Event *)event;
delete pEvent;
}
TEST_F(EnqueueSvmTest, GivenValidParamsWhenFreeingSvmWithBlockingThenSuccessIsReturned) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableAsyncEventsHandler.set(false);
void *svmPtrs[] = {ptrSVM};
UserEvent uEvent;
cl_event eventWaitList[] = {&uEvent};
retVal = this->pCmdQ->enqueueSVMFree(
1, // cl_uint num_svm_pointers
svmPtrs, // void *svm_pointers[]
nullptr, // (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[])
nullptr, // void *user_data
1, // cl_uint num_events_in_wait_list
eventWaitList, // const cl_event *event_wait_list
nullptr // cl_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
}
HWTEST_F(EnqueueSvmTest, GivenEventAndGpuHangAndBlockingCallAndValidParamsWhenFreeingSvmWithBlockingThenEventIsNotDeletedAndOutOfResourcesIsReturned) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableAsyncEventsHandler.set(false);
DebugManager.flags.MakeEachEnqueueBlocking.set(true);
std::unique_ptr<ClDevice> device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr)});
cl_queue_properties props = {};
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang;
const cl_uint numOfSvmPointers = 1;
void *svmPtrs[numOfSvmPointers] = {ptrSVM};
UserEvent uEvent;
const cl_uint numOfEvents = 1;
cl_event eventWaitList[numOfEvents] = {&uEvent};
cl_event retEvent = nullptr;
const auto enqueueResult = mockCommandQueueHw.enqueueSVMFree(
numOfSvmPointers,
svmPtrs,
nullptr,
nullptr,
numOfEvents,
eventWaitList,
&retEvent);
EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult);
EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount);
ASSERT_NE(nullptr, retEvent);
castToObjectOrAbort<Event>(retEvent)->release();
}
HWTEST_F(EnqueueSvmTest, GivenGpuHangAndBlockingCallAndValidParamsWhenFreeingSvmWithBlockingThenOutOfResourcesIsReturned) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableAsyncEventsHandler.set(false);
DebugManager.flags.MakeEachEnqueueBlocking.set(true);
std::unique_ptr<ClDevice> device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr)});
cl_queue_properties props = {};
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang;
const cl_uint numOfSvmPointers = 1;
void *svmPtrs[numOfSvmPointers] = {ptrSVM};
UserEvent uEvent;
const cl_uint numOfEvents = 1;
cl_event eventWaitList[numOfEvents] = {&uEvent};
const auto enqueueResult = mockCommandQueueHw.enqueueSVMFree(
numOfSvmPointers,
svmPtrs,
nullptr,
nullptr,
numOfEvents,
eventWaitList,
nullptr);
EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult);
EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount);
}
TEST_F(EnqueueSvmTest, GivenNullDstPtrWhenCopyingMemoryThenInvalidVaueErrorIsReturned) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableAsyncEventsHandler.set(false);
void *pDstSVM = nullptr;
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_INVALID_VALUE, retVal);
context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM);
}
TEST_F(EnqueueSvmTest, GivenNullSrcPtrWhenCopyingMemoryThenInvalidVaueErrorIsReturned) {
void *pDstSVM = ptrSVM;
void *pSrcSVM = nullptr;
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_INVALID_VALUE, retVal);
}
TEST_F(EnqueueSvmTest, givenSrcHostPtrAndEventWhenEnqueueSVMMemcpyThenEventCommandTypeIsCorrectlySet) {
char srcHostPtr[260] = {};
void *pDstSVM = ptrSVM;
void *pSrcSVM = srcHostPtr;
cl_event event = nullptr;
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
&event // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MEMCPY;
cl_command_type actualCmd = castToObjectOrAbort<Event>(event)->getCommandType();
EXPECT_EQ(expectedCmd, actualCmd);
clReleaseEvent(event);
}
TEST_F(EnqueueSvmTest, givenSrcHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) {
char srcHostPtr[260] = {};
void *pDstSVM = ptrSVM;
void *pSrcSVM = srcHostPtr;
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
0, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
}
HWTEST_F(EnqueueSvmTest, givenSrcHostPtrWhenEnqueueSVMMemcpyThenEnqueuWriteBufferIsCalled) {
char srcHostPtr[260];
void *pSrcSVM = srcHostPtr;
void *pDstSVM = ptrSVM;
MockCommandQueueHw<FamilyType> myCmdQ(context, pClDevice, 0);
retVal = myCmdQ.enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(myCmdQ.lastCommandType, static_cast<cl_command_type>(CL_COMMAND_WRITE_BUFFER));
auto tempAlloc = myCmdQ.getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead();
EXPECT_EQ(0u, tempAlloc->countSuccessors());
EXPECT_EQ(pSrcSVM, reinterpret_cast<void *>(tempAlloc->getGpuAddress()));
auto srcAddress = myCmdQ.kernelParams.srcPtr;
auto srcOffset = myCmdQ.kernelParams.srcOffset.x;
auto dstAddress = myCmdQ.kernelParams.dstPtr;
auto dstOffset = myCmdQ.kernelParams.dstOffset.x;
EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress);
EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset);
EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress);
EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset);
}
HWTEST_F(EnqueueSvmTest, givenDstHostPtrWhenEnqueueSVMMemcpyThenEnqueuReadBufferIsCalled) {
char dstHostPtr[260];
void *pDstSVM = dstHostPtr;
void *pSrcSVM = ptrSVM;
MockCommandQueueHw<FamilyType> myCmdQ(context, pClDevice, 0);
retVal = myCmdQ.enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(myCmdQ.lastCommandType, static_cast<cl_command_type>(CL_COMMAND_READ_BUFFER));
auto tempAlloc = myCmdQ.getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead();
EXPECT_EQ(0u, tempAlloc->countSuccessors());
EXPECT_EQ(pDstSVM, reinterpret_cast<void *>(tempAlloc->getGpuAddress()));
auto srcAddress = myCmdQ.kernelParams.srcPtr;
auto srcOffset = myCmdQ.kernelParams.srcOffset.x;
auto dstAddress = myCmdQ.kernelParams.dstPtr;
auto dstOffset = myCmdQ.kernelParams.dstOffset.x;
EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress);
EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset);
EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress);
EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset);
}
TEST_F(EnqueueSvmTest, givenDstHostPtrAndEventWhenEnqueueSVMMemcpyThenEventCommandTypeIsCorrectlySet) {
char dstHostPtr[260];
void *pDstSVM = dstHostPtr;
void *pSrcSVM = ptrSVM;
cl_event event = nullptr;
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
&event // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MEMCPY;
cl_command_type actualCmd = castToObjectOrAbort<Event>(event)->getCommandType();
EXPECT_EQ(expectedCmd, actualCmd);
clReleaseEvent(event);
}
TEST_F(EnqueueSvmTest, givenDstHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) {
char dstHostPtr[260];
void *pDstSVM = dstHostPtr;
void *pSrcSVM = ptrSVM;
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
0, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
}
HWTEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrWhenEnqueueNonBlockingSVMMemcpyThenEnqueuWriteBufferIsCalled) {
char dstHostPtr[] = {0, 0, 0};
char srcHostPtr[] = {1, 2, 3};
void *pDstSVM = dstHostPtr;
void *pSrcSVM = srcHostPtr;
MockCommandQueueHw<FamilyType> myCmdQ(context, pClDevice, 0);
retVal = myCmdQ.enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
3, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(myCmdQ.lastCommandType, static_cast<cl_command_type>(CL_COMMAND_WRITE_BUFFER));
auto tempAlloc = myCmdQ.getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead();
EXPECT_EQ(1u, tempAlloc->countSuccessors());
EXPECT_EQ(pSrcSVM, reinterpret_cast<void *>(tempAlloc->getGpuAddress()));
EXPECT_EQ(pDstSVM, reinterpret_cast<void *>(tempAlloc->next->getGpuAddress()));
auto srcAddress = myCmdQ.kernelParams.srcPtr;
auto srcOffset = myCmdQ.kernelParams.srcOffset.x;
auto dstAddress = myCmdQ.kernelParams.dstPtr;
auto dstOffset = myCmdQ.kernelParams.dstOffset.x;
EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress);
EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset);
EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress);
EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset);
}
HWTEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrWhenEnqueueBlockingSVMMemcpyThenEnqueuWriteBufferIsCalled) {
char dstHostPtr[] = {0, 0, 0};
char srcHostPtr[] = {1, 2, 3};
void *pDstSVM = dstHostPtr;
void *pSrcSVM = srcHostPtr;
MockCommandQueueHw<FamilyType> myCmdQ(context, pClDevice, 0);
retVal = myCmdQ.enqueueSVMMemcpy(
true, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
3, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(myCmdQ.lastCommandType, static_cast<cl_command_type>(CL_COMMAND_WRITE_BUFFER));
auto srcAddress = myCmdQ.kernelParams.srcPtr;
auto srcOffset = myCmdQ.kernelParams.srcOffset.x;
auto dstAddress = myCmdQ.kernelParams.dstPtr;
auto dstOffset = myCmdQ.kernelParams.dstOffset.x;
EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress);
EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset);
EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress);
EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset);
}
TEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) {
char dstHostPtr[260] = {};
char srcHostPtr[260] = {};
void *pDstSVM = dstHostPtr;
void *pSrcSVM = srcHostPtr;
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
0, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
}
HWTEST_F(EnqueueSvmTest, givenSvmToSvmCopyTypeWhenEnqueueNonBlockingSVMMemcpyThenSvmMemcpyCommandIsEnqueued) {
void *pDstSVM = ptrSVM;
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
MockCommandQueueHw<FamilyType> myCmdQ(context, pClDevice, 0);
retVal = myCmdQ.enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(myCmdQ.lastCommandType, static_cast<cl_command_type>(CL_COMMAND_SVM_MEMCPY));
auto tempAlloc = myCmdQ.getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead();
EXPECT_EQ(nullptr, tempAlloc);
auto srcAddress = myCmdQ.kernelParams.srcPtr;
auto srcOffset = myCmdQ.kernelParams.srcOffset.x;
auto dstAddress = myCmdQ.kernelParams.dstPtr;
auto dstOffset = myCmdQ.kernelParams.dstOffset.x;
EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress);
EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset);
EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress);
EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset);
context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM);
}
TEST_F(EnqueueSvmTest, givenSvmToSvmCopyTypeWhenEnqueueBlockingSVMMemcpyThenSuccessIsReturned) {
void *pDstSVM = ptrSVM;
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
retVal = this->pCmdQ->enqueueSVMMemcpy(
true, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM);
}
TEST_F(EnqueueSvmTest, GivenValidParamsWhenCopyingMemoryWithBlockingThenSuccessisReturned) {
void *pDstSVM = ptrSVM;
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
auto uEvent = makeReleaseable<UserEvent>();
cl_event eventWaitList[] = {uEvent.get()};
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
1, // cl_uint num_events_in_wait_list
eventWaitList, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM);
uEvent->setStatus(-1);
}
TEST_F(EnqueueSvmTest, GivenCoherencyWhenCopyingMemoryThenSuccessIsReturned) {
void *pDstSVM = ptrSVM;
SVMAllocsManager::SvmAllocationProperties svmProperties;
svmProperties.coherent = true;
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, svmProperties, context->getRootDeviceIndices(), context->getDeviceBitfields());
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM);
}
TEST_F(EnqueueSvmTest, GivenCoherencyWhenCopyingMemoryWithBlockingThenSuccessIsReturned) {
void *pDstSVM = ptrSVM;
SVMAllocsManager::SvmAllocationProperties svmProperties;
svmProperties.coherent = true;
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, svmProperties, context->getRootDeviceIndices(), context->getDeviceBitfields());
auto uEvent = makeReleaseable<UserEvent>();
cl_event eventWaitList[] = {uEvent.get()};
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
1, // cl_uint num_events_in_wait_list
eventWaitList, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM);
uEvent->setStatus(-1);
}
HWTEST_F(EnqueueSvmTest, givenUnalignedAddressWhenEnqueueMemcpyThenDispatchInfoHasAlignedAddressAndProperOffset) {
void *pDstSVM = reinterpret_cast<void *>(0x17);
void *pSrcSVM = ptrSVM;
MockCommandQueueHw<FamilyType> myCmdQ(context, pClDevice, 0);
retVal = myCmdQ.enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
0, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
auto srcAddress = myCmdQ.kernelParams.srcPtr;
auto srcOffset = myCmdQ.kernelParams.srcOffset.x;
auto dstAddress = myCmdQ.kernelParams.dstPtr;
auto dstOffset = myCmdQ.kernelParams.dstOffset.x;
EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress);
EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset);
EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress);
EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset);
}
TEST_F(EnqueueSvmTest, GivenNullSvmPtrWhenFillingMemoryThenInvalidValueErrorIsReturned) {
void *svmPtr = nullptr;
const float pattern[1] = {1.2345f};
const size_t patternSize = sizeof(pattern);
retVal = this->pCmdQ->enqueueSVMMemFill(
svmPtr, // void *svm_ptr
pattern, // const void *pattern
patternSize, // size_t pattern_size
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_INVALID_VALUE, retVal);
}
HWTEST_F(EnqueueSvmTest, givenSvmAllocWhenEnqueueSvmFillThenSuccesIsReturnedAndAddressIsProperlyAligned) {
const float pattern[1] = {1.2345f};
const size_t patternSize = sizeof(pattern);
MockCommandQueueHw<FamilyType> myCmdQ(context, pClDevice, 0);
retVal = myCmdQ.enqueueSVMMemFill(
ptrSVM, // void *svm_ptr
pattern, // const void *pattern
patternSize, // size_t pattern_size
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
auto dstAddress = myCmdQ.kernelParams.dstPtr;
auto dstOffset = myCmdQ.kernelParams.dstOffset.x;
EXPECT_EQ(alignDown(ptrSVM, 4), dstAddress);
EXPECT_EQ(ptrDiff(ptrSVM, alignDown(ptrSVM, 4)), dstOffset);
}
TEST_F(EnqueueSvmTest, GivenValidParamsWhenFillingMemoryWithBlockingThenSuccessIsReturned) {
const float pattern[1] = {1.2345f};
const size_t patternSize = sizeof(pattern);
auto uEvent = makeReleaseable<UserEvent>();
cl_event eventWaitList[] = {uEvent.get()};
retVal = this->pCmdQ->enqueueSVMMemFill(
ptrSVM, // void *svm_ptr
pattern, // const void *pattern
patternSize, // size_t pattern_size
256, // size_t size
1, // cl_uint num_events_in_wait_list
eventWaitList, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
uEvent->setStatus(-1);
}
HWTEST_F(EnqueueSvmTest, GivenGpuHangAndBlockingCallAndValidParamsWhenFillingMemoryThenOutOfResourcesIsReturned) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.MakeEachEnqueueBlocking.set(true);
std::unique_ptr<ClDevice> device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr)});
cl_queue_properties props = {};
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang;
const float pattern[1] = {1.2345f};
const size_t patternSize = sizeof(pattern);
auto uEvent = makeReleaseable<UserEvent>();
const cl_uint numOfEvents = 1;
cl_event eventWaitList[numOfEvents] = {uEvent.get()};
const auto enqueueResult = mockCommandQueueHw.enqueueSVMMemFill(
ptrSVM,
pattern,
patternSize,
256,
numOfEvents,
eventWaitList,
nullptr);
EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult);
EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount);
uEvent->setStatus(-1);
}
TEST_F(EnqueueSvmTest, GivenRepeatCallsWhenFillingMemoryThenSuccessIsReturnedForEachCall) {
const float pattern[1] = {1.2345f};
const size_t patternSize = sizeof(pattern);
retVal = this->pCmdQ->enqueueSVMMemFill(
ptrSVM, // void *svm_ptr
pattern, // const void *pattern
patternSize, // size_t pattern_size
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = this->pCmdQ->enqueueSVMMemFill(
ptrSVM, // void *svm_ptr
pattern, // const void *pattern
patternSize, // size_t pattern_size
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
}
TEST_F(EnqueueSvmTest, givenEnqueueSVMMemFillWhenPatternAllocationIsObtainedThenItsTypeShouldBeSetToFillPattern) {
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty());
const float pattern[1] = {1.2345f};
const size_t patternSize = sizeof(pattern);
const size_t size = patternSize;
retVal = this->pCmdQ->enqueueSVMMemFill(
ptrSVM,
pattern,
patternSize,
size,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
GraphicsAllocation *patternAllocation = csr.getAllocationsForReuse().peekHead();
ASSERT_NE(nullptr, patternAllocation);
EXPECT_EQ(AllocationType::FILL_PATTERN, patternAllocation->getAllocationType());
}
TEST_F(EnqueueSvmTest, GivenSvmAllocationWhenEnqueingKernelThenSuccessIsReturned) {
auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM);
ASSERT_NE(nullptr, svmData);
GraphicsAllocation *svmAllocation = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex());
EXPECT_NE(nullptr, ptrSVM);
std::unique_ptr<MockProgram> program(Program::createBuiltInFromSource<MockProgram>("FillBufferBytes", context, context->getDevices(), &retVal));
program->build(program->getDevices(), nullptr, false);
std::unique_ptr<MockKernel> kernel(Kernel::create<MockKernel>(program.get(), program->getKernelInfoForKernel("FillBufferBytes"), *context->getDevice(0), &retVal));
kernel->setSvmKernelExecInfo(svmAllocation);
size_t offset = 0;
size_t size = 1;
retVal = this->pCmdQ->enqueueKernel(
kernel.get(),
1,
&offset,
&size,
&size,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(1u, kernel->kernelSvmGfxAllocations.size());
}
TEST_F(EnqueueSvmTest, givenEnqueueTaskBlockedOnUserEventWhenItIsEnqueuedThenSurfacesAreMadeResident) {
auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM);
ASSERT_NE(nullptr, svmData);
GraphicsAllocation *svmAllocation = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex());
EXPECT_NE(nullptr, ptrSVM);
auto program = clUniquePtr(Program::createBuiltInFromSource<MockProgram>("FillBufferBytes", context, context->getDevices(), &retVal));
program->build(program->getDevices(), nullptr, false);
auto pMultiDeviceKernel = clUniquePtr(MultiDeviceKernel::create<MockKernel>(program.get(), program->getKernelInfosForKernel("FillBufferBytes"), &retVal));
auto kernel = static_cast<MockKernel *>(pMultiDeviceKernel->getKernel(rootDeviceIndex));
std::vector<Surface *> allSurfaces;
kernel->getResidency(allSurfaces);
EXPECT_EQ(1u, allSurfaces.size());
kernel->setSvmKernelExecInfo(svmAllocation);
auto uEvent = makeReleaseable<UserEvent>();
cl_event eventWaitList[] = {uEvent.get()};
size_t offset = 0;
size_t size = 1;
retVal = this->pCmdQ->enqueueKernel(
kernel,
1,
&offset,
&size,
&size,
1,
eventWaitList,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
kernel->getResidency(allSurfaces);
EXPECT_EQ(3u, allSurfaces.size());
for (auto &surface : allSurfaces)
delete surface;
EXPECT_EQ(1u, kernel->kernelSvmGfxAllocations.size());
uEvent->setStatus(-1);
}
TEST_F(EnqueueSvmTest, GivenMultipleThreasWhenAllocatingSvmThenOnlyOneAllocationIsCreated) {
std::atomic<int> flag(0);
std::atomic<int> ready(0);
void *svmPtrs[15] = {};
auto allocSvm = [&](uint32_t from, uint32_t to) {
for (uint32_t i = from; i <= to; i++) {
svmPtrs[i] = context->getSVMAllocsManager()->createSVMAlloc(1, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
auto svmData = context->getSVMAllocsManager()->getSVMAlloc(svmPtrs[i]);
ASSERT_NE(nullptr, svmData);
auto ga = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex());
EXPECT_NE(nullptr, ga);
EXPECT_EQ(ga->getUnderlyingBuffer(), svmPtrs[i]);
}
};
auto freeSvm = [&](uint32_t from, uint32_t to) {
for (uint32_t i = from; i <= to; i++) {
context->getSVMAllocsManager()->freeSVMAlloc(svmPtrs[i]);
}
};
auto asyncFcn = [&](bool alloc, uint32_t from, uint32_t to) {
flag++;
while (flag < 3)
;
if (alloc) {
allocSvm(from, to);
}
freeSvm(from, to);
ready++;
};
EXPECT_EQ(1u, context->getSVMAllocsManager()->getNumAllocs());
allocSvm(10, 14);
auto t1 = std::unique_ptr<std::thread>(new std::thread(asyncFcn, true, 0, 4));
auto t2 = std::unique_ptr<std::thread>(new std::thread(asyncFcn, true, 5, 9));
auto t3 = std::unique_ptr<std::thread>(new std::thread(asyncFcn, false, 10, 14));
while (ready < 3) {
std::this_thread::yield();
}
EXPECT_EQ(1u, context->getSVMAllocsManager()->getNumAllocs());
t1->join();
t2->join();
t3->join();
}
TEST_F(EnqueueSvmTest, GivenValidParamsWhenMigratingMemoryThenSuccessIsReturned) {
const void *svmPtrs[] = {ptrSVM};
retVal = this->pCmdQ->enqueueSVMMigrateMem(
1, // cl_uint num_svm_pointers
svmPtrs, // const void **svm_pointers
nullptr, // const size_t *sizes
0, // const cl_mem_migration_flags flags
0, // cl_uint num_events_in_wait_list
nullptr, // cl_event *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
}
HWTEST_F(EnqueueSvmTest, GivenGpuHangAndBlockingCallAndValidParamsWhenMigratingMemoryThenOutOfResourcesIsReturned) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.MakeEachEnqueueBlocking.set(true);
std::unique_ptr<ClDevice> device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr)});
cl_queue_properties props = {};
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang;
const void *svmPtrs[] = {ptrSVM};
const auto enqueueResult = mockCommandQueueHw.enqueueSVMMigrateMem(
1, // cl_uint num_svm_pointers
svmPtrs, // const void **svm_pointers
nullptr, // const size_t *sizes
0, // const cl_mem_migration_flags flags
0, // cl_uint num_events_in_wait_list
nullptr, // cl_event *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult);
EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount);
}
HWTEST_F(EnqueueSvmTest, WhenMigratingMemoryThenSvmMigrateMemCommandTypeIsUsed) {
MockCommandQueueHw<FamilyType> commandQueue{context, pClDevice, nullptr};
const void *svmPtrs[] = {ptrSVM};
retVal = commandQueue.enqueueSVMMigrateMem(
1, // cl_uint num_svm_pointers
svmPtrs, // const void **svm_pointers
nullptr, // const size_t *sizes
0, // const cl_mem_migration_flags flags
0, // cl_uint num_events_in_wait_list
nullptr, // cl_event *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
uint32_t expectedCommandType = CL_COMMAND_SVM_MIGRATE_MEM;
EXPECT_EQ(expectedCommandType, commandQueue.lastCommandType);
}
TEST(CreateSvmAllocTests, givenVariousSvmAllocationPropertiesWhenAllocatingSvmThenSvmIsCorrectlyAllocated) {
if (!defaultHwInfo->capabilityTable.ftrSvm) {
return;
}
DebugManagerStateRestore dbgRestore;
SVMAllocsManager::SvmAllocationProperties svmAllocationProperties;
for (auto isLocalMemorySupported : ::testing::Bool()) {
DebugManager.flags.EnableLocalMemory.set(isLocalMemorySupported);
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
auto mockContext = std::make_unique<MockContext>(mockDevice.get());
for (auto isReadOnly : ::testing::Bool()) {
for (auto isHostPtrReadOnly : ::testing::Bool()) {
svmAllocationProperties.readOnly = isReadOnly;
svmAllocationProperties.hostPtrReadOnly = isHostPtrReadOnly;
auto ptrSVM = mockContext->getSVMAllocsManager()->createSVMAlloc(256, svmAllocationProperties, mockContext->getRootDeviceIndices(), mockContext->getDeviceBitfields());
EXPECT_NE(nullptr, ptrSVM);
mockContext->getSVMAllocsManager()->freeSVMAlloc(ptrSVM);
}
}
}
}
struct EnqueueSvmTestLocalMemory : public ClDeviceFixture,
public ::testing::Test {
void SetUp() override {
REQUIRE_SVM_OR_SKIP(defaultHwInfo);
dbgRestore = std::make_unique<DebugManagerStateRestore>();
DebugManager.flags.EnableLocalMemory.set(1);
ClDeviceFixture::setUp();
context = std::make_unique<MockContext>(pClDevice, true);
size = 256;
svmPtr = context->getSVMAllocsManager()->createSVMAlloc(size, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
ASSERT_NE(nullptr, svmPtr);
mockSvmManager = reinterpret_cast<MockSVMAllocsManager *>(context->getSVMAllocsManager());
}
void TearDown() override {
if (defaultHwInfo->capabilityTable.ftrSvm == false) {
return;
}
context->getSVMAllocsManager()->freeSVMAlloc(svmPtr);
context.reset(nullptr);
ClDeviceFixture::tearDown();
}
cl_int retVal = CL_SUCCESS;
void *svmPtr = nullptr;
size_t size;
MockSVMAllocsManager *mockSvmManager;
std::unique_ptr<DebugManagerStateRestore> dbgRestore;
std::unique_ptr<MockContext> context;
HardwareParse hwParse;
};
HWTEST_F(EnqueueSvmTestLocalMemory, givenWriteInvalidateRegionFlagWhenMappingSvmThenMapIsSuccessfulAndReadOnlyFlagIsFalse) {
MockCommandQueueHw<FamilyType> queue(context.get(), pClDevice, nullptr);
uintptr_t offset = 64;
void *regionSvmPtr = ptrOffset(svmPtr, offset);
size_t regionSize = 64;
retVal = queue.enqueueSVMMap(
CL_TRUE,
CL_MAP_WRITE_INVALIDATE_REGION,
regionSvmPtr,
regionSize,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr);
EXPECT_FALSE(svmMap->readOnlyMap);
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenGpuHangAndBlockingCallAndWriteInvalidateRegionFlagWhenMappingSvmThenOutOfResourcesIsReturned) {
MockCommandQueueHw<FamilyType> queue(context.get(), pClDevice, nullptr);
queue.waitForAllEnginesReturnValue = WaitStatus::GpuHang;
uintptr_t offset = 64;
void *regionSvmPtr = ptrOffset(svmPtr, offset);
size_t regionSize = 64;
const auto enqueueResult = queue.enqueueSVMMap(
CL_TRUE,
CL_MAP_WRITE_INVALIDATE_REGION,
regionSvmPtr,
regionSize,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult);
EXPECT_EQ(1, queue.waitForAllEnginesCalledCount);
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenMapWriteFlagWhenMappingSvmThenMapIsSuccessfulAndReadOnlyFlagIsFalse) {
MockCommandQueueHw<FamilyType> queue(context.get(), pClDevice, nullptr);
uintptr_t offset = 64;
void *regionSvmPtr = ptrOffset(svmPtr, offset);
size_t regionSize = 64;
retVal = queue.enqueueSVMMap(
CL_TRUE,
CL_MAP_WRITE,
regionSvmPtr,
regionSize,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr);
EXPECT_FALSE(svmMap->readOnlyMap);
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenMapReadFlagWhenMappingSvmThenMapIsSuccessfulAndReadOnlyFlagIsTrue) {
MockCommandQueueHw<FamilyType> queue(context.get(), pClDevice, nullptr);
uintptr_t offset = 64;
void *regionSvmPtr = ptrOffset(svmPtr, offset);
size_t regionSize = 64;
retVal = queue.enqueueSVMMap(
CL_TRUE,
CL_MAP_READ,
regionSvmPtr,
regionSize,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr);
EXPECT_TRUE(svmMap->readOnlyMap);
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenMapReadAndWriteFlagWhenMappingSvmThenDontSetReadOnlyProperty) {
MockCommandQueueHw<FamilyType> queue(context.get(), pClDevice, nullptr);
uintptr_t offset = 64;
void *regionSvmPtr = ptrOffset(svmPtr, offset);
size_t regionSize = 64;
retVal = queue.enqueueSVMMap(
CL_TRUE,
CL_MAP_READ | CL_MAP_WRITE,
regionSvmPtr,
regionSize,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr);
EXPECT_FALSE(svmMap->readOnlyMap);
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenSvmAllocWithoutFlagsWhenMappingSvmThenMapIsSuccessfulAndReadOnlyFlagIsTrue) {
MockCommandQueueHw<FamilyType> queue(context.get(), pClDevice, nullptr);
uintptr_t offset = 64;
void *regionSvmPtr = ptrOffset(svmPtr, offset);
size_t regionSize = 64;
retVal = queue.enqueueSVMMap(
CL_TRUE,
0,
regionSvmPtr,
regionSize,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr);
EXPECT_FALSE(svmMap->readOnlyMap);
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenEnqueueMapValidSvmPtrThenExpectSingleWalker) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
MockCommandQueueHw<FamilyType> queue(context.get(), pClDevice, nullptr);
LinearStream &stream = queue.getCS(0x1000);
cl_event event = nullptr;
uintptr_t offset = 64;
void *regionSvmPtr = ptrOffset(svmPtr, offset);
size_t regionSize = 64;
retVal = queue.enqueueSVMMap(
CL_FALSE,
CL_MAP_READ,
regionSvmPtr,
regionSize,
0,
nullptr,
&event,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr);
ASSERT_NE(nullptr, svmMap);
EXPECT_EQ(regionSvmPtr, svmMap->regionSvmPtr);
EXPECT_EQ(svmPtr, svmMap->baseSvmPtr);
EXPECT_EQ(regionSize, svmMap->regionSize);
EXPECT_EQ(offset, svmMap->offset);
EXPECT_TRUE(svmMap->readOnlyMap);
queue.flush();
hwParse.parseCommands<FamilyType>(stream);
auto walkerCount = hwParse.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(1u, walkerCount);
constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MAP;
cl_command_type actualCmd = castToObjectOrAbort<Event>(event)->getCommandType();
EXPECT_EQ(expectedCmd, actualCmd);
clReleaseEvent(event);
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenEnqueueMapSvmPtrTwiceThenExpectSingleWalker) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
MockCommandQueueHw<FamilyType> queue(context.get(), pClDevice, nullptr);
LinearStream &stream = queue.getCS(0x1000);
uintptr_t offset = 64;
void *regionSvmPtr = ptrOffset(svmPtr, offset);
size_t regionSize = 64;
retVal = queue.enqueueSVMMap(
CL_FALSE,
CL_MAP_WRITE,
regionSvmPtr,
regionSize,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr);
ASSERT_NE(nullptr, svmMap);
EXPECT_EQ(regionSvmPtr, svmMap->regionSvmPtr);
EXPECT_EQ(svmPtr, svmMap->baseSvmPtr);
EXPECT_EQ(regionSize, svmMap->regionSize);
EXPECT_EQ(offset, svmMap->offset);
EXPECT_FALSE(svmMap->readOnlyMap);
EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations());
cl_event event = nullptr;
retVal = queue.enqueueSVMMap(
CL_FALSE,
CL_MAP_WRITE,
regionSvmPtr,
regionSize,
0,
nullptr,
&event,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations());
queue.flush();
hwParse.parseCommands<FamilyType>(stream);
auto walkerCount = hwParse.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(1u, walkerCount);
constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MAP;
cl_command_type actualCmd = castToObjectOrAbort<Event>(event)->getCommandType();
EXPECT_EQ(expectedCmd, actualCmd);
clReleaseEvent(event);
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryAndBlockingCallAndGpuHangOnSecondMapWhenEnqueueMapSvmPtrTwiceThenSecondCallReturnsOutOfresources) {
std::unique_ptr<ClDevice> device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr)});
cl_queue_properties props = {};
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::Ready;
uintptr_t offset = 64;
void *regionSvmPtr = ptrOffset(svmPtr, offset);
size_t regionSize = 64;
const auto firstMapResult = mockCommandQueueHw.enqueueSVMMap(
CL_TRUE,
CL_MAP_WRITE,
regionSvmPtr,
regionSize,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, firstMapResult);
mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang;
const auto secondMapResult = mockCommandQueueHw.enqueueSVMMap(
CL_TRUE,
CL_MAP_WRITE,
regionSvmPtr,
regionSize,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_OUT_OF_RESOURCES, secondMapResult);
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenNoMappedSvmPtrThenExpectNoUnmapCopyKernel) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
MockCommandQueueHw<FamilyType> queue(context.get(), pClDevice, nullptr);
LinearStream &stream = queue.getCS(0x1000);
cl_event event = nullptr;
retVal = queue.enqueueSVMUnmap(
svmPtr,
0,
nullptr,
&event,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
queue.flush();
hwParse.parseCommands<FamilyType>(stream);
auto walkerCount = hwParse.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(0u, walkerCount);
constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_UNMAP;
cl_command_type actualCmd = castToObjectOrAbort<Event>(event)->getCommandType();
EXPECT_EQ(expectedCmd, actualCmd);
clReleaseEvent(event);
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryAndGpuHangAndBlockingCallWhenUnmappingThenReturnOutOfResources) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.MakeEachEnqueueBlocking.set(true);
std::unique_ptr<ClDevice> device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr)});
cl_queue_properties props = {};
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang;
const auto enqueueResult = mockCommandQueueHw.enqueueSVMUnmap(
svmPtr,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult);
EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount);
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenMappedSvmRegionIsReadOnlyThenExpectNoUnmapCopyKernel) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
MockCommandQueueHw<FamilyType> queue(context.get(), pClDevice, nullptr);
LinearStream &stream = queue.getCS(0x1000);
retVal = queue.enqueueSVMMap(
CL_FALSE,
CL_MAP_READ,
svmPtr,
size,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations());
auto svmMap = mockSvmManager->svmMapOperations.get(svmPtr);
ASSERT_NE(nullptr, svmMap);
queue.flush();
size_t offset = stream.getUsed();
hwParse.parseCommands<FamilyType>(stream);
auto walkerCount = hwParse.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(1u, walkerCount);
hwParse.tearDown();
cl_event event = nullptr;
retVal = queue.enqueueSVMUnmap(
svmPtr,
0,
nullptr,
&event,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0u, mockSvmManager->svmMapOperations.getNumMapOperations());
queue.flush();
hwParse.parseCommands<FamilyType>(stream, offset);
walkerCount = hwParse.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(0u, walkerCount);
constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_UNMAP;
cl_command_type actualCmd = castToObjectOrAbort<Event>(event)->getCommandType();
EXPECT_EQ(expectedCmd, actualCmd);
clReleaseEvent(event);
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryAndBlockingCallAndGpuHangForUnmapWhenUnmapingThenOutOfResourcesIsReturnedFromUnmap) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.MakeEachEnqueueBlocking.set(true);
std::unique_ptr<ClDevice> device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr)});
cl_queue_properties props = {};
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::Ready;
const auto enqueueMapResult = mockCommandQueueHw.enqueueSVMMap(
CL_FALSE,
CL_MAP_READ,
svmPtr,
size,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, enqueueMapResult);
mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang;
const auto enqueueUnmapResult = mockCommandQueueHw.enqueueSVMUnmap(
svmPtr,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueUnmapResult);
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenNonReadOnlyMapWhenUnmappingThenSetAubTbxWritableBeforeUnmapEnqueue) {
class MyQueue : public MockCommandQueueHw<FamilyType> {
public:
using MockCommandQueueHw<FamilyType>::MockCommandQueueHw;
void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override {
waitUntilCompleteCalled++;
if (allocationToVerify) {
EXPECT_TRUE(allocationToVerify->isAubWritable(GraphicsAllocation::defaultBank));
EXPECT_TRUE(allocationToVerify->isTbxWritable(GraphicsAllocation::defaultBank));
}
}
uint32_t waitUntilCompleteCalled = 0;
GraphicsAllocation *allocationToVerify = nullptr;
};
MyQueue myQueue(context.get(), pClDevice, nullptr);
retVal = myQueue.enqueueSVMMap(CL_TRUE, CL_MAP_WRITE, svmPtr, size, 0, nullptr, nullptr, false);
EXPECT_EQ(CL_SUCCESS, retVal);
auto gpuAllocation = mockSvmManager->getSVMAlloc(svmPtr)->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex());
myQueue.allocationToVerify = gpuAllocation;
gpuAllocation->setAubWritable(false, GraphicsAllocation::defaultBank);
gpuAllocation->setTbxWritable(false, GraphicsAllocation::defaultBank);
EXPECT_EQ(1u, myQueue.waitUntilCompleteCalled);
retVal = myQueue.enqueueSVMUnmap(svmPtr, 0, nullptr, nullptr, false);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(2u, myQueue.waitUntilCompleteCalled);
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenReadOnlyMapWhenUnmappingThenDontResetAubTbxWritable) {
MockCommandQueueHw<FamilyType> queue(context.get(), pClDevice, nullptr);
retVal = queue.enqueueSVMMap(CL_TRUE, CL_MAP_READ, svmPtr, size, 0, nullptr, nullptr, false);
EXPECT_EQ(CL_SUCCESS, retVal);
auto gpuAllocation = mockSvmManager->getSVMAlloc(svmPtr)->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex());
gpuAllocation->setAubWritable(false, GraphicsAllocation::defaultBank);
gpuAllocation->setTbxWritable(false, GraphicsAllocation::defaultBank);
retVal = queue.enqueueSVMUnmap(svmPtr, 0, nullptr, nullptr, false);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(gpuAllocation->isAubWritable(GraphicsAllocation::defaultBank));
EXPECT_FALSE(gpuAllocation->isTbxWritable(GraphicsAllocation::defaultBank));
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenMappedSvmRegionIsWritableThenExpectMapAndUnmapCopyKernel) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
MockCommandQueueHw<FamilyType> queue(context.get(), pClDevice, nullptr);
LinearStream &stream = queue.getCS(0x1000);
cl_event eventMap = nullptr;
retVal = queue.enqueueSVMMap(
CL_FALSE,
CL_MAP_WRITE,
svmPtr,
size,
0,
nullptr,
&eventMap,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations());
auto svmMap = mockSvmManager->svmMapOperations.get(svmPtr);
ASSERT_NE(nullptr, svmMap);
cl_event eventUnmap = nullptr;
retVal = queue.enqueueSVMUnmap(
svmPtr,
0,
nullptr,
&eventUnmap,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0u, mockSvmManager->svmMapOperations.getNumMapOperations());
queue.flush();
hwParse.parseCommands<FamilyType>(stream);
auto walkerCount = hwParse.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(2u, walkerCount);
constexpr cl_command_type expectedMapCmd = CL_COMMAND_SVM_MAP;
cl_command_type actualMapCmd = castToObjectOrAbort<Event>(eventMap)->getCommandType();
EXPECT_EQ(expectedMapCmd, actualMapCmd);
constexpr cl_command_type expectedUnmapCmd = CL_COMMAND_SVM_UNMAP;
cl_command_type actualUnmapCmd = castToObjectOrAbort<Event>(eventUnmap)->getCommandType();
EXPECT_EQ(expectedUnmapCmd, actualUnmapCmd);
clReleaseEvent(eventMap);
clReleaseEvent(eventUnmap);
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenGpuHangAndBlockingCallAndEnabledLocalMemoryWhenMappedSvmRegionIsWritableThenUnmapReturnsOutOfResources) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.MakeEachEnqueueBlocking.set(true);
MockCommandQueueHw<FamilyType> queue(context.get(), pClDevice, nullptr);
queue.waitForAllEnginesReturnValue = WaitStatus::Ready;
const auto enqueueMapResult = queue.enqueueSVMMap(
CL_TRUE,
CL_MAP_WRITE,
svmPtr,
size,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, enqueueMapResult);
queue.waitForAllEnginesReturnValue = WaitStatus::GpuHang;
const auto enqueueUnmapResult = queue.enqueueSVMUnmap(
svmPtr,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueUnmapResult);
EXPECT_EQ(2, queue.waitForAllEnginesCalledCount);
}
HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenMappedSvmRegionAndNoEventIsUsedIsWritableThenExpectMapAndUnmapCopyKernelAnNo) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
MockCommandQueueHw<FamilyType> queue(context.get(), pClDevice, nullptr);
LinearStream &stream = queue.getCS(0x1000);
retVal = queue.enqueueSVMMap(
CL_FALSE,
CL_MAP_WRITE,
svmPtr,
size,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations());
auto svmMap = mockSvmManager->svmMapOperations.get(svmPtr);
ASSERT_NE(nullptr, svmMap);
retVal = queue.enqueueSVMUnmap(
svmPtr,
0,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0u, mockSvmManager->svmMapOperations.getNumMapOperations());
queue.flush();
hwParse.parseCommands<FamilyType>(stream);
auto walkerCount = hwParse.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(2u, walkerCount);
}
template <typename GfxFamily>
struct FailCsr : public CommandStreamReceiverHw<GfxFamily> {
using CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw;
bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override {
return CL_FALSE;
}
};
HWTEST_F(EnqueueSvmTest, whenInternalAllocationsAreMadeResidentThenOnlyNonSvmAllocationsAreAdded) {
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, context->getRootDeviceIndices(), context->getDeviceBitfields());
unifiedMemoryProperties.device = pDevice;
auto allocationSize = 4096u;
auto svmManager = this->context->getSVMAllocsManager();
EXPECT_NE(0u, svmManager->getNumAllocs());
auto unifiedMemoryPtr = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties);
EXPECT_NE(nullptr, unifiedMemoryPtr);
EXPECT_EQ(2u, svmManager->getNumAllocs());
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto &residentAllocations = commandStreamReceiver.getResidencyAllocations();
EXPECT_EQ(0u, residentAllocations.size());
svmManager->makeInternalAllocationsResident(commandStreamReceiver, InternalMemoryType::DEVICE_UNIFIED_MEMORY);
//only unified memory allocation is made resident
EXPECT_EQ(1u, residentAllocations.size());
EXPECT_EQ(residentAllocations[0]->getGpuAddress(), castToUint64(unifiedMemoryPtr));
svmManager->freeSVMAlloc(unifiedMemoryPtr);
}
HWTEST_F(EnqueueSvmTest, whenInternalAllocationsAreAddedToResidencyContainerThenOnlyExpectedAllocationsAreAdded) {
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, context->getRootDeviceIndices(), context->getDeviceBitfields());
unifiedMemoryProperties.device = pDevice;
auto allocationSize = 4096u;
auto svmManager = this->context->getSVMAllocsManager();
EXPECT_NE(0u, svmManager->getNumAllocs());
auto unifiedMemoryPtr = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties);
EXPECT_NE(nullptr, unifiedMemoryPtr);
EXPECT_EQ(2u, svmManager->getNumAllocs());
ResidencyContainer residencyContainer;
EXPECT_EQ(0u, residencyContainer.size());
svmManager->addInternalAllocationsToResidencyContainer(pDevice->getRootDeviceIndex(),
residencyContainer,
InternalMemoryType::DEVICE_UNIFIED_MEMORY);
//only unified memory allocation is added to residency container
EXPECT_EQ(1u, residencyContainer.size());
EXPECT_EQ(residencyContainer[0]->getGpuAddress(), castToUint64(unifiedMemoryPtr));
svmManager->freeSVMAlloc(unifiedMemoryPtr);
}
HWTEST_F(EnqueueSvmTest, whenInternalAllocationIsTriedToBeAddedTwiceToResidencyContainerThenItIsAdded) {
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, context->getRootDeviceIndices(), context->getDeviceBitfields());
unifiedMemoryProperties.device = pDevice;
auto allocationSize = 4096u;
auto svmManager = this->context->getSVMAllocsManager();
EXPECT_NE(0u, svmManager->getNumAllocs());
auto unifiedMemoryPtr = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties);
EXPECT_NE(nullptr, unifiedMemoryPtr);
EXPECT_EQ(2u, svmManager->getNumAllocs());
ResidencyContainer residencyContainer;
EXPECT_EQ(0u, residencyContainer.size());
svmManager->addInternalAllocationsToResidencyContainer(pDevice->getRootDeviceIndex(),
residencyContainer,
InternalMemoryType::DEVICE_UNIFIED_MEMORY);
//only unified memory allocation is added to residency container
EXPECT_EQ(1u, residencyContainer.size());
EXPECT_EQ(residencyContainer[0]->getGpuAddress(), castToUint64(unifiedMemoryPtr));
svmManager->addInternalAllocationsToResidencyContainer(pDevice->getRootDeviceIndex(),
residencyContainer,
InternalMemoryType::DEVICE_UNIFIED_MEMORY);
EXPECT_EQ(2u, residencyContainer.size());
svmManager->freeSVMAlloc(unifiedMemoryPtr);
}
struct createHostUnifiedMemoryAllocationTest : public ::testing::Test {
void SetUp() override {
REQUIRE_SVM_OR_SKIP(defaultHwInfo);
device0 = context.pRootDevice0;
device1 = context.pRootDevice1;
device2 = context.pRootDevice2;
svmManager = context.getSVMAllocsManager();
EXPECT_EQ(0u, svmManager->getNumAllocs());
}
const size_t allocationSize = 4096u;
const uint32_t numDevices = 3u;
MockDefaultContext context;
MockClDevice *device2;
MockClDevice *device1;
MockClDevice *device0;
SVMAllocsManager *svmManager = nullptr;
};
HWTEST_F(createHostUnifiedMemoryAllocationTest,
whenCreatingHostUnifiedMemoryAllocationThenOneAllocDataIsCreatedWithOneGraphicsAllocationPerDevice) {
NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, context.getRootDeviceIndices(), context.getDeviceBitfields());
EXPECT_EQ(0u, svmManager->getNumAllocs());
auto unifiedMemoryPtr = svmManager->createHostUnifiedMemoryAllocation(allocationSize,
unifiedMemoryProperties);
EXPECT_NE(nullptr, unifiedMemoryPtr);
EXPECT_EQ(1u, svmManager->getNumAllocs());
auto allocData = svmManager->getSVMAlloc(unifiedMemoryPtr);
EXPECT_EQ(numDevices, allocData->gpuAllocations.getGraphicsAllocations().size());
for (uint32_t i = 0; i < allocData->gpuAllocations.getGraphicsAllocations().size(); i++) {
auto alloc = allocData->gpuAllocations.getGraphicsAllocation(i);
EXPECT_EQ(i, alloc->getRootDeviceIndex());
}
svmManager->freeSVMAlloc(unifiedMemoryPtr);
}
HWTEST_F(createHostUnifiedMemoryAllocationTest,
whenCreatingMultiGraphicsAllocationThenGraphicsAllocationPerDeviceIsCreated) {
NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, context.getRootDeviceIndices(), context.getDeviceBitfields());
auto alignedSize = alignUp<size_t>(allocationSize, MemoryConstants::pageSize64k);
auto memoryManager = context.getMemoryManager();
auto allocationType = AllocationType::BUFFER_HOST_MEMORY;
auto maxRootDeviceIndex = numDevices - 1u;
RootDeviceIndicesContainer rootDeviceIndices;
rootDeviceIndices.reserve(numDevices);
rootDeviceIndices.push_back(0u);
rootDeviceIndices.push_back(1u);
rootDeviceIndices.push_back(2u);
auto rootDeviceIndex = rootDeviceIndices.at(0);
auto deviceBitfield = device0->getDeviceBitfield();
AllocationProperties allocationProperties{rootDeviceIndex,
true,
alignedSize,
allocationType,
deviceBitfield.count() > 1,
deviceBitfield.count() > 1,
deviceBitfield};
allocationProperties.flags.shareable = unifiedMemoryProperties.allocationFlags.flags.shareable;
SvmAllocationData allocData(maxRootDeviceIndex);
void *unifiedMemoryPtr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, allocationProperties, allocData.gpuAllocations);
EXPECT_NE(nullptr, unifiedMemoryPtr);
EXPECT_EQ(numDevices, allocData.gpuAllocations.getGraphicsAllocations().size());
for (auto rootDeviceIndex = 0u; rootDeviceIndex <= maxRootDeviceIndex; rootDeviceIndex++) {
auto alloc = allocData.gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
EXPECT_NE(nullptr, alloc);
EXPECT_EQ(rootDeviceIndex, alloc->getRootDeviceIndex());
}
for (auto gpuAllocation : allocData.gpuAllocations.getGraphicsAllocations()) {
memoryManager->freeGraphicsMemory(gpuAllocation);
}
}
HWTEST_F(createHostUnifiedMemoryAllocationTest,
whenCreatingMultiGraphicsAllocationForSpecificRootDeviceIndicesThenOnlyGraphicsAllocationPerSpecificRootDeviceIndexIsCreated) {
NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, context.getRootDeviceIndices(), context.getDeviceBitfields());
auto alignedSize = alignUp<size_t>(allocationSize, MemoryConstants::pageSize64k);
auto memoryManager = context.getMemoryManager();
auto allocationType = AllocationType::BUFFER_HOST_MEMORY;
auto maxRootDeviceIndex = numDevices - 1u;
RootDeviceIndicesContainer rootDeviceIndices;
rootDeviceIndices.reserve(numDevices);
rootDeviceIndices.push_back(0u);
rootDeviceIndices.push_back(2u);
auto noProgramedRootDeviceIndex = 1u;
auto rootDeviceIndex = rootDeviceIndices.at(0);
auto deviceBitfield = device0->getDeviceBitfield();
AllocationProperties allocationProperties{rootDeviceIndex,
true,
alignedSize,
allocationType,
deviceBitfield.count() > 1,
deviceBitfield.count() > 1,
deviceBitfield};
allocationProperties.flags.shareable = unifiedMemoryProperties.allocationFlags.flags.shareable;
SvmAllocationData allocData(maxRootDeviceIndex);
void *unifiedMemoryPtr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, allocationProperties, allocData.gpuAllocations);
EXPECT_NE(nullptr, unifiedMemoryPtr);
EXPECT_EQ(numDevices, allocData.gpuAllocations.getGraphicsAllocations().size());
for (auto rootDeviceIndex = 0u; rootDeviceIndex <= maxRootDeviceIndex; rootDeviceIndex++) {
auto alloc = allocData.gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
if (rootDeviceIndex == noProgramedRootDeviceIndex) {
EXPECT_EQ(nullptr, alloc);
} else {
EXPECT_NE(nullptr, alloc);
EXPECT_EQ(rootDeviceIndex, alloc->getRootDeviceIndex());
}
}
for (auto gpuAllocation : allocData.gpuAllocations.getGraphicsAllocations()) {
memoryManager->freeGraphicsMemory(gpuAllocation);
}
}
struct MemoryAllocationTypeArray {
const InternalMemoryType allocationType[3] = {InternalMemoryType::HOST_UNIFIED_MEMORY,
InternalMemoryType::DEVICE_UNIFIED_MEMORY,
InternalMemoryType::SHARED_UNIFIED_MEMORY};
};
struct UpdateResidencyContainerMultipleDevicesTest : public ::testing::WithParamInterface<std::tuple<InternalMemoryType, InternalMemoryType>>,
public ::testing::Test {
void SetUp() override {
device = context.pRootDevice0;
subDevice0 = context.pSubDevice00;
subDevice1 = context.pSubDevice01;
peerDevice = context.pRootDevice1;
peerSubDevice0 = context.pSubDevice10;
peerSubDevice1 = context.pSubDevice11;
svmManager = context.getSVMAllocsManager();
EXPECT_EQ(0u, svmManager->getNumAllocs());
}
MockUnrestrictiveContextMultiGPU context;
MockClDevice *device;
ClDevice *subDevice0 = nullptr;
ClDevice *subDevice1 = nullptr;
MockClDevice *peerDevice;
ClDevice *peerSubDevice0 = nullptr;
ClDevice *peerSubDevice1 = nullptr;
SVMAllocsManager *svmManager = nullptr;
const uint32_t numRootDevices = 2;
const uint32_t maxRootDeviceIndex = numRootDevices - 1;
};
HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
givenNoAllocationsCreatedThenNoInternalAllocationsAreAddedToResidencyContainer) {
ResidencyContainer residencyContainer;
EXPECT_EQ(0u, residencyContainer.size());
svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(),
residencyContainer,
InternalMemoryType::DEVICE_UNIFIED_MEMORY);
EXPECT_EQ(0u, residencyContainer.size());
}
HWTEST_P(UpdateResidencyContainerMultipleDevicesTest, givenAllocationThenItIsAddedToContainerOnlyIfMaskMatches) {
uint32_t pCmdBuffer[1024];
MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(),
static_cast<void *>(pCmdBuffer), sizeof(pCmdBuffer));
InternalMemoryType type = std::get<0>(GetParam());
uint32_t mask = std::get<1>(GetParam());
SvmAllocationData allocData(maxRootDeviceIndex);
allocData.gpuAllocations.addAllocation(&gfxAllocation);
allocData.memoryType = type;
allocData.device = &device->getDevice();
svmManager->insertSVMAlloc(allocData);
EXPECT_EQ(1u, svmManager->getNumAllocs());
ResidencyContainer residencyContainer;
EXPECT_EQ(0u, residencyContainer.size());
svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(),
residencyContainer,
mask);
if (mask == static_cast<uint32_t>(type)) {
EXPECT_EQ(1u, residencyContainer.size());
EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress());
} else {
EXPECT_EQ(0u, residencyContainer.size());
}
}
HWTEST_P(UpdateResidencyContainerMultipleDevicesTest,
whenUsingRootDeviceIndexGreaterThanMultiGraphicsAllocationSizeThenNoAllocationsAreAdded) {
uint32_t pCmdBuffer[1024];
MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(),
static_cast<void *>(pCmdBuffer), sizeof(pCmdBuffer));
SvmAllocationData allocData(maxRootDeviceIndex);
allocData.gpuAllocations.addAllocation(&gfxAllocation);
allocData.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY;
allocData.device = &device->getDevice();
uint32_t pCmdBufferPeer[1024];
MockGraphicsAllocation gfxAllocationPeer(peerDevice->getDevice().getRootDeviceIndex(),
(void *)pCmdBufferPeer, sizeof(pCmdBufferPeer));
SvmAllocationData allocDataPeer(maxRootDeviceIndex);
allocDataPeer.gpuAllocations.addAllocation(&gfxAllocationPeer);
allocDataPeer.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY;
allocDataPeer.device = &peerDevice->getDevice();
svmManager->insertSVMAlloc(allocData);
svmManager->insertSVMAlloc(allocDataPeer);
EXPECT_EQ(2u, svmManager->getNumAllocs());
ResidencyContainer residencyContainer;
EXPECT_EQ(0u, residencyContainer.size());
svmManager->addInternalAllocationsToResidencyContainer(numRootDevices + 1,
residencyContainer,
InternalMemoryType::DEVICE_UNIFIED_MEMORY);
EXPECT_EQ(0u, residencyContainer.size());
svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(),
residencyContainer,
InternalMemoryType::DEVICE_UNIFIED_MEMORY);
EXPECT_EQ(1u, residencyContainer.size());
EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress());
}
MemoryAllocationTypeArray memoryTypeArray;
INSTANTIATE_TEST_SUITE_P(UpdateResidencyContainerMultipleDevicesTests,
UpdateResidencyContainerMultipleDevicesTest,
::testing::Combine(
::testing::ValuesIn(memoryTypeArray.allocationType),
::testing::ValuesIn(memoryTypeArray.allocationType)));
HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
whenInternalAllocationsAreAddedToResidencyContainerThenOnlyAllocationsFromSameDeviceAreAdded) {
uint32_t pCmdBuffer[1024];
MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(),
static_cast<void *>(pCmdBuffer), sizeof(pCmdBuffer));
SvmAllocationData allocData(maxRootDeviceIndex);
allocData.gpuAllocations.addAllocation(&gfxAllocation);
allocData.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY;
allocData.device = &device->getDevice();
uint32_t pCmdBufferPeer[1024];
MockGraphicsAllocation gfxAllocationPeer(peerDevice->getDevice().getRootDeviceIndex(),
(void *)pCmdBufferPeer, sizeof(pCmdBufferPeer));
SvmAllocationData allocDataPeer(maxRootDeviceIndex);
allocDataPeer.gpuAllocations.addAllocation(&gfxAllocationPeer);
allocDataPeer.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY;
allocDataPeer.device = &peerDevice->getDevice();
svmManager->insertSVMAlloc(allocData);
svmManager->insertSVMAlloc(allocDataPeer);
EXPECT_EQ(2u, svmManager->getNumAllocs());
ResidencyContainer residencyContainer;
EXPECT_EQ(0u, residencyContainer.size());
svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(),
residencyContainer,
InternalMemoryType::DEVICE_UNIFIED_MEMORY);
EXPECT_EQ(1u, residencyContainer.size());
EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress());
}
HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
givenSharedAllocationWithNullDevicePointerThenAllocationIsAddedToResidencyContainer) {
uint32_t pCmdBuffer[1024];
MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(), static_cast<void *>(pCmdBuffer), sizeof(pCmdBuffer));
SvmAllocationData allocData(maxRootDeviceIndex);
allocData.gpuAllocations.addAllocation(&gfxAllocation);
allocData.memoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY;
allocData.device = nullptr;
svmManager->insertSVMAlloc(allocData);
EXPECT_EQ(1u, svmManager->getNumAllocs());
ResidencyContainer residencyContainer;
EXPECT_EQ(0u, residencyContainer.size());
svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(),
residencyContainer,
InternalMemoryType::SHARED_UNIFIED_MEMORY);
EXPECT_EQ(1u, residencyContainer.size());
EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress());
}
HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
givenSharedAllocationWithNonNullDevicePointerAndDifferentDeviceToOnePassedToResidencyCallThenAllocationIsNotAddedToResidencyContainer) {
uint32_t pCmdBuffer[1024];
MockGraphicsAllocation gfxAllocation(peerDevice->getDevice().getRootDeviceIndex(),
static_cast<void *>(pCmdBuffer), sizeof(pCmdBuffer));
SvmAllocationData allocData(maxRootDeviceIndex);
allocData.gpuAllocations.addAllocation(&gfxAllocation);
allocData.memoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY;
allocData.device = &peerDevice->getDevice();
svmManager->insertSVMAlloc(allocData);
EXPECT_EQ(1u, svmManager->getNumAllocs());
ResidencyContainer residencyContainer;
EXPECT_EQ(0u, residencyContainer.size());
svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(),
residencyContainer,
InternalMemoryType::SHARED_UNIFIED_MEMORY);
EXPECT_EQ(0u, residencyContainer.size());
}
HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
givenAllocationsFromSubDevicesBelongingToTheSameTargetDeviceThenTheyAreAddedToTheResidencyContainer) {
uint32_t pCmdBuffer[1024];
MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(),
static_cast<void *>(pCmdBuffer), sizeof(pCmdBuffer));
SvmAllocationData allocData0(maxRootDeviceIndex);
allocData0.gpuAllocations.addAllocation(&gfxAllocation);
allocData0.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY;
allocData0.device = &subDevice0->getDevice();
uint32_t pCmdBufferPeer[1024];
MockGraphicsAllocation gfxAllocationPeer(device->getDevice().getRootDeviceIndex(),
(void *)pCmdBufferPeer, sizeof(pCmdBufferPeer));
SvmAllocationData allocData1(maxRootDeviceIndex);
allocData1.gpuAllocations.addAllocation(&gfxAllocationPeer);
allocData1.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY;
allocData1.device = &subDevice1->getDevice();
svmManager->insertSVMAlloc(allocData0);
svmManager->insertSVMAlloc(allocData1);
EXPECT_EQ(2u, svmManager->getNumAllocs());
ResidencyContainer residencyContainer;
EXPECT_EQ(0u, residencyContainer.size());
svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(),
residencyContainer,
InternalMemoryType::DEVICE_UNIFIED_MEMORY);
EXPECT_EQ(2u, residencyContainer.size());
}
HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
givenAllocationsFromSubDevicesNotBelongingToTheSameTargetDeviceThenTheyAreNotAddedToTheResidencyContainer) {
uint32_t pCmdBuffer[1024];
MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(),
static_cast<void *>(pCmdBuffer), sizeof(pCmdBuffer));
SvmAllocationData allocData0(maxRootDeviceIndex);
allocData0.gpuAllocations.addAllocation(&gfxAllocation);
allocData0.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY;
allocData0.device = &subDevice0->getDevice();
uint32_t pCmdBufferPeer[1024];
MockGraphicsAllocation gfxAllocationPeer(device->getDevice().getRootDeviceIndex(),
(void *)pCmdBufferPeer, sizeof(pCmdBufferPeer));
SvmAllocationData allocData1(maxRootDeviceIndex);
allocData1.gpuAllocations.addAllocation(&gfxAllocationPeer);
allocData1.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY;
allocData1.device = &subDevice1->getDevice();
svmManager->insertSVMAlloc(allocData0);
svmManager->insertSVMAlloc(allocData1);
EXPECT_EQ(2u, svmManager->getNumAllocs());
ResidencyContainer residencyContainer;
EXPECT_EQ(0u, residencyContainer.size());
svmManager->addInternalAllocationsToResidencyContainer(peerDevice->getDevice().getRootDeviceIndex(),
residencyContainer,
InternalMemoryType::DEVICE_UNIFIED_MEMORY);
EXPECT_EQ(0u, residencyContainer.size());
}
HWTEST_F(EnqueueSvmTest, GivenDstHostPtrWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) {
char dstHostPtr[260];
void *pDstSVM = dstHostPtr;
void *pSrcSVM = ptrSVM;
MockCommandQueueHw<FamilyType> cmdQ(context, pClDevice, nullptr);
auto failCsr = std::make_unique<FailCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ.getGpgpuCommandStreamReceiver();
cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get();
retVal = cmdQ.enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
cmdQ.gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver;
}
HWTEST_F(EnqueueSvmTest, GivenSrcHostPtrAndSizeZeroWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) {
char srcHostPtr[260];
void *pDstSVM = ptrSVM;
void *pSrcSVM = srcHostPtr;
MockCommandQueueHw<FamilyType> cmdQ(context, pClDevice, nullptr);
auto failCsr = std::make_unique<FailCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ.getGpgpuCommandStreamReceiver();
cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get();
retVal = cmdQ.enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
cmdQ.gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver;
}
HWTEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) {
char dstHostPtr[260];
char srcHostPtr[260];
void *pDstSVM = dstHostPtr;
void *pSrcSVM = srcHostPtr;
MockCommandQueueHw<FamilyType> cmdQ(context, pClDevice, nullptr);
auto failCsr = std::make_unique<FailCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ.getGpgpuCommandStreamReceiver();
cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get();
retVal = cmdQ.enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
cmdQ.gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver;
}
TEST_F(EnqueueSvmTest, givenPageFaultManagerWhenEnqueueMemcpyThenAllocIsDecommitted) {
auto mockMemoryManager = std::make_unique<MockMemoryManager>();
mockMemoryManager->pageFaultManager.reset(new MockPageFaultManager());
auto memoryManager = context->getMemoryManager();
context->memoryManager = mockMemoryManager.get();
auto srcSvm = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
mockMemoryManager->getPageFaultManager()->insertAllocation(srcSvm, 256, context->getSVMAllocsManager(), context->getSpecialQueue(pDevice->getRootDeviceIndex()), {});
mockMemoryManager->getPageFaultManager()->insertAllocation(ptrSVM, 256, context->getSVMAllocsManager(), context->getSpecialQueue(pDevice->getRootDeviceIndex()), {});
EXPECT_EQ(static_cast<MockPageFaultManager *>(mockMemoryManager->getPageFaultManager())->transferToCpuCalled, 0);
this->pCmdQ->enqueueSVMMemcpy(false, ptrSVM, srcSvm, 256, 0, nullptr, nullptr);
EXPECT_EQ(static_cast<MockPageFaultManager *>(mockMemoryManager->getPageFaultManager())->allowMemoryAccessCalled, 0);
EXPECT_EQ(static_cast<MockPageFaultManager *>(mockMemoryManager->getPageFaultManager())->protectMemoryCalled, 2);
EXPECT_EQ(static_cast<MockPageFaultManager *>(mockMemoryManager->getPageFaultManager())->transferToCpuCalled, 0);
EXPECT_EQ(static_cast<MockPageFaultManager *>(mockMemoryManager->getPageFaultManager())->transferToGpuCalled, 2);
context->getSVMAllocsManager()->freeSVMAlloc(srcSvm);
context->memoryManager = memoryManager;
}
TEST_F(EnqueueSvmTest, givenPageFaultManagerWhenEnqueueMemFillThenAllocIsDecommitted) {
char pattern[256];
auto mockMemoryManager = std::make_unique<MockMemoryManager>();
mockMemoryManager->pageFaultManager.reset(new MockPageFaultManager());
auto memoryManager = context->getMemoryManager();
context->memoryManager = mockMemoryManager.get();
mockMemoryManager->getPageFaultManager()->insertAllocation(ptrSVM, 256, context->getSVMAllocsManager(), context->getSpecialQueue(0u), {});
EXPECT_EQ(static_cast<MockPageFaultManager *>(mockMemoryManager->getPageFaultManager())->transferToCpuCalled, 0);
EXPECT_EQ(static_cast<MockPageFaultManager *>(mockMemoryManager->getPageFaultManager())->protectMemoryCalled, 0);
pCmdQ->enqueueSVMMemFill(ptrSVM, &pattern, 256, 256, 0, nullptr, nullptr);
EXPECT_EQ(static_cast<MockPageFaultManager *>(mockMemoryManager->getPageFaultManager())->allowMemoryAccessCalled, 0);
EXPECT_EQ(static_cast<MockPageFaultManager *>(mockMemoryManager->getPageFaultManager())->protectMemoryCalled, 1);
EXPECT_EQ(static_cast<MockPageFaultManager *>(mockMemoryManager->getPageFaultManager())->transferToCpuCalled, 0);
EXPECT_EQ(static_cast<MockPageFaultManager *>(mockMemoryManager->getPageFaultManager())->transferToGpuCalled, 1);
context->memoryManager = memoryManager;
}
HWTEST_F(EnqueueSvmTest, givenCopyFromMappedPtrToSvmAllocWhenCallingSvmMemcpyThenReuseMappedAllocations) {
constexpr size_t size = 1u;
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
{
auto [buffer, mappedPtr] = createBufferAndMapItOnGpu();
std::ignore = buffer;
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
ptrSVM, // void *dst_ptr
mappedPtr, // const void *src_ptr
size, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
}
{
auto notMappedPtr = std::make_unique<char[]>(size);
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
ptrSVM, // void *dst_ptr
notMappedPtr.get(), // const void *src_ptr
size, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
}
}
HWTEST_F(EnqueueSvmTest, givenCopyFromSvmAllocToMappedPtrWhenCallingSvmMemcpyThenReuseMappedAllocations) {
constexpr size_t size = 1u;
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
{
auto [buffer, mappedPtr] = createBufferAndMapItOnGpu();
std::ignore = buffer;
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
mappedPtr, // void *dst_ptr
ptrSVM, // const void *src_ptr
size, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
}
{
auto notMappedPtr = std::make_unique<char[]>(size);
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
notMappedPtr.get(), // void *dst_ptr
ptrSVM, // const void *src_ptr
size, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
}
}
HWTEST_F(EnqueueSvmTest, givenCopyFromMappedPtrToMappedPtrWhenCallingSvmMemcpyThenReuseMappedAllocations) {
constexpr size_t size = 1u;
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
{
auto [buffer1, mappedPtr1] = createBufferAndMapItOnGpu();
auto [buffer2, mappedPtr2] = createBufferAndMapItOnGpu();
std::ignore = buffer1;
std::ignore = buffer2;
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
mappedPtr2, // void *dst_ptr
mappedPtr1, // const void *src_ptr
size, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
}
{
auto [buffer, mappedPtr] = createBufferAndMapItOnGpu();
std::ignore = buffer;
auto notMappedPtr = std::make_unique<char[]>(size);
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
mappedPtr, // void *dst_ptr
notMappedPtr.get(), // const void *src_ptr
size, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
}
{
auto notMappedPtr = std::make_unique<char[]>(size);
auto [buffer, mappedPtr] = createBufferAndMapItOnGpu();
std::ignore = buffer;
EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
notMappedPtr.get(), // void *dst_ptr
mappedPtr, // const void *src_ptr
size, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(2u, csr.createAllocationForHostSurfaceCalled);
}
}