/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_cpu_page_fault_manager.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_svm_manager.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/common/utilities/base_object_utils.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_map_buffer_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" using namespace NEO; struct EnqueueSvmTest : public ClDeviceFixture, public CommandQueueHwFixture, public ::testing::Test { typedef CommandQueueHwFixture CommandQueueFixture; EnqueueSvmTest() { } void SetUp() override { REQUIRE_SVM_OR_SKIP(defaultHwInfo); ClDeviceFixture::setUp(); CommandQueueFixture::setUp(pClDevice, 0); ptrSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); } void TearDown() override { if (defaultHwInfo->capabilityTable.ftrSvm == false) { return; } context->getSVMAllocsManager()->freeSVMAlloc(ptrSVM); CommandQueueFixture::tearDown(); ClDeviceFixture::tearDown(); } std::pair, void *> createBufferAndMapItOnGpu() { DebugManagerStateRestore restore{}; DebugManager.flags.DisableZeroCopyForBuffers.set(1); BufferDefaults::context = this->context; ReleaseableObjectPtr buffer = clUniquePtr(BufferHelper<>::create()); void *mappedPtr = pCmdQ->enqueueMapBuffer(buffer.get(), CL_TRUE, CL_MAP_READ, 0, buffer->getSize(), 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, mappedPtr); return {std::move(buffer), mappedPtr}; } cl_int retVal = CL_SUCCESS; void *ptrSVM = nullptr; }; TEST_F(EnqueueSvmTest, GivenInvalidSvmPtrWhenMappingSvmThenInvalidValueErrorIsReturned) { void *svmPtr = nullptr; retVal = this->pCmdQ->enqueueSVMMap( CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags svmPtr, // void *svm_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenMappingSvmThenSuccessIsReturned) { retVal = this->pCmdQ->enqueueSVMMap( CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSVM, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, GivenGpuHangAndBlockingCallAndValidParamsWhenMappingSvmThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto enqueueResult = mockCommandQueueHw.enqueueSVMMap( CL_TRUE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSVM, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); // bool externalAppCall EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenMappingSvmWithBlockingThenSuccessIsReturned) { retVal = this->pCmdQ->enqueueSVMMap( CL_TRUE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSVM, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenMappingSvmWithEventsThenSuccessIsReturned) { UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; retVal = this->pCmdQ->enqueueSVMMap( CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSVM, // void *svm_ptr 256, // size_t size 1, // cl_uint num_events_in_wait_list eventWaitList, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueSvmTest, GivenInvalidSvmPtrWhenUnmappingSvmThenInvalidValueErrorIsReturned) { void *svmPtr = nullptr; retVal = this->pCmdQ->enqueueSVMUnmap( svmPtr, // void *svm_ptr 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenUnmappingSvmThenSuccessIsReturned) { retVal = this->pCmdQ->enqueueSVMUnmap( ptrSVM, // void *svm_ptr 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, GivenGpuHangAndBlockingCallAndValidParamsWhenUnmappingSvmThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto enqueueResult = mockCommandQueueHw.enqueueSVMUnmap( ptrSVM, 0, nullptr, nullptr, false); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenUnmappingSvmWithEventsThenSuccessIsReturned) { UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; retVal = this->pCmdQ->enqueueSVMUnmap( ptrSVM, // void *svm_ptr 1, // cl_uint num_events_in_wait_list eventWaitList, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenFreeingSvmThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); ASSERT_EQ(1U, this->context->getSVMAllocsManager()->getNumAllocs()); void *svmPtrs[] = {ptrSVM}; retVal = this->pCmdQ->enqueueSVMFree( 1, // cl_uint num_svm_pointers svmPtrs, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(0U, this->context->getSVMAllocsManager()->getNumAllocs()); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenFreeingSvmWithCallbackThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); void *svmPtrs[] = {ptrSVM}; bool callbackWasCalled = false; struct ClbHelper { ClbHelper(bool &callbackWasCalled) : callbackWasCalled(callbackWasCalled) {} static void CL_CALLBACK clb(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *usrData) { ClbHelper *data = (ClbHelper *)usrData; data->callbackWasCalled = true; } bool &callbackWasCalled; } userData(callbackWasCalled); retVal = this->pCmdQ->enqueueSVMFree( 1, // cl_uint num_svm_pointers svmPtrs, // void *svm_pointers[] ClbHelper::clb, // (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) &userData, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(callbackWasCalled); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenFreeingSvmWithCallbackAndEventThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); void *svmPtrs[] = {ptrSVM}; bool callbackWasCalled = false; struct ClbHelper { ClbHelper(bool &callbackWasCalled) : callbackWasCalled(callbackWasCalled) {} static void CL_CALLBACK clb(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *usrData) { ClbHelper *data = (ClbHelper *)usrData; data->callbackWasCalled = true; } bool &callbackWasCalled; } userData(callbackWasCalled); cl_event event = nullptr; retVal = this->pCmdQ->enqueueSVMFree( 1, // cl_uint num_svm_pointers svmPtrs, // void *svm_pointers[] ClbHelper::clb, // (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) &userData, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list &event // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(callbackWasCalled); auto pEvent = (Event *)event; delete pEvent; } TEST_F(EnqueueSvmTest, GivenValidParamsWhenFreeingSvmWithBlockingThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); void *svmPtrs[] = {ptrSVM}; UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; retVal = this->pCmdQ->enqueueSVMFree( 1, // cl_uint num_svm_pointers svmPtrs, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 1, // cl_uint num_events_in_wait_list eventWaitList, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, GivenEventAndGpuHangAndBlockingCallAndValidParamsWhenFreeingSvmWithBlockingThenEventIsNotDeletedAndOutOfResourcesIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const cl_uint numOfSvmPointers = 1; void *svmPtrs[numOfSvmPointers] = {ptrSVM}; UserEvent uEvent; const cl_uint numOfEvents = 1; cl_event eventWaitList[numOfEvents] = {&uEvent}; cl_event retEvent = nullptr; const auto enqueueResult = mockCommandQueueHw.enqueueSVMFree( numOfSvmPointers, svmPtrs, nullptr, nullptr, numOfEvents, eventWaitList, &retEvent); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); ASSERT_NE(nullptr, retEvent); castToObjectOrAbort(retEvent)->release(); } HWTEST_F(EnqueueSvmTest, GivenGpuHangAndBlockingCallAndValidParamsWhenFreeingSvmWithBlockingThenOutOfResourcesIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const cl_uint numOfSvmPointers = 1; void *svmPtrs[numOfSvmPointers] = {ptrSVM}; UserEvent uEvent; const cl_uint numOfEvents = 1; cl_event eventWaitList[numOfEvents] = {&uEvent}; const auto enqueueResult = mockCommandQueueHw.enqueueSVMFree( numOfSvmPointers, svmPtrs, nullptr, nullptr, numOfEvents, eventWaitList, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } TEST_F(EnqueueSvmTest, GivenNullDstPtrWhenCopyingMemoryThenInvalidVaueErrorIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); void *pDstSVM = nullptr; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } TEST_F(EnqueueSvmTest, GivenNullSrcPtrWhenCopyingMemoryThenInvalidVaueErrorIsReturned) { void *pDstSVM = ptrSVM; void *pSrcSVM = nullptr; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(EnqueueSvmTest, givenSrcHostPtrAndEventWhenEnqueueSVMMemcpyThenEventCommandTypeIsCorrectlySet) { char srcHostPtr[260] = {}; void *pDstSVM = ptrSVM; void *pSrcSVM = srcHostPtr; cl_event event = nullptr; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list &event // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MEMCPY; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } TEST_F(EnqueueSvmTest, givenSrcHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) { char srcHostPtr[260] = {}; void *pDstSVM = ptrSVM; void *pSrcSVM = srcHostPtr; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, givenSrcHostPtrWhenEnqueueSVMMemcpyThenEnqueuWriteBufferIsCalled) { char srcHostPtr[260]; void *pSrcSVM = srcHostPtr; void *pDstSVM = ptrSVM; MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_WRITE_BUFFER)); auto tempAlloc = myCmdQ.getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead(); EXPECT_EQ(0u, tempAlloc->countSuccessors()); EXPECT_EQ(pSrcSVM, reinterpret_cast(tempAlloc->getGpuAddress())); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); } HWTEST_F(EnqueueSvmTest, givenDstHostPtrWhenEnqueueSVMMemcpyThenEnqueuReadBufferIsCalled) { char dstHostPtr[260]; void *pDstSVM = dstHostPtr; void *pSrcSVM = ptrSVM; MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_READ_BUFFER)); auto tempAlloc = myCmdQ.getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead(); EXPECT_EQ(0u, tempAlloc->countSuccessors()); EXPECT_EQ(pDstSVM, reinterpret_cast(tempAlloc->getGpuAddress())); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); } TEST_F(EnqueueSvmTest, givenDstHostPtrAndEventWhenEnqueueSVMMemcpyThenEventCommandTypeIsCorrectlySet) { char dstHostPtr[260]; void *pDstSVM = dstHostPtr; void *pSrcSVM = ptrSVM; cl_event event = nullptr; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list &event // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MEMCPY; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } TEST_F(EnqueueSvmTest, givenDstHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) { char dstHostPtr[260]; void *pDstSVM = dstHostPtr; void *pSrcSVM = ptrSVM; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrWhenEnqueueNonBlockingSVMMemcpyThenEnqueuWriteBufferIsCalled) { char dstHostPtr[] = {0, 0, 0}; char srcHostPtr[] = {1, 2, 3}; void *pDstSVM = dstHostPtr; void *pSrcSVM = srcHostPtr; MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 3, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_WRITE_BUFFER)); auto tempAlloc = myCmdQ.getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead(); EXPECT_EQ(1u, tempAlloc->countSuccessors()); EXPECT_EQ(pSrcSVM, reinterpret_cast(tempAlloc->getGpuAddress())); EXPECT_EQ(pDstSVM, reinterpret_cast(tempAlloc->next->getGpuAddress())); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); } HWTEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrWhenEnqueueBlockingSVMMemcpyThenEnqueuWriteBufferIsCalled) { char dstHostPtr[] = {0, 0, 0}; char srcHostPtr[] = {1, 2, 3}; void *pDstSVM = dstHostPtr; void *pSrcSVM = srcHostPtr; MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( true, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 3, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_WRITE_BUFFER)); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); } TEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) { char dstHostPtr[260] = {}; char srcHostPtr[260] = {}; void *pDstSVM = dstHostPtr; void *pSrcSVM = srcHostPtr; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, givenSvmToSvmCopyTypeWhenEnqueueNonBlockingSVMMemcpyThenSvmMemcpyCommandIsEnqueued) { void *pDstSVM = ptrSVM; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_SVM_MEMCPY)); auto tempAlloc = myCmdQ.getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead(); EXPECT_EQ(nullptr, tempAlloc); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } TEST_F(EnqueueSvmTest, givenSvmToSvmCopyTypeWhenEnqueueBlockingSVMMemcpyThenSuccessIsReturned) { void *pDstSVM = ptrSVM; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); retVal = this->pCmdQ->enqueueSVMMemcpy( true, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenCopyingMemoryWithBlockingThenSuccessisReturned) { void *pDstSVM = ptrSVM; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); auto uEvent = makeReleaseable(); cl_event eventWaitList[] = {uEvent.get()}; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 1, // cl_uint num_events_in_wait_list eventWaitList, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); uEvent->setStatus(-1); } TEST_F(EnqueueSvmTest, GivenCoherencyWhenCopyingMemoryThenSuccessIsReturned) { void *pDstSVM = ptrSVM; SVMAllocsManager::SvmAllocationProperties svmProperties; svmProperties.coherent = true; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, svmProperties, context->getRootDeviceIndices(), context->getDeviceBitfields()); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } TEST_F(EnqueueSvmTest, GivenCoherencyWhenCopyingMemoryWithBlockingThenSuccessIsReturned) { void *pDstSVM = ptrSVM; SVMAllocsManager::SvmAllocationProperties svmProperties; svmProperties.coherent = true; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, svmProperties, context->getRootDeviceIndices(), context->getDeviceBitfields()); auto uEvent = makeReleaseable(); cl_event eventWaitList[] = {uEvent.get()}; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 1, // cl_uint num_events_in_wait_list eventWaitList, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); uEvent->setStatus(-1); } HWTEST_F(EnqueueSvmTest, givenUnalignedAddressWhenEnqueueMemcpyThenDispatchInfoHasAlignedAddressAndProperOffset) { void *pDstSVM = reinterpret_cast(0x17); void *pSrcSVM = ptrSVM; MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); } TEST_F(EnqueueSvmTest, GivenNullSvmPtrWhenFillingMemoryThenInvalidValueErrorIsReturned) { void *svmPtr = nullptr; const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); retVal = this->pCmdQ->enqueueSVMMemFill( svmPtr, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(EnqueueSvmTest, givenSvmAllocWhenEnqueueSvmFillThenSuccesIsReturnedAndAddressIsProperlyAligned) { const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemFill( ptrSVM, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(ptrSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(ptrSVM, alignDown(ptrSVM, 4)), dstOffset); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenFillingMemoryWithBlockingThenSuccessIsReturned) { const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); auto uEvent = makeReleaseable(); cl_event eventWaitList[] = {uEvent.get()}; retVal = this->pCmdQ->enqueueSVMMemFill( ptrSVM, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 1, // cl_uint num_events_in_wait_list eventWaitList, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); uEvent->setStatus(-1); } HWTEST_F(EnqueueSvmTest, GivenGpuHangAndBlockingCallAndValidParamsWhenFillingMemoryThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); auto uEvent = makeReleaseable(); const cl_uint numOfEvents = 1; cl_event eventWaitList[numOfEvents] = {uEvent.get()}; const auto enqueueResult = mockCommandQueueHw.enqueueSVMMemFill( ptrSVM, pattern, patternSize, 256, numOfEvents, eventWaitList, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); uEvent->setStatus(-1); } TEST_F(EnqueueSvmTest, GivenRepeatCallsWhenFillingMemoryThenSuccessIsReturnedForEachCall) { const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); retVal = this->pCmdQ->enqueueSVMMemFill( ptrSVM, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = this->pCmdQ->enqueueSVMMemFill( ptrSVM, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueSvmTest, givenEnqueueSVMMemFillWhenPatternAllocationIsObtainedThenItsTypeShouldBeSetToFillPattern) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); const size_t size = patternSize; retVal = this->pCmdQ->enqueueSVMMemFill( ptrSVM, pattern, patternSize, size, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty()); GraphicsAllocation *patternAllocation = csr.getAllocationsForReuse().peekHead(); ASSERT_NE(nullptr, patternAllocation); EXPECT_EQ(AllocationType::FILL_PATTERN, patternAllocation->getAllocationType()); } TEST_F(EnqueueSvmTest, GivenSvmAllocationWhenEnqueingKernelThenSuccessIsReturned) { auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM); ASSERT_NE(nullptr, svmData); GraphicsAllocation *svmAllocation = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, ptrSVM); std::unique_ptr program(Program::createBuiltInFromSource("FillBufferBytes", context, context->getDevices(), &retVal)); program->build(program->getDevices(), nullptr, false); std::unique_ptr kernel(Kernel::create(program.get(), program->getKernelInfoForKernel("FillBufferBytes"), *context->getDevice(0), &retVal)); kernel->setSvmKernelExecInfo(svmAllocation); size_t offset = 0; size_t size = 1; retVal = this->pCmdQ->enqueueKernel( kernel.get(), 1, &offset, &size, &size, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, kernel->kernelSvmGfxAllocations.size()); } TEST_F(EnqueueSvmTest, givenEnqueueTaskBlockedOnUserEventWhenItIsEnqueuedThenSurfacesAreMadeResident) { auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM); ASSERT_NE(nullptr, svmData); GraphicsAllocation *svmAllocation = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, ptrSVM); auto program = clUniquePtr(Program::createBuiltInFromSource("FillBufferBytes", context, context->getDevices(), &retVal)); program->build(program->getDevices(), nullptr, false); auto pMultiDeviceKernel = clUniquePtr(MultiDeviceKernel::create(program.get(), program->getKernelInfosForKernel("FillBufferBytes"), &retVal)); auto kernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); std::vector allSurfaces; kernel->getResidency(allSurfaces); EXPECT_EQ(1u, allSurfaces.size()); kernel->setSvmKernelExecInfo(svmAllocation); auto uEvent = makeReleaseable(); cl_event eventWaitList[] = {uEvent.get()}; size_t offset = 0; size_t size = 1; retVal = this->pCmdQ->enqueueKernel( kernel, 1, &offset, &size, &size, 1, eventWaitList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); kernel->getResidency(allSurfaces); EXPECT_EQ(3u, allSurfaces.size()); for (auto &surface : allSurfaces) delete surface; EXPECT_EQ(1u, kernel->kernelSvmGfxAllocations.size()); uEvent->setStatus(-1); } TEST_F(EnqueueSvmTest, GivenMultipleThreasWhenAllocatingSvmThenOnlyOneAllocationIsCreated) { std::atomic flag(0); std::atomic ready(0); void *svmPtrs[15] = {}; auto allocSvm = [&](uint32_t from, uint32_t to) { for (uint32_t i = from; i <= to; i++) { svmPtrs[i] = context->getSVMAllocsManager()->createSVMAlloc(1, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); auto svmData = context->getSVMAllocsManager()->getSVMAlloc(svmPtrs[i]); ASSERT_NE(nullptr, svmData); auto ga = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, ga); EXPECT_EQ(ga->getUnderlyingBuffer(), svmPtrs[i]); } }; auto freeSvm = [&](uint32_t from, uint32_t to) { for (uint32_t i = from; i <= to; i++) { context->getSVMAllocsManager()->freeSVMAlloc(svmPtrs[i]); } }; auto asyncFcn = [&](bool alloc, uint32_t from, uint32_t to) { flag++; while (flag < 3) ; if (alloc) { allocSvm(from, to); } freeSvm(from, to); ready++; }; EXPECT_EQ(1u, context->getSVMAllocsManager()->getNumAllocs()); allocSvm(10, 14); auto t1 = std::unique_ptr(new std::thread(asyncFcn, true, 0, 4)); auto t2 = std::unique_ptr(new std::thread(asyncFcn, true, 5, 9)); auto t3 = std::unique_ptr(new std::thread(asyncFcn, false, 10, 14)); while (ready < 3) { std::this_thread::yield(); } EXPECT_EQ(1u, context->getSVMAllocsManager()->getNumAllocs()); t1->join(); t2->join(); t3->join(); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenMigratingMemoryThenSuccessIsReturned) { const void *svmPtrs[] = {ptrSVM}; retVal = this->pCmdQ->enqueueSVMMigrateMem( 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, GivenGpuHangAndBlockingCallAndValidParamsWhenMigratingMemoryThenOutOfResourcesIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const void *svmPtrs[] = {ptrSVM}; const auto enqueueResult = mockCommandQueueHw.enqueueSVMMigrateMem( 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueSvmTest, WhenMigratingMemoryThenSvmMigrateMemCommandTypeIsUsed) { MockCommandQueueHw commandQueue{context, pClDevice, nullptr}; const void *svmPtrs[] = {ptrSVM}; retVal = commandQueue.enqueueSVMMigrateMem( 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); uint32_t expectedCommandType = CL_COMMAND_SVM_MIGRATE_MEM; EXPECT_EQ(expectedCommandType, commandQueue.lastCommandType); } TEST(CreateSvmAllocTests, givenVariousSvmAllocationPropertiesWhenAllocatingSvmThenSvmIsCorrectlyAllocated) { if (!defaultHwInfo->capabilityTable.ftrSvm) { return; } DebugManagerStateRestore dbgRestore; SVMAllocsManager::SvmAllocationProperties svmAllocationProperties; for (auto isLocalMemorySupported : ::testing::Bool()) { DebugManager.flags.EnableLocalMemory.set(isLocalMemorySupported); auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto mockContext = std::make_unique(mockDevice.get()); for (auto isReadOnly : ::testing::Bool()) { for (auto isHostPtrReadOnly : ::testing::Bool()) { svmAllocationProperties.readOnly = isReadOnly; svmAllocationProperties.hostPtrReadOnly = isHostPtrReadOnly; auto ptrSVM = mockContext->getSVMAllocsManager()->createSVMAlloc(256, svmAllocationProperties, mockContext->getRootDeviceIndices(), mockContext->getDeviceBitfields()); EXPECT_NE(nullptr, ptrSVM); mockContext->getSVMAllocsManager()->freeSVMAlloc(ptrSVM); } } } } struct EnqueueSvmTestLocalMemory : public ClDeviceFixture, public ::testing::Test { void SetUp() override { REQUIRE_SVM_OR_SKIP(defaultHwInfo); dbgRestore = std::make_unique(); DebugManager.flags.EnableLocalMemory.set(1); ClDeviceFixture::setUp(); context = std::make_unique(pClDevice, true); size = 256; svmPtr = context->getSVMAllocsManager()->createSVMAlloc(size, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, svmPtr); mockSvmManager = reinterpret_cast(context->getSVMAllocsManager()); } void TearDown() override { if (defaultHwInfo->capabilityTable.ftrSvm == false) { return; } context->getSVMAllocsManager()->freeSVMAlloc(svmPtr); context.reset(nullptr); ClDeviceFixture::tearDown(); } cl_int retVal = CL_SUCCESS; void *svmPtr = nullptr; size_t size; MockSVMAllocsManager *mockSvmManager; std::unique_ptr dbgRestore; std::unique_ptr context; HardwareParse hwParse; }; HWTEST_F(EnqueueSvmTestLocalMemory, givenWriteInvalidateRegionFlagWhenMappingSvmThenMapIsSuccessfulAndReadOnlyFlagIsFalse) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); EXPECT_FALSE(svmMap->readOnlyMap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenGpuHangAndBlockingCallAndWriteInvalidateRegionFlagWhenMappingSvmThenOutOfResourcesIsReturned) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); queue.waitForAllEnginesReturnValue = WaitStatus::GpuHang; uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; const auto enqueueResult = queue.enqueueSVMMap( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, queue.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueSvmTestLocalMemory, givenMapWriteFlagWhenMappingSvmThenMapIsSuccessfulAndReadOnlyFlagIsFalse) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_TRUE, CL_MAP_WRITE, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); EXPECT_FALSE(svmMap->readOnlyMap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenMapReadFlagWhenMappingSvmThenMapIsSuccessfulAndReadOnlyFlagIsTrue) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_TRUE, CL_MAP_READ, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); EXPECT_TRUE(svmMap->readOnlyMap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenMapReadAndWriteFlagWhenMappingSvmThenDontSetReadOnlyProperty) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); EXPECT_FALSE(svmMap->readOnlyMap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenSvmAllocWithoutFlagsWhenMappingSvmThenMapIsSuccessfulAndReadOnlyFlagIsTrue) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_TRUE, 0, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); EXPECT_FALSE(svmMap->readOnlyMap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenEnqueueMapValidSvmPtrThenExpectSingleWalker) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); cl_event event = nullptr; uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_READ, regionSvmPtr, regionSize, 0, nullptr, &event, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); ASSERT_NE(nullptr, svmMap); EXPECT_EQ(regionSvmPtr, svmMap->regionSvmPtr); EXPECT_EQ(svmPtr, svmMap->baseSvmPtr); EXPECT_EQ(regionSize, svmMap->regionSize); EXPECT_EQ(offset, svmMap->offset); EXPECT_TRUE(svmMap->readOnlyMap); queue.flush(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(1u, walkerCount); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MAP; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenEnqueueMapSvmPtrTwiceThenExpectSingleWalker) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_WRITE, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); ASSERT_NE(nullptr, svmMap); EXPECT_EQ(regionSvmPtr, svmMap->regionSvmPtr); EXPECT_EQ(svmPtr, svmMap->baseSvmPtr); EXPECT_EQ(regionSize, svmMap->regionSize); EXPECT_EQ(offset, svmMap->offset); EXPECT_FALSE(svmMap->readOnlyMap); EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations()); cl_event event = nullptr; retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_WRITE, regionSvmPtr, regionSize, 0, nullptr, &event, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations()); queue.flush(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(1u, walkerCount); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MAP; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryAndBlockingCallAndGpuHangOnSecondMapWhenEnqueueMapSvmPtrTwiceThenSecondCallReturnsOutOfresources) { std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context.get(), device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::Ready; uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; const auto firstMapResult = mockCommandQueueHw.enqueueSVMMap( CL_TRUE, CL_MAP_WRITE, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, firstMapResult); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto secondMapResult = mockCommandQueueHw.enqueueSVMMap( CL_TRUE, CL_MAP_WRITE, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_OUT_OF_RESOURCES, secondMapResult); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenNoMappedSvmPtrThenExpectNoUnmapCopyKernel) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); cl_event event = nullptr; retVal = queue.enqueueSVMUnmap( svmPtr, 0, nullptr, &event, false); EXPECT_EQ(CL_SUCCESS, retVal); queue.flush(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(0u, walkerCount); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_UNMAP; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryAndGpuHangAndBlockingCallWhenUnmappingThenReturnOutOfResources) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context.get(), device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto enqueueResult = mockCommandQueueHw.enqueueSVMUnmap( svmPtr, 0, nullptr, nullptr, false); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenMappedSvmRegionIsReadOnlyThenExpectNoUnmapCopyKernel) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_READ, svmPtr, size, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations()); auto svmMap = mockSvmManager->svmMapOperations.get(svmPtr); ASSERT_NE(nullptr, svmMap); queue.flush(); size_t offset = stream.getUsed(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(1u, walkerCount); hwParse.tearDown(); cl_event event = nullptr; retVal = queue.enqueueSVMUnmap( svmPtr, 0, nullptr, &event, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockSvmManager->svmMapOperations.getNumMapOperations()); queue.flush(); hwParse.parseCommands(stream, offset); walkerCount = hwParse.getCommandCount(); EXPECT_EQ(0u, walkerCount); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_UNMAP; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryAndBlockingCallAndGpuHangForUnmapWhenUnmapingThenOutOfResourcesIsReturnedFromUnmap) { DebugManagerStateRestore dbgRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context.get(), device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::Ready; const auto enqueueMapResult = mockCommandQueueHw.enqueueSVMMap( CL_FALSE, CL_MAP_READ, svmPtr, size, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, enqueueMapResult); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto enqueueUnmapResult = mockCommandQueueHw.enqueueSVMUnmap( svmPtr, 0, nullptr, nullptr, false); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueUnmapResult); } HWTEST_F(EnqueueSvmTestLocalMemory, givenNonReadOnlyMapWhenUnmappingThenSetAubTbxWritableBeforeUnmapEnqueue) { class MyQueue : public MockCommandQueueHw { public: using MockCommandQueueHw::MockCommandQueueHw; void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override { waitUntilCompleteCalled++; if (allocationToVerify) { EXPECT_TRUE(allocationToVerify->isAubWritable(GraphicsAllocation::defaultBank)); EXPECT_TRUE(allocationToVerify->isTbxWritable(GraphicsAllocation::defaultBank)); } } uint32_t waitUntilCompleteCalled = 0; GraphicsAllocation *allocationToVerify = nullptr; }; MyQueue myQueue(context.get(), pClDevice, nullptr); retVal = myQueue.enqueueSVMMap(CL_TRUE, CL_MAP_WRITE, svmPtr, size, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto gpuAllocation = mockSvmManager->getSVMAlloc(svmPtr)->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); myQueue.allocationToVerify = gpuAllocation; gpuAllocation->setAubWritable(false, GraphicsAllocation::defaultBank); gpuAllocation->setTbxWritable(false, GraphicsAllocation::defaultBank); EXPECT_EQ(1u, myQueue.waitUntilCompleteCalled); retVal = myQueue.enqueueSVMUnmap(svmPtr, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, myQueue.waitUntilCompleteCalled); } HWTEST_F(EnqueueSvmTestLocalMemory, givenReadOnlyMapWhenUnmappingThenDontResetAubTbxWritable) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); retVal = queue.enqueueSVMMap(CL_TRUE, CL_MAP_READ, svmPtr, size, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto gpuAllocation = mockSvmManager->getSVMAlloc(svmPtr)->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); gpuAllocation->setAubWritable(false, GraphicsAllocation::defaultBank); gpuAllocation->setTbxWritable(false, GraphicsAllocation::defaultBank); retVal = queue.enqueueSVMUnmap(svmPtr, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(gpuAllocation->isAubWritable(GraphicsAllocation::defaultBank)); EXPECT_FALSE(gpuAllocation->isTbxWritable(GraphicsAllocation::defaultBank)); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenMappedSvmRegionIsWritableThenExpectMapAndUnmapCopyKernel) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); cl_event eventMap = nullptr; retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_WRITE, svmPtr, size, 0, nullptr, &eventMap, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations()); auto svmMap = mockSvmManager->svmMapOperations.get(svmPtr); ASSERT_NE(nullptr, svmMap); cl_event eventUnmap = nullptr; retVal = queue.enqueueSVMUnmap( svmPtr, 0, nullptr, &eventUnmap, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockSvmManager->svmMapOperations.getNumMapOperations()); queue.flush(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(2u, walkerCount); constexpr cl_command_type expectedMapCmd = CL_COMMAND_SVM_MAP; cl_command_type actualMapCmd = castToObjectOrAbort(eventMap)->getCommandType(); EXPECT_EQ(expectedMapCmd, actualMapCmd); constexpr cl_command_type expectedUnmapCmd = CL_COMMAND_SVM_UNMAP; cl_command_type actualUnmapCmd = castToObjectOrAbort(eventUnmap)->getCommandType(); EXPECT_EQ(expectedUnmapCmd, actualUnmapCmd); clReleaseEvent(eventMap); clReleaseEvent(eventUnmap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenGpuHangAndBlockingCallAndEnabledLocalMemoryWhenMappedSvmRegionIsWritableThenUnmapReturnsOutOfResources) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); MockCommandQueueHw queue(context.get(), pClDevice, nullptr); queue.waitForAllEnginesReturnValue = WaitStatus::Ready; const auto enqueueMapResult = queue.enqueueSVMMap( CL_TRUE, CL_MAP_WRITE, svmPtr, size, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, enqueueMapResult); queue.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto enqueueUnmapResult = queue.enqueueSVMUnmap( svmPtr, 0, nullptr, nullptr, false); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueUnmapResult); EXPECT_EQ(2, queue.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenMappedSvmRegionAndNoEventIsUsedIsWritableThenExpectMapAndUnmapCopyKernelAnNo) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_WRITE, svmPtr, size, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations()); auto svmMap = mockSvmManager->svmMapOperations.get(svmPtr); ASSERT_NE(nullptr, svmMap); retVal = queue.enqueueSVMUnmap( svmPtr, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockSvmManager->svmMapOperations.getNumMapOperations()); queue.flush(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(2u, walkerCount); } template struct FailCsr : public CommandStreamReceiverHw { using CommandStreamReceiverHw::CommandStreamReceiverHw; bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override { return CL_FALSE; } }; HWTEST_F(EnqueueSvmTest, whenInternalAllocationsAreMadeResidentThenOnlyNonSvmAllocationsAreAdded) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, context->getRootDeviceIndices(), context->getDeviceBitfields()); unifiedMemoryProperties.device = pDevice; auto allocationSize = 4096u; auto svmManager = this->context->getSVMAllocsManager(); EXPECT_NE(0u, svmManager->getNumAllocs()); auto unifiedMemoryPtr = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); EXPECT_NE(nullptr, unifiedMemoryPtr); EXPECT_EQ(2u, svmManager->getNumAllocs()); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto &residentAllocations = commandStreamReceiver.getResidencyAllocations(); EXPECT_EQ(0u, residentAllocations.size()); svmManager->makeInternalAllocationsResident(commandStreamReceiver, InternalMemoryType::DEVICE_UNIFIED_MEMORY); //only unified memory allocation is made resident EXPECT_EQ(1u, residentAllocations.size()); EXPECT_EQ(residentAllocations[0]->getGpuAddress(), castToUint64(unifiedMemoryPtr)); svmManager->freeSVMAlloc(unifiedMemoryPtr); } HWTEST_F(EnqueueSvmTest, whenInternalAllocationsAreAddedToResidencyContainerThenOnlyExpectedAllocationsAreAdded) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, context->getRootDeviceIndices(), context->getDeviceBitfields()); unifiedMemoryProperties.device = pDevice; auto allocationSize = 4096u; auto svmManager = this->context->getSVMAllocsManager(); EXPECT_NE(0u, svmManager->getNumAllocs()); auto unifiedMemoryPtr = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); EXPECT_NE(nullptr, unifiedMemoryPtr); EXPECT_EQ(2u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(pDevice->getRootDeviceIndex(), residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); //only unified memory allocation is added to residency container EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), castToUint64(unifiedMemoryPtr)); svmManager->freeSVMAlloc(unifiedMemoryPtr); } HWTEST_F(EnqueueSvmTest, whenInternalAllocationIsTriedToBeAddedTwiceToResidencyContainerThenItIsAdded) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, context->getRootDeviceIndices(), context->getDeviceBitfields()); unifiedMemoryProperties.device = pDevice; auto allocationSize = 4096u; auto svmManager = this->context->getSVMAllocsManager(); EXPECT_NE(0u, svmManager->getNumAllocs()); auto unifiedMemoryPtr = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); EXPECT_NE(nullptr, unifiedMemoryPtr); EXPECT_EQ(2u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(pDevice->getRootDeviceIndex(), residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); //only unified memory allocation is added to residency container EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), castToUint64(unifiedMemoryPtr)); svmManager->addInternalAllocationsToResidencyContainer(pDevice->getRootDeviceIndex(), residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(2u, residencyContainer.size()); svmManager->freeSVMAlloc(unifiedMemoryPtr); } struct createHostUnifiedMemoryAllocationTest : public ::testing::Test { void SetUp() override { REQUIRE_SVM_OR_SKIP(defaultHwInfo); device0 = context.pRootDevice0; device1 = context.pRootDevice1; device2 = context.pRootDevice2; svmManager = context.getSVMAllocsManager(); EXPECT_EQ(0u, svmManager->getNumAllocs()); } const size_t allocationSize = 4096u; const uint32_t numDevices = 3u; MockDefaultContext context; MockClDevice *device2; MockClDevice *device1; MockClDevice *device0; SVMAllocsManager *svmManager = nullptr; }; HWTEST_F(createHostUnifiedMemoryAllocationTest, whenCreatingHostUnifiedMemoryAllocationThenOneAllocDataIsCreatedWithOneGraphicsAllocationPerDevice) { NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, context.getRootDeviceIndices(), context.getDeviceBitfields()); EXPECT_EQ(0u, svmManager->getNumAllocs()); auto unifiedMemoryPtr = svmManager->createHostUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); EXPECT_NE(nullptr, unifiedMemoryPtr); EXPECT_EQ(1u, svmManager->getNumAllocs()); auto allocData = svmManager->getSVMAlloc(unifiedMemoryPtr); EXPECT_EQ(numDevices, allocData->gpuAllocations.getGraphicsAllocations().size()); for (uint32_t i = 0; i < allocData->gpuAllocations.getGraphicsAllocations().size(); i++) { auto alloc = allocData->gpuAllocations.getGraphicsAllocation(i); EXPECT_EQ(i, alloc->getRootDeviceIndex()); } svmManager->freeSVMAlloc(unifiedMemoryPtr); } HWTEST_F(createHostUnifiedMemoryAllocationTest, whenCreatingMultiGraphicsAllocationThenGraphicsAllocationPerDeviceIsCreated) { NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, context.getRootDeviceIndices(), context.getDeviceBitfields()); auto alignedSize = alignUp(allocationSize, MemoryConstants::pageSize64k); auto memoryManager = context.getMemoryManager(); auto allocationType = AllocationType::BUFFER_HOST_MEMORY; auto maxRootDeviceIndex = numDevices - 1u; RootDeviceIndicesContainer rootDeviceIndices; rootDeviceIndices.reserve(numDevices); rootDeviceIndices.push_back(0u); rootDeviceIndices.push_back(1u); rootDeviceIndices.push_back(2u); auto rootDeviceIndex = rootDeviceIndices.at(0); auto deviceBitfield = device0->getDeviceBitfield(); AllocationProperties allocationProperties{rootDeviceIndex, true, alignedSize, allocationType, deviceBitfield.count() > 1, deviceBitfield.count() > 1, deviceBitfield}; allocationProperties.flags.shareable = unifiedMemoryProperties.allocationFlags.flags.shareable; SvmAllocationData allocData(maxRootDeviceIndex); void *unifiedMemoryPtr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, allocationProperties, allocData.gpuAllocations); EXPECT_NE(nullptr, unifiedMemoryPtr); EXPECT_EQ(numDevices, allocData.gpuAllocations.getGraphicsAllocations().size()); for (auto rootDeviceIndex = 0u; rootDeviceIndex <= maxRootDeviceIndex; rootDeviceIndex++) { auto alloc = allocData.gpuAllocations.getGraphicsAllocation(rootDeviceIndex); EXPECT_NE(nullptr, alloc); EXPECT_EQ(rootDeviceIndex, alloc->getRootDeviceIndex()); } for (auto gpuAllocation : allocData.gpuAllocations.getGraphicsAllocations()) { memoryManager->freeGraphicsMemory(gpuAllocation); } } HWTEST_F(createHostUnifiedMemoryAllocationTest, whenCreatingMultiGraphicsAllocationForSpecificRootDeviceIndicesThenOnlyGraphicsAllocationPerSpecificRootDeviceIndexIsCreated) { NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, context.getRootDeviceIndices(), context.getDeviceBitfields()); auto alignedSize = alignUp(allocationSize, MemoryConstants::pageSize64k); auto memoryManager = context.getMemoryManager(); auto allocationType = AllocationType::BUFFER_HOST_MEMORY; auto maxRootDeviceIndex = numDevices - 1u; RootDeviceIndicesContainer rootDeviceIndices; rootDeviceIndices.reserve(numDevices); rootDeviceIndices.push_back(0u); rootDeviceIndices.push_back(2u); auto noProgramedRootDeviceIndex = 1u; auto rootDeviceIndex = rootDeviceIndices.at(0); auto deviceBitfield = device0->getDeviceBitfield(); AllocationProperties allocationProperties{rootDeviceIndex, true, alignedSize, allocationType, deviceBitfield.count() > 1, deviceBitfield.count() > 1, deviceBitfield}; allocationProperties.flags.shareable = unifiedMemoryProperties.allocationFlags.flags.shareable; SvmAllocationData allocData(maxRootDeviceIndex); void *unifiedMemoryPtr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, allocationProperties, allocData.gpuAllocations); EXPECT_NE(nullptr, unifiedMemoryPtr); EXPECT_EQ(numDevices, allocData.gpuAllocations.getGraphicsAllocations().size()); for (auto rootDeviceIndex = 0u; rootDeviceIndex <= maxRootDeviceIndex; rootDeviceIndex++) { auto alloc = allocData.gpuAllocations.getGraphicsAllocation(rootDeviceIndex); if (rootDeviceIndex == noProgramedRootDeviceIndex) { EXPECT_EQ(nullptr, alloc); } else { EXPECT_NE(nullptr, alloc); EXPECT_EQ(rootDeviceIndex, alloc->getRootDeviceIndex()); } } for (auto gpuAllocation : allocData.gpuAllocations.getGraphicsAllocations()) { memoryManager->freeGraphicsMemory(gpuAllocation); } } struct MemoryAllocationTypeArray { const InternalMemoryType allocationType[3] = {InternalMemoryType::HOST_UNIFIED_MEMORY, InternalMemoryType::DEVICE_UNIFIED_MEMORY, InternalMemoryType::SHARED_UNIFIED_MEMORY}; }; struct UpdateResidencyContainerMultipleDevicesTest : public ::testing::WithParamInterface>, public ::testing::Test { void SetUp() override { device = context.pRootDevice0; subDevice0 = context.pSubDevice00; subDevice1 = context.pSubDevice01; peerDevice = context.pRootDevice1; peerSubDevice0 = context.pSubDevice10; peerSubDevice1 = context.pSubDevice11; svmManager = context.getSVMAllocsManager(); EXPECT_EQ(0u, svmManager->getNumAllocs()); } MockUnrestrictiveContextMultiGPU context; MockClDevice *device; ClDevice *subDevice0 = nullptr; ClDevice *subDevice1 = nullptr; MockClDevice *peerDevice; ClDevice *peerSubDevice0 = nullptr; ClDevice *peerSubDevice1 = nullptr; SVMAllocsManager *svmManager = nullptr; const uint32_t numRootDevices = 2; const uint32_t maxRootDeviceIndex = numRootDevices - 1; }; HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, givenNoAllocationsCreatedThenNoInternalAllocationsAreAddedToResidencyContainer) { ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(0u, residencyContainer.size()); } HWTEST_P(UpdateResidencyContainerMultipleDevicesTest, givenAllocationThenItIsAddedToContainerOnlyIfMaskMatches) { uint32_t pCmdBuffer[1024]; MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(), static_cast(pCmdBuffer), sizeof(pCmdBuffer)); InternalMemoryType type = std::get<0>(GetParam()); uint32_t mask = std::get<1>(GetParam()); SvmAllocationData allocData(maxRootDeviceIndex); allocData.gpuAllocations.addAllocation(&gfxAllocation); allocData.memoryType = type; allocData.device = &device->getDevice(); svmManager->insertSVMAlloc(allocData); EXPECT_EQ(1u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), residencyContainer, mask); if (mask == static_cast(type)) { EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress()); } else { EXPECT_EQ(0u, residencyContainer.size()); } } HWTEST_P(UpdateResidencyContainerMultipleDevicesTest, whenUsingRootDeviceIndexGreaterThanMultiGraphicsAllocationSizeThenNoAllocationsAreAdded) { uint32_t pCmdBuffer[1024]; MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(), static_cast(pCmdBuffer), sizeof(pCmdBuffer)); SvmAllocationData allocData(maxRootDeviceIndex); allocData.gpuAllocations.addAllocation(&gfxAllocation); allocData.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocData.device = &device->getDevice(); uint32_t pCmdBufferPeer[1024]; MockGraphicsAllocation gfxAllocationPeer(peerDevice->getDevice().getRootDeviceIndex(), (void *)pCmdBufferPeer, sizeof(pCmdBufferPeer)); SvmAllocationData allocDataPeer(maxRootDeviceIndex); allocDataPeer.gpuAllocations.addAllocation(&gfxAllocationPeer); allocDataPeer.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocDataPeer.device = &peerDevice->getDevice(); svmManager->insertSVMAlloc(allocData); svmManager->insertSVMAlloc(allocDataPeer); EXPECT_EQ(2u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(numRootDevices + 1, residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress()); } MemoryAllocationTypeArray memoryTypeArray; INSTANTIATE_TEST_SUITE_P(UpdateResidencyContainerMultipleDevicesTests, UpdateResidencyContainerMultipleDevicesTest, ::testing::Combine( ::testing::ValuesIn(memoryTypeArray.allocationType), ::testing::ValuesIn(memoryTypeArray.allocationType))); HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, whenInternalAllocationsAreAddedToResidencyContainerThenOnlyAllocationsFromSameDeviceAreAdded) { uint32_t pCmdBuffer[1024]; MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(), static_cast(pCmdBuffer), sizeof(pCmdBuffer)); SvmAllocationData allocData(maxRootDeviceIndex); allocData.gpuAllocations.addAllocation(&gfxAllocation); allocData.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocData.device = &device->getDevice(); uint32_t pCmdBufferPeer[1024]; MockGraphicsAllocation gfxAllocationPeer(peerDevice->getDevice().getRootDeviceIndex(), (void *)pCmdBufferPeer, sizeof(pCmdBufferPeer)); SvmAllocationData allocDataPeer(maxRootDeviceIndex); allocDataPeer.gpuAllocations.addAllocation(&gfxAllocationPeer); allocDataPeer.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocDataPeer.device = &peerDevice->getDevice(); svmManager->insertSVMAlloc(allocData); svmManager->insertSVMAlloc(allocDataPeer); EXPECT_EQ(2u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress()); } HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, givenSharedAllocationWithNullDevicePointerThenAllocationIsAddedToResidencyContainer) { uint32_t pCmdBuffer[1024]; MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(), static_cast(pCmdBuffer), sizeof(pCmdBuffer)); SvmAllocationData allocData(maxRootDeviceIndex); allocData.gpuAllocations.addAllocation(&gfxAllocation); allocData.memoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; allocData.device = nullptr; svmManager->insertSVMAlloc(allocData); EXPECT_EQ(1u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), residencyContainer, InternalMemoryType::SHARED_UNIFIED_MEMORY); EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress()); } HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, givenSharedAllocationWithNonNullDevicePointerAndDifferentDeviceToOnePassedToResidencyCallThenAllocationIsNotAddedToResidencyContainer) { uint32_t pCmdBuffer[1024]; MockGraphicsAllocation gfxAllocation(peerDevice->getDevice().getRootDeviceIndex(), static_cast(pCmdBuffer), sizeof(pCmdBuffer)); SvmAllocationData allocData(maxRootDeviceIndex); allocData.gpuAllocations.addAllocation(&gfxAllocation); allocData.memoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; allocData.device = &peerDevice->getDevice(); svmManager->insertSVMAlloc(allocData); EXPECT_EQ(1u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), residencyContainer, InternalMemoryType::SHARED_UNIFIED_MEMORY); EXPECT_EQ(0u, residencyContainer.size()); } HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, givenAllocationsFromSubDevicesBelongingToTheSameTargetDeviceThenTheyAreAddedToTheResidencyContainer) { uint32_t pCmdBuffer[1024]; MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(), static_cast(pCmdBuffer), sizeof(pCmdBuffer)); SvmAllocationData allocData0(maxRootDeviceIndex); allocData0.gpuAllocations.addAllocation(&gfxAllocation); allocData0.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocData0.device = &subDevice0->getDevice(); uint32_t pCmdBufferPeer[1024]; MockGraphicsAllocation gfxAllocationPeer(device->getDevice().getRootDeviceIndex(), (void *)pCmdBufferPeer, sizeof(pCmdBufferPeer)); SvmAllocationData allocData1(maxRootDeviceIndex); allocData1.gpuAllocations.addAllocation(&gfxAllocationPeer); allocData1.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocData1.device = &subDevice1->getDevice(); svmManager->insertSVMAlloc(allocData0); svmManager->insertSVMAlloc(allocData1); EXPECT_EQ(2u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(2u, residencyContainer.size()); } HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, givenAllocationsFromSubDevicesNotBelongingToTheSameTargetDeviceThenTheyAreNotAddedToTheResidencyContainer) { uint32_t pCmdBuffer[1024]; MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(), static_cast(pCmdBuffer), sizeof(pCmdBuffer)); SvmAllocationData allocData0(maxRootDeviceIndex); allocData0.gpuAllocations.addAllocation(&gfxAllocation); allocData0.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocData0.device = &subDevice0->getDevice(); uint32_t pCmdBufferPeer[1024]; MockGraphicsAllocation gfxAllocationPeer(device->getDevice().getRootDeviceIndex(), (void *)pCmdBufferPeer, sizeof(pCmdBufferPeer)); SvmAllocationData allocData1(maxRootDeviceIndex); allocData1.gpuAllocations.addAllocation(&gfxAllocationPeer); allocData1.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocData1.device = &subDevice1->getDevice(); svmManager->insertSVMAlloc(allocData0); svmManager->insertSVMAlloc(allocData1); EXPECT_EQ(2u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(peerDevice->getDevice().getRootDeviceIndex(), residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(0u, residencyContainer.size()); } HWTEST_F(EnqueueSvmTest, GivenDstHostPtrWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { char dstHostPtr[260]; void *pDstSVM = dstHostPtr; void *pSrcSVM = ptrSVM; MockCommandQueueHw cmdQ(context, pClDevice, nullptr); auto failCsr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ.getGpgpuCommandStreamReceiver(); cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get(); retVal = cmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); cmdQ.gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } HWTEST_F(EnqueueSvmTest, GivenSrcHostPtrAndSizeZeroWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { char srcHostPtr[260]; void *pDstSVM = ptrSVM; void *pSrcSVM = srcHostPtr; MockCommandQueueHw cmdQ(context, pClDevice, nullptr); auto failCsr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ.getGpgpuCommandStreamReceiver(); cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get(); retVal = cmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); cmdQ.gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } HWTEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { char dstHostPtr[260]; char srcHostPtr[260]; void *pDstSVM = dstHostPtr; void *pSrcSVM = srcHostPtr; MockCommandQueueHw cmdQ(context, pClDevice, nullptr); auto failCsr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ.getGpgpuCommandStreamReceiver(); cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get(); retVal = cmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); cmdQ.gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } TEST_F(EnqueueSvmTest, givenPageFaultManagerWhenEnqueueMemcpyThenAllocIsDecommitted) { auto mockMemoryManager = std::make_unique(); mockMemoryManager->pageFaultManager.reset(new MockPageFaultManager()); auto memoryManager = context->getMemoryManager(); context->memoryManager = mockMemoryManager.get(); auto srcSvm = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); mockMemoryManager->getPageFaultManager()->insertAllocation(srcSvm, 256, context->getSVMAllocsManager(), context->getSpecialQueue(pDevice->getRootDeviceIndex()), {}); mockMemoryManager->getPageFaultManager()->insertAllocation(ptrSVM, 256, context->getSVMAllocsManager(), context->getSpecialQueue(pDevice->getRootDeviceIndex()), {}); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToCpuCalled, 0); this->pCmdQ->enqueueSVMMemcpy(false, ptrSVM, srcSvm, 256, 0, nullptr, nullptr); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->allowMemoryAccessCalled, 0); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->protectMemoryCalled, 2); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToCpuCalled, 0); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToGpuCalled, 2); context->getSVMAllocsManager()->freeSVMAlloc(srcSvm); context->memoryManager = memoryManager; } TEST_F(EnqueueSvmTest, givenPageFaultManagerWhenEnqueueMemFillThenAllocIsDecommitted) { char pattern[256]; auto mockMemoryManager = std::make_unique(); mockMemoryManager->pageFaultManager.reset(new MockPageFaultManager()); auto memoryManager = context->getMemoryManager(); context->memoryManager = mockMemoryManager.get(); mockMemoryManager->getPageFaultManager()->insertAllocation(ptrSVM, 256, context->getSVMAllocsManager(), context->getSpecialQueue(0u), {}); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToCpuCalled, 0); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->protectMemoryCalled, 0); pCmdQ->enqueueSVMMemFill(ptrSVM, &pattern, 256, 256, 0, nullptr, nullptr); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->allowMemoryAccessCalled, 0); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->protectMemoryCalled, 1); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToCpuCalled, 0); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToGpuCalled, 1); context->memoryManager = memoryManager; } HWTEST_F(EnqueueSvmTest, givenCopyFromMappedPtrToSvmAllocWhenCallingSvmMemcpyThenReuseMappedAllocations) { constexpr size_t size = 1u; auto &csr = pDevice->getUltCommandStreamReceiver(); { auto [buffer, mappedPtr] = createBufferAndMapItOnGpu(); std::ignore = buffer; EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy ptrSVM, // void *dst_ptr mappedPtr, // const void *src_ptr size, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); } { auto notMappedPtr = std::make_unique(size); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy ptrSVM, // void *dst_ptr notMappedPtr.get(), // const void *src_ptr size, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); } } HWTEST_F(EnqueueSvmTest, givenCopyFromSvmAllocToMappedPtrWhenCallingSvmMemcpyThenReuseMappedAllocations) { constexpr size_t size = 1u; auto &csr = pDevice->getUltCommandStreamReceiver(); { auto [buffer, mappedPtr] = createBufferAndMapItOnGpu(); std::ignore = buffer; EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy mappedPtr, // void *dst_ptr ptrSVM, // const void *src_ptr size, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); } { auto notMappedPtr = std::make_unique(size); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy notMappedPtr.get(), // void *dst_ptr ptrSVM, // const void *src_ptr size, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); } } HWTEST_F(EnqueueSvmTest, givenCopyFromMappedPtrToMappedPtrWhenCallingSvmMemcpyThenReuseMappedAllocations) { constexpr size_t size = 1u; auto &csr = pDevice->getUltCommandStreamReceiver(); { auto [buffer1, mappedPtr1] = createBufferAndMapItOnGpu(); auto [buffer2, mappedPtr2] = createBufferAndMapItOnGpu(); std::ignore = buffer1; std::ignore = buffer2; EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy mappedPtr2, // void *dst_ptr mappedPtr1, // const void *src_ptr size, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); } { auto [buffer, mappedPtr] = createBufferAndMapItOnGpu(); std::ignore = buffer; auto notMappedPtr = std::make_unique(size); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy mappedPtr, // void *dst_ptr notMappedPtr.get(), // const void *src_ptr size, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); } { auto notMappedPtr = std::make_unique(size); auto [buffer, mappedPtr] = createBufferAndMapItOnGpu(); std::ignore = buffer; EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy notMappedPtr.get(), // void *dst_ptr mappedPtr, // const void *src_ptr size, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, csr.createAllocationForHostSurfaceCalled); } }