diff --git a/opencl/source/gtpin/gtpin_callbacks.cpp b/opencl/source/gtpin/gtpin_callbacks.cpp index e5c0a93ca7..6888f655f5 100644 --- a/opencl/source/gtpin/gtpin_callbacks.cpp +++ b/opencl/source/gtpin/gtpin_callbacks.cpp @@ -153,7 +153,9 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { size_t size = gpuAllocation->getUnderlyingBufferSize(); Buffer::setSurfaceState(&device, pSurfaceState, false, false, size, gpuAllocation->getUnderlyingBuffer(), 0, gpuAllocation, 0, 0, pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices()); - device.getMemoryManager()->getPageFaultManager()->moveAllocationToGpuDomain(reinterpret_cast(gpuAllocation->getGpuAddress())); + if (device.getMemoryManager()->getPageFaultManager()) { + device.getMemoryManager()->getPageFaultManager()->moveAllocationToGpuDomain(reinterpret_cast(gpuAllocation->getGpuAddress())); + } } else { cl_mem buffer = (cl_mem)resource; auto pBuffer = castToObjectOrAbort(buffer); diff --git a/opencl/source/gtpin/gtpin_helpers.cpp b/opencl/source/gtpin/gtpin_helpers.cpp index e16ab2721a..4ba29ce4c8 100644 --- a/opencl/source/gtpin/gtpin_helpers.cpp +++ b/opencl/source/gtpin/gtpin_helpers.cpp @@ -9,6 +9,7 @@ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/hw_info.h" +#include "shared/source/helpers/validators.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" @@ -27,38 +28,45 @@ using namespace gtpin; namespace NEO { GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinCreateBuffer(context_handle_t context, uint32_t reqSize, resource_handle_t *pResource) { - cl_int diag = CL_SUCCESS; - Context *pContext = castToObject((cl_context)context); - if ((pContext == nullptr) || (pResource == nullptr)) { + cl_int retVal = CL_SUCCESS; + Context *pContext = castToObject(reinterpret_cast(context)); + if (isAnyNullptr(pContext, pResource)) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } size_t size = alignUp(reqSize, MemoryConstants::cacheLineSize); - auto >pinHelper = pContext->getDevice(0)->getGTPinGfxCoreHelper(); - if (gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) { - void *unifiedMemorySharedAllocation = clSharedMemAllocINTEL(pContext, pContext->getDevice(0), 0, size, 0, &diag); - auto allocationsManager = pContext->getSVMAllocsManager(); - auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemorySharedAllocation); - *pResource = (resource_handle_t)graphicsAllocation; - } else { - void *hostPtr = pContext->getMemoryManager()->allocateSystemMemory(size, MemoryConstants::pageSize); - if (hostPtr == nullptr) { + auto clDevice = pContext->getDevice(0); + auto >pinHelper = clDevice->getGTPinGfxCoreHelper(); + if (gtpinHelper.canUseSharedAllocation(clDevice->getHardwareInfo())) { + void *unifiedMemorySharedAllocation = clSharedMemAllocINTEL(pContext, clDevice, 0, size, 0, &retVal); + if (retVal != CL_SUCCESS) { return GTPIN_DI_ERROR_ALLOCATION_FAILED; } - cl_mem buffer = Buffer::create(pContext, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE | CL_MEM_FORCE_HOST_MEMORY_INTEL, size, hostPtr, diag); - *pResource = (resource_handle_t)buffer; + auto allocationsManager = pContext->getSVMAllocsManager(); + auto allocData = allocationsManager->getSVMAlloc(unifiedMemorySharedAllocation); + *pResource = reinterpret_cast(allocData); + } else { + void *hostPtr = pContext->getMemoryManager()->allocateSystemMemory(size, MemoryConstants::pageSize); + cl_mem buffer = Buffer::create(pContext, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE | CL_MEM_FORCE_HOST_MEMORY_INTEL, size, hostPtr, retVal); + if (retVal != CL_SUCCESS) { + return GTPIN_DI_ERROR_ALLOCATION_FAILED; + } + *pResource = reinterpret_cast(buffer); } return GTPIN_DI_SUCCESS; } GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinFreeBuffer(context_handle_t context, resource_handle_t resource) { - Context *pContext = castToObject((cl_context)context); - if ((pContext == nullptr) || (resource == nullptr)) { + Context *pContext = castToObject(reinterpret_cast(context)); + if (isAnyNullptr(pContext, resource)) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } - auto >pinHelper = pContext->getDevice(0)->getGTPinGfxCoreHelper(); - if (gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) { + + auto clDevice = pContext->getDevice(0); + auto >pinHelper = clDevice->getGTPinGfxCoreHelper(); + if (gtpinHelper.canUseSharedAllocation(clDevice->getHardwareInfo())) { auto allocData = reinterpret_cast(resource); - clMemFreeINTEL(pContext, allocData->cpuAllocation->getUnderlyingBuffer()); + auto graphicsAllocation = allocData->gpuAllocations.getGraphicsAllocation(clDevice->getRootDeviceIndex()); + clMemFreeINTEL(pContext, reinterpret_cast(graphicsAllocation->getGpuAddress())); } else { auto pMemObj = castToObject(resource); if (pMemObj == nullptr) { @@ -71,17 +79,20 @@ GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinFreeBuffer(context_handle_t context, } GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinMapBuffer(context_handle_t context, resource_handle_t resource, uint8_t **pAddress) { - cl_mem buffer = (cl_mem)resource; - Context *pContext = castToObject((cl_context)context); - if ((pContext == nullptr) || (buffer == nullptr) || (pAddress == nullptr)) { + Context *pContext = castToObject(reinterpret_cast(context)); + if (isAnyNullptr(pContext, resource, pAddress)) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } - auto >pinHelper = pContext->getDevice(0)->getGTPinGfxCoreHelper(); - if (gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) { + + auto clDevice = pContext->getDevice(0); + auto >pinHelper = clDevice->getGTPinGfxCoreHelper(); + if (gtpinHelper.canUseSharedAllocation(clDevice->getHardwareInfo())) { auto allocData = reinterpret_cast(resource); - *pAddress = reinterpret_cast(allocData->cpuAllocation->getUnderlyingBuffer()); + auto graphicsAllocation = allocData->gpuAllocations.getGraphicsAllocation(clDevice->getRootDeviceIndex()); + *pAddress = reinterpret_cast(graphicsAllocation->getGpuAddress()); + } else { - auto pMemObj = castToObject(buffer); + auto pMemObj = castToObject(resource); if (pMemObj == nullptr) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } @@ -91,12 +102,14 @@ GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinMapBuffer(context_handle_t context, r } GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinUnmapBuffer(context_handle_t context, resource_handle_t resource) { - Context *pContext = castToObject((cl_context)context); - if ((pContext == nullptr) || (resource == nullptr)) { + Context *pContext = castToObject(reinterpret_cast(context)); + if (isAnyNullptr(pContext, resource)) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } - auto >pinHelper = pContext->getDevice(0)->getGTPinGfxCoreHelper(); - if (!gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) { + + auto clDevice = pContext->getDevice(0); + auto >pinHelper = clDevice->getGTPinGfxCoreHelper(); + if (!gtpinHelper.canUseSharedAllocation(clDevice->getHardwareInfo())) { auto pMemObj = castToObject(resource); if (pMemObj == nullptr) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; diff --git a/opencl/test/unit_test/gtpin/gtpin_tests.cpp b/opencl/test/unit_test/gtpin/gtpin_tests.cpp index 3056e003f8..0ed3449eb8 100644 --- a/opencl/test/unit_test/gtpin/gtpin_tests.cpp +++ b/opencl/test/unit_test/gtpin/gtpin_tests.cpp @@ -432,35 +432,40 @@ TEST_F(GTPinTests, givenInvalidArgumentsThenBufferUnMapFails) { TEST_F(GTPinTests, givenValidRequestForHugeMemoryAllocationThenBufferAllocateFails) { - InjectedFunction allocBufferFunc = [this](size_t failureIndex) { - resource_handle_t res; - cl_context ctxt = (cl_context)((Context *)pContext); - uint32_t hugeSize = 400u; // Will be handled as huge memory allocation - retFromGtPin = (*driverServices.bufferAllocate)((gtpin::context_handle_t)ctxt, hugeSize, &res); - if (MemoryManagement::nonfailingAllocation != failureIndex) { - EXPECT_EQ(GTPIN_DI_ERROR_ALLOCATION_FAILED, retFromGtPin); - } else { - EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); - EXPECT_NE(nullptr, res); - retFromGtPin = (*driverServices.bufferDeallocate)((gtpin::context_handle_t)ctxt, res); - EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); - } - }; + DebugManagerStateRestore restorer; + for (auto &allocationInUSMShared : ::testing::Bool()) { + DebugManager.flags.GTPinAllocateBufferInSharedMemory.set(allocationInUSMShared); + InjectedFunction allocBufferFunc = [this](size_t failureIndex) { + resource_handle_t res; + cl_context ctxt = (cl_context)((Context *)pContext); + uint32_t hugeSize = 400u; // Will be handled as huge memory allocation + retFromGtPin = (*driverServices.bufferAllocate)((gtpin::context_handle_t)ctxt, hugeSize, &res); + if (MemoryManagement::nonfailingAllocation != failureIndex) { + EXPECT_EQ(GTPIN_DI_ERROR_ALLOCATION_FAILED, retFromGtPin); + } else { + EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); + EXPECT_NE(nullptr, res); + retFromGtPin = (*driverServices.bufferDeallocate)((gtpin::context_handle_t)ctxt, res); + EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); + } + }; + isGTPinInitialized = false; + gtpinCallbacks.onContextCreate = onContextCreate; + gtpinCallbacks.onContextDestroy = onContextDestroy; + gtpinCallbacks.onKernelCreate = onKernelCreate; + gtpinCallbacks.onKernelSubmit = onKernelSubmit; + gtpinCallbacks.onCommandBufferCreate = onCommandBufferCreate; + gtpinCallbacks.onCommandBufferComplete = onCommandBufferComplete; + retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); + EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); + ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); + ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); + ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); + EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); + EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); - gtpinCallbacks.onContextCreate = onContextCreate; - gtpinCallbacks.onContextDestroy = onContextDestroy; - gtpinCallbacks.onKernelCreate = onKernelCreate; - gtpinCallbacks.onKernelSubmit = onKernelSubmit; - gtpinCallbacks.onCommandBufferCreate = onCommandBufferCreate; - gtpinCallbacks.onCommandBufferComplete = onCommandBufferComplete; - retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); - EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); - ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); - ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); - EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); - EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); - - injectFailures(allocBufferFunc); + injectFailures(allocBufferFunc); + } } TEST_F(GTPinTests, givenValidRequestForMemoryAllocationThenBufferAllocateAndDeallocateSucceeds) { @@ -2160,7 +2165,7 @@ TEST_F(GTPinTestsWithLocalMemory, whenPlatformHasNoSvmSupportThenGtPinBufferCant } } -HWTEST_F(GTPinTestsWithLocalMemory, givenGtPinWithSupportForSharedAllocationWhenGtPinHelperFunctionsAreCalledThenCheckIfSharedAllocationCabBeUsed) { +HWTEST_F(GTPinTests, givenGtPinWithSupportForSharedAllocationWhenGtPinHelperFunctionsAreCalledThenCheckIfSharedAllocationCanBeUsed) { auto >pinHelper = pDevice->getGTPinGfxCoreHelper(); if (!gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo())) { GTEST_SKIP(); @@ -2180,39 +2185,39 @@ HWTEST_F(GTPinTestsWithLocalMemory, givenGtPinWithSupportForSharedAllocationWhen pDevice->gtpinGfxCoreHelper.swap(backup); resource_handle_t resource = nullptr; - cl_context ctxt = (cl_context)((Context *)pContext); + gtpin::context_handle_t gtPinContext = reinterpret_cast(static_cast(pContext)); mockGTPinGfxCoreHelperHw->canUseSharedAllocationCalled = false; - gtpinCreateBuffer((gtpin::context_handle_t)ctxt, 256, &resource); + gtpinCreateBuffer(gtPinContext, 256, &resource); EXPECT_TRUE(mockGTPinGfxCoreHelperHw->canUseSharedAllocationCalled); mockGTPinGfxCoreHelperHw->canUseSharedAllocationCalled = false; uint8_t *address = nullptr; - gtpinMapBuffer((gtpin::context_handle_t)ctxt, resource, &address); + gtpinMapBuffer(gtPinContext, resource, &address); EXPECT_TRUE(mockGTPinGfxCoreHelperHw->canUseSharedAllocationCalled); mockGTPinGfxCoreHelperHw->canUseSharedAllocationCalled = false; - gtpinUnmapBuffer((gtpin::context_handle_t)ctxt, resource); + gtpinUnmapBuffer(gtPinContext, resource); EXPECT_TRUE(mockGTPinGfxCoreHelperHw->canUseSharedAllocationCalled); mockGTPinGfxCoreHelperHw->canUseSharedAllocationCalled = false; - gtpinFreeBuffer((gtpin::context_handle_t)ctxt, resource); + gtpinFreeBuffer(gtPinContext, resource); EXPECT_TRUE(mockGTPinGfxCoreHelperHw->canUseSharedAllocationCalled); pDevice->gtpinGfxCoreHelper.swap(backup); } -HWTEST_F(GTPinTestsWithLocalMemory, givenGtPinCanUseSharedAllocationWhenGtPinBufferIsCreatedThenAllocateBufferInSharedMemory) { +TEST_F(GTPinTestsWithLocalMemory, givenGtPinCanUseSharedAllocationWhenGtPinBufferIsCreatedThenAllocateBufferInSharedMemory) { auto >pinHelper = pDevice->getGTPinGfxCoreHelper(); if (!gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo())) { GTEST_SKIP(); } resource_handle_t resource = nullptr; - cl_context ctxt = (cl_context)((Context *)pContext); + gtpin::context_handle_t gtPinContext = reinterpret_cast(static_cast(pContext)); GTPIN_DI_STATUS status = GTPIN_DI_SUCCESS; - status = gtpinCreateBuffer((gtpin::context_handle_t)ctxt, 256, &resource); + status = gtpinCreateBuffer(gtPinContext, 256, &resource); EXPECT_EQ(GTPIN_DI_SUCCESS, status); EXPECT_NE(nullptr, resource); @@ -2227,14 +2232,51 @@ HWTEST_F(GTPinTestsWithLocalMemory, givenGtPinCanUseSharedAllocationWhenGtPinBuf EXPECT_NE(AllocationType::UNIFIED_SHARED_MEMORY, gpuAllocation->getAllocationType()); uint8_t *address = nullptr; - status = gtpinMapBuffer((gtpin::context_handle_t)ctxt, resource, &address); + status = gtpinMapBuffer(gtPinContext, resource, &address); EXPECT_EQ(GTPIN_DI_SUCCESS, status); EXPECT_EQ(allocData->cpuAllocation->getUnderlyingBuffer(), address); - status = gtpinUnmapBuffer((gtpin::context_handle_t)ctxt, resource); + status = gtpinUnmapBuffer(gtPinContext, resource); EXPECT_EQ(GTPIN_DI_SUCCESS, status); - status = gtpinFreeBuffer((gtpin::context_handle_t)ctxt, resource); + status = gtpinFreeBuffer(gtPinContext, resource); + EXPECT_EQ(GTPIN_DI_SUCCESS, status); +} + +TEST_F(GTPinTestsWithLocalMemory, givenGtPinCanUseSharedAllocationWhenGtPinBufferIsCreatedInSingleStorageThenAllocateBufferWithoutCpuAllocation) { + DebugManagerStateRestore restorer; + DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.set(0); + auto >pinHelper = pDevice->getGTPinGfxCoreHelper(); + if (!gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo())) { + GTEST_SKIP(); + } + + resource_handle_t resource = nullptr; + gtpin::context_handle_t gtPinContext = reinterpret_cast(static_cast(pContext)); + GTPIN_DI_STATUS status = GTPIN_DI_SUCCESS; + + status = gtpinCreateBuffer(gtPinContext, 256, &resource); + EXPECT_EQ(GTPIN_DI_SUCCESS, status); + EXPECT_NE(nullptr, resource); + + auto allocData = reinterpret_cast(resource); + + auto cpuAllocation = allocData->cpuAllocation; + EXPECT_EQ(nullptr, cpuAllocation); + + auto gpuAllocation = allocData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); + ASSERT_NE(nullptr, gpuAllocation); + EXPECT_NE(AllocationType::UNIFIED_SHARED_MEMORY, gpuAllocation->getAllocationType()); + + uint8_t *address = nullptr; + status = gtpinMapBuffer(gtPinContext, resource, &address); + EXPECT_EQ(GTPIN_DI_SUCCESS, status); + EXPECT_EQ(gpuAllocation->getGpuAddress(), castToUint64(address)); + + status = gtpinUnmapBuffer(gtPinContext, resource); + EXPECT_EQ(GTPIN_DI_SUCCESS, status); + + status = gtpinFreeBuffer(gtPinContext, resource); EXPECT_EQ(GTPIN_DI_SUCCESS, status); } @@ -2365,15 +2407,6 @@ HWTEST_F(GTPinTestsWithLocalMemory, givenGtPinCanUseSharedAllocationWhenGtpinNot } uint8_t data[128]; }; - - struct MockResidentTestsPageFaultManager : public MockPageFaultManager { - void moveAllocationToGpuDomain(void *ptr) override { - moveAllocationToGpuDomainCalledTimes++; - migratedAddress = ptr; - } - uint32_t moveAllocationToGpuDomainCalledTimes = 0; - void *migratedAddress = nullptr; - }; static std::unique_ptr allocDataHandle; static std::unique_ptr mockGAHandle; @@ -2394,21 +2427,34 @@ HWTEST_F(GTPinTestsWithLocalMemory, givenGtPinCanUseSharedAllocationWhenGtpinNot gtpinCallbacks.onCommandBufferCreate = onCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = onCommandBufferComplete; - GTPIN_DI_STATUS status = GTPin_Init(>pinCallbacks, &driverServices, nullptr); - EXPECT_EQ(GTPIN_DI_SUCCESS, status); + DebugManagerStateRestore restorer; + for (auto &allocateDualStorageUSM : ::testing::Bool()) { + DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.set(allocateDualStorageUSM); + isGTPinInitialized = false; + if (allocateDualStorageUSM) { + memoryManager->pageFaultManager = std::make_unique(); + } else { + memoryManager->pageFaultManager.reset(); + } - MockKernelWithInternals mockkernel(*pDevice); - MockCommandQueue mockCmdQueue; - cl_context ctxt = (cl_context)((Context *)pContext); - currContext = (gtpin::context_handle_t)(ctxt); - mockCmdQueue.device = pDevice; + GTPIN_DI_STATUS status = GTPin_Init(>pinCallbacks, &driverServices, nullptr); + EXPECT_EQ(GTPIN_DI_SUCCESS, status); - gtpinNotifyKernelSubmit(mockkernel.mockMultiDeviceKernel, &mockCmdQueue); - EXPECT_EQ(reinterpret_cast(pDevice->getExecutionEnvironment()->memoryManager->getPageFaultManager())->moveAllocationToGpuDomainCalledTimes, 1u); + MockKernelWithInternals mockkernel(*pDevice); + MockCommandQueue mockCmdQueue; + cl_context ctxt = (cl_context)((Context *)pContext); + currContext = (gtpin::context_handle_t)(ctxt); + mockCmdQueue.device = pDevice; - mockCmdQueue.device = nullptr; - mockGAHandle.reset(); - allocDataHandle.reset(); + gtpinNotifyKernelSubmit(mockkernel.mockMultiDeviceKernel, &mockCmdQueue); + if (allocateDualStorageUSM) { + EXPECT_EQ(static_cast(pDevice->getExecutionEnvironment()->memoryManager->getPageFaultManager())->moveAllocationToGpuDomainCalledTimes, 1u); + } + + mockCmdQueue.device = nullptr; + mockGAHandle.reset(); + allocDataHandle.reset(); + } pDevice->gtpinGfxCoreHelper.swap(backup); } diff --git a/shared/source/helpers/validators.h b/shared/source/helpers/validators.h index 239b67d220..3d2577ef36 100644 --- a/shared/source/helpers/validators.h +++ b/shared/source/helpers/validators.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -19,4 +19,9 @@ template bool areNotNullptr(T t, RT... rt) { return (t != nullptr) && areNotNullptr(rt...); } + +template +bool isAnyNullptr(T t, RT... rt) { + return !areNotNullptr(t, rt...); +} } // namespace NEO