Add GTPin feature to allocate buffer in shared memory

Related-To: NEO-5667

Signed-off-by: Milczarek, Slawomir <slawomir.milczarek@intel.com>
This commit is contained in:
Milczarek, Slawomir
2021-03-25 13:57:58 +00:00
committed by Compute-Runtime-Automation
parent 671d916c70
commit 255e85c124
7 changed files with 279 additions and 39 deletions

View File

@ -7,7 +7,7 @@
#include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/memory_manager/surface.h" #include "shared/source/memory_manager/surface.h"
#include "shared/source/utilities/spinlock.h" #include "shared/source/memory_manager/unified_memory_manager.h"
#include "opencl/source/cl_device/cl_device.h" #include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_queue/command_queue.h"
@ -30,13 +30,15 @@ using namespace gtpin;
namespace NEO { namespace NEO {
using GTPinLockType = std::recursive_mutex;
extern gtpin::ocl::gtpin_events_t GTPinCallbacks; extern gtpin::ocl::gtpin_events_t GTPinCallbacks;
igc_init_t *pIgcInit = nullptr; igc_init_t *pIgcInit = nullptr;
std::atomic<int> sequenceCount(1); std::atomic<int> sequenceCount(1);
CommandQueue *pCmdQueueForFlushTask = nullptr; CommandQueue *pCmdQueueForFlushTask = nullptr;
std::deque<gtpinkexec_t> kernelExecQueue; std::deque<gtpinkexec_t> kernelExecQueue;
SpinLock kernelExecQueueLock; GTPinLockType kernelExecQueueLock;
void gtpinNotifyContextCreate(cl_context context) { void gtpinNotifyContextCreate(cl_context context) {
if (isGTPinInitialized) { if (isGTPinInitialized) {
@ -131,7 +133,7 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
kExec.gtpinResource = (cl_mem)resource; kExec.gtpinResource = (cl_mem)resource;
kExec.commandBuffer = commandBuffer; kExec.commandBuffer = commandBuffer;
kExec.pCommandQueue = (CommandQueue *)pCmdQueue; kExec.pCommandQueue = (CommandQueue *)pCmdQueue;
std::unique_lock<SpinLock> lock{kernelExecQueueLock}; std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
kernelExecQueue.push_back(kExec); kernelExecQueue.push_back(kExec);
lock.unlock(); lock.unlock();
// Patch SSH[gtpinBTI] with GT-Pin resource // Patch SSH[gtpinBTI] with GT-Pin resource
@ -142,10 +144,19 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(genFamily); GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(genFamily);
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1; size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1;
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI); void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI);
cl_mem buffer = (cl_mem)resource; if (gtpinHelper.canUseSharedAllocation(device.getHardwareInfo())) {
auto pBuffer = castToObjectOrAbort<Buffer>(buffer); auto allocData = reinterpret_cast<SvmAllocationData *>(resource);
pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device, auto gpuAllocation = allocData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices()); size_t size = gpuAllocation->getUnderlyingBufferSize();
Buffer::setSurfaceState(&device, pSurfaceState, false, false, size, gpuAllocation->getUnderlyingBuffer(), 0, gpuAllocation, 0, 0,
pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices());
pKernel->setUnifiedMemoryExecInfo(gpuAllocation);
} else {
cl_mem buffer = (cl_mem)resource;
auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device,
pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices());
}
} }
} }
@ -157,7 +168,7 @@ void gtpinNotifyPreFlushTask(void *pCmdQueue) {
void gtpinNotifyFlushTask(uint32_t flushedTaskCount) { void gtpinNotifyFlushTask(uint32_t flushedTaskCount) {
if (isGTPinInitialized) { if (isGTPinInitialized) {
std::unique_lock<SpinLock> lock{kernelExecQueueLock}; std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
size_t numElems = kernelExecQueue.size(); size_t numElems = kernelExecQueue.size();
for (size_t n = 0; n < numElems; n++) { for (size_t n = 0; n < numElems; n++) {
if ((kernelExecQueue[n].pCommandQueue == pCmdQueueForFlushTask) && !kernelExecQueue[n].isTaskCountValid) { if ((kernelExecQueue[n].pCommandQueue == pCmdQueueForFlushTask) && !kernelExecQueue[n].isTaskCountValid) {
@ -173,7 +184,7 @@ void gtpinNotifyFlushTask(uint32_t flushedTaskCount) {
void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) { void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) {
if (isGTPinInitialized) { if (isGTPinInitialized) {
std::unique_lock<SpinLock> lock{kernelExecQueueLock}; std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
size_t numElems = kernelExecQueue.size(); size_t numElems = kernelExecQueue.size();
for (size_t n = 0; n < numElems;) { for (size_t n = 0; n < numElems;) {
if (kernelExecQueue[n].isTaskCountValid && (kernelExecQueue[n].taskCount <= completedTaskCount)) { if (kernelExecQueue[n].isTaskCountValid && (kernelExecQueue[n].taskCount <= completedTaskCount)) {
@ -191,15 +202,23 @@ void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) {
void gtpinNotifyMakeResident(void *pKernel, void *pCSR) { void gtpinNotifyMakeResident(void *pKernel, void *pCSR) {
if (isGTPinInitialized) { if (isGTPinInitialized) {
std::unique_lock<SpinLock> lock{kernelExecQueueLock}; std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
Context &context = static_cast<Kernel *>(pKernel)->getContext();
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(context.getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
size_t numElems = kernelExecQueue.size(); size_t numElems = kernelExecQueue.size();
for (size_t n = 0; n < numElems; n++) { for (size_t n = 0; n < numElems; n++) {
if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) { if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) {
// It's time for kernel to make resident its GT-Pin resource // It's time for kernel to make resident its GT-Pin resource
CommandStreamReceiver *pCommandStreamReceiver = reinterpret_cast<CommandStreamReceiver *>(pCSR); CommandStreamReceiver *pCommandStreamReceiver = reinterpret_cast<CommandStreamReceiver *>(pCSR);
cl_mem gtpinBuffer = kernelExecQueue[n].gtpinResource; GraphicsAllocation *pGfxAlloc = nullptr;
auto pBuffer = castToObjectOrAbort<Buffer>(gtpinBuffer); if (gtpinHelper.canUseSharedAllocation(context.getDevice(0)->getHardwareInfo())) {
GraphicsAllocation *pGfxAlloc = pBuffer->getGraphicsAllocation(pCommandStreamReceiver->getRootDeviceIndex()); auto allocData = reinterpret_cast<SvmAllocationData *>(kernelExecQueue[n].gtpinResource);
pGfxAlloc = allocData->gpuAllocations.getGraphicsAllocation(pCommandStreamReceiver->getRootDeviceIndex());
} else {
cl_mem gtpinBuffer = kernelExecQueue[n].gtpinResource;
auto pBuffer = castToObjectOrAbort<Buffer>(gtpinBuffer);
pGfxAlloc = pBuffer->getGraphicsAllocation(pCommandStreamReceiver->getRootDeviceIndex());
}
pCommandStreamReceiver->makeResident(*pGfxAlloc); pCommandStreamReceiver->makeResident(*pGfxAlloc);
kernelExecQueue[n].isResourceResident = true; kernelExecQueue[n].isResourceResident = true;
break; break;
@ -210,7 +229,7 @@ void gtpinNotifyMakeResident(void *pKernel, void *pCSR) {
void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResVec) { void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResVec) {
if (isGTPinInitialized) { if (isGTPinInitialized) {
std::unique_lock<SpinLock> lock{kernelExecQueueLock}; std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
size_t numElems = kernelExecQueue.size(); size_t numElems = kernelExecQueue.size();
for (size_t n = 0; n < numElems; n++) { for (size_t n = 0; n < numElems; n++) {
if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) { if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2017-2020 Intel Corporation * Copyright (C) 2017-2021 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -8,8 +8,12 @@
#include "gtpin_helpers.h" #include "gtpin_helpers.h"
#include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "opencl/source/api/api.h"
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/context/context.h" #include "opencl/source/context/context.h"
#include "opencl/source/gtpin/gtpin_hw_helper.h"
#include "opencl/source/helpers/validators.h" #include "opencl/source/helpers/validators.h"
#include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/buffer.h"
@ -27,27 +31,39 @@ GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinCreateBuffer(context_handle_t context
return GTPIN_DI_ERROR_INVALID_ARGUMENT; return GTPIN_DI_ERROR_INVALID_ARGUMENT;
} }
size_t size = alignUp(reqSize, MemoryConstants::cacheLineSize); size_t size = alignUp(reqSize, MemoryConstants::cacheLineSize);
void *hostPtr = pContext->getMemoryManager()->allocateSystemMemory(size, MemoryConstants::pageSize); GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
if (hostPtr == nullptr) { if (gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) {
return GTPIN_DI_ERROR_ALLOCATION_FAILED; void *unfiedMemorySharedAllocation = clSharedMemAllocINTEL(pContext, pContext->getDevice(0), 0, size, 0, &diag);
auto allocationsManager = pContext->getSVMAllocsManager();
auto graphicsAllocation = allocationsManager->getSVMAlloc(unfiedMemorySharedAllocation);
*pResource = (resource_handle_t)graphicsAllocation;
} else {
void *hostPtr = pContext->getMemoryManager()->allocateSystemMemory(size, MemoryConstants::pageSize);
if (hostPtr == nullptr) {
return GTPIN_DI_ERROR_ALLOCATION_FAILED;
}
cl_mem buffer = Buffer::create(pContext, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE | CL_MEM_FORCE_HOST_MEMORY_INTEL, size, hostPtr, diag);
*pResource = (resource_handle_t)buffer;
} }
cl_mem buffer = Buffer::create(pContext, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE | CL_MEM_FORCE_HOST_MEMORY_INTEL, size, hostPtr, diag);
*pResource = (resource_handle_t)buffer;
return GTPIN_DI_SUCCESS; return GTPIN_DI_SUCCESS;
} }
GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinFreeBuffer(context_handle_t context, resource_handle_t resource) { GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinFreeBuffer(context_handle_t context, resource_handle_t resource) {
cl_mem buffer = (cl_mem)resource;
Context *pContext = castToObject<Context>((cl_context)context); Context *pContext = castToObject<Context>((cl_context)context);
if ((pContext == nullptr) || (buffer == nullptr)) { if ((pContext == nullptr) || (resource == nullptr)) {
return GTPIN_DI_ERROR_INVALID_ARGUMENT; return GTPIN_DI_ERROR_INVALID_ARGUMENT;
} }
auto pMemObj = castToObject<MemObj>(buffer); if (pContext->getMemoryManager()->isLocalMemorySupported(pContext->getDevice(0)->getRootDeviceIndex())) {
if (pMemObj == nullptr) { auto allocData = reinterpret_cast<SvmAllocationData *>(resource);
return GTPIN_DI_ERROR_INVALID_ARGUMENT; clMemFreeINTEL(pContext, allocData->cpuAllocation->getUnderlyingBuffer());
} else {
auto pMemObj = castToObject<MemObj>(resource);
if (pMemObj == nullptr) {
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
}
alignedFree(pMemObj->getHostPtr());
pMemObj->release();
} }
alignedFree(pMemObj->getHostPtr());
pMemObj->release();
return GTPIN_DI_SUCCESS; return GTPIN_DI_SUCCESS;
} }
@ -57,23 +73,31 @@ GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinMapBuffer(context_handle_t context, r
if ((pContext == nullptr) || (buffer == nullptr) || (pAddress == nullptr)) { if ((pContext == nullptr) || (buffer == nullptr) || (pAddress == nullptr)) {
return GTPIN_DI_ERROR_INVALID_ARGUMENT; return GTPIN_DI_ERROR_INVALID_ARGUMENT;
} }
auto pMemObj = castToObject<MemObj>(buffer); GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
if (pMemObj == nullptr) { if (gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) {
return GTPIN_DI_ERROR_INVALID_ARGUMENT; auto allocData = reinterpret_cast<SvmAllocationData *>(resource);
*pAddress = reinterpret_cast<uint8_t *>(allocData->cpuAllocation->getUnderlyingBuffer());
} else {
auto pMemObj = castToObject<MemObj>(buffer);
if (pMemObj == nullptr) {
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
}
*pAddress = reinterpret_cast<uint8_t *>(pMemObj->getHostPtr());
} }
*pAddress = (uint8_t *)pMemObj->getHostPtr();
return GTPIN_DI_SUCCESS; return GTPIN_DI_SUCCESS;
} }
GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinUnmapBuffer(context_handle_t context, resource_handle_t resource) { GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinUnmapBuffer(context_handle_t context, resource_handle_t resource) {
cl_mem buffer = (cl_mem)resource;
Context *pContext = castToObject<Context>((cl_context)context); Context *pContext = castToObject<Context>((cl_context)context);
if ((pContext == nullptr) || (buffer == nullptr)) { if ((pContext == nullptr) || (resource == nullptr)) {
return GTPIN_DI_ERROR_INVALID_ARGUMENT; return GTPIN_DI_ERROR_INVALID_ARGUMENT;
} }
auto pMemObj = castToObject<MemObj>(buffer); GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
if (pMemObj == nullptr) { if (!gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) {
return GTPIN_DI_ERROR_INVALID_ARGUMENT; auto pMemObj = castToObject<MemObj>(resource);
if (pMemObj == nullptr) {
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
}
} }
return GTPIN_DI_SUCCESS; return GTPIN_DI_SUCCESS;
} }

View File

@ -17,6 +17,7 @@ class GTPinHwHelper {
virtual uint32_t getGenVersion() = 0; virtual uint32_t getGenVersion() = 0;
virtual bool addSurfaceState(Kernel *pKernel) = 0; virtual bool addSurfaceState(Kernel *pKernel) = 0;
virtual void *getSurfaceState(Kernel *pKernel, size_t bti) = 0; virtual void *getSurfaceState(Kernel *pKernel, size_t bti) = 0;
virtual bool canUseSharedAllocation(const HardwareInfo &hwInfo) const = 0;
protected: protected:
GTPinHwHelper(){}; GTPinHwHelper(){};
@ -32,8 +33,9 @@ class GTPinHwHelperHw : public GTPinHwHelper {
uint32_t getGenVersion() override; uint32_t getGenVersion() override;
bool addSurfaceState(Kernel *pKernel) override; bool addSurfaceState(Kernel *pKernel) override;
void *getSurfaceState(Kernel *pKernel, size_t bti) override; void *getSurfaceState(Kernel *pKernel, size_t bti) override;
bool canUseSharedAllocation(const HardwareInfo &hwInfo) const override;
private: protected:
GTPinHwHelperHw(){}; GTPinHwHelperHw(){};
}; };
} // namespace NEO } // namespace NEO

View File

@ -56,4 +56,14 @@ void *GTPinHwHelperHw<GfxFamily>::getSurfaceState(Kernel *pKernel, size_t bti) {
return pSurfaceState; return pSurfaceState;
} }
template <typename GfxFamily>
bool GTPinHwHelperHw<GfxFamily>::canUseSharedAllocation(const HardwareInfo &hwInfo) const {
bool canUseSharedAllocation = false;
if (DebugManager.flags.GTPinAllocateBufferInSharedMemory.get() != -1) {
canUseSharedAllocation = !!DebugManager.flags.GTPinAllocateBufferInSharedMemory.get();
}
canUseSharedAllocation &= hwInfo.capabilityTable.ftrSvm;
return canUseSharedAllocation;
}
} // namespace NEO } // namespace NEO

View File

@ -11,7 +11,9 @@
#include "shared/source/helpers/file_io.h" #include "shared/source/helpers/file_io.h"
#include "shared/source/helpers/hash.h" #include "shared/source/helpers/hash.h"
#include "shared/source/memory_manager/surface.h" #include "shared/source/memory_manager/surface.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_context.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/test_files.h" #include "shared/test/common/helpers/test_files.h"
#include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_device.h"
@ -52,7 +54,8 @@ using namespace gtpin;
namespace NEO { namespace NEO {
extern std::deque<gtpinkexec_t> kernelExecQueue; extern std::deque<gtpinkexec_t> kernelExecQueue;
} extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE];
} // namespace NEO
namespace ULT { namespace ULT {
@ -149,6 +152,11 @@ class GTPinFixture : public ContextFixture, public MemoryManagementFixture {
public: public:
void SetUp() override { void SetUp() override {
DebugManager.flags.GTPinAllocateBufferInSharedMemory.set(false);
SetUpImpl();
}
void SetUpImpl() {
platformsImpl->clear(); platformsImpl->clear();
MemoryManagementFixture::SetUp(); MemoryManagementFixture::SetUp();
constructPlatform(); constructPlatform();
@ -195,6 +203,7 @@ class GTPinFixture : public ContextFixture, public MemoryManagementFixture {
gtpin::ocl::gtpin_events_t gtpinCallbacks; gtpin::ocl::gtpin_events_t gtpinCallbacks;
MockMemoryManagerWithFailures *memoryManager = nullptr; MockMemoryManagerWithFailures *memoryManager = nullptr;
uint32_t rootDeviceIndex = std::numeric_limits<uint32_t>::max(); uint32_t rootDeviceIndex = std::numeric_limits<uint32_t>::max();
DebugManagerStateRestore restore;
}; };
typedef Test<GTPinFixture> GTPinTests; typedef Test<GTPinFixture> GTPinTests;
@ -2495,4 +2504,178 @@ HWTEST_F(GTPinTests, givenGtPinInitializedWhenSubmittingKernelCommandThenFlushed
EXPECT_EQ(kernelExecQueue[0].taskCount, stamp.taskCount); EXPECT_EQ(kernelExecQueue[0].taskCount, stamp.taskCount);
} }
class GTPinFixtureWithLocalMemory : public GTPinFixture {
public:
void SetUp() override {
DebugManager.flags.EnableLocalMemory.set(true);
DebugManager.flags.GTPinAllocateBufferInSharedMemory.set(true);
GTPinFixture::SetUpImpl();
}
void TearDown() override {
GTPinFixture::TearDown();
}
DebugManagerStateRestore restore;
};
using GTPinTestsWithLocalMemory = Test<GTPinFixtureWithLocalMemory>;
TEST_F(GTPinTestsWithLocalMemory, whenPlatformHasNoSvmSupportThenGtPinBufferCantBeAllocatedInSharedMemory) {
DebugManager.flags.GTPinAllocateBufferInSharedMemory.set(-1);
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily);
auto canUseSharedAllocation = gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo());
if (!pDevice->getHardwareInfo().capabilityTable.ftrSvm) {
EXPECT_FALSE(canUseSharedAllocation);
}
}
HWTEST_F(GTPinTestsWithLocalMemory, givenGtPinCanUseSharedAllocationWhenGtPinBufferIsCreatedThenAllocateBufferInSharedMemory) {
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily);
if (!gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo())) {
GTEST_SKIP();
}
resource_handle_t resource = nullptr;
cl_context ctxt = (cl_context)((Context *)pContext);
GTPIN_DI_STATUS status = GTPIN_DI_SUCCESS;
status = gtpinCreateBuffer((gtpin::context_handle_t)ctxt, 256, &resource);
EXPECT_EQ(GTPIN_DI_SUCCESS, status);
EXPECT_NE(nullptr, resource);
auto allocData = reinterpret_cast<SvmAllocationData *>(resource);
auto cpuAllocation = allocData->cpuAllocation;
ASSERT_NE(nullptr, cpuAllocation);
EXPECT_NE(GraphicsAllocation::AllocationType::UNIFIED_SHARED_MEMORY, cpuAllocation->getAllocationType());
auto gpuAllocation = allocData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex());
ASSERT_NE(nullptr, gpuAllocation);
EXPECT_NE(GraphicsAllocation::AllocationType::UNIFIED_SHARED_MEMORY, gpuAllocation->getAllocationType());
uint8_t *address = nullptr;
status = gtpinMapBuffer((gtpin::context_handle_t)ctxt, resource, &address);
EXPECT_EQ(GTPIN_DI_SUCCESS, status);
EXPECT_EQ(allocData->cpuAllocation->getUnderlyingBuffer(), address);
status = gtpinUnmapBuffer((gtpin::context_handle_t)ctxt, resource);
EXPECT_EQ(GTPIN_DI_SUCCESS, status);
status = gtpinFreeBuffer((gtpin::context_handle_t)ctxt, resource);
EXPECT_EQ(GTPIN_DI_SUCCESS, status);
}
HWTEST_F(GTPinTestsWithLocalMemory, givenGtPinCanUseSharedAllocationWhenGtPinBufferIsAllocatedInSharedMemoryThenSetSurfaceStateForTheBufferAndMakeItResident) {
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily);
if (!gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo())) {
GTEST_SKIP();
}
gtpinCallbacks.onContextCreate = OnContextCreate;
gtpinCallbacks.onContextDestroy = OnContextDestroy;
gtpinCallbacks.onKernelCreate = OnKernelCreate;
gtpinCallbacks.onKernelSubmit = OnKernelSubmit;
gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate;
gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete;
GTPIN_DI_STATUS status = GTPin_Init(&gtpinCallbacks, &driverServices, nullptr);
EXPECT_EQ(GTPIN_DI_SUCCESS, status);
cl_kernel kernel = nullptr;
cl_program pProgram = nullptr;
cl_device_id device = (cl_device_id)pDevice;
size_t sourceSize = 0;
std::string testFile;
cl_command_queue cmdQ = nullptr;
cl_queue_properties properties = 0;
cl_context context = nullptr;
KernelBinaryHelper kbHelper("CopyBuffer_simd16", false);
testFile.append(clFiles);
testFile.append("CopyBuffer_simd16.cl");
auto pSource = loadDataFromFile(testFile.c_str(), sourceSize);
EXPECT_NE(0u, sourceSize);
EXPECT_NE(nullptr, pSource);
context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, context);
cmdQ = clCreateCommandQueue(context, device, properties, &retVal);
ASSERT_NE(nullptr, cmdQ);
EXPECT_EQ(CL_SUCCESS, retVal);
const char *sources[1] = {pSource.get()};
pProgram = clCreateProgramWithSource(
context,
1,
sources,
&sourceSize,
&retVal);
ASSERT_NE(nullptr, pProgram);
retVal = clBuildProgram(
pProgram,
1,
&device,
nullptr,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal);
EXPECT_NE(nullptr, kernel);
EXPECT_EQ(CL_SUCCESS, retVal);
auto pMultiDeviceKernel = static_cast<MultiDeviceKernel *>(kernel);
auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex);
auto pCmdQueue = castToObject<CommandQueue>(cmdQ);
auto &csr = pCmdQueue->getGpgpuCommandStreamReceiver();
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
constexpr size_t renderSurfaceSize = sizeof(RENDER_SURFACE_STATE);
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1;
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI);
EXPECT_NE(nullptr, pSurfaceState);
RENDER_SURFACE_STATE *surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(pSurfaceState);
memset(pSurfaceState, 0, renderSurfaceSize);
gtpinNotifyKernelSubmit(kernel, pCmdQueue);
auto allocData = reinterpret_cast<SvmAllocationData *>(kernelExecQueue[0].gtpinResource);
EXPECT_NE(nullptr, allocData);
auto gpuAllocation = allocData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
EXPECT_NE(nullptr, gpuAllocation);
RENDER_SURFACE_STATE expectedSurfaceState;
memset(&expectedSurfaceState, 0, renderSurfaceSize);
{
void *addressToPatch = gpuAllocation->getUnderlyingBuffer();
size_t sizeToPatch = gpuAllocation->getUnderlyingBufferSize();
Buffer::setSurfaceState(&pDevice->getDevice(), &expectedSurfaceState, false, false,
sizeToPatch, addressToPatch, 0, gpuAllocation, 0, 0,
pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices());
}
EXPECT_EQ(0, memcmp(&expectedSurfaceState, surfaceState, renderSurfaceSize));
EXPECT_FALSE(gpuAllocation->isResident(csr.getOsContext().getContextId()));
gtpinNotifyMakeResident(pKernel, &csr);
EXPECT_TRUE(gpuAllocation->isResident(csr.getOsContext().getContextId()));
kernelExecQueue[0].isTaskCountValid = true;
gtpinNotifyTaskCompletion(kernelExecQueue[0].taskCount);
retVal = clReleaseKernel(kernel);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseProgram(pProgram);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseCommandQueue(cmdQ);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseContext(context);
EXPECT_EQ(CL_SUCCESS, retVal);
}
} // namespace ULT } // namespace ULT

View File

@ -228,4 +228,5 @@ OverrideSlmAllocationSize = -1
OverrideSlmSize = -1 OverrideSlmSize = -1
UseCyclesPerSecondTimer = 0 UseCyclesPerSecondTimer = 0
WaitLoopCount = -1 WaitLoopCount = -1
DebuggerLogBitmask = 0 DebuggerLogBitmask = 0
GTPinAllocateBufferInSharedMemory = -1

View File

@ -216,6 +216,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, UseBindlessMode, -1, "Use precompiled builtins i
DECLARE_DEBUG_VARIABLE(int32_t, OverrideSlmSize, -1, "Force different slm size than default in kB") DECLARE_DEBUG_VARIABLE(int32_t, OverrideSlmSize, -1, "Force different slm size than default in kB")
DECLARE_DEBUG_VARIABLE(int32_t, UseCyclesPerSecondTimer, 0, "0: default behavior, 0: disabled: Report L0 timer in nanosecond units, 1: enabled: Report L0 timer in cycles per second") DECLARE_DEBUG_VARIABLE(int32_t, UseCyclesPerSecondTimer, 0, "0: default behavior, 0: disabled: Report L0 timer in nanosecond units, 1: enabled: Report L0 timer in cycles per second")
DECLARE_DEBUG_VARIABLE(int32_t, WaitLoopCount, -1, "-1: use default, >=0: number of iterations in wait loop") DECLARE_DEBUG_VARIABLE(int32_t, WaitLoopCount, -1, "-1: use default, >=0: number of iterations in wait loop")
DECLARE_DEBUG_VARIABLE(int32_t, GTPinAllocateBufferInSharedMemory, -1, "Force GTPin to allocate buffer in shared memory")
/*DRIVER TOGGLES*/ /*DRIVER TOGGLES*/
DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version") DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version")