Add GTPin feature to allocate buffer in shared memory

Related-To: NEO-5667

Signed-off-by: Milczarek, Slawomir <slawomir.milczarek@intel.com>
This commit is contained in:
Milczarek, Slawomir
2021-03-25 13:57:58 +00:00
committed by Compute-Runtime-Automation
parent 671d916c70
commit 255e85c124
7 changed files with 279 additions and 39 deletions

View File

@ -7,7 +7,7 @@
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/memory_manager/surface.h"
#include "shared/source/utilities/spinlock.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/command_queue/command_queue.h"
@ -30,13 +30,15 @@ using namespace gtpin;
namespace NEO {
using GTPinLockType = std::recursive_mutex;
extern gtpin::ocl::gtpin_events_t GTPinCallbacks;
igc_init_t *pIgcInit = nullptr;
std::atomic<int> sequenceCount(1);
CommandQueue *pCmdQueueForFlushTask = nullptr;
std::deque<gtpinkexec_t> kernelExecQueue;
SpinLock kernelExecQueueLock;
GTPinLockType kernelExecQueueLock;
void gtpinNotifyContextCreate(cl_context context) {
if (isGTPinInitialized) {
@ -131,7 +133,7 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
kExec.gtpinResource = (cl_mem)resource;
kExec.commandBuffer = commandBuffer;
kExec.pCommandQueue = (CommandQueue *)pCmdQueue;
std::unique_lock<SpinLock> lock{kernelExecQueueLock};
std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
kernelExecQueue.push_back(kExec);
lock.unlock();
// Patch SSH[gtpinBTI] with GT-Pin resource
@ -142,12 +144,21 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(genFamily);
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1;
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI);
if (gtpinHelper.canUseSharedAllocation(device.getHardwareInfo())) {
auto allocData = reinterpret_cast<SvmAllocationData *>(resource);
auto gpuAllocation = allocData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
size_t size = gpuAllocation->getUnderlyingBufferSize();
Buffer::setSurfaceState(&device, pSurfaceState, false, false, size, gpuAllocation->getUnderlyingBuffer(), 0, gpuAllocation, 0, 0,
pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices());
pKernel->setUnifiedMemoryExecInfo(gpuAllocation);
} else {
cl_mem buffer = (cl_mem)resource;
auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device,
pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices());
}
}
}
void gtpinNotifyPreFlushTask(void *pCmdQueue) {
if (isGTPinInitialized) {
@ -157,7 +168,7 @@ void gtpinNotifyPreFlushTask(void *pCmdQueue) {
void gtpinNotifyFlushTask(uint32_t flushedTaskCount) {
if (isGTPinInitialized) {
std::unique_lock<SpinLock> lock{kernelExecQueueLock};
std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
size_t numElems = kernelExecQueue.size();
for (size_t n = 0; n < numElems; n++) {
if ((kernelExecQueue[n].pCommandQueue == pCmdQueueForFlushTask) && !kernelExecQueue[n].isTaskCountValid) {
@ -173,7 +184,7 @@ void gtpinNotifyFlushTask(uint32_t flushedTaskCount) {
void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) {
if (isGTPinInitialized) {
std::unique_lock<SpinLock> lock{kernelExecQueueLock};
std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
size_t numElems = kernelExecQueue.size();
for (size_t n = 0; n < numElems;) {
if (kernelExecQueue[n].isTaskCountValid && (kernelExecQueue[n].taskCount <= completedTaskCount)) {
@ -191,15 +202,23 @@ void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) {
void gtpinNotifyMakeResident(void *pKernel, void *pCSR) {
if (isGTPinInitialized) {
std::unique_lock<SpinLock> lock{kernelExecQueueLock};
std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
Context &context = static_cast<Kernel *>(pKernel)->getContext();
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(context.getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
size_t numElems = kernelExecQueue.size();
for (size_t n = 0; n < numElems; n++) {
if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) {
// It's time for kernel to make resident its GT-Pin resource
CommandStreamReceiver *pCommandStreamReceiver = reinterpret_cast<CommandStreamReceiver *>(pCSR);
GraphicsAllocation *pGfxAlloc = nullptr;
if (gtpinHelper.canUseSharedAllocation(context.getDevice(0)->getHardwareInfo())) {
auto allocData = reinterpret_cast<SvmAllocationData *>(kernelExecQueue[n].gtpinResource);
pGfxAlloc = allocData->gpuAllocations.getGraphicsAllocation(pCommandStreamReceiver->getRootDeviceIndex());
} else {
cl_mem gtpinBuffer = kernelExecQueue[n].gtpinResource;
auto pBuffer = castToObjectOrAbort<Buffer>(gtpinBuffer);
GraphicsAllocation *pGfxAlloc = pBuffer->getGraphicsAllocation(pCommandStreamReceiver->getRootDeviceIndex());
pGfxAlloc = pBuffer->getGraphicsAllocation(pCommandStreamReceiver->getRootDeviceIndex());
}
pCommandStreamReceiver->makeResident(*pGfxAlloc);
kernelExecQueue[n].isResourceResident = true;
break;
@ -210,7 +229,7 @@ void gtpinNotifyMakeResident(void *pKernel, void *pCSR) {
void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResVec) {
if (isGTPinInitialized) {
std::unique_lock<SpinLock> lock{kernelExecQueueLock};
std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
size_t numElems = kernelExecQueue.size();
for (size_t n = 0; n < numElems; n++) {
if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -8,8 +8,12 @@
#include "gtpin_helpers.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "opencl/source/api/api.h"
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/context/context.h"
#include "opencl/source/gtpin/gtpin_hw_helper.h"
#include "opencl/source/helpers/validators.h"
#include "opencl/source/mem_obj/buffer.h"
@ -27,27 +31,39 @@ GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinCreateBuffer(context_handle_t context
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
}
size_t size = alignUp(reqSize, MemoryConstants::cacheLineSize);
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
if (gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) {
void *unfiedMemorySharedAllocation = clSharedMemAllocINTEL(pContext, pContext->getDevice(0), 0, size, 0, &diag);
auto allocationsManager = pContext->getSVMAllocsManager();
auto graphicsAllocation = allocationsManager->getSVMAlloc(unfiedMemorySharedAllocation);
*pResource = (resource_handle_t)graphicsAllocation;
} else {
void *hostPtr = pContext->getMemoryManager()->allocateSystemMemory(size, MemoryConstants::pageSize);
if (hostPtr == nullptr) {
return GTPIN_DI_ERROR_ALLOCATION_FAILED;
}
cl_mem buffer = Buffer::create(pContext, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE | CL_MEM_FORCE_HOST_MEMORY_INTEL, size, hostPtr, diag);
*pResource = (resource_handle_t)buffer;
}
return GTPIN_DI_SUCCESS;
}
GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinFreeBuffer(context_handle_t context, resource_handle_t resource) {
cl_mem buffer = (cl_mem)resource;
Context *pContext = castToObject<Context>((cl_context)context);
if ((pContext == nullptr) || (buffer == nullptr)) {
if ((pContext == nullptr) || (resource == nullptr)) {
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
}
auto pMemObj = castToObject<MemObj>(buffer);
if (pContext->getMemoryManager()->isLocalMemorySupported(pContext->getDevice(0)->getRootDeviceIndex())) {
auto allocData = reinterpret_cast<SvmAllocationData *>(resource);
clMemFreeINTEL(pContext, allocData->cpuAllocation->getUnderlyingBuffer());
} else {
auto pMemObj = castToObject<MemObj>(resource);
if (pMemObj == nullptr) {
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
}
alignedFree(pMemObj->getHostPtr());
pMemObj->release();
}
return GTPIN_DI_SUCCESS;
}
@ -57,24 +73,32 @@ GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinMapBuffer(context_handle_t context, r
if ((pContext == nullptr) || (buffer == nullptr) || (pAddress == nullptr)) {
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
}
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
if (gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) {
auto allocData = reinterpret_cast<SvmAllocationData *>(resource);
*pAddress = reinterpret_cast<uint8_t *>(allocData->cpuAllocation->getUnderlyingBuffer());
} else {
auto pMemObj = castToObject<MemObj>(buffer);
if (pMemObj == nullptr) {
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
}
*pAddress = (uint8_t *)pMemObj->getHostPtr();
*pAddress = reinterpret_cast<uint8_t *>(pMemObj->getHostPtr());
}
return GTPIN_DI_SUCCESS;
}
GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinUnmapBuffer(context_handle_t context, resource_handle_t resource) {
cl_mem buffer = (cl_mem)resource;
Context *pContext = castToObject<Context>((cl_context)context);
if ((pContext == nullptr) || (buffer == nullptr)) {
if ((pContext == nullptr) || (resource == nullptr)) {
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
}
auto pMemObj = castToObject<MemObj>(buffer);
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
if (!gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) {
auto pMemObj = castToObject<MemObj>(resource);
if (pMemObj == nullptr) {
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
}
}
return GTPIN_DI_SUCCESS;
}
} // namespace NEO

View File

@ -17,6 +17,7 @@ class GTPinHwHelper {
virtual uint32_t getGenVersion() = 0;
virtual bool addSurfaceState(Kernel *pKernel) = 0;
virtual void *getSurfaceState(Kernel *pKernel, size_t bti) = 0;
virtual bool canUseSharedAllocation(const HardwareInfo &hwInfo) const = 0;
protected:
GTPinHwHelper(){};
@ -32,8 +33,9 @@ class GTPinHwHelperHw : public GTPinHwHelper {
uint32_t getGenVersion() override;
bool addSurfaceState(Kernel *pKernel) override;
void *getSurfaceState(Kernel *pKernel, size_t bti) override;
bool canUseSharedAllocation(const HardwareInfo &hwInfo) const override;
private:
protected:
GTPinHwHelperHw(){};
};
} // namespace NEO

View File

@ -56,4 +56,14 @@ void *GTPinHwHelperHw<GfxFamily>::getSurfaceState(Kernel *pKernel, size_t bti) {
return pSurfaceState;
}
template <typename GfxFamily>
bool GTPinHwHelperHw<GfxFamily>::canUseSharedAllocation(const HardwareInfo &hwInfo) const {
bool canUseSharedAllocation = false;
if (DebugManager.flags.GTPinAllocateBufferInSharedMemory.get() != -1) {
canUseSharedAllocation = !!DebugManager.flags.GTPinAllocateBufferInSharedMemory.get();
}
canUseSharedAllocation &= hwInfo.capabilityTable.ftrSvm;
return canUseSharedAllocation;
}
} // namespace NEO

View File

@ -11,7 +11,9 @@
#include "shared/source/helpers/file_io.h"
#include "shared/source/helpers/hash.h"
#include "shared/source/memory_manager/surface.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/test_files.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/mocks/mock_device.h"
@ -52,7 +54,8 @@ using namespace gtpin;
namespace NEO {
extern std::deque<gtpinkexec_t> kernelExecQueue;
}
extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE];
} // namespace NEO
namespace ULT {
@ -149,6 +152,11 @@ class GTPinFixture : public ContextFixture, public MemoryManagementFixture {
public:
void SetUp() override {
DebugManager.flags.GTPinAllocateBufferInSharedMemory.set(false);
SetUpImpl();
}
void SetUpImpl() {
platformsImpl->clear();
MemoryManagementFixture::SetUp();
constructPlatform();
@ -195,6 +203,7 @@ class GTPinFixture : public ContextFixture, public MemoryManagementFixture {
gtpin::ocl::gtpin_events_t gtpinCallbacks;
MockMemoryManagerWithFailures *memoryManager = nullptr;
uint32_t rootDeviceIndex = std::numeric_limits<uint32_t>::max();
DebugManagerStateRestore restore;
};
typedef Test<GTPinFixture> GTPinTests;
@ -2495,4 +2504,178 @@ HWTEST_F(GTPinTests, givenGtPinInitializedWhenSubmittingKernelCommandThenFlushed
EXPECT_EQ(kernelExecQueue[0].taskCount, stamp.taskCount);
}
class GTPinFixtureWithLocalMemory : public GTPinFixture {
public:
void SetUp() override {
DebugManager.flags.EnableLocalMemory.set(true);
DebugManager.flags.GTPinAllocateBufferInSharedMemory.set(true);
GTPinFixture::SetUpImpl();
}
void TearDown() override {
GTPinFixture::TearDown();
}
DebugManagerStateRestore restore;
};
using GTPinTestsWithLocalMemory = Test<GTPinFixtureWithLocalMemory>;
TEST_F(GTPinTestsWithLocalMemory, whenPlatformHasNoSvmSupportThenGtPinBufferCantBeAllocatedInSharedMemory) {
DebugManager.flags.GTPinAllocateBufferInSharedMemory.set(-1);
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily);
auto canUseSharedAllocation = gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo());
if (!pDevice->getHardwareInfo().capabilityTable.ftrSvm) {
EXPECT_FALSE(canUseSharedAllocation);
}
}
HWTEST_F(GTPinTestsWithLocalMemory, givenGtPinCanUseSharedAllocationWhenGtPinBufferIsCreatedThenAllocateBufferInSharedMemory) {
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily);
if (!gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo())) {
GTEST_SKIP();
}
resource_handle_t resource = nullptr;
cl_context ctxt = (cl_context)((Context *)pContext);
GTPIN_DI_STATUS status = GTPIN_DI_SUCCESS;
status = gtpinCreateBuffer((gtpin::context_handle_t)ctxt, 256, &resource);
EXPECT_EQ(GTPIN_DI_SUCCESS, status);
EXPECT_NE(nullptr, resource);
auto allocData = reinterpret_cast<SvmAllocationData *>(resource);
auto cpuAllocation = allocData->cpuAllocation;
ASSERT_NE(nullptr, cpuAllocation);
EXPECT_NE(GraphicsAllocation::AllocationType::UNIFIED_SHARED_MEMORY, cpuAllocation->getAllocationType());
auto gpuAllocation = allocData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex());
ASSERT_NE(nullptr, gpuAllocation);
EXPECT_NE(GraphicsAllocation::AllocationType::UNIFIED_SHARED_MEMORY, gpuAllocation->getAllocationType());
uint8_t *address = nullptr;
status = gtpinMapBuffer((gtpin::context_handle_t)ctxt, resource, &address);
EXPECT_EQ(GTPIN_DI_SUCCESS, status);
EXPECT_EQ(allocData->cpuAllocation->getUnderlyingBuffer(), address);
status = gtpinUnmapBuffer((gtpin::context_handle_t)ctxt, resource);
EXPECT_EQ(GTPIN_DI_SUCCESS, status);
status = gtpinFreeBuffer((gtpin::context_handle_t)ctxt, resource);
EXPECT_EQ(GTPIN_DI_SUCCESS, status);
}
HWTEST_F(GTPinTestsWithLocalMemory, givenGtPinCanUseSharedAllocationWhenGtPinBufferIsAllocatedInSharedMemoryThenSetSurfaceStateForTheBufferAndMakeItResident) {
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily);
if (!gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo())) {
GTEST_SKIP();
}
gtpinCallbacks.onContextCreate = OnContextCreate;
gtpinCallbacks.onContextDestroy = OnContextDestroy;
gtpinCallbacks.onKernelCreate = OnKernelCreate;
gtpinCallbacks.onKernelSubmit = OnKernelSubmit;
gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate;
gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete;
GTPIN_DI_STATUS status = GTPin_Init(&gtpinCallbacks, &driverServices, nullptr);
EXPECT_EQ(GTPIN_DI_SUCCESS, status);
cl_kernel kernel = nullptr;
cl_program pProgram = nullptr;
cl_device_id device = (cl_device_id)pDevice;
size_t sourceSize = 0;
std::string testFile;
cl_command_queue cmdQ = nullptr;
cl_queue_properties properties = 0;
cl_context context = nullptr;
KernelBinaryHelper kbHelper("CopyBuffer_simd16", false);
testFile.append(clFiles);
testFile.append("CopyBuffer_simd16.cl");
auto pSource = loadDataFromFile(testFile.c_str(), sourceSize);
EXPECT_NE(0u, sourceSize);
EXPECT_NE(nullptr, pSource);
context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, context);
cmdQ = clCreateCommandQueue(context, device, properties, &retVal);
ASSERT_NE(nullptr, cmdQ);
EXPECT_EQ(CL_SUCCESS, retVal);
const char *sources[1] = {pSource.get()};
pProgram = clCreateProgramWithSource(
context,
1,
sources,
&sourceSize,
&retVal);
ASSERT_NE(nullptr, pProgram);
retVal = clBuildProgram(
pProgram,
1,
&device,
nullptr,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal);
EXPECT_NE(nullptr, kernel);
EXPECT_EQ(CL_SUCCESS, retVal);
auto pMultiDeviceKernel = static_cast<MultiDeviceKernel *>(kernel);
auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex);
auto pCmdQueue = castToObject<CommandQueue>(cmdQ);
auto &csr = pCmdQueue->getGpgpuCommandStreamReceiver();
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
constexpr size_t renderSurfaceSize = sizeof(RENDER_SURFACE_STATE);
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1;
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI);
EXPECT_NE(nullptr, pSurfaceState);
RENDER_SURFACE_STATE *surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(pSurfaceState);
memset(pSurfaceState, 0, renderSurfaceSize);
gtpinNotifyKernelSubmit(kernel, pCmdQueue);
auto allocData = reinterpret_cast<SvmAllocationData *>(kernelExecQueue[0].gtpinResource);
EXPECT_NE(nullptr, allocData);
auto gpuAllocation = allocData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
EXPECT_NE(nullptr, gpuAllocation);
RENDER_SURFACE_STATE expectedSurfaceState;
memset(&expectedSurfaceState, 0, renderSurfaceSize);
{
void *addressToPatch = gpuAllocation->getUnderlyingBuffer();
size_t sizeToPatch = gpuAllocation->getUnderlyingBufferSize();
Buffer::setSurfaceState(&pDevice->getDevice(), &expectedSurfaceState, false, false,
sizeToPatch, addressToPatch, 0, gpuAllocation, 0, 0,
pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices());
}
EXPECT_EQ(0, memcmp(&expectedSurfaceState, surfaceState, renderSurfaceSize));
EXPECT_FALSE(gpuAllocation->isResident(csr.getOsContext().getContextId()));
gtpinNotifyMakeResident(pKernel, &csr);
EXPECT_TRUE(gpuAllocation->isResident(csr.getOsContext().getContextId()));
kernelExecQueue[0].isTaskCountValid = true;
gtpinNotifyTaskCompletion(kernelExecQueue[0].taskCount);
retVal = clReleaseKernel(kernel);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseProgram(pProgram);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseCommandQueue(cmdQ);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseContext(context);
EXPECT_EQ(CL_SUCCESS, retVal);
}
} // namespace ULT

View File

@ -229,3 +229,4 @@ OverrideSlmSize = -1
UseCyclesPerSecondTimer = 0
WaitLoopCount = -1
DebuggerLogBitmask = 0
GTPinAllocateBufferInSharedMemory = -1

View File

@ -216,6 +216,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, UseBindlessMode, -1, "Use precompiled builtins i
DECLARE_DEBUG_VARIABLE(int32_t, OverrideSlmSize, -1, "Force different slm size than default in kB")
DECLARE_DEBUG_VARIABLE(int32_t, UseCyclesPerSecondTimer, 0, "0: default behavior, 0: disabled: Report L0 timer in nanosecond units, 1: enabled: Report L0 timer in cycles per second")
DECLARE_DEBUG_VARIABLE(int32_t, WaitLoopCount, -1, "-1: use default, >=0: number of iterations in wait loop")
DECLARE_DEBUG_VARIABLE(int32_t, GTPinAllocateBufferInSharedMemory, -1, "Force GTPin to allocate buffer in shared memory")
/*DRIVER TOGGLES*/
DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version")