mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Add GTPin feature to allocate buffer in shared memory
Related-To: NEO-5667 Signed-off-by: Milczarek, Slawomir <slawomir.milczarek@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
671d916c70
commit
255e85c124
@ -7,7 +7,7 @@
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/memory_manager/surface.h"
|
||||
#include "shared/source/utilities/spinlock.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
|
||||
#include "opencl/source/cl_device/cl_device.h"
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
@ -30,13 +30,15 @@ using namespace gtpin;
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using GTPinLockType = std::recursive_mutex;
|
||||
|
||||
extern gtpin::ocl::gtpin_events_t GTPinCallbacks;
|
||||
|
||||
igc_init_t *pIgcInit = nullptr;
|
||||
std::atomic<int> sequenceCount(1);
|
||||
CommandQueue *pCmdQueueForFlushTask = nullptr;
|
||||
std::deque<gtpinkexec_t> kernelExecQueue;
|
||||
SpinLock kernelExecQueueLock;
|
||||
GTPinLockType kernelExecQueueLock;
|
||||
|
||||
void gtpinNotifyContextCreate(cl_context context) {
|
||||
if (isGTPinInitialized) {
|
||||
@ -131,7 +133,7 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
|
||||
kExec.gtpinResource = (cl_mem)resource;
|
||||
kExec.commandBuffer = commandBuffer;
|
||||
kExec.pCommandQueue = (CommandQueue *)pCmdQueue;
|
||||
std::unique_lock<SpinLock> lock{kernelExecQueueLock};
|
||||
std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
|
||||
kernelExecQueue.push_back(kExec);
|
||||
lock.unlock();
|
||||
// Patch SSH[gtpinBTI] with GT-Pin resource
|
||||
@ -142,12 +144,21 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
|
||||
GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily);
|
||||
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1;
|
||||
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI);
|
||||
if (gtpinHelper.canUseSharedAllocation(device.getHardwareInfo())) {
|
||||
auto allocData = reinterpret_cast<SvmAllocationData *>(resource);
|
||||
auto gpuAllocation = allocData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
|
||||
size_t size = gpuAllocation->getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&device, pSurfaceState, false, false, size, gpuAllocation->getUnderlyingBuffer(), 0, gpuAllocation, 0, 0,
|
||||
pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices());
|
||||
pKernel->setUnifiedMemoryExecInfo(gpuAllocation);
|
||||
} else {
|
||||
cl_mem buffer = (cl_mem)resource;
|
||||
auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
|
||||
pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device,
|
||||
pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gtpinNotifyPreFlushTask(void *pCmdQueue) {
|
||||
if (isGTPinInitialized) {
|
||||
@ -157,7 +168,7 @@ void gtpinNotifyPreFlushTask(void *pCmdQueue) {
|
||||
|
||||
void gtpinNotifyFlushTask(uint32_t flushedTaskCount) {
|
||||
if (isGTPinInitialized) {
|
||||
std::unique_lock<SpinLock> lock{kernelExecQueueLock};
|
||||
std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
|
||||
size_t numElems = kernelExecQueue.size();
|
||||
for (size_t n = 0; n < numElems; n++) {
|
||||
if ((kernelExecQueue[n].pCommandQueue == pCmdQueueForFlushTask) && !kernelExecQueue[n].isTaskCountValid) {
|
||||
@ -173,7 +184,7 @@ void gtpinNotifyFlushTask(uint32_t flushedTaskCount) {
|
||||
|
||||
void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) {
|
||||
if (isGTPinInitialized) {
|
||||
std::unique_lock<SpinLock> lock{kernelExecQueueLock};
|
||||
std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
|
||||
size_t numElems = kernelExecQueue.size();
|
||||
for (size_t n = 0; n < numElems;) {
|
||||
if (kernelExecQueue[n].isTaskCountValid && (kernelExecQueue[n].taskCount <= completedTaskCount)) {
|
||||
@ -191,15 +202,23 @@ void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) {
|
||||
|
||||
void gtpinNotifyMakeResident(void *pKernel, void *pCSR) {
|
||||
if (isGTPinInitialized) {
|
||||
std::unique_lock<SpinLock> lock{kernelExecQueueLock};
|
||||
std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
|
||||
Context &context = static_cast<Kernel *>(pKernel)->getContext();
|
||||
GTPinHwHelper >pinHelper = GTPinHwHelper::get(context.getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
|
||||
size_t numElems = kernelExecQueue.size();
|
||||
for (size_t n = 0; n < numElems; n++) {
|
||||
if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) {
|
||||
// It's time for kernel to make resident its GT-Pin resource
|
||||
CommandStreamReceiver *pCommandStreamReceiver = reinterpret_cast<CommandStreamReceiver *>(pCSR);
|
||||
GraphicsAllocation *pGfxAlloc = nullptr;
|
||||
if (gtpinHelper.canUseSharedAllocation(context.getDevice(0)->getHardwareInfo())) {
|
||||
auto allocData = reinterpret_cast<SvmAllocationData *>(kernelExecQueue[n].gtpinResource);
|
||||
pGfxAlloc = allocData->gpuAllocations.getGraphicsAllocation(pCommandStreamReceiver->getRootDeviceIndex());
|
||||
} else {
|
||||
cl_mem gtpinBuffer = kernelExecQueue[n].gtpinResource;
|
||||
auto pBuffer = castToObjectOrAbort<Buffer>(gtpinBuffer);
|
||||
GraphicsAllocation *pGfxAlloc = pBuffer->getGraphicsAllocation(pCommandStreamReceiver->getRootDeviceIndex());
|
||||
pGfxAlloc = pBuffer->getGraphicsAllocation(pCommandStreamReceiver->getRootDeviceIndex());
|
||||
}
|
||||
pCommandStreamReceiver->makeResident(*pGfxAlloc);
|
||||
kernelExecQueue[n].isResourceResident = true;
|
||||
break;
|
||||
@ -210,7 +229,7 @@ void gtpinNotifyMakeResident(void *pKernel, void *pCSR) {
|
||||
|
||||
void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResVec) {
|
||||
if (isGTPinInitialized) {
|
||||
std::unique_lock<SpinLock> lock{kernelExecQueueLock};
|
||||
std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
|
||||
size_t numElems = kernelExecQueue.size();
|
||||
for (size_t n = 0; n < numElems; n++) {
|
||||
if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -8,8 +8,12 @@
|
||||
#include "gtpin_helpers.h"
|
||||
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
|
||||
#include "opencl/source/api/api.h"
|
||||
#include "opencl/source/cl_device/cl_device.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/gtpin/gtpin_hw_helper.h"
|
||||
#include "opencl/source/helpers/validators.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
@ -27,27 +31,39 @@ GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinCreateBuffer(context_handle_t context
|
||||
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
size_t size = alignUp(reqSize, MemoryConstants::cacheLineSize);
|
||||
GTPinHwHelper >pinHelper = GTPinHwHelper::get(pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
|
||||
if (gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) {
|
||||
void *unfiedMemorySharedAllocation = clSharedMemAllocINTEL(pContext, pContext->getDevice(0), 0, size, 0, &diag);
|
||||
auto allocationsManager = pContext->getSVMAllocsManager();
|
||||
auto graphicsAllocation = allocationsManager->getSVMAlloc(unfiedMemorySharedAllocation);
|
||||
*pResource = (resource_handle_t)graphicsAllocation;
|
||||
} else {
|
||||
void *hostPtr = pContext->getMemoryManager()->allocateSystemMemory(size, MemoryConstants::pageSize);
|
||||
if (hostPtr == nullptr) {
|
||||
return GTPIN_DI_ERROR_ALLOCATION_FAILED;
|
||||
}
|
||||
cl_mem buffer = Buffer::create(pContext, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE | CL_MEM_FORCE_HOST_MEMORY_INTEL, size, hostPtr, diag);
|
||||
*pResource = (resource_handle_t)buffer;
|
||||
}
|
||||
return GTPIN_DI_SUCCESS;
|
||||
}
|
||||
|
||||
GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinFreeBuffer(context_handle_t context, resource_handle_t resource) {
|
||||
cl_mem buffer = (cl_mem)resource;
|
||||
Context *pContext = castToObject<Context>((cl_context)context);
|
||||
if ((pContext == nullptr) || (buffer == nullptr)) {
|
||||
if ((pContext == nullptr) || (resource == nullptr)) {
|
||||
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
auto pMemObj = castToObject<MemObj>(buffer);
|
||||
if (pContext->getMemoryManager()->isLocalMemorySupported(pContext->getDevice(0)->getRootDeviceIndex())) {
|
||||
auto allocData = reinterpret_cast<SvmAllocationData *>(resource);
|
||||
clMemFreeINTEL(pContext, allocData->cpuAllocation->getUnderlyingBuffer());
|
||||
} else {
|
||||
auto pMemObj = castToObject<MemObj>(resource);
|
||||
if (pMemObj == nullptr) {
|
||||
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
alignedFree(pMemObj->getHostPtr());
|
||||
pMemObj->release();
|
||||
}
|
||||
return GTPIN_DI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -57,24 +73,32 @@ GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinMapBuffer(context_handle_t context, r
|
||||
if ((pContext == nullptr) || (buffer == nullptr) || (pAddress == nullptr)) {
|
||||
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
GTPinHwHelper >pinHelper = GTPinHwHelper::get(pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
|
||||
if (gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) {
|
||||
auto allocData = reinterpret_cast<SvmAllocationData *>(resource);
|
||||
*pAddress = reinterpret_cast<uint8_t *>(allocData->cpuAllocation->getUnderlyingBuffer());
|
||||
} else {
|
||||
auto pMemObj = castToObject<MemObj>(buffer);
|
||||
if (pMemObj == nullptr) {
|
||||
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
*pAddress = (uint8_t *)pMemObj->getHostPtr();
|
||||
*pAddress = reinterpret_cast<uint8_t *>(pMemObj->getHostPtr());
|
||||
}
|
||||
return GTPIN_DI_SUCCESS;
|
||||
}
|
||||
|
||||
GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinUnmapBuffer(context_handle_t context, resource_handle_t resource) {
|
||||
cl_mem buffer = (cl_mem)resource;
|
||||
Context *pContext = castToObject<Context>((cl_context)context);
|
||||
if ((pContext == nullptr) || (buffer == nullptr)) {
|
||||
if ((pContext == nullptr) || (resource == nullptr)) {
|
||||
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
auto pMemObj = castToObject<MemObj>(buffer);
|
||||
GTPinHwHelper >pinHelper = GTPinHwHelper::get(pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
|
||||
if (!gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) {
|
||||
auto pMemObj = castToObject<MemObj>(resource);
|
||||
if (pMemObj == nullptr) {
|
||||
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
}
|
||||
return GTPIN_DI_SUCCESS;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
@ -17,6 +17,7 @@ class GTPinHwHelper {
|
||||
virtual uint32_t getGenVersion() = 0;
|
||||
virtual bool addSurfaceState(Kernel *pKernel) = 0;
|
||||
virtual void *getSurfaceState(Kernel *pKernel, size_t bti) = 0;
|
||||
virtual bool canUseSharedAllocation(const HardwareInfo &hwInfo) const = 0;
|
||||
|
||||
protected:
|
||||
GTPinHwHelper(){};
|
||||
@ -32,8 +33,9 @@ class GTPinHwHelperHw : public GTPinHwHelper {
|
||||
uint32_t getGenVersion() override;
|
||||
bool addSurfaceState(Kernel *pKernel) override;
|
||||
void *getSurfaceState(Kernel *pKernel, size_t bti) override;
|
||||
bool canUseSharedAllocation(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
private:
|
||||
protected:
|
||||
GTPinHwHelperHw(){};
|
||||
};
|
||||
} // namespace NEO
|
||||
|
@ -56,4 +56,14 @@ void *GTPinHwHelperHw<GfxFamily>::getSurfaceState(Kernel *pKernel, size_t bti) {
|
||||
return pSurfaceState;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool GTPinHwHelperHw<GfxFamily>::canUseSharedAllocation(const HardwareInfo &hwInfo) const {
|
||||
bool canUseSharedAllocation = false;
|
||||
if (DebugManager.flags.GTPinAllocateBufferInSharedMemory.get() != -1) {
|
||||
canUseSharedAllocation = !!DebugManager.flags.GTPinAllocateBufferInSharedMemory.get();
|
||||
}
|
||||
canUseSharedAllocation &= hwInfo.capabilityTable.ftrSvm;
|
||||
return canUseSharedAllocation;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
@ -11,7 +11,9 @@
|
||||
#include "shared/source/helpers/file_io.h"
|
||||
#include "shared/source/helpers/hash.h"
|
||||
#include "shared/source/memory_manager/surface.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/test_files.h"
|
||||
#include "shared/test/common/helpers/variable_backup.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
@ -52,7 +54,8 @@ using namespace gtpin;
|
||||
|
||||
namespace NEO {
|
||||
extern std::deque<gtpinkexec_t> kernelExecQueue;
|
||||
}
|
||||
extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE];
|
||||
} // namespace NEO
|
||||
|
||||
namespace ULT {
|
||||
|
||||
@ -149,6 +152,11 @@ class GTPinFixture : public ContextFixture, public MemoryManagementFixture {
|
||||
|
||||
public:
|
||||
void SetUp() override {
|
||||
DebugManager.flags.GTPinAllocateBufferInSharedMemory.set(false);
|
||||
SetUpImpl();
|
||||
}
|
||||
|
||||
void SetUpImpl() {
|
||||
platformsImpl->clear();
|
||||
MemoryManagementFixture::SetUp();
|
||||
constructPlatform();
|
||||
@ -195,6 +203,7 @@ class GTPinFixture : public ContextFixture, public MemoryManagementFixture {
|
||||
gtpin::ocl::gtpin_events_t gtpinCallbacks;
|
||||
MockMemoryManagerWithFailures *memoryManager = nullptr;
|
||||
uint32_t rootDeviceIndex = std::numeric_limits<uint32_t>::max();
|
||||
DebugManagerStateRestore restore;
|
||||
};
|
||||
|
||||
typedef Test<GTPinFixture> GTPinTests;
|
||||
@ -2495,4 +2504,178 @@ HWTEST_F(GTPinTests, givenGtPinInitializedWhenSubmittingKernelCommandThenFlushed
|
||||
EXPECT_EQ(kernelExecQueue[0].taskCount, stamp.taskCount);
|
||||
}
|
||||
|
||||
class GTPinFixtureWithLocalMemory : public GTPinFixture {
|
||||
public:
|
||||
void SetUp() override {
|
||||
DebugManager.flags.EnableLocalMemory.set(true);
|
||||
DebugManager.flags.GTPinAllocateBufferInSharedMemory.set(true);
|
||||
GTPinFixture::SetUpImpl();
|
||||
}
|
||||
void TearDown() override {
|
||||
GTPinFixture::TearDown();
|
||||
}
|
||||
DebugManagerStateRestore restore;
|
||||
};
|
||||
|
||||
using GTPinTestsWithLocalMemory = Test<GTPinFixtureWithLocalMemory>;
|
||||
|
||||
TEST_F(GTPinTestsWithLocalMemory, whenPlatformHasNoSvmSupportThenGtPinBufferCantBeAllocatedInSharedMemory) {
|
||||
DebugManager.flags.GTPinAllocateBufferInSharedMemory.set(-1);
|
||||
GTPinHwHelper >pinHelper = GTPinHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily);
|
||||
auto canUseSharedAllocation = gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo());
|
||||
if (!pDevice->getHardwareInfo().capabilityTable.ftrSvm) {
|
||||
EXPECT_FALSE(canUseSharedAllocation);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(GTPinTestsWithLocalMemory, givenGtPinCanUseSharedAllocationWhenGtPinBufferIsCreatedThenAllocateBufferInSharedMemory) {
|
||||
GTPinHwHelper >pinHelper = GTPinHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily);
|
||||
if (!gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo())) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
resource_handle_t resource = nullptr;
|
||||
cl_context ctxt = (cl_context)((Context *)pContext);
|
||||
GTPIN_DI_STATUS status = GTPIN_DI_SUCCESS;
|
||||
|
||||
status = gtpinCreateBuffer((gtpin::context_handle_t)ctxt, 256, &resource);
|
||||
EXPECT_EQ(GTPIN_DI_SUCCESS, status);
|
||||
EXPECT_NE(nullptr, resource);
|
||||
|
||||
auto allocData = reinterpret_cast<SvmAllocationData *>(resource);
|
||||
|
||||
auto cpuAllocation = allocData->cpuAllocation;
|
||||
ASSERT_NE(nullptr, cpuAllocation);
|
||||
EXPECT_NE(GraphicsAllocation::AllocationType::UNIFIED_SHARED_MEMORY, cpuAllocation->getAllocationType());
|
||||
|
||||
auto gpuAllocation = allocData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex());
|
||||
ASSERT_NE(nullptr, gpuAllocation);
|
||||
EXPECT_NE(GraphicsAllocation::AllocationType::UNIFIED_SHARED_MEMORY, gpuAllocation->getAllocationType());
|
||||
|
||||
uint8_t *address = nullptr;
|
||||
status = gtpinMapBuffer((gtpin::context_handle_t)ctxt, resource, &address);
|
||||
EXPECT_EQ(GTPIN_DI_SUCCESS, status);
|
||||
EXPECT_EQ(allocData->cpuAllocation->getUnderlyingBuffer(), address);
|
||||
|
||||
status = gtpinUnmapBuffer((gtpin::context_handle_t)ctxt, resource);
|
||||
EXPECT_EQ(GTPIN_DI_SUCCESS, status);
|
||||
|
||||
status = gtpinFreeBuffer((gtpin::context_handle_t)ctxt, resource);
|
||||
EXPECT_EQ(GTPIN_DI_SUCCESS, status);
|
||||
}
|
||||
|
||||
HWTEST_F(GTPinTestsWithLocalMemory, givenGtPinCanUseSharedAllocationWhenGtPinBufferIsAllocatedInSharedMemoryThenSetSurfaceStateForTheBufferAndMakeItResident) {
|
||||
GTPinHwHelper >pinHelper = GTPinHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily);
|
||||
if (!gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo())) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
gtpinCallbacks.onContextCreate = OnContextCreate;
|
||||
gtpinCallbacks.onContextDestroy = OnContextDestroy;
|
||||
gtpinCallbacks.onKernelCreate = OnKernelCreate;
|
||||
gtpinCallbacks.onKernelSubmit = OnKernelSubmit;
|
||||
gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate;
|
||||
gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete;
|
||||
|
||||
GTPIN_DI_STATUS status = GTPin_Init(>pinCallbacks, &driverServices, nullptr);
|
||||
EXPECT_EQ(GTPIN_DI_SUCCESS, status);
|
||||
|
||||
cl_kernel kernel = nullptr;
|
||||
cl_program pProgram = nullptr;
|
||||
cl_device_id device = (cl_device_id)pDevice;
|
||||
size_t sourceSize = 0;
|
||||
std::string testFile;
|
||||
cl_command_queue cmdQ = nullptr;
|
||||
cl_queue_properties properties = 0;
|
||||
cl_context context = nullptr;
|
||||
|
||||
KernelBinaryHelper kbHelper("CopyBuffer_simd16", false);
|
||||
testFile.append(clFiles);
|
||||
testFile.append("CopyBuffer_simd16.cl");
|
||||
auto pSource = loadDataFromFile(testFile.c_str(), sourceSize);
|
||||
EXPECT_NE(0u, sourceSize);
|
||||
EXPECT_NE(nullptr, pSource);
|
||||
|
||||
context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_NE(nullptr, context);
|
||||
|
||||
cmdQ = clCreateCommandQueue(context, device, properties, &retVal);
|
||||
ASSERT_NE(nullptr, cmdQ);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
const char *sources[1] = {pSource.get()};
|
||||
pProgram = clCreateProgramWithSource(
|
||||
context,
|
||||
1,
|
||||
sources,
|
||||
&sourceSize,
|
||||
&retVal);
|
||||
ASSERT_NE(nullptr, pProgram);
|
||||
|
||||
retVal = clBuildProgram(
|
||||
pProgram,
|
||||
1,
|
||||
&device,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal);
|
||||
EXPECT_NE(nullptr, kernel);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto pMultiDeviceKernel = static_cast<MultiDeviceKernel *>(kernel);
|
||||
auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex);
|
||||
auto pCmdQueue = castToObject<CommandQueue>(cmdQ);
|
||||
auto &csr = pCmdQueue->getGpgpuCommandStreamReceiver();
|
||||
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
constexpr size_t renderSurfaceSize = sizeof(RENDER_SURFACE_STATE);
|
||||
|
||||
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1;
|
||||
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI);
|
||||
EXPECT_NE(nullptr, pSurfaceState);
|
||||
|
||||
RENDER_SURFACE_STATE *surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(pSurfaceState);
|
||||
memset(pSurfaceState, 0, renderSurfaceSize);
|
||||
|
||||
gtpinNotifyKernelSubmit(kernel, pCmdQueue);
|
||||
|
||||
auto allocData = reinterpret_cast<SvmAllocationData *>(kernelExecQueue[0].gtpinResource);
|
||||
EXPECT_NE(nullptr, allocData);
|
||||
auto gpuAllocation = allocData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
|
||||
EXPECT_NE(nullptr, gpuAllocation);
|
||||
|
||||
RENDER_SURFACE_STATE expectedSurfaceState;
|
||||
memset(&expectedSurfaceState, 0, renderSurfaceSize);
|
||||
{
|
||||
void *addressToPatch = gpuAllocation->getUnderlyingBuffer();
|
||||
size_t sizeToPatch = gpuAllocation->getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&pDevice->getDevice(), &expectedSurfaceState, false, false,
|
||||
sizeToPatch, addressToPatch, 0, gpuAllocation, 0, 0,
|
||||
pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices());
|
||||
}
|
||||
EXPECT_EQ(0, memcmp(&expectedSurfaceState, surfaceState, renderSurfaceSize));
|
||||
|
||||
EXPECT_FALSE(gpuAllocation->isResident(csr.getOsContext().getContextId()));
|
||||
gtpinNotifyMakeResident(pKernel, &csr);
|
||||
EXPECT_TRUE(gpuAllocation->isResident(csr.getOsContext().getContextId()));
|
||||
|
||||
kernelExecQueue[0].isTaskCountValid = true;
|
||||
gtpinNotifyTaskCompletion(kernelExecQueue[0].taskCount);
|
||||
|
||||
retVal = clReleaseKernel(kernel);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = clReleaseProgram(pProgram);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = clReleaseCommandQueue(cmdQ);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = clReleaseContext(context);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
} // namespace ULT
|
||||
|
@ -229,3 +229,4 @@ OverrideSlmSize = -1
|
||||
UseCyclesPerSecondTimer = 0
|
||||
WaitLoopCount = -1
|
||||
DebuggerLogBitmask = 0
|
||||
GTPinAllocateBufferInSharedMemory = -1
|
||||
|
@ -216,6 +216,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, UseBindlessMode, -1, "Use precompiled builtins i
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideSlmSize, -1, "Force different slm size than default in kB")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, UseCyclesPerSecondTimer, 0, "0: default behavior, 0: disabled: Report L0 timer in nanosecond units, 1: enabled: Report L0 timer in cycles per second")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, WaitLoopCount, -1, "-1: use default, >=0: number of iterations in wait loop")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, GTPinAllocateBufferInSharedMemory, -1, "Force GTPin to allocate buffer in shared memory")
|
||||
|
||||
/*DRIVER TOGGLES*/
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version")
|
||||
|
Reference in New Issue
Block a user