Add checks for correct engine for concurrent kernels.

Related-To: NEO-5135
Change-Id: Ib1c37ec8d5e468de331521ae4be1cd92902a2330
Signed-off-by: Sebastian Luzynski <sebastian.jozef.luzynski@intel.com>
This commit is contained in:
Sebastian Luzynski
2020-10-06 16:21:46 +02:00
committed by sys_ocldev
parent f9a97cbb22
commit 225e7f01b4
9 changed files with 93 additions and 13 deletions

View File

@ -64,7 +64,7 @@ TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGetting
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, workDim, globalWorkOffset, localWorkSize,
&maxConcurrentWorkGroupCount);
EXPECT_EQ(CL_SUCCESS, retVal);
size_t expectedMaxConcurrentWorkGroupCount = pKernel->getMaxWorkGroupCount(workDim, localWorkSize);
size_t expectedMaxConcurrentWorkGroupCount = pKernel->getMaxWorkGroupCount(workDim, localWorkSize, pCommandQueue);
EXPECT_EQ(expectedMaxConcurrentWorkGroupCount, maxConcurrentWorkGroupCount);
std::unique_ptr<MockKernel> pKernelWithExecutionEnvironmentPatch(MockKernel::create(pCommandQueue->getDevice(), pProgram));
@ -72,7 +72,7 @@ TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGetting
globalWorkOffset, localWorkSize,
&maxConcurrentWorkGroupCount);
EXPECT_EQ(CL_SUCCESS, retVal);
expectedMaxConcurrentWorkGroupCount = pKernelWithExecutionEnvironmentPatch->getMaxWorkGroupCount(workDim, localWorkSize);
expectedMaxConcurrentWorkGroupCount = pKernelWithExecutionEnvironmentPatch->getMaxWorkGroupCount(workDim, localWorkSize, pCommandQueue);
EXPECT_EQ(expectedMaxConcurrentWorkGroupCount, maxConcurrentWorkGroupCount);
}

View File

@ -214,6 +214,11 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreSetThenClEnqueueNDCountKernel
cl_int retVal = CL_SUCCESS;
CommandQueue *pCmdQ2 = createCommandQueue(pClDevice);
HwHelper &hwHelper = HwHelper::get(pClDevice->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
if (!hwHelper.isCooperativeDispatchSupported(pCmdQ2->getGpgpuEngine().getEngineType(), pClDevice->getDevice().getHardwareInfo().platform.eProductFamily)) {
pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, true, false).osContext;
}
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
@ -253,6 +258,11 @@ TEST_F(EnqueueKernelTest, givenKernelWhenNotAllArgsAreSetButSetKernelArgIsCalled
cl_int retVal = CL_SUCCESS;
CommandQueue *pCmdQ2 = createCommandQueue(pClDevice);
HwHelper &hwHelper = HwHelper::get(pClDevice->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
if (!hwHelper.isCooperativeDispatchSupported(pCmdQ2->getGpgpuEngine().getEngineType(), pClDevice->getDevice().getHardwareInfo().platform.eProductFamily)) {
pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, true, false).osContext;
}
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
@ -292,6 +302,11 @@ TEST_F(EnqueueKernelTest, givenKernelWhenSetKernelArgIsCalledForEachArgButAtLeas
cl_int retVal = CL_SUCCESS;
CommandQueue *pCmdQ2 = createCommandQueue(pClDevice);
HwHelper &hwHelper = HwHelper::get(pClDevice->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
if (!hwHelper.isCooperativeDispatchSupported(pCmdQ2->getGpgpuEngine().getEngineType(), pClDevice->getDevice().getHardwareInfo().platform.eProductFamily)) {
pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, true, false).osContext;
}
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
EXPECT_EQ(CL_SUCCESS, retVal);

View File

@ -25,25 +25,56 @@ class MockSyncBufferHandler : public SyncBufferHandler {
using SyncBufferHandler::usedBufferSize;
};
class SyncBufferHandlerTest : public EnqueueHandlerTest {
class SyncBufferEnqueueHandlerTest : public EnqueueHandlerTest {
public:
void SetUp() {
hardwareInfo = *defaultHwInfo;
uint64_t hwInfoConfig = defaultHardwareInfoConfigTable[productFamily];
hardwareInfoSetup[productFamily](&hardwareInfo, true, hwInfoConfig);
SetUpImpl(&hardwareInfo);
}
void TearDown() {
context->decRefInternal();
delete pClDevice;
pClDevice = nullptr;
pDevice = nullptr;
}
void SetUpImpl(const NEO::HardwareInfo *hardwareInfo) {
pDevice = MockDevice::createWithNewExecutionEnvironment<MockDevice>(hardwareInfo);
ASSERT_NE(nullptr, pDevice);
pClDevice = new MockClDevice{pDevice};
ASSERT_NE(nullptr, pClDevice);
auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver();
pTagMemory = commandStreamReceiver.getTagAddress();
ASSERT_NE(nullptr, const_cast<uint32_t *>(pTagMemory));
context = new NEO::MockContext(pClDevice);
}
};
class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest {
public:
void SetUp() override {}
void TearDown() override {}
template <typename FamilyType>
void SetUpT() {
EnqueueHandlerTest::SetUp();
SyncBufferEnqueueHandlerTest::SetUp();
kernelInternals = std::make_unique<MockKernelWithInternals>(*pClDevice, context);
kernel = kernelInternals->mockKernel;
kernel->executionType = KernelExecutionType::Concurrent;
commandQueue = reinterpret_cast<MockCommandQueue *>(new MockCommandQueueHw<FamilyType>(context, pClDevice, 0));
hwHelper = &HwHelper::get(kernel->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
}
template <typename FamilyType>
void TearDownT() {
commandQueue->release();
kernelInternals.reset();
EnqueueHandlerTest::TearDown();
SyncBufferEnqueueHandlerTest::TearDown();
}
void patchAllocateSyncBuffer() {
@ -61,6 +92,10 @@ class SyncBufferHandlerTest : public EnqueueHandlerTest {
return clEnqueueNDCountKernelINTEL(commandQueue, kernel, workDim, gwOffset, workgroupCount, lws, 0, nullptr, nullptr);
}
bool isCooperativeDispatchSupported() {
return hwHelper->isCooperativeDispatchSupported(commandQueue->getGpgpuEngine().getEngineType(), kernel->getDevice().getHardwareInfo().platform.eProductFamily);
}
const cl_uint workDim = 1;
const size_t gwOffset[3] = {0, 0, 0};
const size_t lws[3] = {10, 1, 1};
@ -71,6 +106,7 @@ class SyncBufferHandlerTest : public EnqueueHandlerTest {
MockKernel *kernel;
MockCommandQueue *commandQueue;
SPatchAllocateSyncBuffer sPatchAllocateSyncBuffer;
HwHelper *hwHelper;
};
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferIsUsed) {
@ -109,7 +145,7 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithAllocateSyncB
}
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenMaxWorkgroupCountWhenEnqueuingConcurrentKernelThenSuccessIsReturned) {
auto maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws);
auto maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws, commandQueue);
workgroupCount[0] = maxWorkGroupCount;
globalWorkSize[0] = maxWorkGroupCount * lws[0];
@ -118,7 +154,7 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenMaxWorkgroupCountWhenEnqueuingCon
}
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenTooHighWorkgroupCountWhenEnqueuingConcurrentKernelThenErrorIsReturned) {
size_t maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws);
size_t maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws, commandQueue);
workgroupCount[0] = maxWorkGroupCount + 1;
globalWorkSize[0] = maxWorkGroupCount * lws[0] + 1;

View File

@ -24,6 +24,7 @@
#include "opencl/source/gtpin/gtpin_hw_helper.h"
#include "opencl/source/gtpin/gtpin_init.h"
#include "opencl/source/gtpin/gtpin_notify.h"
#include "opencl/source/helpers/validators.h"
#include "opencl/source/kernel/kernel.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/program/create.inl"
@ -913,7 +914,13 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelINTELIsExecutedThenGT
cl_uint workDim = 1;
size_t localWorkSize[3] = {1, 1, 1};
size_t n = pKernel1->getMaxWorkGroupCount(workDim, localWorkSize);
CommandQueue *commandQueue = nullptr;
WithCastToInternal(cmdQ, &commandQueue);
HwHelper &hwHelper = HwHelper::get(pDevice->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
if (!hwHelper.isCooperativeDispatchSupported(commandQueue->getGpgpuEngine().getEngineType(), pDevice->getDevice().getHardwareInfo().platform.eProductFamily)) {
commandQueue->getGpgpuEngine().osContext = commandQueue->getDevice().getEngine(aub_stream::ENGINE_CCS, true, false).osContext;
}
size_t n = pKernel1->getMaxWorkGroupCount(workDim, localWorkSize, commandQueue);
auto buff10 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr);
auto buff11 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr);