mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Add checks for correct engine for concurrent kernels.
Related-To: NEO-5135 Change-Id: Ib1c37ec8d5e468de331521ae4be1cd92902a2330 Signed-off-by: Sebastian Luzynski <sebastian.jozef.luzynski@intel.com>
This commit is contained in:

committed by
sys_ocldev

parent
f9a97cbb22
commit
225e7f01b4
@ -64,7 +64,7 @@ TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGetting
|
||||
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, workDim, globalWorkOffset, localWorkSize,
|
||||
&maxConcurrentWorkGroupCount);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
size_t expectedMaxConcurrentWorkGroupCount = pKernel->getMaxWorkGroupCount(workDim, localWorkSize);
|
||||
size_t expectedMaxConcurrentWorkGroupCount = pKernel->getMaxWorkGroupCount(workDim, localWorkSize, pCommandQueue);
|
||||
EXPECT_EQ(expectedMaxConcurrentWorkGroupCount, maxConcurrentWorkGroupCount);
|
||||
|
||||
std::unique_ptr<MockKernel> pKernelWithExecutionEnvironmentPatch(MockKernel::create(pCommandQueue->getDevice(), pProgram));
|
||||
@ -72,7 +72,7 @@ TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGetting
|
||||
globalWorkOffset, localWorkSize,
|
||||
&maxConcurrentWorkGroupCount);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
expectedMaxConcurrentWorkGroupCount = pKernelWithExecutionEnvironmentPatch->getMaxWorkGroupCount(workDim, localWorkSize);
|
||||
expectedMaxConcurrentWorkGroupCount = pKernelWithExecutionEnvironmentPatch->getMaxWorkGroupCount(workDim, localWorkSize, pCommandQueue);
|
||||
EXPECT_EQ(expectedMaxConcurrentWorkGroupCount, maxConcurrentWorkGroupCount);
|
||||
}
|
||||
|
||||
|
@ -214,6 +214,11 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreSetThenClEnqueueNDCountKernel
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
CommandQueue *pCmdQ2 = createCommandQueue(pClDevice);
|
||||
|
||||
HwHelper &hwHelper = HwHelper::get(pClDevice->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
|
||||
if (!hwHelper.isCooperativeDispatchSupported(pCmdQ2->getGpgpuEngine().getEngineType(), pClDevice->getDevice().getHardwareInfo().platform.eProductFamily)) {
|
||||
pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, true, false).osContext;
|
||||
}
|
||||
|
||||
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
@ -253,6 +258,11 @@ TEST_F(EnqueueKernelTest, givenKernelWhenNotAllArgsAreSetButSetKernelArgIsCalled
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
CommandQueue *pCmdQ2 = createCommandQueue(pClDevice);
|
||||
|
||||
HwHelper &hwHelper = HwHelper::get(pClDevice->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
|
||||
if (!hwHelper.isCooperativeDispatchSupported(pCmdQ2->getGpgpuEngine().getEngineType(), pClDevice->getDevice().getHardwareInfo().platform.eProductFamily)) {
|
||||
pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, true, false).osContext;
|
||||
}
|
||||
|
||||
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
@ -292,6 +302,11 @@ TEST_F(EnqueueKernelTest, givenKernelWhenSetKernelArgIsCalledForEachArgButAtLeas
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
CommandQueue *pCmdQ2 = createCommandQueue(pClDevice);
|
||||
|
||||
HwHelper &hwHelper = HwHelper::get(pClDevice->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
|
||||
if (!hwHelper.isCooperativeDispatchSupported(pCmdQ2->getGpgpuEngine().getEngineType(), pClDevice->getDevice().getHardwareInfo().platform.eProductFamily)) {
|
||||
pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, true, false).osContext;
|
||||
}
|
||||
|
||||
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
|
@ -25,25 +25,56 @@ class MockSyncBufferHandler : public SyncBufferHandler {
|
||||
using SyncBufferHandler::usedBufferSize;
|
||||
};
|
||||
|
||||
class SyncBufferHandlerTest : public EnqueueHandlerTest {
|
||||
class SyncBufferEnqueueHandlerTest : public EnqueueHandlerTest {
|
||||
public:
|
||||
void SetUp() {
|
||||
hardwareInfo = *defaultHwInfo;
|
||||
uint64_t hwInfoConfig = defaultHardwareInfoConfigTable[productFamily];
|
||||
hardwareInfoSetup[productFamily](&hardwareInfo, true, hwInfoConfig);
|
||||
SetUpImpl(&hardwareInfo);
|
||||
}
|
||||
|
||||
void TearDown() {
|
||||
context->decRefInternal();
|
||||
delete pClDevice;
|
||||
pClDevice = nullptr;
|
||||
pDevice = nullptr;
|
||||
}
|
||||
|
||||
void SetUpImpl(const NEO::HardwareInfo *hardwareInfo) {
|
||||
pDevice = MockDevice::createWithNewExecutionEnvironment<MockDevice>(hardwareInfo);
|
||||
ASSERT_NE(nullptr, pDevice);
|
||||
pClDevice = new MockClDevice{pDevice};
|
||||
ASSERT_NE(nullptr, pClDevice);
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver();
|
||||
pTagMemory = commandStreamReceiver.getTagAddress();
|
||||
ASSERT_NE(nullptr, const_cast<uint32_t *>(pTagMemory));
|
||||
|
||||
context = new NEO::MockContext(pClDevice);
|
||||
}
|
||||
};
|
||||
|
||||
class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest {
|
||||
public:
|
||||
void SetUp() override {}
|
||||
void TearDown() override {}
|
||||
|
||||
template <typename FamilyType>
|
||||
void SetUpT() {
|
||||
EnqueueHandlerTest::SetUp();
|
||||
SyncBufferEnqueueHandlerTest::SetUp();
|
||||
kernelInternals = std::make_unique<MockKernelWithInternals>(*pClDevice, context);
|
||||
kernel = kernelInternals->mockKernel;
|
||||
kernel->executionType = KernelExecutionType::Concurrent;
|
||||
commandQueue = reinterpret_cast<MockCommandQueue *>(new MockCommandQueueHw<FamilyType>(context, pClDevice, 0));
|
||||
hwHelper = &HwHelper::get(kernel->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void TearDownT() {
|
||||
commandQueue->release();
|
||||
kernelInternals.reset();
|
||||
EnqueueHandlerTest::TearDown();
|
||||
SyncBufferEnqueueHandlerTest::TearDown();
|
||||
}
|
||||
|
||||
void patchAllocateSyncBuffer() {
|
||||
@ -61,6 +92,10 @@ class SyncBufferHandlerTest : public EnqueueHandlerTest {
|
||||
return clEnqueueNDCountKernelINTEL(commandQueue, kernel, workDim, gwOffset, workgroupCount, lws, 0, nullptr, nullptr);
|
||||
}
|
||||
|
||||
bool isCooperativeDispatchSupported() {
|
||||
return hwHelper->isCooperativeDispatchSupported(commandQueue->getGpgpuEngine().getEngineType(), kernel->getDevice().getHardwareInfo().platform.eProductFamily);
|
||||
}
|
||||
|
||||
const cl_uint workDim = 1;
|
||||
const size_t gwOffset[3] = {0, 0, 0};
|
||||
const size_t lws[3] = {10, 1, 1};
|
||||
@ -71,6 +106,7 @@ class SyncBufferHandlerTest : public EnqueueHandlerTest {
|
||||
MockKernel *kernel;
|
||||
MockCommandQueue *commandQueue;
|
||||
SPatchAllocateSyncBuffer sPatchAllocateSyncBuffer;
|
||||
HwHelper *hwHelper;
|
||||
};
|
||||
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferIsUsed) {
|
||||
@ -109,7 +145,7 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithAllocateSyncB
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenMaxWorkgroupCountWhenEnqueuingConcurrentKernelThenSuccessIsReturned) {
|
||||
auto maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws);
|
||||
auto maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws, commandQueue);
|
||||
workgroupCount[0] = maxWorkGroupCount;
|
||||
globalWorkSize[0] = maxWorkGroupCount * lws[0];
|
||||
|
||||
@ -118,7 +154,7 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenMaxWorkgroupCountWhenEnqueuingCon
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenTooHighWorkgroupCountWhenEnqueuingConcurrentKernelThenErrorIsReturned) {
|
||||
size_t maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws);
|
||||
size_t maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws, commandQueue);
|
||||
workgroupCount[0] = maxWorkGroupCount + 1;
|
||||
globalWorkSize[0] = maxWorkGroupCount * lws[0] + 1;
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "opencl/source/gtpin/gtpin_hw_helper.h"
|
||||
#include "opencl/source/gtpin/gtpin_init.h"
|
||||
#include "opencl/source/gtpin/gtpin_notify.h"
|
||||
#include "opencl/source/helpers/validators.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/program/create.inl"
|
||||
@ -913,7 +914,13 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelINTELIsExecutedThenGT
|
||||
|
||||
cl_uint workDim = 1;
|
||||
size_t localWorkSize[3] = {1, 1, 1};
|
||||
size_t n = pKernel1->getMaxWorkGroupCount(workDim, localWorkSize);
|
||||
CommandQueue *commandQueue = nullptr;
|
||||
WithCastToInternal(cmdQ, &commandQueue);
|
||||
HwHelper &hwHelper = HwHelper::get(pDevice->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
|
||||
if (!hwHelper.isCooperativeDispatchSupported(commandQueue->getGpgpuEngine().getEngineType(), pDevice->getDevice().getHardwareInfo().platform.eProductFamily)) {
|
||||
commandQueue->getGpgpuEngine().osContext = commandQueue->getDevice().getEngine(aub_stream::ENGINE_CCS, true, false).osContext;
|
||||
}
|
||||
size_t n = pKernel1->getMaxWorkGroupCount(workDim, localWorkSize, commandQueue);
|
||||
auto buff10 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr);
|
||||
auto buff11 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr);
|
||||
|
||||
|
Reference in New Issue
Block a user