mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Respect KernelExecutionType in enqueue kernel calls
Change-Id: I9de07f9e3b77c4a44f6a0127e0ae3bd7e1ab97f8 Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:

committed by
sys_ocldev

parent
d6f4520599
commit
07c4682668
@ -3143,7 +3143,8 @@ cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue,
|
||||
return retVal;
|
||||
}
|
||||
|
||||
if (pKernel->getKernelInfo().patchInfo.pAllocateSyncBuffer != nullptr) {
|
||||
if ((pKernel->getExecutionType() != KernelExecutionType::Default) ||
|
||||
pKernel->isUsingSyncBuffer()) {
|
||||
retVal = CL_INVALID_KERNEL;
|
||||
TRACING_EXIT(clEnqueueNDRangeKernel, &retVal);
|
||||
return retVal;
|
||||
@ -5330,16 +5331,28 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue,
|
||||
}
|
||||
|
||||
size_t globalWorkSize[3];
|
||||
size_t requestedNumberOfWorkgroups = 1;
|
||||
for (size_t i = 0; i < workDim; i++) {
|
||||
globalWorkSize[i] = workgroupCount[i] * localWorkSize[i];
|
||||
requestedNumberOfWorkgroups *= workgroupCount[i];
|
||||
}
|
||||
|
||||
size_t maximalNumberOfWorkgroupsAllowed = pKernel->getMaxWorkGroupCount(workDim, localWorkSize);
|
||||
if (requestedNumberOfWorkgroups > maximalNumberOfWorkgroupsAllowed) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
return retVal;
|
||||
if (pKernel->getExecutionType() == KernelExecutionType::Concurrent) {
|
||||
size_t requestedNumberOfWorkgroups = 1;
|
||||
for (size_t i = 0; i < workDim; i++) {
|
||||
requestedNumberOfWorkgroups *= workgroupCount[i];
|
||||
}
|
||||
size_t maximalNumberOfWorkgroupsAllowed = pKernel->getMaxWorkGroupCount(workDim, localWorkSize);
|
||||
if (requestedNumberOfWorkgroups > maximalNumberOfWorkgroupsAllowed) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
return retVal;
|
||||
}
|
||||
}
|
||||
|
||||
if (pKernel->isUsingSyncBuffer()) {
|
||||
if (pKernel->getExecutionType() != KernelExecutionType::Concurrent) {
|
||||
retVal = CL_INVALID_KERNEL;
|
||||
return retVal;
|
||||
}
|
||||
platform()->clDeviceMap[&pCommandQueue->getDevice()]->allocateSyncBufferHandler();
|
||||
}
|
||||
|
||||
TakeOwnershipWrapper<Kernel> kernelOwnership(*pKernel, gtpinIsGTPinInitialized());
|
||||
@ -5347,8 +5360,6 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue,
|
||||
gtpinNotifyKernelSubmit(kernel, pCommandQueue);
|
||||
}
|
||||
|
||||
platform()->clDeviceMap[&pCommandQueue->getDevice()]->allocateSyncBufferHandler();
|
||||
|
||||
retVal = pCommandQueue->enqueueKernel(
|
||||
kernel,
|
||||
workDim,
|
||||
|
@ -345,6 +345,12 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
uint32_t getThreadArbitrationPolicy() const {
|
||||
return threadArbitrationPolicy;
|
||||
}
|
||||
KernelExecutionType getExecutionType() const {
|
||||
return executionType;
|
||||
}
|
||||
bool isUsingSyncBuffer() const {
|
||||
return (kernelInfo.patchInfo.pAllocateSyncBuffer != nullptr);
|
||||
}
|
||||
|
||||
bool checkIfIsParentKernelAndBlocksUsesPrintf();
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -78,4 +78,54 @@ TEST_F(clEnqueueNDRangeKernelTests, GivenNonZeroEventsAndEmptyEventWaitListWhenE
|
||||
|
||||
EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal);
|
||||
}
|
||||
|
||||
TEST_F(clEnqueueNDRangeKernelTests, GivenConcurrentKernelWhenExecutingKernelThenInvalidKernelErrorIsReturned) {
|
||||
cl_uint workDim = 1;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {1, 1, 1};
|
||||
size_t localWorkSize[3] = {1, 1, 1};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
pKernel->executionType = KernelExecutionType::Concurrent;
|
||||
|
||||
retVal = clEnqueueNDRangeKernel(
|
||||
pCommandQueue,
|
||||
pKernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
|
||||
EXPECT_EQ(CL_INVALID_KERNEL, retVal);
|
||||
}
|
||||
TEST_F(clEnqueueNDRangeKernelTests, GivenKernelWithAllocateSyncBufferPatchWhenExecutingKernelThenInvalidKernelErrorIsReturned) {
|
||||
cl_uint workDim = 1;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {1, 1, 1};
|
||||
size_t localWorkSize[3] = {1, 1, 1};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
SPatchAllocateSyncBuffer patchAllocateSyncBuffer;
|
||||
pProgram->mockKernelInfo.patchInfo.pAllocateSyncBuffer = &patchAllocateSyncBuffer;
|
||||
|
||||
EXPECT_TRUE(pKernel->isUsingSyncBuffer());
|
||||
|
||||
retVal = clEnqueueNDRangeKernel(
|
||||
pCommandQueue,
|
||||
pKernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
|
||||
EXPECT_EQ(CL_INVALID_KERNEL, retVal);
|
||||
}
|
||||
} // namespace ULT
|
||||
|
@ -32,6 +32,7 @@ class SyncBufferHandlerTest : public EnqueueHandlerTest {
|
||||
EnqueueHandlerTest::SetUp();
|
||||
kernelInternals = std::make_unique<MockKernelWithInternals>(*pClDevice, context);
|
||||
kernel = kernelInternals->mockKernel;
|
||||
kernel->executionType = KernelExecutionType::Concurrent;
|
||||
commandQueue = reinterpret_cast<MockCommandQueue *>(new MockCommandQueueHw<FamilyType>(context, pClDevice, 0));
|
||||
}
|
||||
|
||||
@ -53,10 +54,15 @@ class SyncBufferHandlerTest : public EnqueueHandlerTest {
|
||||
return reinterpret_cast<MockSyncBufferHandler *>(pClDevice->syncBufferHandler.get());
|
||||
}
|
||||
|
||||
cl_int enqueueNDCount() {
|
||||
return clEnqueueNDCountKernelINTEL(commandQueue, kernel, workDim, gwOffset, workgroupCount, lws, 0, nullptr, nullptr);
|
||||
}
|
||||
|
||||
const cl_uint workDim = 1;
|
||||
const size_t gwOffset[3] = {0, 0, 0};
|
||||
const size_t lws[3] = {10, 1, 1};
|
||||
size_t workgroupCount[3] = {10, 1, 1};
|
||||
size_t globalWorkSize[3] = {100, 1, 1};
|
||||
size_t workItemsCount = 10;
|
||||
std::unique_ptr<MockKernelWithInternals> kernelInternals;
|
||||
MockKernel *kernel;
|
||||
@ -64,10 +70,10 @@ class SyncBufferHandlerTest : public EnqueueHandlerTest {
|
||||
SPatchAllocateSyncBuffer sPatchAllocateSyncBuffer;
|
||||
};
|
||||
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchWhenEnqueuingKernelThenSyncBufferIsUsed) {
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferIsUsed) {
|
||||
patchAllocateSyncBuffer();
|
||||
clEnqueueNDCountKernelINTEL(commandQueue, kernel, workDim, gwOffset, workgroupCount, lws, 0, nullptr, nullptr);
|
||||
|
||||
enqueueNDCount();
|
||||
auto syncBufferHandler = getSyncBufferHandler();
|
||||
EXPECT_EQ(workItemsCount, syncBufferHandler->usedBufferSize);
|
||||
|
||||
@ -77,34 +83,53 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchWhenEnqueu
|
||||
pDevice->getUltCommandStreamReceiver<FamilyType>().latestSentTaskCount);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenNoAllocateSyncBufferPatchWhenEnqueuingKernelThenSyncBufferIsNotUsedAndUsedBufferSizeIsNotUpdated) {
|
||||
clEnqueueNDCountKernelINTEL(commandQueue, kernel, workDim, gwOffset, workgroupCount, lws, 0, nullptr, nullptr);
|
||||
|
||||
auto syncBufferHandler = getSyncBufferHandler();
|
||||
EXPECT_EQ(0u, syncBufferHandler->usedBufferSize);
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithoutAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsNotCreated) {
|
||||
auto retVal = enqueueNDCount();
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(nullptr, getSyncBufferHandler());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenMaxWorkgroupCountWhenEnqueuingKernelThenSuccessIsReturned) {
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenDefaultKernelUsingSyncBufferWhenEnqueuingKernelThenErrorIsReturnedAndSyncBufferIsNotCreated) {
|
||||
patchAllocateSyncBuffer();
|
||||
kernel->executionType = KernelExecutionType::Default;
|
||||
|
||||
auto retVal = enqueueNDCount();
|
||||
EXPECT_EQ(CL_INVALID_KERNEL, retVal);
|
||||
EXPECT_EQ(nullptr, getSyncBufferHandler());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsCreated) {
|
||||
patchAllocateSyncBuffer();
|
||||
auto retVal = enqueueNDCount();
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_NE(nullptr, getSyncBufferHandler());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenMaxWorkgroupCountWhenEnqueuingConcurrentKernelThenSuccessIsReturned) {
|
||||
auto maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws);
|
||||
workgroupCount[0] = maxWorkGroupCount;
|
||||
auto retVal = clEnqueueNDCountKernelINTEL(commandQueue, kernel, workDim, gwOffset, workgroupCount, lws, 0, nullptr, nullptr);
|
||||
globalWorkSize[0] = maxWorkGroupCount * lws[0];
|
||||
|
||||
auto retVal = enqueueNDCount();
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenTooHighWorkgroupCountWhenEnqueuingKernelThenErrorIsReturned) {
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenTooHighWorkgroupCountWhenEnqueuingConcurrentKernelThenErrorIsReturned) {
|
||||
size_t maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws);
|
||||
workgroupCount[0] = maxWorkGroupCount + 1;
|
||||
auto retVal = clEnqueueNDCountKernelINTEL(commandQueue, kernel, workDim, gwOffset, workgroupCount, lws, 0, nullptr, nullptr);
|
||||
globalWorkSize[0] = maxWorkGroupCount * lws[0] + 1;
|
||||
|
||||
auto retVal = enqueueNDCount();
|
||||
EXPECT_EQ(CL_INVALID_VALUE, retVal);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSyncBufferFullWhenEnqueuingKernelThenNewBufferIsAllocated) {
|
||||
patchAllocateSyncBuffer();
|
||||
clEnqueueNDCountKernelINTEL(commandQueue, kernel, workDim, gwOffset, workgroupCount, lws, 0, nullptr, nullptr);
|
||||
|
||||
enqueueNDCount();
|
||||
auto syncBufferHandler = getSyncBufferHandler();
|
||||
|
||||
syncBufferHandler->usedBufferSize = syncBufferHandler->bufferSize;
|
||||
clEnqueueNDCountKernelINTEL(commandQueue, kernel, workDim, gwOffset, workgroupCount, lws, 0, nullptr, nullptr);
|
||||
enqueueNDCount();
|
||||
EXPECT_EQ(workItemsCount, syncBufferHandler->usedBufferSize);
|
||||
}
|
||||
|
||||
@ -128,14 +153,6 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSshRequiredWhenPatchingSyncBuffer
|
||||
EXPECT_EQ(bufferAddress, surfaceAddress);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenKernelUsingSyncBufferWhenUsingStandardEnqueueThenErrorIsReturned) {
|
||||
patchAllocateSyncBuffer();
|
||||
|
||||
size_t globalWorkSize[3] = {workgroupCount[0] * lws[0], workgroupCount[1] * lws[1], workgroupCount[2] * lws[2]};
|
||||
auto retVal = clEnqueueNDRangeKernel(commandQueue, kernel, workDim, gwOffset, globalWorkSize, lws, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_INVALID_KERNEL, retVal);
|
||||
}
|
||||
|
||||
TEST(SyncBufferHandlerDeviceTest, GivenRootDeviceWhenAllocateSyncBufferIsCalledTwiceThenTheObjectIsCreatedOnlyOnce) {
|
||||
const size_t testUsedBufferSize = 100;
|
||||
MockClDevice rootDevice{new MockDevice};
|
||||
|
Reference in New Issue
Block a user