mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Add check for local work group size in clEnqueueNDRangeKernel call.
- Incoming local work group size cannot exceed device capabilities. Change-Id: I89a7503155c71443e3ebc630debb5d5b466c6cb5
This commit is contained in:
@ -71,6 +71,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
|
||||
}
|
||||
|
||||
size_t remainder = 0;
|
||||
size_t totalWorkItems = 1u;
|
||||
const size_t *localWkgSizeToPass = localWorkSizeIn ? workGroupSize : nullptr;
|
||||
|
||||
for (auto i = 0u; i < workDim; i++) {
|
||||
@ -86,6 +87,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
|
||||
}
|
||||
}
|
||||
workGroupSize[i] = localWorkSizeIn[i];
|
||||
totalWorkItems *= localWorkSizeIn[i];
|
||||
}
|
||||
|
||||
remainder += region[i] % workGroupSize[i];
|
||||
@ -126,6 +128,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
|
||||
",", globalWorkSizeIn[2],
|
||||
",SIMD:, ", kernel.getKernelInfo().getMaxSimdSize());
|
||||
|
||||
if (totalWorkItems > this->getDevice().getDeviceInfo().maxWorkGroupSize) {
|
||||
return CL_INVALID_WORK_GROUP_SIZE;
|
||||
}
|
||||
|
||||
enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(
|
||||
surfaces,
|
||||
false,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@ -78,12 +78,12 @@ TEST_F(EnqueueKernelRequiredWorkSize, unspecifiedWorkGroupSize) {
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_EQ(*pKernel->localWorkSizeX, 16u);
|
||||
EXPECT_EQ(*pKernel->localWorkSizeY, 8u);
|
||||
EXPECT_EQ(*pKernel->localWorkSizeX, 8u);
|
||||
EXPECT_EQ(*pKernel->localWorkSizeY, 4u);
|
||||
EXPECT_EQ(*pKernel->localWorkSizeZ, 4u);
|
||||
|
||||
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 16u);
|
||||
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 8u);
|
||||
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u);
|
||||
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 4u);
|
||||
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 4u);
|
||||
}
|
||||
|
||||
@ -91,7 +91,7 @@ TEST_F(EnqueueKernelRequiredWorkSize, unspecifiedWorkGroupSize) {
|
||||
TEST_F(EnqueueKernelRequiredWorkSize, matchingRequiredWorkGroupSize) {
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {32, 32, 32};
|
||||
size_t localWorkSize[3] = {16, 8, 4};
|
||||
size_t localWorkSize[3] = {8, 4, 4};
|
||||
|
||||
auto retVal = pCmdQ->enqueueKernel(
|
||||
pKernel,
|
||||
@ -105,12 +105,12 @@ TEST_F(EnqueueKernelRequiredWorkSize, matchingRequiredWorkGroupSize) {
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 16u);
|
||||
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 8u);
|
||||
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u);
|
||||
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 4u);
|
||||
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 4u);
|
||||
|
||||
EXPECT_EQ(*pKernel->localWorkSizeX, 16u);
|
||||
EXPECT_EQ(*pKernel->localWorkSizeY, 8u);
|
||||
EXPECT_EQ(*pKernel->localWorkSizeX, 8u);
|
||||
EXPECT_EQ(*pKernel->localWorkSizeY, 4u);
|
||||
EXPECT_EQ(*pKernel->localWorkSizeZ, 4u);
|
||||
}
|
||||
|
||||
@ -118,7 +118,7 @@ TEST_F(EnqueueKernelRequiredWorkSize, matchingRequiredWorkGroupSize) {
|
||||
TEST_F(EnqueueKernelRequiredWorkSize, givenKernelRequiringLocalWorkgroupSizeWhen1DimensionIsPassedThatIsCorrectThenNdRangeIsSuccesful) {
|
||||
size_t globalWorkOffset[1] = {0};
|
||||
size_t globalWorkSize[1] = {32};
|
||||
size_t localWorkSize[1] = {16};
|
||||
size_t localWorkSize[1] = {8};
|
||||
|
||||
auto retVal = pCmdQ->enqueueKernel(
|
||||
pKernel,
|
||||
|
@ -1547,3 +1547,13 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreNotAndEventExistSetThenClEnqu
|
||||
|
||||
clReleaseCommandQueue(pCmdQ2);
|
||||
}
|
||||
|
||||
TEST_F(EnqueueKernelTest, givenEnqueueCommandThatLwsExceedsDeviceCapabilitiesWhenEnqueueNDRangeKernelIsCalledThenErrorIsReturned) {
|
||||
auto maxWorkgroupSize = pDevice->getDeviceInfo().maxWorkGroupSize;
|
||||
size_t globalWorkSize[3] = {maxWorkgroupSize * 2, 1, 1};
|
||||
size_t localWorkSize[3] = {maxWorkgroupSize * 2, 1, 1};
|
||||
MockKernelWithInternals mockKernel(*pDevice);
|
||||
|
||||
auto status = pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, status);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@ -764,10 +764,10 @@ TEST_P(PerformanceHintEnqueueKernelBadSizeTest, GivenBadLocalWorkGroupSizeWhenEn
|
||||
}
|
||||
|
||||
badSizeDimension = GetParam();
|
||||
if (badSizeDimension == 0) {
|
||||
if (localWorkGroupSize[badSizeDimension] > 1) {
|
||||
localWorkGroupSize[badSizeDimension] /= 2;
|
||||
} else {
|
||||
localWorkGroupSize[badSizeDimension] *= 2;
|
||||
localWorkGroupSize[0] /= 2;
|
||||
}
|
||||
|
||||
retVal = pCmdQ->enqueueKernel(kernel, 3, nullptr, globalWorkGroupSize, localWorkGroupSize, 0, nullptr, nullptr);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@ -240,7 +240,7 @@ TEST_F(ProgramNonUniformTest, ExecuteKernelNonUniform21) {
|
||||
ASSERT_NE(nullptr, pKernel);
|
||||
|
||||
size_t globalWorkSize[3] = {12, 12, 12};
|
||||
size_t localWorkSize[3] = {11, 12, 12};
|
||||
size_t localWorkSize[3] = {11, 12, 1};
|
||||
|
||||
retVal = pCmdQ->enqueueKernel(
|
||||
pKernel,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@ -20,7 +20,7 @@
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
__kernel __attribute__((reqd_work_group_size(16, 8, 4)))
|
||||
__kernel __attribute__((reqd_work_group_size(8, 4, 4)))
|
||||
void CopyBuffer(
|
||||
__global unsigned int *src,
|
||||
__global unsigned int *dst)
|
||||
|
Reference in New Issue
Block a user