Use Kernels maxKernelWorkGroupSize to compute WG size

Change-Id: I947b5612f36cd2437e1ead98712c914ffe4b5b15
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2019-09-16 20:07:04 +02:00
committed by sys_ocldev
parent 6d21c2a422
commit 9d6dcd6fff
6 changed files with 15 additions and 10 deletions

View File

@ -408,7 +408,7 @@ Vec3<size_t> computeWorkgroupSize(const DispatchInfo &dispatchInfo) {
size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z};
computeWorkgroupSizeND(wsInfo, workGroupSize, workItems, dispatchInfo.getDim());
} else {
auto maxWorkGroupSize = static_cast<uint32_t>(dispatchInfo.getKernel()->getDevice().getDeviceInfo().maxWorkGroupSize);
auto maxWorkGroupSize = dispatchInfo.getKernel()->maxKernelWorkGroupSize;
auto simd = dispatchInfo.getKernel()->getKernelInfo().getMaxSimdSize();
size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z};
if (dispatchInfo.getDim() == 1) {

View File

@ -562,7 +562,7 @@ cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info p
switch (paramName) {
case CL_KERNEL_WORK_GROUP_SIZE:
maxWorkgroupSize = this->device.getDeviceInfo().maxWorkGroupSize;
maxWorkgroupSize = this->maxKernelWorkGroupSize;
if (DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get()) {
auto divisionSize = 32 / patchInfo.executionEnvironment->LargestCompiledSIMDSize;
maxWorkgroupSize /= divisionSize;
@ -614,7 +614,7 @@ cl_int Kernel::getSubGroupInfo(cl_kernel_sub_group_info paramName,
size_t numDimensions = 0;
size_t WGS = 1;
auto maxSimdSize = static_cast<size_t>(getKernelInfo().getMaxSimdSize());
auto maxRequiredWorkGroupSize = static_cast<size_t>(getKernelInfo().getMaxRequiredWorkGroupSize(device.getDeviceInfo().maxWorkGroupSize));
auto maxRequiredWorkGroupSize = static_cast<size_t>(getKernelInfo().getMaxRequiredWorkGroupSize(maxKernelWorkGroupSize));
auto largestCompiledSIMDSize = static_cast<size_t>(getKernelInfo().patchInfo.executionEnvironment->LargestCompiledSIMDSize);
GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet);

View File

@ -155,7 +155,7 @@ WorkSizeInfo::WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t
setMinWorkGroupSize();
}
WorkSizeInfo::WorkSizeInfo(const DispatchInfo &dispatchInfo) {
this->maxWorkGroupSize = (uint32_t)dispatchInfo.getKernel()->getDevice().getDeviceInfo().maxWorkGroupSize;
this->maxWorkGroupSize = dispatchInfo.getKernel()->maxKernelWorkGroupSize;
this->hasBarriers = !!dispatchInfo.getKernel()->getKernelInfo().patchInfo.executionEnvironment->HasBarriers;
this->simdSize = (uint32_t)dispatchInfo.getKernel()->getKernelInfo().getMaxSimdSize();
this->slmTotalSize = (uint32_t)dispatchInfo.getKernel()->slmTotalSize;

View File

@ -14,11 +14,12 @@ struct KernelSubGroupInfoFixture : HelloWorldFixture<HelloWorldFixtureFactory> {
void SetUp() override {
ParentClass::SetUp();
pKernel->maxKernelWorkGroupSize = static_cast<uint32_t>(pDevice->getDeviceInfo().maxWorkGroupSize / 2);
maxSimdSize = static_cast<size_t>(pKernel->getKernelInfo().getMaxSimdSize());
ASSERT_LE(8u, maxSimdSize);
maxWorkDim = static_cast<size_t>(pDevice->getDeviceInfo().maxWorkItemDimensions);
ASSERT_EQ(3u, maxWorkDim);
maxWorkGroupSize = static_cast<size_t>(pDevice->getDeviceInfo().maxWorkGroupSize);
maxWorkGroupSize = static_cast<size_t>(pKernel->maxKernelWorkGroupSize);
ASSERT_GE(1024u, maxWorkGroupSize);
largestCompiledSIMDSize = static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->LargestCompiledSIMDSize);
ASSERT_EQ(32u, largestCompiledSIMDSize);
@ -28,8 +29,8 @@ struct KernelSubGroupInfoFixture : HelloWorldFixture<HelloWorldFixtureFactory> {
auto requiredWorkGroupSizeZ = static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->RequiredWorkGroupSizeZ);
calculatedMaxWorkgroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ;
if ((calculatedMaxWorkgroupSize == 0) || (calculatedMaxWorkgroupSize > static_cast<size_t>(pDevice->getDeviceInfo().maxWorkGroupSize))) {
calculatedMaxWorkgroupSize = static_cast<size_t>(pDevice->getDeviceInfo().maxWorkGroupSize);
if ((calculatedMaxWorkgroupSize == 0) || (calculatedMaxWorkgroupSize > static_cast<size_t>(pKernel->maxKernelWorkGroupSize))) {
calculatedMaxWorkgroupSize = static_cast<size_t>(pKernel->maxKernelWorkGroupSize);
}
}

View File

@ -1047,10 +1047,11 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreNotAndEventExistSetThenClEnqu
}
TEST_F(EnqueueKernelTest, givenEnqueueCommandThatLwsExceedsDeviceCapabilitiesWhenEnqueueNDRangeKernelIsCalledThenErrorIsReturned) {
auto maxWorkgroupSize = pDevice->getDeviceInfo().maxWorkGroupSize;
MockKernelWithInternals mockKernel(*pDevice);
auto maxWorkgroupSize = mockKernel.mockKernel->maxKernelWorkGroupSize;
size_t globalWorkSize[3] = {maxWorkgroupSize * 2, 1, 1};
size_t localWorkSize[3] = {maxWorkgroupSize * 2, 1, 1};
MockKernelWithInternals mockKernel(*pDevice);
auto status = pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, status);

View File

@ -262,6 +262,9 @@ TEST_P(KernelTest, GetWorkGroupInfo_WorkgroupSize) {
size_t paramValueSize = sizeof(paramValue);
size_t paramValueSizeRet = 0;
auto kernelMaxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize - 1;
pKernel->maxKernelWorkGroupSize = static_cast<uint32_t>(kernelMaxWorkGroupSize);
retVal = pKernel->getWorkGroupInfo(
pDevice,
paramName,
@ -271,7 +274,7 @@ TEST_P(KernelTest, GetWorkGroupInfo_WorkgroupSize) {
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(paramValueSize, paramValueSizeRet);
EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, paramValue);
EXPECT_EQ(kernelMaxWorkGroupSize, paramValue);
}
TEST_P(KernelTest, GetWorkGroupInfo_CompileWorkgroupSize) {