mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Use Kernels maxKernelWorkGroupSize to compute WG size
Change-Id: I947b5612f36cd2437e1ead98712c914ffe4b5b15 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:

committed by
sys_ocldev

parent
6d21c2a422
commit
9d6dcd6fff
@ -408,7 +408,7 @@ Vec3<size_t> computeWorkgroupSize(const DispatchInfo &dispatchInfo) {
|
||||
size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z};
|
||||
computeWorkgroupSizeND(wsInfo, workGroupSize, workItems, dispatchInfo.getDim());
|
||||
} else {
|
||||
auto maxWorkGroupSize = static_cast<uint32_t>(dispatchInfo.getKernel()->getDevice().getDeviceInfo().maxWorkGroupSize);
|
||||
auto maxWorkGroupSize = dispatchInfo.getKernel()->maxKernelWorkGroupSize;
|
||||
auto simd = dispatchInfo.getKernel()->getKernelInfo().getMaxSimdSize();
|
||||
size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z};
|
||||
if (dispatchInfo.getDim() == 1) {
|
||||
|
@ -562,7 +562,7 @@ cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info p
|
||||
|
||||
switch (paramName) {
|
||||
case CL_KERNEL_WORK_GROUP_SIZE:
|
||||
maxWorkgroupSize = this->device.getDeviceInfo().maxWorkGroupSize;
|
||||
maxWorkgroupSize = this->maxKernelWorkGroupSize;
|
||||
if (DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get()) {
|
||||
auto divisionSize = 32 / patchInfo.executionEnvironment->LargestCompiledSIMDSize;
|
||||
maxWorkgroupSize /= divisionSize;
|
||||
@ -614,7 +614,7 @@ cl_int Kernel::getSubGroupInfo(cl_kernel_sub_group_info paramName,
|
||||
size_t numDimensions = 0;
|
||||
size_t WGS = 1;
|
||||
auto maxSimdSize = static_cast<size_t>(getKernelInfo().getMaxSimdSize());
|
||||
auto maxRequiredWorkGroupSize = static_cast<size_t>(getKernelInfo().getMaxRequiredWorkGroupSize(device.getDeviceInfo().maxWorkGroupSize));
|
||||
auto maxRequiredWorkGroupSize = static_cast<size_t>(getKernelInfo().getMaxRequiredWorkGroupSize(maxKernelWorkGroupSize));
|
||||
auto largestCompiledSIMDSize = static_cast<size_t>(getKernelInfo().patchInfo.executionEnvironment->LargestCompiledSIMDSize);
|
||||
|
||||
GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet);
|
||||
|
@ -155,7 +155,7 @@ WorkSizeInfo::WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t
|
||||
setMinWorkGroupSize();
|
||||
}
|
||||
WorkSizeInfo::WorkSizeInfo(const DispatchInfo &dispatchInfo) {
|
||||
this->maxWorkGroupSize = (uint32_t)dispatchInfo.getKernel()->getDevice().getDeviceInfo().maxWorkGroupSize;
|
||||
this->maxWorkGroupSize = dispatchInfo.getKernel()->maxKernelWorkGroupSize;
|
||||
this->hasBarriers = !!dispatchInfo.getKernel()->getKernelInfo().patchInfo.executionEnvironment->HasBarriers;
|
||||
this->simdSize = (uint32_t)dispatchInfo.getKernel()->getKernelInfo().getMaxSimdSize();
|
||||
this->slmTotalSize = (uint32_t)dispatchInfo.getKernel()->slmTotalSize;
|
||||
|
@ -14,11 +14,12 @@ struct KernelSubGroupInfoFixture : HelloWorldFixture<HelloWorldFixtureFactory> {
|
||||
|
||||
void SetUp() override {
|
||||
ParentClass::SetUp();
|
||||
pKernel->maxKernelWorkGroupSize = static_cast<uint32_t>(pDevice->getDeviceInfo().maxWorkGroupSize / 2);
|
||||
maxSimdSize = static_cast<size_t>(pKernel->getKernelInfo().getMaxSimdSize());
|
||||
ASSERT_LE(8u, maxSimdSize);
|
||||
maxWorkDim = static_cast<size_t>(pDevice->getDeviceInfo().maxWorkItemDimensions);
|
||||
ASSERT_EQ(3u, maxWorkDim);
|
||||
maxWorkGroupSize = static_cast<size_t>(pDevice->getDeviceInfo().maxWorkGroupSize);
|
||||
maxWorkGroupSize = static_cast<size_t>(pKernel->maxKernelWorkGroupSize);
|
||||
ASSERT_GE(1024u, maxWorkGroupSize);
|
||||
largestCompiledSIMDSize = static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->LargestCompiledSIMDSize);
|
||||
ASSERT_EQ(32u, largestCompiledSIMDSize);
|
||||
@ -28,8 +29,8 @@ struct KernelSubGroupInfoFixture : HelloWorldFixture<HelloWorldFixtureFactory> {
|
||||
auto requiredWorkGroupSizeZ = static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->RequiredWorkGroupSizeZ);
|
||||
|
||||
calculatedMaxWorkgroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ;
|
||||
if ((calculatedMaxWorkgroupSize == 0) || (calculatedMaxWorkgroupSize > static_cast<size_t>(pDevice->getDeviceInfo().maxWorkGroupSize))) {
|
||||
calculatedMaxWorkgroupSize = static_cast<size_t>(pDevice->getDeviceInfo().maxWorkGroupSize);
|
||||
if ((calculatedMaxWorkgroupSize == 0) || (calculatedMaxWorkgroupSize > static_cast<size_t>(pKernel->maxKernelWorkGroupSize))) {
|
||||
calculatedMaxWorkgroupSize = static_cast<size_t>(pKernel->maxKernelWorkGroupSize);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1047,10 +1047,11 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreNotAndEventExistSetThenClEnqu
|
||||
}
|
||||
|
||||
TEST_F(EnqueueKernelTest, givenEnqueueCommandThatLwsExceedsDeviceCapabilitiesWhenEnqueueNDRangeKernelIsCalledThenErrorIsReturned) {
|
||||
auto maxWorkgroupSize = pDevice->getDeviceInfo().maxWorkGroupSize;
|
||||
MockKernelWithInternals mockKernel(*pDevice);
|
||||
|
||||
auto maxWorkgroupSize = mockKernel.mockKernel->maxKernelWorkGroupSize;
|
||||
size_t globalWorkSize[3] = {maxWorkgroupSize * 2, 1, 1};
|
||||
size_t localWorkSize[3] = {maxWorkgroupSize * 2, 1, 1};
|
||||
MockKernelWithInternals mockKernel(*pDevice);
|
||||
|
||||
auto status = pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, status);
|
||||
|
@ -262,6 +262,9 @@ TEST_P(KernelTest, GetWorkGroupInfo_WorkgroupSize) {
|
||||
size_t paramValueSize = sizeof(paramValue);
|
||||
size_t paramValueSizeRet = 0;
|
||||
|
||||
auto kernelMaxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize - 1;
|
||||
pKernel->maxKernelWorkGroupSize = static_cast<uint32_t>(kernelMaxWorkGroupSize);
|
||||
|
||||
retVal = pKernel->getWorkGroupInfo(
|
||||
pDevice,
|
||||
paramName,
|
||||
@ -271,7 +274,7 @@ TEST_P(KernelTest, GetWorkGroupInfo_WorkgroupSize) {
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(paramValueSize, paramValueSizeRet);
|
||||
EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, paramValue);
|
||||
EXPECT_EQ(kernelMaxWorkGroupSize, paramValue);
|
||||
}
|
||||
|
||||
TEST_P(KernelTest, GetWorkGroupInfo_CompileWorkgroupSize) {
|
||||
|
Reference in New Issue
Block a user