Refactor ocl kernel

Removed dispatch traits pointers and added getters. Removed uneeded
tests.

Related-To: NEO-4729
Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
This commit is contained in:
Krystian Chmielewski
2021-06-11 11:24:27 +00:00
committed by Compute-Runtime-Automation
parent 67216047db
commit 6fad8b3100
9 changed files with 204 additions and 316 deletions

View File

@ -248,7 +248,7 @@ HWTEST_F(DispatchWalkerTest, GivenDefaultLwsAlgorithmWhenDispatchingWalkerThenDi
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
EXPECT_EQ(dimension, *kernel.workDim);
EXPECT_EQ(dimension, *kernel.getWorkDim());
}
}
@ -279,7 +279,7 @@ HWTEST_F(DispatchWalkerTest, GivenSquaredLwsAlgorithmWhenDispatchingWalkerThenDi
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
EXPECT_EQ(dimension, *kernel.workDim);
EXPECT_EQ(dimension, *kernel.getWorkDim());
}
}
@ -309,7 +309,7 @@ HWTEST_F(DispatchWalkerTest, GivenNdLwsAlgorithmWhenDispatchingWalkerThenDimensi
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
EXPECT_EQ(dimension, *kernel.workDim);
EXPECT_EQ(dimension, *kernel.getWorkDim());
}
}
@ -340,7 +340,7 @@ HWTEST_F(DispatchWalkerTest, GivenOldLwsAlgorithmWhenDispatchingWalkerThenDimens
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
EXPECT_EQ(dimension, *kernel.workDim);
EXPECT_EQ(dimension, *kernel.getWorkDim());
}
}
@ -372,9 +372,10 @@ HWTEST_F(DispatchWalkerTest, GivenNumWorkGroupsWhenDispatchingWalkerThenNumWorkG
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
EXPECT_EQ(2u, *kernel.numWorkGroupsX);
EXPECT_EQ(5u, *kernel.numWorkGroupsY);
EXPECT_EQ(10u, *kernel.numWorkGroupsZ);
auto numWorkGroups = kernel.getNumWorkGroupsValues();
EXPECT_EQ(2u, *numWorkGroups[0]);
EXPECT_EQ(5u, *numWorkGroups[1]);
EXPECT_EQ(10u, *numWorkGroups[2]);
}
HWTEST_F(DispatchWalkerTest, GivenGlobalWorkOffsetWhenDispatchingWalkerThenGlobalWorkOffsetIsCorrectlySet) {
@ -405,9 +406,10 @@ HWTEST_F(DispatchWalkerTest, GivenGlobalWorkOffsetWhenDispatchingWalkerThenGloba
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
EXPECT_EQ(1u, *kernel.globalWorkOffsetX);
EXPECT_EQ(2u, *kernel.globalWorkOffsetY);
EXPECT_EQ(3u, *kernel.globalWorkOffsetZ);
auto gwo = kernel.getGlobalWorkOffsetValues();
EXPECT_EQ(1u, *gwo[0]);
EXPECT_EQ(2u, *gwo[1]);
EXPECT_EQ(3u, *gwo[2]);
}
HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndDefaultAlgorithmWhenDispatchingWalkerThenLwsIsCorrect) {
@ -437,9 +439,11 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndDefaultAlgorithmWhenDispatch
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
EXPECT_EQ(2u, *kernel.localWorkSizeX);
EXPECT_EQ(5u, *kernel.localWorkSizeY);
EXPECT_EQ(1u, *kernel.localWorkSizeZ);
auto localWorkSize = kernel.getLocalWorkSizeValues();
EXPECT_EQ(2u, *localWorkSize[0]);
EXPECT_EQ(5u, *localWorkSize[1]);
EXPECT_EQ(1u, *localWorkSize[2]);
}
HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndNdOnWhenDispatchingWalkerThenLwsIsCorrect) {
@ -469,9 +473,11 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndNdOnWhenDispatchingWalkerThe
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
EXPECT_EQ(2u, *kernel.localWorkSizeX);
EXPECT_EQ(3u, *kernel.localWorkSizeY);
EXPECT_EQ(5u, *kernel.localWorkSizeZ);
auto localWorkSize = kernel.getLocalWorkSizeValues();
EXPECT_EQ(2u, *localWorkSize[0]);
EXPECT_EQ(3u, *localWorkSize[1]);
EXPECT_EQ(5u, *localWorkSize[2]);
}
HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmWhenDispatchingWalkerThenLwsIsCorrect) {
@ -502,9 +508,11 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmWhenDispatch
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
EXPECT_EQ(2u, *kernel.localWorkSizeX);
EXPECT_EQ(5u, *kernel.localWorkSizeY);
EXPECT_EQ(1u, *kernel.localWorkSizeZ);
auto localWorkSize = kernel.getLocalWorkSizeValues();
EXPECT_EQ(2u, *localWorkSize[0]);
EXPECT_EQ(5u, *localWorkSize[1]);
EXPECT_EQ(1u, *localWorkSize[2]);
}
HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmOffAndNdOffWhenDispatchingWalkerThenLwsIsCorrect) {
@ -535,9 +543,11 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmOffAndNdOffW
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
EXPECT_EQ(2u, *kernel.localWorkSizeX);
EXPECT_EQ(5u, *kernel.localWorkSizeY);
EXPECT_EQ(1u, *kernel.localWorkSizeZ);
auto localWorkSize = kernel.getLocalWorkSizeValues();
EXPECT_EQ(2u, *localWorkSize[0]);
EXPECT_EQ(5u, *localWorkSize[1]);
EXPECT_EQ(1u, *localWorkSize[2]);
}
HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeWhenDispatchingWalkerThenLwsIsCorrect) {
@ -566,9 +576,11 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeWhenDispatchingWalkerThenLwsIsC
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
EXPECT_EQ(1u, *kernel.localWorkSizeX);
EXPECT_EQ(2u, *kernel.localWorkSizeY);
EXPECT_EQ(3u, *kernel.localWorkSizeZ);
auto localWorkSize = kernel.getLocalWorkSizeValues();
EXPECT_EQ(1u, *localWorkSize[0]);
EXPECT_EQ(2u, *localWorkSize[1]);
EXPECT_EQ(3u, *localWorkSize[2]);
}
HWTEST_F(DispatchWalkerTest, GivenTwoSetsOfLwsOffsetsWhenDispatchingWalkerThenLwsIsCorrect) {
@ -600,12 +612,15 @@ HWTEST_F(DispatchWalkerTest, GivenTwoSetsOfLwsOffsetsWhenDispatchingWalkerThenLw
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
EXPECT_EQ(1u, *kernel.localWorkSizeX);
EXPECT_EQ(2u, *kernel.localWorkSizeY);
EXPECT_EQ(3u, *kernel.localWorkSizeZ);
EXPECT_EQ(1u, *kernel.localWorkSizeX2);
EXPECT_EQ(2u, *kernel.localWorkSizeY2);
EXPECT_EQ(3u, *kernel.localWorkSizeZ2);
auto localWorkSize = kernel.getLocalWorkSizeValues();
EXPECT_EQ(1u, *localWorkSize[0]);
EXPECT_EQ(2u, *localWorkSize[1]);
EXPECT_EQ(3u, *localWorkSize[2]);
auto localWorkSize2 = kernel.getLocalWorkSize2Values();
EXPECT_EQ(1u, *localWorkSize2[0]);
EXPECT_EQ(2u, *localWorkSize2[1]);
EXPECT_EQ(3u, *localWorkSize2[2]);
}
HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorrect) {
@ -644,15 +659,16 @@ HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorre
auto dispatchId = 0;
for (auto &dispatchInfo : multiDispatchInfo) {
auto &kernel = static_cast<MockKernel &>(*dispatchInfo.getKernel());
auto localWorkSize = kernel.getLocalWorkSizeValues();
if (dispatchId == 0) {
EXPECT_EQ(1u, *kernel.localWorkSizeX);
EXPECT_EQ(2u, *kernel.localWorkSizeY);
EXPECT_EQ(3u, *kernel.localWorkSizeZ);
EXPECT_EQ(1u, *localWorkSize[0]);
EXPECT_EQ(2u, *localWorkSize[1]);
EXPECT_EQ(3u, *localWorkSize[2]);
}
if (dispatchId == 1) {
EXPECT_EQ(4u, *kernel.localWorkSizeX);
EXPECT_EQ(5u, *kernel.localWorkSizeY);
EXPECT_EQ(6u, *kernel.localWorkSizeZ);
EXPECT_EQ(4u, *localWorkSize[0]);
EXPECT_EQ(5u, *localWorkSize[1]);
EXPECT_EQ(6u, *localWorkSize[2]);
}
dispatchId++;
}
@ -661,15 +677,16 @@ HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorre
HWTEST_F(DispatchWalkerTest, GivenSplitWalkerWhenDispatchingWalkerThenLwsIsCorrect) {
MockKernel kernel1(program.get(), kernelInfo, *pClDevice);
MockKernel mainKernel(program.get(), kernelInfo, *pClDevice);
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0;
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4;
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8;
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[0] = 12;
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[1] = 16;
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[2] = 20;
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = 24;
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[1] = 28;
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[2] = 32;
auto &dispatchTraits = kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits;
dispatchTraits.localWorkSize[0] = 0;
dispatchTraits.localWorkSize[1] = 4;
dispatchTraits.localWorkSize[2] = 8;
dispatchTraits.localWorkSize2[0] = 12;
dispatchTraits.localWorkSize2[1] = 16;
dispatchTraits.localWorkSize2[2] = 20;
dispatchTraits.numWorkGroups[0] = 24;
dispatchTraits.numWorkGroups[1] = 28;
dispatchTraits.numWorkGroups[2] = 32;
ASSERT_EQ(CL_SUCCESS, kernel1.initialize());
ASSERT_EQ(CL_SUCCESS, mainKernel.initialize());
@ -697,26 +714,29 @@ HWTEST_F(DispatchWalkerTest, GivenSplitWalkerWhenDispatchingWalkerThenLwsIsCorre
for (auto &dispatchInfo : multiDispatchInfo) {
auto &kernel = static_cast<MockKernel &>(*dispatchInfo.getKernel());
auto localWorkSize = kernel.getLocalWorkSizeValues();
auto localWorkSize2 = kernel.getLocalWorkSize2Values();
auto numWorkGroups = kernel.getNumWorkGroupsValues();
if (&kernel == &mainKernel) {
EXPECT_EQ(4u, *kernel.localWorkSizeX);
EXPECT_EQ(5u, *kernel.localWorkSizeY);
EXPECT_EQ(6u, *kernel.localWorkSizeZ);
EXPECT_EQ(4u, *kernel.localWorkSizeX2);
EXPECT_EQ(5u, *kernel.localWorkSizeY2);
EXPECT_EQ(6u, *kernel.localWorkSizeZ2);
EXPECT_EQ(3u, *kernel.numWorkGroupsX);
EXPECT_EQ(2u, *kernel.numWorkGroupsY);
EXPECT_EQ(2u, *kernel.numWorkGroupsZ);
EXPECT_EQ(4u, *localWorkSize[0]);
EXPECT_EQ(5u, *localWorkSize[1]);
EXPECT_EQ(6u, *localWorkSize[2]);
EXPECT_EQ(4u, *localWorkSize2[0]);
EXPECT_EQ(5u, *localWorkSize2[1]);
EXPECT_EQ(6u, *localWorkSize2[2]);
EXPECT_EQ(3u, *numWorkGroups[0]);
EXPECT_EQ(2u, *numWorkGroups[1]);
EXPECT_EQ(2u, *numWorkGroups[2]);
} else {
EXPECT_EQ(0u, *kernel.localWorkSizeX);
EXPECT_EQ(0u, *kernel.localWorkSizeY);
EXPECT_EQ(0u, *kernel.localWorkSizeZ);
EXPECT_EQ(1u, *kernel.localWorkSizeX2);
EXPECT_EQ(2u, *kernel.localWorkSizeY2);
EXPECT_EQ(3u, *kernel.localWorkSizeZ2);
EXPECT_EQ(0u, *kernel.numWorkGroupsX);
EXPECT_EQ(0u, *kernel.numWorkGroupsY);
EXPECT_EQ(0u, *kernel.numWorkGroupsZ);
EXPECT_EQ(0u, *localWorkSize[0]);
EXPECT_EQ(0u, *localWorkSize[1]);
EXPECT_EQ(0u, *localWorkSize[2]);
EXPECT_EQ(1u, *localWorkSize2[0]);
EXPECT_EQ(2u, *localWorkSize2[1]);
EXPECT_EQ(3u, *localWorkSize2[2]);
EXPECT_EQ(0u, *numWorkGroups[0]);
EXPECT_EQ(0u, *numWorkGroups[1]);
EXPECT_EQ(0u, *numWorkGroups[2]);
}
}
}
@ -894,6 +914,8 @@ HWTEST_F(DispatchWalkerTest, givenThereAreAllocationsForReuseWhenDispatchWalkerI
}
HWTEST_F(DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenWorkDimensionsAreCorrect) {
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0;
MockKernel kernel1(program.get(), kernelInfo, *pClDevice);
ASSERT_EQ(CL_SUCCESS, kernel1.initialize());
MockKernel kernel2(program.get(), kernelInfo, *pClDevice);
@ -914,7 +936,7 @@ HWTEST_F(DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenWorkDi
for (auto &dispatchInfo : multiDispatchInfo) {
auto &kernel = static_cast<MockKernel &>(*dispatchInfo.getKernel());
EXPECT_EQ(*kernel.workDim, dispatchInfo.getDim());
EXPECT_EQ(dispatchInfo.getDim(), *kernel.getWorkDim());
}
}

View File

@ -64,13 +64,15 @@ TEST_F(EnqueueKernelRequiredWorkSize, GivenUnspecifiedWorkGroupSizeWhenEnqeueing
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(*pKernel->localWorkSizeX, 8u);
EXPECT_EQ(*pKernel->localWorkSizeY, 2u);
EXPECT_EQ(*pKernel->localWorkSizeZ, 2u);
auto localWorkSizeVal = pKernel->getLocalWorkSizeValues();
EXPECT_EQ(8u, *localWorkSizeVal[0]);
EXPECT_EQ(2u, *localWorkSizeVal[1]);
EXPECT_EQ(2u, *localWorkSizeVal[2]);
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u);
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 2u);
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 2u);
auto enqueuedLocalWorkSize = pKernel->getEnqueuedLocalWorkSizeValues();
EXPECT_EQ(8u, *enqueuedLocalWorkSize[0]);
EXPECT_EQ(2u, *enqueuedLocalWorkSize[1]);
EXPECT_EQ(2u, *enqueuedLocalWorkSize[2]);
}
// Fully specified
@ -91,13 +93,15 @@ TEST_F(EnqueueKernelRequiredWorkSize, GivenRequiredWorkGroupSizeWhenEnqeueingKer
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u);
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 2u);
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 2u);
auto localWorkSizeVal = pKernel->getLocalWorkSizeValues();
EXPECT_EQ(8u, *localWorkSizeVal[0]);
EXPECT_EQ(2u, *localWorkSizeVal[1]);
EXPECT_EQ(2u, *localWorkSizeVal[2]);
EXPECT_EQ(*pKernel->localWorkSizeX, 8u);
EXPECT_EQ(*pKernel->localWorkSizeY, 2u);
EXPECT_EQ(*pKernel->localWorkSizeZ, 2u);
auto enqueuedLocalWorkSize = pKernel->getEnqueuedLocalWorkSizeValues();
EXPECT_EQ(8u, *enqueuedLocalWorkSize[0]);
EXPECT_EQ(2u, *enqueuedLocalWorkSize[1]);
EXPECT_EQ(2u, *enqueuedLocalWorkSize[2]);
}
// Underspecified. Won't permit.

View File

@ -680,11 +680,10 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS
retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
auto localWorkSize = kernel->getLocalWorkSizeValues();
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
*kernel->localWorkSizeX,
*kernel->localWorkSizeY,
*kernel->localWorkSizeZ);
*localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
EXPECT_TRUE(containsHint(expectedHint, userData));
}
@ -695,11 +694,10 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS
retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
auto localWorkSize = kernel->getLocalWorkSizeValues();
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
*kernel->localWorkSizeX,
*kernel->localWorkSizeY,
*kernel->localWorkSizeZ);
*localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
EXPECT_TRUE(containsHint(expectedHint, userData));
DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled);
}
@ -711,11 +709,11 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS
retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
auto localWorkSize = kernel->getLocalWorkSizeValues();
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
*kernel->localWorkSizeX,
*kernel->localWorkSizeY,
*kernel->localWorkSizeZ);
*localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
EXPECT_TRUE(containsHint(expectedHint, userData));
DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled);
}
@ -725,11 +723,11 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS
retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
auto localWorkSize = kernel->getLocalWorkSizeValues();
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
*kernel->localWorkSizeX,
*kernel->localWorkSizeY,
*kernel->localWorkSizeZ);
*localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
EXPECT_TRUE(containsHint(expectedHint, userData));
}
@ -741,11 +739,11 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS
retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
auto localWorkSize = kernel->getLocalWorkSizeValues();
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
*kernel->localWorkSizeX,
*kernel->localWorkSizeY,
*kernel->localWorkSizeZ);
*localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
EXPECT_TRUE(containsHint(expectedHint, userData));
}
@ -757,11 +755,11 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS
retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
auto localWorkSize = kernel->getLocalWorkSizeValues();
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
*kernel->localWorkSizeX,
*kernel->localWorkSizeY,
*kernel->localWorkSizeZ);
*localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
EXPECT_TRUE(containsHint(expectedHint, userData));
}

View File

@ -70,19 +70,22 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
false);
EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.localWorkSizeX);
EXPECT_EQ(1u, *scheduler.localWorkSizeY);
EXPECT_EQ(1u, *scheduler.localWorkSizeZ);
auto localWorkSize = scheduler.getLocalWorkSizeValues();
EXPECT_EQ((uint32_t)scheduler.getLws(), *localWorkSize[0]);
EXPECT_EQ(1u, *localWorkSize[1]);
EXPECT_EQ(1u, *localWorkSize[2]);
if (scheduler.enqueuedLocalWorkSizeX != &Kernel::dummyPatchLocation) {
EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.enqueuedLocalWorkSizeX);
auto enqueuedLocalWorkSize = scheduler.getEnqueuedLocalWorkSizeValues();
if (enqueuedLocalWorkSize[0] != &Kernel::dummyPatchLocation) {
EXPECT_EQ((uint32_t)scheduler.getLws(), *enqueuedLocalWorkSize[0]);
EXPECT_EQ(1u, *enqueuedLocalWorkSize[1]);
EXPECT_EQ(1u, *enqueuedLocalWorkSize[2]);
}
EXPECT_EQ(1u, *scheduler.enqueuedLocalWorkSizeY);
EXPECT_EQ(1u, *scheduler.enqueuedLocalWorkSizeZ);
EXPECT_EQ((uint32_t)(scheduler.getGws() / scheduler.getLws()), *scheduler.numWorkGroupsX);
EXPECT_EQ(0u, *scheduler.numWorkGroupsY);
EXPECT_EQ(0u, *scheduler.numWorkGroupsZ);
auto numWorkGroups = scheduler.getNumWorkGroupsValues();
EXPECT_EQ((uint32_t)(scheduler.getGws() / scheduler.getLws()), *numWorkGroups[0]);
EXPECT_EQ(0u, *numWorkGroups[1]);
EXPECT_EQ(0u, *numWorkGroups[2]);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandStream, 0);

View File

@ -2239,97 +2239,33 @@ struct KernelCrossThreadTests : Test<ClDeviceFixture> {
SPatchExecutionEnvironment executionEnvironment = {};
};
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenGlobalWorkOffsetIsCorrect) {
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkOffset[1] = 4;
TEST_F(KernelCrossThreadTests, WhenLocalWorkSize2OffsetsAreValidThenIsLocalWorkSize2PatchableReturnsTrue) {
auto &localWorkSize2 = pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2;
localWorkSize2[0] = 0;
localWorkSize2[1] = 4;
localWorkSize2[2] = 8;
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetX);
EXPECT_NE(nullptr, kernel.globalWorkOffsetY);
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetY);
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetZ);
EXPECT_TRUE(kernel.isLocalWorkSize2Patchable());
}
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkSizeIsCorrect) {
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0xc;
TEST_F(KernelCrossThreadTests, WhenNotAllLocalWorkSize2OffsetsAreValidThenIsLocalWorkSize2PatchableReturnsTrue) {
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
EXPECT_NE(nullptr, kernel.localWorkSizeX);
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.localWorkSizeX);
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeY);
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeZ);
}
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkSize2IsCorrect) {
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[1] = 0xd;
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeX2);
EXPECT_NE(nullptr, kernel.localWorkSizeY2);
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.localWorkSizeY2);
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeZ2);
}
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenGlobalWorkSizeIsCorrect) {
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[2] = 8;
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkSizeX);
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkSizeY);
EXPECT_NE(nullptr, kernel.globalWorkSizeZ);
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.globalWorkSizeZ);
}
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkDimIsCorrect) {
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.workDim = 12;
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
EXPECT_NE(nullptr, kernel.workDim);
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.workDim);
}
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenNumWorkGroupsIsCorrect) {
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = 0 * sizeof(uint32_t);
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[1] = 1 * sizeof(uint32_t);
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[2] = 2 * sizeof(uint32_t);
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
EXPECT_NE(nullptr, kernel.numWorkGroupsX);
EXPECT_NE(nullptr, kernel.numWorkGroupsY);
EXPECT_NE(nullptr, kernel.numWorkGroupsZ);
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsX);
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsY);
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsZ);
}
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedLocalWorkSizeIsCorrect) {
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.enqueuedLocalWorkSize[0] = 0;
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
EXPECT_NE(nullptr, kernel.enqueuedLocalWorkSizeX);
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeX);
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeY);
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeZ);
auto &localWorkSize2 = pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2;
for (auto ele0 : {true, false}) {
for (auto ele1 : {true, false}) {
for (auto ele2 : {true, false}) {
if (ele0 && ele1 && ele2) {
continue;
} else {
localWorkSize2[0] = ele0 ? 0 : undefined<CrossThreadDataOffset>;
localWorkSize2[1] = ele1 ? 4 : undefined<CrossThreadDataOffset>;
localWorkSize2[2] = ele2 ? 8 : undefined<CrossThreadDataOffset>;
EXPECT_FALSE(kernel.isLocalWorkSize2Patchable());
}
}
}
}
}
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedMaxWorkGroupSizeIsCorrect) {

View File

@ -103,18 +103,9 @@ class MockKernel : public Kernel {
using Kernel::auxTranslationRequired;
using Kernel::containsStatelessWrites;
using Kernel::dataParameterSimdSize;
using Kernel::enqueuedLocalWorkSizeX;
using Kernel::enqueuedLocalWorkSizeY;
using Kernel::enqueuedLocalWorkSizeZ;
using Kernel::executionType;
using Kernel::getDevice;
using Kernel::getHardwareInfo;
using Kernel::globalWorkOffsetX;
using Kernel::globalWorkOffsetY;
using Kernel::globalWorkOffsetZ;
using Kernel::globalWorkSizeX;
using Kernel::globalWorkSizeY;
using Kernel::globalWorkSizeZ;
using Kernel::hasDirectStatelessAccessToHostMemory;
using Kernel::hasIndirectStatelessAccessToHostMemory;
using Kernel::isSchedulerKernel;
@ -126,18 +117,9 @@ class MockKernel : public Kernel {
using Kernel::kernelSubmissionMap;
using Kernel::kernelSvmGfxAllocations;
using Kernel::kernelUnifiedMemoryGfxAllocations;
using Kernel::localWorkSizeX;
using Kernel::localWorkSizeX2;
using Kernel::localWorkSizeY;
using Kernel::localWorkSizeY2;
using Kernel::localWorkSizeZ;
using Kernel::localWorkSizeZ2;
using Kernel::maxKernelWorkGroupSize;
using Kernel::maxWorkGroupSizeForCrossThreadData;
using Kernel::numberOfBindingTableStates;
using Kernel::numWorkGroupsX;
using Kernel::numWorkGroupsY;
using Kernel::numWorkGroupsZ;
using Kernel::parentEventOffset;
using Kernel::patchBufferOffset;
using Kernel::patchWithImplicitSurface;
@ -147,7 +129,6 @@ class MockKernel : public Kernel {
using Kernel::svmAllocationsRequireCacheFlush;
using Kernel::threadArbitrationPolicy;
using Kernel::unifiedMemoryControls;
using Kernel::workDim;
using Kernel::slmSizes;
using Kernel::slmTotalSize;
@ -558,24 +539,6 @@ class MockParentKernel : public Kernel {
class MockSchedulerKernel : public SchedulerKernel {
public:
using Kernel::enqueuedLocalWorkSizeX;
using Kernel::enqueuedLocalWorkSizeY;
using Kernel::enqueuedLocalWorkSizeZ;
using Kernel::globalWorkOffsetX;
using Kernel::globalWorkOffsetY;
using Kernel::globalWorkOffsetZ;
using Kernel::globalWorkSizeX;
using Kernel::globalWorkSizeY;
using Kernel::globalWorkSizeZ;
using Kernel::localWorkSizeX;
using Kernel::localWorkSizeX2;
using Kernel::localWorkSizeY;
using Kernel::localWorkSizeY2;
using Kernel::localWorkSizeZ;
using Kernel::localWorkSizeZ2;
using Kernel::numWorkGroupsX;
using Kernel::numWorkGroupsY;
using Kernel::numWorkGroupsZ;
MockSchedulerKernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &clDeviceArg) : SchedulerKernel(programArg, kernelInfoArg, clDeviceArg){};
};