mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Refactor ocl kernel
Removed dispatch traits pointers and added getters. Removed uneeded tests. Related-To: NEO-4729 Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
67216047db
commit
6fad8b3100
@ -248,7 +248,7 @@ HWTEST_F(DispatchWalkerTest, GivenDefaultLwsAlgorithmWhenDispatchingWalkerThenDi
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
|
||||
EXPECT_EQ(dimension, *kernel.workDim);
|
||||
EXPECT_EQ(dimension, *kernel.getWorkDim());
|
||||
}
|
||||
}
|
||||
|
||||
@ -279,7 +279,7 @@ HWTEST_F(DispatchWalkerTest, GivenSquaredLwsAlgorithmWhenDispatchingWalkerThenDi
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
EXPECT_EQ(dimension, *kernel.workDim);
|
||||
EXPECT_EQ(dimension, *kernel.getWorkDim());
|
||||
}
|
||||
}
|
||||
|
||||
@ -309,7 +309,7 @@ HWTEST_F(DispatchWalkerTest, GivenNdLwsAlgorithmWhenDispatchingWalkerThenDimensi
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
EXPECT_EQ(dimension, *kernel.workDim);
|
||||
EXPECT_EQ(dimension, *kernel.getWorkDim());
|
||||
}
|
||||
}
|
||||
|
||||
@ -340,7 +340,7 @@ HWTEST_F(DispatchWalkerTest, GivenOldLwsAlgorithmWhenDispatchingWalkerThenDimens
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
EXPECT_EQ(dimension, *kernel.workDim);
|
||||
EXPECT_EQ(dimension, *kernel.getWorkDim());
|
||||
}
|
||||
}
|
||||
|
||||
@ -372,9 +372,10 @@ HWTEST_F(DispatchWalkerTest, GivenNumWorkGroupsWhenDispatchingWalkerThenNumWorkG
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
|
||||
EXPECT_EQ(2u, *kernel.numWorkGroupsX);
|
||||
EXPECT_EQ(5u, *kernel.numWorkGroupsY);
|
||||
EXPECT_EQ(10u, *kernel.numWorkGroupsZ);
|
||||
auto numWorkGroups = kernel.getNumWorkGroupsValues();
|
||||
EXPECT_EQ(2u, *numWorkGroups[0]);
|
||||
EXPECT_EQ(5u, *numWorkGroups[1]);
|
||||
EXPECT_EQ(10u, *numWorkGroups[2]);
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, GivenGlobalWorkOffsetWhenDispatchingWalkerThenGlobalWorkOffsetIsCorrectlySet) {
|
||||
@ -405,9 +406,10 @@ HWTEST_F(DispatchWalkerTest, GivenGlobalWorkOffsetWhenDispatchingWalkerThenGloba
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
|
||||
EXPECT_EQ(1u, *kernel.globalWorkOffsetX);
|
||||
EXPECT_EQ(2u, *kernel.globalWorkOffsetY);
|
||||
EXPECT_EQ(3u, *kernel.globalWorkOffsetZ);
|
||||
auto gwo = kernel.getGlobalWorkOffsetValues();
|
||||
EXPECT_EQ(1u, *gwo[0]);
|
||||
EXPECT_EQ(2u, *gwo[1]);
|
||||
EXPECT_EQ(3u, *gwo[2]);
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndDefaultAlgorithmWhenDispatchingWalkerThenLwsIsCorrect) {
|
||||
@ -437,9 +439,11 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndDefaultAlgorithmWhenDispatch
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
EXPECT_EQ(2u, *kernel.localWorkSizeX);
|
||||
EXPECT_EQ(5u, *kernel.localWorkSizeY);
|
||||
EXPECT_EQ(1u, *kernel.localWorkSizeZ);
|
||||
|
||||
auto localWorkSize = kernel.getLocalWorkSizeValues();
|
||||
EXPECT_EQ(2u, *localWorkSize[0]);
|
||||
EXPECT_EQ(5u, *localWorkSize[1]);
|
||||
EXPECT_EQ(1u, *localWorkSize[2]);
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndNdOnWhenDispatchingWalkerThenLwsIsCorrect) {
|
||||
@ -469,9 +473,11 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndNdOnWhenDispatchingWalkerThe
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
EXPECT_EQ(2u, *kernel.localWorkSizeX);
|
||||
EXPECT_EQ(3u, *kernel.localWorkSizeY);
|
||||
EXPECT_EQ(5u, *kernel.localWorkSizeZ);
|
||||
|
||||
auto localWorkSize = kernel.getLocalWorkSizeValues();
|
||||
EXPECT_EQ(2u, *localWorkSize[0]);
|
||||
EXPECT_EQ(3u, *localWorkSize[1]);
|
||||
EXPECT_EQ(5u, *localWorkSize[2]);
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmWhenDispatchingWalkerThenLwsIsCorrect) {
|
||||
@ -502,9 +508,11 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmWhenDispatch
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
EXPECT_EQ(2u, *kernel.localWorkSizeX);
|
||||
EXPECT_EQ(5u, *kernel.localWorkSizeY);
|
||||
EXPECT_EQ(1u, *kernel.localWorkSizeZ);
|
||||
|
||||
auto localWorkSize = kernel.getLocalWorkSizeValues();
|
||||
EXPECT_EQ(2u, *localWorkSize[0]);
|
||||
EXPECT_EQ(5u, *localWorkSize[1]);
|
||||
EXPECT_EQ(1u, *localWorkSize[2]);
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmOffAndNdOffWhenDispatchingWalkerThenLwsIsCorrect) {
|
||||
@ -535,9 +543,11 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmOffAndNdOffW
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
EXPECT_EQ(2u, *kernel.localWorkSizeX);
|
||||
EXPECT_EQ(5u, *kernel.localWorkSizeY);
|
||||
EXPECT_EQ(1u, *kernel.localWorkSizeZ);
|
||||
|
||||
auto localWorkSize = kernel.getLocalWorkSizeValues();
|
||||
EXPECT_EQ(2u, *localWorkSize[0]);
|
||||
EXPECT_EQ(5u, *localWorkSize[1]);
|
||||
EXPECT_EQ(1u, *localWorkSize[2]);
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeWhenDispatchingWalkerThenLwsIsCorrect) {
|
||||
@ -566,9 +576,11 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeWhenDispatchingWalkerThenLwsIsC
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
EXPECT_EQ(1u, *kernel.localWorkSizeX);
|
||||
EXPECT_EQ(2u, *kernel.localWorkSizeY);
|
||||
EXPECT_EQ(3u, *kernel.localWorkSizeZ);
|
||||
|
||||
auto localWorkSize = kernel.getLocalWorkSizeValues();
|
||||
EXPECT_EQ(1u, *localWorkSize[0]);
|
||||
EXPECT_EQ(2u, *localWorkSize[1]);
|
||||
EXPECT_EQ(3u, *localWorkSize[2]);
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, GivenTwoSetsOfLwsOffsetsWhenDispatchingWalkerThenLwsIsCorrect) {
|
||||
@ -600,12 +612,15 @@ HWTEST_F(DispatchWalkerTest, GivenTwoSetsOfLwsOffsetsWhenDispatchingWalkerThenLw
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
EXPECT_EQ(1u, *kernel.localWorkSizeX);
|
||||
EXPECT_EQ(2u, *kernel.localWorkSizeY);
|
||||
EXPECT_EQ(3u, *kernel.localWorkSizeZ);
|
||||
EXPECT_EQ(1u, *kernel.localWorkSizeX2);
|
||||
EXPECT_EQ(2u, *kernel.localWorkSizeY2);
|
||||
EXPECT_EQ(3u, *kernel.localWorkSizeZ2);
|
||||
|
||||
auto localWorkSize = kernel.getLocalWorkSizeValues();
|
||||
EXPECT_EQ(1u, *localWorkSize[0]);
|
||||
EXPECT_EQ(2u, *localWorkSize[1]);
|
||||
EXPECT_EQ(3u, *localWorkSize[2]);
|
||||
auto localWorkSize2 = kernel.getLocalWorkSize2Values();
|
||||
EXPECT_EQ(1u, *localWorkSize2[0]);
|
||||
EXPECT_EQ(2u, *localWorkSize2[1]);
|
||||
EXPECT_EQ(3u, *localWorkSize2[2]);
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorrect) {
|
||||
@ -644,15 +659,16 @@ HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorre
|
||||
auto dispatchId = 0;
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
auto &kernel = static_cast<MockKernel &>(*dispatchInfo.getKernel());
|
||||
auto localWorkSize = kernel.getLocalWorkSizeValues();
|
||||
if (dispatchId == 0) {
|
||||
EXPECT_EQ(1u, *kernel.localWorkSizeX);
|
||||
EXPECT_EQ(2u, *kernel.localWorkSizeY);
|
||||
EXPECT_EQ(3u, *kernel.localWorkSizeZ);
|
||||
EXPECT_EQ(1u, *localWorkSize[0]);
|
||||
EXPECT_EQ(2u, *localWorkSize[1]);
|
||||
EXPECT_EQ(3u, *localWorkSize[2]);
|
||||
}
|
||||
if (dispatchId == 1) {
|
||||
EXPECT_EQ(4u, *kernel.localWorkSizeX);
|
||||
EXPECT_EQ(5u, *kernel.localWorkSizeY);
|
||||
EXPECT_EQ(6u, *kernel.localWorkSizeZ);
|
||||
EXPECT_EQ(4u, *localWorkSize[0]);
|
||||
EXPECT_EQ(5u, *localWorkSize[1]);
|
||||
EXPECT_EQ(6u, *localWorkSize[2]);
|
||||
}
|
||||
dispatchId++;
|
||||
}
|
||||
@ -661,15 +677,16 @@ HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorre
|
||||
HWTEST_F(DispatchWalkerTest, GivenSplitWalkerWhenDispatchingWalkerThenLwsIsCorrect) {
|
||||
MockKernel kernel1(program.get(), kernelInfo, *pClDevice);
|
||||
MockKernel mainKernel(program.get(), kernelInfo, *pClDevice);
|
||||
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0;
|
||||
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4;
|
||||
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8;
|
||||
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[0] = 12;
|
||||
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[1] = 16;
|
||||
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[2] = 20;
|
||||
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = 24;
|
||||
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[1] = 28;
|
||||
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[2] = 32;
|
||||
auto &dispatchTraits = kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits;
|
||||
dispatchTraits.localWorkSize[0] = 0;
|
||||
dispatchTraits.localWorkSize[1] = 4;
|
||||
dispatchTraits.localWorkSize[2] = 8;
|
||||
dispatchTraits.localWorkSize2[0] = 12;
|
||||
dispatchTraits.localWorkSize2[1] = 16;
|
||||
dispatchTraits.localWorkSize2[2] = 20;
|
||||
dispatchTraits.numWorkGroups[0] = 24;
|
||||
dispatchTraits.numWorkGroups[1] = 28;
|
||||
dispatchTraits.numWorkGroups[2] = 32;
|
||||
ASSERT_EQ(CL_SUCCESS, kernel1.initialize());
|
||||
ASSERT_EQ(CL_SUCCESS, mainKernel.initialize());
|
||||
|
||||
@ -697,26 +714,29 @@ HWTEST_F(DispatchWalkerTest, GivenSplitWalkerWhenDispatchingWalkerThenLwsIsCorre
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
auto &kernel = static_cast<MockKernel &>(*dispatchInfo.getKernel());
|
||||
auto localWorkSize = kernel.getLocalWorkSizeValues();
|
||||
auto localWorkSize2 = kernel.getLocalWorkSize2Values();
|
||||
auto numWorkGroups = kernel.getNumWorkGroupsValues();
|
||||
if (&kernel == &mainKernel) {
|
||||
EXPECT_EQ(4u, *kernel.localWorkSizeX);
|
||||
EXPECT_EQ(5u, *kernel.localWorkSizeY);
|
||||
EXPECT_EQ(6u, *kernel.localWorkSizeZ);
|
||||
EXPECT_EQ(4u, *kernel.localWorkSizeX2);
|
||||
EXPECT_EQ(5u, *kernel.localWorkSizeY2);
|
||||
EXPECT_EQ(6u, *kernel.localWorkSizeZ2);
|
||||
EXPECT_EQ(3u, *kernel.numWorkGroupsX);
|
||||
EXPECT_EQ(2u, *kernel.numWorkGroupsY);
|
||||
EXPECT_EQ(2u, *kernel.numWorkGroupsZ);
|
||||
EXPECT_EQ(4u, *localWorkSize[0]);
|
||||
EXPECT_EQ(5u, *localWorkSize[1]);
|
||||
EXPECT_EQ(6u, *localWorkSize[2]);
|
||||
EXPECT_EQ(4u, *localWorkSize2[0]);
|
||||
EXPECT_EQ(5u, *localWorkSize2[1]);
|
||||
EXPECT_EQ(6u, *localWorkSize2[2]);
|
||||
EXPECT_EQ(3u, *numWorkGroups[0]);
|
||||
EXPECT_EQ(2u, *numWorkGroups[1]);
|
||||
EXPECT_EQ(2u, *numWorkGroups[2]);
|
||||
} else {
|
||||
EXPECT_EQ(0u, *kernel.localWorkSizeX);
|
||||
EXPECT_EQ(0u, *kernel.localWorkSizeY);
|
||||
EXPECT_EQ(0u, *kernel.localWorkSizeZ);
|
||||
EXPECT_EQ(1u, *kernel.localWorkSizeX2);
|
||||
EXPECT_EQ(2u, *kernel.localWorkSizeY2);
|
||||
EXPECT_EQ(3u, *kernel.localWorkSizeZ2);
|
||||
EXPECT_EQ(0u, *kernel.numWorkGroupsX);
|
||||
EXPECT_EQ(0u, *kernel.numWorkGroupsY);
|
||||
EXPECT_EQ(0u, *kernel.numWorkGroupsZ);
|
||||
EXPECT_EQ(0u, *localWorkSize[0]);
|
||||
EXPECT_EQ(0u, *localWorkSize[1]);
|
||||
EXPECT_EQ(0u, *localWorkSize[2]);
|
||||
EXPECT_EQ(1u, *localWorkSize2[0]);
|
||||
EXPECT_EQ(2u, *localWorkSize2[1]);
|
||||
EXPECT_EQ(3u, *localWorkSize2[2]);
|
||||
EXPECT_EQ(0u, *numWorkGroups[0]);
|
||||
EXPECT_EQ(0u, *numWorkGroups[1]);
|
||||
EXPECT_EQ(0u, *numWorkGroups[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -894,6 +914,8 @@ HWTEST_F(DispatchWalkerTest, givenThereAreAllocationsForReuseWhenDispatchWalkerI
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenWorkDimensionsAreCorrect) {
|
||||
kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0;
|
||||
|
||||
MockKernel kernel1(program.get(), kernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernel1.initialize());
|
||||
MockKernel kernel2(program.get(), kernelInfo, *pClDevice);
|
||||
@ -914,7 +936,7 @@ HWTEST_F(DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenWorkDi
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
auto &kernel = static_cast<MockKernel &>(*dispatchInfo.getKernel());
|
||||
EXPECT_EQ(*kernel.workDim, dispatchInfo.getDim());
|
||||
EXPECT_EQ(dispatchInfo.getDim(), *kernel.getWorkDim());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -64,13 +64,15 @@ TEST_F(EnqueueKernelRequiredWorkSize, GivenUnspecifiedWorkGroupSizeWhenEnqeueing
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_EQ(*pKernel->localWorkSizeX, 8u);
|
||||
EXPECT_EQ(*pKernel->localWorkSizeY, 2u);
|
||||
EXPECT_EQ(*pKernel->localWorkSizeZ, 2u);
|
||||
auto localWorkSizeVal = pKernel->getLocalWorkSizeValues();
|
||||
EXPECT_EQ(8u, *localWorkSizeVal[0]);
|
||||
EXPECT_EQ(2u, *localWorkSizeVal[1]);
|
||||
EXPECT_EQ(2u, *localWorkSizeVal[2]);
|
||||
|
||||
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u);
|
||||
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 2u);
|
||||
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 2u);
|
||||
auto enqueuedLocalWorkSize = pKernel->getEnqueuedLocalWorkSizeValues();
|
||||
EXPECT_EQ(8u, *enqueuedLocalWorkSize[0]);
|
||||
EXPECT_EQ(2u, *enqueuedLocalWorkSize[1]);
|
||||
EXPECT_EQ(2u, *enqueuedLocalWorkSize[2]);
|
||||
}
|
||||
|
||||
// Fully specified
|
||||
@ -91,13 +93,15 @@ TEST_F(EnqueueKernelRequiredWorkSize, GivenRequiredWorkGroupSizeWhenEnqeueingKer
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u);
|
||||
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 2u);
|
||||
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 2u);
|
||||
auto localWorkSizeVal = pKernel->getLocalWorkSizeValues();
|
||||
EXPECT_EQ(8u, *localWorkSizeVal[0]);
|
||||
EXPECT_EQ(2u, *localWorkSizeVal[1]);
|
||||
EXPECT_EQ(2u, *localWorkSizeVal[2]);
|
||||
|
||||
EXPECT_EQ(*pKernel->localWorkSizeX, 8u);
|
||||
EXPECT_EQ(*pKernel->localWorkSizeY, 2u);
|
||||
EXPECT_EQ(*pKernel->localWorkSizeZ, 2u);
|
||||
auto enqueuedLocalWorkSize = pKernel->getEnqueuedLocalWorkSizeValues();
|
||||
EXPECT_EQ(8u, *enqueuedLocalWorkSize[0]);
|
||||
EXPECT_EQ(2u, *enqueuedLocalWorkSize[1]);
|
||||
EXPECT_EQ(2u, *enqueuedLocalWorkSize[2]);
|
||||
}
|
||||
|
||||
// Underspecified. Won't permit.
|
||||
|
@ -680,11 +680,10 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS
|
||||
retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto localWorkSize = kernel->getLocalWorkSizeValues();
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
|
||||
kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
|
||||
*kernel->localWorkSizeX,
|
||||
*kernel->localWorkSizeY,
|
||||
*kernel->localWorkSizeZ);
|
||||
*localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
}
|
||||
|
||||
@ -695,11 +694,10 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS
|
||||
retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto localWorkSize = kernel->getLocalWorkSizeValues();
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
|
||||
kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
|
||||
*kernel->localWorkSizeX,
|
||||
*kernel->localWorkSizeY,
|
||||
*kernel->localWorkSizeZ);
|
||||
*localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled);
|
||||
}
|
||||
@ -711,11 +709,11 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS
|
||||
retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto localWorkSize = kernel->getLocalWorkSizeValues();
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
|
||||
kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
|
||||
*kernel->localWorkSizeX,
|
||||
*kernel->localWorkSizeY,
|
||||
*kernel->localWorkSizeZ);
|
||||
*localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
|
||||
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled);
|
||||
}
|
||||
@ -725,11 +723,11 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS
|
||||
retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto localWorkSize = kernel->getLocalWorkSizeValues();
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
|
||||
kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
|
||||
*kernel->localWorkSizeX,
|
||||
*kernel->localWorkSizeY,
|
||||
*kernel->localWorkSizeZ);
|
||||
*localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
|
||||
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
}
|
||||
|
||||
@ -741,11 +739,11 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS
|
||||
retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto localWorkSize = kernel->getLocalWorkSizeValues();
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
|
||||
kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
|
||||
*kernel->localWorkSizeX,
|
||||
*kernel->localWorkSizeY,
|
||||
*kernel->localWorkSizeZ);
|
||||
*localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
|
||||
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
}
|
||||
|
||||
@ -757,11 +755,11 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS
|
||||
retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto localWorkSize = kernel->getLocalWorkSizeValues();
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
|
||||
kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
|
||||
*kernel->localWorkSizeX,
|
||||
*kernel->localWorkSizeY,
|
||||
*kernel->localWorkSizeZ);
|
||||
*localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
|
||||
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
}
|
||||
|
||||
|
@ -70,19 +70,22 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched
|
||||
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||
false);
|
||||
|
||||
EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.localWorkSizeX);
|
||||
EXPECT_EQ(1u, *scheduler.localWorkSizeY);
|
||||
EXPECT_EQ(1u, *scheduler.localWorkSizeZ);
|
||||
auto localWorkSize = scheduler.getLocalWorkSizeValues();
|
||||
EXPECT_EQ((uint32_t)scheduler.getLws(), *localWorkSize[0]);
|
||||
EXPECT_EQ(1u, *localWorkSize[1]);
|
||||
EXPECT_EQ(1u, *localWorkSize[2]);
|
||||
|
||||
if (scheduler.enqueuedLocalWorkSizeX != &Kernel::dummyPatchLocation) {
|
||||
EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.enqueuedLocalWorkSizeX);
|
||||
auto enqueuedLocalWorkSize = scheduler.getEnqueuedLocalWorkSizeValues();
|
||||
if (enqueuedLocalWorkSize[0] != &Kernel::dummyPatchLocation) {
|
||||
EXPECT_EQ((uint32_t)scheduler.getLws(), *enqueuedLocalWorkSize[0]);
|
||||
EXPECT_EQ(1u, *enqueuedLocalWorkSize[1]);
|
||||
EXPECT_EQ(1u, *enqueuedLocalWorkSize[2]);
|
||||
}
|
||||
EXPECT_EQ(1u, *scheduler.enqueuedLocalWorkSizeY);
|
||||
EXPECT_EQ(1u, *scheduler.enqueuedLocalWorkSizeZ);
|
||||
|
||||
EXPECT_EQ((uint32_t)(scheduler.getGws() / scheduler.getLws()), *scheduler.numWorkGroupsX);
|
||||
EXPECT_EQ(0u, *scheduler.numWorkGroupsY);
|
||||
EXPECT_EQ(0u, *scheduler.numWorkGroupsZ);
|
||||
auto numWorkGroups = scheduler.getNumWorkGroupsValues();
|
||||
EXPECT_EQ((uint32_t)(scheduler.getGws() / scheduler.getLws()), *numWorkGroups[0]);
|
||||
EXPECT_EQ(0u, *numWorkGroups[1]);
|
||||
EXPECT_EQ(0u, *numWorkGroups[2]);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
||||
|
@ -2239,97 +2239,33 @@ struct KernelCrossThreadTests : Test<ClDeviceFixture> {
|
||||
SPatchExecutionEnvironment executionEnvironment = {};
|
||||
};
|
||||
|
||||
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenGlobalWorkOffsetIsCorrect) {
|
||||
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkOffset[1] = 4;
|
||||
TEST_F(KernelCrossThreadTests, WhenLocalWorkSize2OffsetsAreValidThenIsLocalWorkSize2PatchableReturnsTrue) {
|
||||
auto &localWorkSize2 = pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2;
|
||||
localWorkSize2[0] = 0;
|
||||
localWorkSize2[1] = 4;
|
||||
localWorkSize2[2] = 8;
|
||||
|
||||
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetX);
|
||||
EXPECT_NE(nullptr, kernel.globalWorkOffsetY);
|
||||
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetY);
|
||||
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetZ);
|
||||
EXPECT_TRUE(kernel.isLocalWorkSize2Patchable());
|
||||
}
|
||||
|
||||
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkSizeIsCorrect) {
|
||||
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0xc;
|
||||
|
||||
TEST_F(KernelCrossThreadTests, WhenNotAllLocalWorkSize2OffsetsAreValidThenIsLocalWorkSize2PatchableReturnsTrue) {
|
||||
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
EXPECT_NE(nullptr, kernel.localWorkSizeX);
|
||||
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.localWorkSizeX);
|
||||
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeY);
|
||||
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeZ);
|
||||
}
|
||||
|
||||
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkSize2IsCorrect) {
|
||||
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[1] = 0xd;
|
||||
|
||||
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeX2);
|
||||
EXPECT_NE(nullptr, kernel.localWorkSizeY2);
|
||||
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.localWorkSizeY2);
|
||||
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeZ2);
|
||||
}
|
||||
|
||||
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenGlobalWorkSizeIsCorrect) {
|
||||
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[2] = 8;
|
||||
|
||||
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkSizeX);
|
||||
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkSizeY);
|
||||
EXPECT_NE(nullptr, kernel.globalWorkSizeZ);
|
||||
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.globalWorkSizeZ);
|
||||
}
|
||||
|
||||
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkDimIsCorrect) {
|
||||
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.workDim = 12;
|
||||
|
||||
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
EXPECT_NE(nullptr, kernel.workDim);
|
||||
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.workDim);
|
||||
}
|
||||
|
||||
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenNumWorkGroupsIsCorrect) {
|
||||
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = 0 * sizeof(uint32_t);
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[1] = 1 * sizeof(uint32_t);
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[2] = 2 * sizeof(uint32_t);
|
||||
|
||||
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
EXPECT_NE(nullptr, kernel.numWorkGroupsX);
|
||||
EXPECT_NE(nullptr, kernel.numWorkGroupsY);
|
||||
EXPECT_NE(nullptr, kernel.numWorkGroupsZ);
|
||||
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsX);
|
||||
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsY);
|
||||
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsZ);
|
||||
}
|
||||
|
||||
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedLocalWorkSizeIsCorrect) {
|
||||
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.enqueuedLocalWorkSize[0] = 0;
|
||||
|
||||
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
EXPECT_NE(nullptr, kernel.enqueuedLocalWorkSizeX);
|
||||
EXPECT_NE(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeX);
|
||||
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeY);
|
||||
EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeZ);
|
||||
auto &localWorkSize2 = pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2;
|
||||
for (auto ele0 : {true, false}) {
|
||||
for (auto ele1 : {true, false}) {
|
||||
for (auto ele2 : {true, false}) {
|
||||
if (ele0 && ele1 && ele2) {
|
||||
continue;
|
||||
} else {
|
||||
localWorkSize2[0] = ele0 ? 0 : undefined<CrossThreadDataOffset>;
|
||||
localWorkSize2[1] = ele1 ? 4 : undefined<CrossThreadDataOffset>;
|
||||
localWorkSize2[2] = ele2 ? 8 : undefined<CrossThreadDataOffset>;
|
||||
EXPECT_FALSE(kernel.isLocalWorkSize2Patchable());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedMaxWorkGroupSizeIsCorrect) {
|
||||
|
@ -103,18 +103,9 @@ class MockKernel : public Kernel {
|
||||
using Kernel::auxTranslationRequired;
|
||||
using Kernel::containsStatelessWrites;
|
||||
using Kernel::dataParameterSimdSize;
|
||||
using Kernel::enqueuedLocalWorkSizeX;
|
||||
using Kernel::enqueuedLocalWorkSizeY;
|
||||
using Kernel::enqueuedLocalWorkSizeZ;
|
||||
using Kernel::executionType;
|
||||
using Kernel::getDevice;
|
||||
using Kernel::getHardwareInfo;
|
||||
using Kernel::globalWorkOffsetX;
|
||||
using Kernel::globalWorkOffsetY;
|
||||
using Kernel::globalWorkOffsetZ;
|
||||
using Kernel::globalWorkSizeX;
|
||||
using Kernel::globalWorkSizeY;
|
||||
using Kernel::globalWorkSizeZ;
|
||||
using Kernel::hasDirectStatelessAccessToHostMemory;
|
||||
using Kernel::hasIndirectStatelessAccessToHostMemory;
|
||||
using Kernel::isSchedulerKernel;
|
||||
@ -126,18 +117,9 @@ class MockKernel : public Kernel {
|
||||
using Kernel::kernelSubmissionMap;
|
||||
using Kernel::kernelSvmGfxAllocations;
|
||||
using Kernel::kernelUnifiedMemoryGfxAllocations;
|
||||
using Kernel::localWorkSizeX;
|
||||
using Kernel::localWorkSizeX2;
|
||||
using Kernel::localWorkSizeY;
|
||||
using Kernel::localWorkSizeY2;
|
||||
using Kernel::localWorkSizeZ;
|
||||
using Kernel::localWorkSizeZ2;
|
||||
using Kernel::maxKernelWorkGroupSize;
|
||||
using Kernel::maxWorkGroupSizeForCrossThreadData;
|
||||
using Kernel::numberOfBindingTableStates;
|
||||
using Kernel::numWorkGroupsX;
|
||||
using Kernel::numWorkGroupsY;
|
||||
using Kernel::numWorkGroupsZ;
|
||||
using Kernel::parentEventOffset;
|
||||
using Kernel::patchBufferOffset;
|
||||
using Kernel::patchWithImplicitSurface;
|
||||
@ -147,7 +129,6 @@ class MockKernel : public Kernel {
|
||||
using Kernel::svmAllocationsRequireCacheFlush;
|
||||
using Kernel::threadArbitrationPolicy;
|
||||
using Kernel::unifiedMemoryControls;
|
||||
using Kernel::workDim;
|
||||
|
||||
using Kernel::slmSizes;
|
||||
using Kernel::slmTotalSize;
|
||||
@ -558,24 +539,6 @@ class MockParentKernel : public Kernel {
|
||||
|
||||
class MockSchedulerKernel : public SchedulerKernel {
|
||||
public:
|
||||
using Kernel::enqueuedLocalWorkSizeX;
|
||||
using Kernel::enqueuedLocalWorkSizeY;
|
||||
using Kernel::enqueuedLocalWorkSizeZ;
|
||||
using Kernel::globalWorkOffsetX;
|
||||
using Kernel::globalWorkOffsetY;
|
||||
using Kernel::globalWorkOffsetZ;
|
||||
using Kernel::globalWorkSizeX;
|
||||
using Kernel::globalWorkSizeY;
|
||||
using Kernel::globalWorkSizeZ;
|
||||
using Kernel::localWorkSizeX;
|
||||
using Kernel::localWorkSizeX2;
|
||||
using Kernel::localWorkSizeY;
|
||||
using Kernel::localWorkSizeY2;
|
||||
using Kernel::localWorkSizeZ;
|
||||
using Kernel::localWorkSizeZ2;
|
||||
using Kernel::numWorkGroupsX;
|
||||
using Kernel::numWorkGroupsY;
|
||||
using Kernel::numWorkGroupsZ;
|
||||
MockSchedulerKernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &clDeviceArg) : SchedulerKernel(programArg, kernelInfoArg, clDeviceArg){};
|
||||
};
|
||||
|
||||
|
Reference in New Issue
Block a user