Pick Main Kernel for LWS and numWG in dispatchWalker()

Change-Id: I4fd0746ec77890ceacbf333966bb00a4ea99b186
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2018-08-20 11:00:40 +02:00
committed by sys_ocldev
parent 1ad70dfebe
commit 044255e9bd
2 changed files with 31 additions and 15 deletions

View File

@@ -560,7 +560,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
*kernel.globalWorkSizeY = static_cast<uint32_t>(gws.y); *kernel.globalWorkSizeY = static_cast<uint32_t>(gws.y);
*kernel.globalWorkSizeZ = static_cast<uint32_t>(gws.z); *kernel.globalWorkSizeZ = static_cast<uint32_t>(gws.z);
if ((&dispatchInfo == &*multiDispatchInfo.begin()) || (kernel.localWorkSizeX2 == &Kernel::dummyPatchLocation)) { if ((&kernel == multiDispatchInfo.peekMainKernel()) || (kernel.localWorkSizeX2 == &Kernel::dummyPatchLocation)) {
*kernel.localWorkSizeX = static_cast<uint32_t>(lws.x); *kernel.localWorkSizeX = static_cast<uint32_t>(lws.x);
*kernel.localWorkSizeY = static_cast<uint32_t>(lws.y); *kernel.localWorkSizeY = static_cast<uint32_t>(lws.y);
*kernel.localWorkSizeZ = static_cast<uint32_t>(lws.z); *kernel.localWorkSizeZ = static_cast<uint32_t>(lws.z);
@@ -574,7 +574,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
*kernel.enqueuedLocalWorkSizeY = static_cast<uint32_t>(elws.y); *kernel.enqueuedLocalWorkSizeY = static_cast<uint32_t>(elws.y);
*kernel.enqueuedLocalWorkSizeZ = static_cast<uint32_t>(elws.z); *kernel.enqueuedLocalWorkSizeZ = static_cast<uint32_t>(elws.z);
if (&dispatchInfo == &*multiDispatchInfo.begin()) { if (&kernel == multiDispatchInfo.peekMainKernel()) {
*kernel.numWorkGroupsX = static_cast<uint32_t>(twgs.x); *kernel.numWorkGroupsX = static_cast<uint32_t>(twgs.x);
*kernel.numWorkGroupsY = static_cast<uint32_t>(twgs.y); *kernel.numWorkGroupsY = static_cast<uint32_t>(twgs.y);
*kernel.numWorkGroupsZ = static_cast<uint32_t>(twgs.z); *kernel.numWorkGroupsZ = static_cast<uint32_t>(twgs.z);

View File

@@ -584,19 +584,26 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizeForSplitKernel) {
} }
HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizesForSplitWalker) { HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizesForSplitWalker) {
MockKernel kernel(program.get(), kernelInfo, *pDevice); MockKernel kernel1(program.get(), kernelInfo, *pDevice);
MockKernel mainKernel(program.get(), kernelInfo, *pDevice);
kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0;
kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4;
kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8;
kernelInfo.workloadInfo.localWorkSizeOffsets2[0] = 12; kernelInfo.workloadInfo.localWorkSizeOffsets2[0] = 12;
kernelInfo.workloadInfo.localWorkSizeOffsets2[1] = 16; kernelInfo.workloadInfo.localWorkSizeOffsets2[1] = 16;
kernelInfo.workloadInfo.localWorkSizeOffsets2[2] = 20; kernelInfo.workloadInfo.localWorkSizeOffsets2[2] = 20;
ASSERT_EQ(CL_SUCCESS, kernel.initialize()); kernelInfo.workloadInfo.numWorkGroupsOffset[0] = 24;
kernelInfo.workloadInfo.numWorkGroupsOffset[1] = 28;
kernelInfo.workloadInfo.numWorkGroupsOffset[2] = 32;
ASSERT_EQ(CL_SUCCESS, kernel1.initialize());
ASSERT_EQ(CL_SUCCESS, mainKernel.initialize());
DispatchInfo di1(&kernel, 3, {10, 10, 10}, {1, 2, 3}, {0, 0, 0}); DispatchInfo di1(&kernel1, 3, {10, 10, 10}, {1, 2, 3}, {0, 0, 0});
DispatchInfo di2(&kernel, 3, {10, 10, 10}, {4, 5, 6}, {0, 0, 0}); DispatchInfo di2(&mainKernel, 3, {10, 10, 10}, {4, 5, 6}, {0, 0, 0});
MockMultiDispatchInfo multiDispatchInfo(std::vector<DispatchInfo *>({&di1, &di2})); MultiDispatchInfo multiDispatchInfo(&mainKernel);
multiDispatchInfo.push(di1);
multiDispatchInfo.push(di2);
GpgpuWalkerHelper<FamilyType>::dispatchWalker( GpgpuWalkerHelper<FamilyType>::dispatchWalker(
*pCmdQ, *pCmdQ,
@@ -609,20 +616,29 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizesForSplitWalker) {
pDevice->getPreemptionMode(), pDevice->getPreemptionMode(),
false); false);
auto dispatchId = 0;
for (auto &dispatchInfo : multiDispatchInfo) { for (auto &dispatchInfo : multiDispatchInfo) {
auto &kernel = *dispatchInfo.getKernel(); auto &kernel = *dispatchInfo.getKernel();
if (dispatchId == 0) { if (&kernel == &mainKernel) {
EXPECT_EQ(1u, *kernel.localWorkSizeX); EXPECT_EQ(4u, *kernel.localWorkSizeX);
EXPECT_EQ(2u, *kernel.localWorkSizeY); EXPECT_EQ(5u, *kernel.localWorkSizeY);
EXPECT_EQ(3u, *kernel.localWorkSizeZ); EXPECT_EQ(6u, *kernel.localWorkSizeZ);
}
if (dispatchId == 1) {
EXPECT_EQ(4u, *kernel.localWorkSizeX2); EXPECT_EQ(4u, *kernel.localWorkSizeX2);
EXPECT_EQ(5u, *kernel.localWorkSizeY2); EXPECT_EQ(5u, *kernel.localWorkSizeY2);
EXPECT_EQ(6u, *kernel.localWorkSizeZ2); EXPECT_EQ(6u, *kernel.localWorkSizeZ2);
EXPECT_EQ(3u, *kernel.numWorkGroupsX);
EXPECT_EQ(2u, *kernel.numWorkGroupsY);
EXPECT_EQ(2u, *kernel.numWorkGroupsZ);
} else {
EXPECT_EQ(0u, *kernel.localWorkSizeX);
EXPECT_EQ(0u, *kernel.localWorkSizeY);
EXPECT_EQ(0u, *kernel.localWorkSizeZ);
EXPECT_EQ(1u, *kernel.localWorkSizeX2);
EXPECT_EQ(2u, *kernel.localWorkSizeY2);
EXPECT_EQ(3u, *kernel.localWorkSizeZ2);
EXPECT_EQ(0u, *kernel.numWorkGroupsX);
EXPECT_EQ(0u, *kernel.numWorkGroupsY);
EXPECT_EQ(0u, *kernel.numWorkGroupsZ);
} }
dispatchId++;
} }
} }