fix(ocl) use correct lws and groupCount to disable eu fusion check
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
parent
90cd433766
commit
75a3f99685
|
@ -855,9 +855,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
|||
|
||||
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
|
||||
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = systolicPipelineSelectMode;
|
||||
|
||||
uint32_t lws[3] = {static_cast<uint32_t>(multiDispatchInfo.begin()->getLocalWorkgroupSize().x), static_cast<uint32_t>(multiDispatchInfo.begin()->getLocalWorkgroupSize().y), static_cast<uint32_t>(multiDispatchInfo.begin()->getLocalWorkgroupSize().z)};
|
||||
uint32_t groupCount[3] = {static_cast<uint32_t>(multiDispatchInfo.begin()->getNumberOfWorkgroups().x), static_cast<uint32_t>(multiDispatchInfo.begin()->getNumberOfWorkgroups().y), static_cast<uint32_t>(multiDispatchInfo.begin()->getNumberOfWorkgroups().z)};
|
||||
dispatchFlags.disableEUFusion = kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion ||
|
||||
device->getProductHelper().isFusedEuDisabledForDpas(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesSystolicPipelineSelectMode, *kernel->getLocalWorkSizeValues().data(), *kernel->getNumWorkGroupsValues().data());
|
||||
device->getProductHelper().isFusedEuDisabledForDpas(systolicPipelineSelectMode, lws, groupCount);
|
||||
|
||||
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
|
||||
|
||||
|
|
|
@ -73,18 +73,12 @@ DG2TEST_F(CommandQueueHwTest, GivenKernelWithDpasAndOddWorkGroupWhenenqueueNonBl
|
|||
EventsRequest eventsRequest(0, nullptr, nullptr);
|
||||
EventBuilder eventBuilder;
|
||||
LinearStream commandStream;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[0] = 0;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[1] = 4;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[2] = 8;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[0] = 12;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[1] = 16;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[2] = 20;
|
||||
|
||||
pKernel->setLocalWorkSizeValues(3, 7, 1);
|
||||
pKernel->setNumWorkGroupsValues(5, 1, 1);
|
||||
DispatchInfo &dispatchInfo = *multiDispatchInfo.begin();
|
||||
dispatchInfo.setLWS({3, 7, 1});
|
||||
dispatchInfo.setNumberOfWorkgroups({5, 1, 1});
|
||||
|
||||
bool blocking = false;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
|
||||
pKernel->setSystolicPipelineSelectMode(true);
|
||||
cmdQ.template enqueueNonBlocked<CL_COMMAND_NDRANGE_KERNEL>(nullptr, 0, commandStream, commandStream.getUsed(), blocking, true, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, nullptr);
|
||||
EXPECT_TRUE(csr->disableEuFusionPassed);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue