From 75a3f99685fce4763d5050eefd92d3ac220e08fc Mon Sep 17 00:00:00 2001 From: Maciej Plewka Date: Tue, 14 Feb 2023 14:32:36 +0000 Subject: [PATCH] fix(ocl) use correct lws and groupCount to disable eu fusion check Signed-off-by: Maciej Plewka --- opencl/source/command_queue/enqueue_common.h | 5 +++-- .../xe_hpg_core/dg2/command_queue_tests_dg2.cpp | 14 ++++---------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 7e043a3b50..8569066a98 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -855,9 +855,10 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired; dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = systolicPipelineSelectMode; - + uint32_t lws[3] = {static_cast(multiDispatchInfo.begin()->getLocalWorkgroupSize().x), static_cast(multiDispatchInfo.begin()->getLocalWorkgroupSize().y), static_cast(multiDispatchInfo.begin()->getLocalWorkgroupSize().z)}; + uint32_t groupCount[3] = {static_cast(multiDispatchInfo.begin()->getNumberOfWorkgroups().x), static_cast(multiDispatchInfo.begin()->getNumberOfWorkgroups().y), static_cast(multiDispatchInfo.begin()->getNumberOfWorkgroups().z)}; dispatchFlags.disableEUFusion = kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion || - device->getProductHelper().isFusedEuDisabledForDpas(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesSystolicPipelineSelectMode, *kernel->getLocalWorkSizeValues().data(), *kernel->getNumWorkGroupsValues().data()); + device->getProductHelper().isFusedEuDisabledForDpas(systolicPipelineSelectMode, lws, groupCount); const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired(); diff --git a/opencl/test/unit_test/xe_hpg_core/dg2/command_queue_tests_dg2.cpp b/opencl/test/unit_test/xe_hpg_core/dg2/command_queue_tests_dg2.cpp index c8e4bbe3e2..b9589d6a32 100644 --- a/opencl/test/unit_test/xe_hpg_core/dg2/command_queue_tests_dg2.cpp +++ b/opencl/test/unit_test/xe_hpg_core/dg2/command_queue_tests_dg2.cpp @@ -73,18 +73,12 @@ DG2TEST_F(CommandQueueHwTest, GivenKernelWithDpasAndOddWorkGroupWhenenqueueNonBl EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; LinearStream commandStream; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[0] = 0; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[1] = 4; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[2] = 8; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[0] = 12; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[1] = 16; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[2] = 20; - - pKernel->setLocalWorkSizeValues(3, 7, 1); - pKernel->setNumWorkGroupsValues(5, 1, 1); + DispatchInfo &dispatchInfo = *multiDispatchInfo.begin(); + dispatchInfo.setLWS({3, 7, 1}); + dispatchInfo.setNumberOfWorkgroups({5, 1, 1}); bool blocking = false; - const_cast(pKernel->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = true; + pKernel->setSystolicPipelineSelectMode(true); cmdQ.template enqueueNonBlocked(nullptr, 0, commandStream, commandStream.getUsed(), blocking, true, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, nullptr); EXPECT_TRUE(csr->disableEuFusionPassed); }