From 1dad22a12a0c1d4547ba23b5d19dd6c6d145f295 Mon Sep 17 00:00:00 2001 From: Kamil Kopryk Date: Thu, 7 May 2020 13:51:31 +0200 Subject: [PATCH] Add isSpecialWorkgroupSizeRequired helper Change-Id: Ic8d4471f48ed5f25eefa802444d0ea62ac0112da Signed-off-by: Kamil Kopryk Related-To: NEO-4648 --- Jenkinsfile | 2 +- opencl/source/command_queue/gpgpu_walker.h | 2 ++ .../source/command_queue/local_work_size.cpp | 23 +++++++++++++++---- .../command_queue/work_group_size_tests.cpp | 9 ++++++++ shared/source/helpers/hw_helper.h | 4 ++++ shared/source/helpers/hw_helper_base.inl | 5 ++++ 6 files changed, 40 insertions(+), 5 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 606bbfd2ed..06a9ec677e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,5 +1,5 @@ #!groovy dependenciesRevision='c8de469c107af1ced325c9857239bb187d139d1d-1411' strategy='EQUAL' -allowedCD=247 +allowedCD=249 allowedF=20 diff --git a/opencl/source/command_queue/gpgpu_walker.h b/opencl/source/command_queue/gpgpu_walker.h index a9e0c78235..e4b297a6ab 100644 --- a/opencl/source/command_queue/gpgpu_walker.h +++ b/opencl/source/command_queue/gpgpu_walker.h @@ -87,6 +87,8 @@ Vec3 canonizeWorkgroup( void provideLocalWorkGroupSizeHints(Context *context, DispatchInfo dispatchInfo); +void setSpecialWorkgroupSize(size_t workgroupSize[3]); + inline cl_uint computeDimensions(const size_t workItems[3]) { return (workItems[2] > 1) ? 3 : (workItems[1] > 1) ? 2 : 1; } diff --git a/opencl/source/command_queue/local_work_size.cpp b/opencl/source/command_queue/local_work_size.cpp index f75508d6a5..58d8235ba3 100644 --- a/opencl/source/command_queue/local_work_size.cpp +++ b/opencl/source/command_queue/local_work_size.cpp @@ -9,7 +9,9 @@ #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" +#include "shared/source/helpers/hw_helper.h" +#include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" @@ -207,6 +209,12 @@ void choosePreferredWorkGroupSizeWithOutRatio(uint32_t xyzFactors[3][1024], uint } } +void setSpecialWorkgroupSize(size_t workgroupSize[3]) { + workgroupSize[0] = 1; + workgroupSize[1] = 1; + workgroupSize[2] = 1; +} + void computeWorkgroupSize1D(uint32_t maxWorkGroupSize, size_t workGroupSize[3], const size_t workItems[3], @@ -403,14 +411,21 @@ void computeWorkgroupSizeND(WorkSizeInfo wsInfo, size_t workGroupSize[3], const Vec3 computeWorkgroupSize(const DispatchInfo &dispatchInfo) { size_t workGroupSize[3] = {}; - if (dispatchInfo.getKernel() != nullptr) { - if (DebugManager.flags.EnableComputeWorkSizeND.get()) { + auto kernel = dispatchInfo.getKernel(); + + if (kernel != nullptr) { + const auto &hwInfo = kernel->getDevice().getHardwareInfo(); + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + + if (kernel->isBuiltIn && hwHelper.isSpecialWorkgroupSizeRequired(hwInfo)) { + setSpecialWorkgroupSize(workGroupSize); + } else if (DebugManager.flags.EnableComputeWorkSizeND.get()) { WorkSizeInfo wsInfo(dispatchInfo); size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z}; computeWorkgroupSizeND(wsInfo, workGroupSize, workItems, dispatchInfo.getDim()); } else { - auto maxWorkGroupSize = dispatchInfo.getKernel()->maxKernelWorkGroupSize; - auto simd = dispatchInfo.getKernel()->getKernelInfo().getMaxSimdSize(); + auto maxWorkGroupSize = kernel->maxKernelWorkGroupSize; + auto simd = kernel->getKernelInfo().getMaxSimdSize(); size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z}; if (dispatchInfo.getDim() == 1) { computeWorkgroupSize1D(maxWorkGroupSize, workGroupSize, workItems, simd); diff --git a/opencl/test/unit_test/command_queue/work_group_size_tests.cpp b/opencl/test/unit_test/command_queue/work_group_size_tests.cpp index 1d69325cac..107f884167 100644 --- a/opencl/test/unit_test/command_queue/work_group_size_tests.cpp +++ b/opencl/test/unit_test/command_queue/work_group_size_tests.cpp @@ -348,3 +348,12 @@ INSTANTIATE_TEST_CASE_P(wgs, ::testing::Combine( ::testing::ValuesIn(simdSizes), ::testing::ValuesIn(regionCases))); + +TEST(WorkgroupSizeTest, WhenSetSpecialWorkgroupSizeIsCalledThenWorkgroupSizeIsSetTo1x1x1) { + size_t workgroupSize[3] = {}; + setSpecialWorkgroupSize(workgroupSize); + + EXPECT_EQ(1u, workgroupSize[0]); + EXPECT_EQ(1u, workgroupSize[1]); + EXPECT_EQ(1u, workgroupSize[2]); +} diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index 59d00540dd..a453177c03 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -89,6 +89,8 @@ class HwHelper { virtual uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const = 0; virtual uint32_t getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset) const = 0; + virtual bool isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo) const = 0; + static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo); static uint32_t getEnginesCount(const HardwareInfo &hwInfo); static uint32_t getCopyEnginesCount(const HardwareInfo &hwInfo); @@ -233,6 +235,8 @@ class HwHelperHw : public HwHelper { uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const override; + bool isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo) const override; + protected: static const AuxTranslationMode defaultAuxTranslationMode; HwHelperHw() = default; diff --git a/shared/source/helpers/hw_helper_base.inl b/shared/source/helpers/hw_helper_base.inl index 001b2a0c14..61312b5d79 100644 --- a/shared/source/helpers/hw_helper_base.inl +++ b/shared/source/helpers/hw_helper_base.inl @@ -340,6 +340,11 @@ inline bool HwHelperHw::isFusedEuDispatchEnabled(const HardwareInfo & return false; } +template +inline bool HwHelperHw::isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo) const { + return false; +} + template size_t MemorySynchronizationCommands::getSizeForFullCacheFlush() { return sizeof(typename GfxFamily::PIPE_CONTROL);