From 40390f7775b6c8e52899fdbc1a87b8e433b820fb Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Mon, 11 Jan 2021 15:11:14 +0000 Subject: [PATCH] Add simple kernel tunning variant Related-To: NEO-5327 Signed-off-by: Lukasz Jobczyk --- opencl/source/kernel/kernel.cpp | 9 ++++-- opencl/source/kernel/kernel.h | 6 ++++ opencl/test/unit_test/kernel/kernel_tests.cpp | 30 +++++++++++++++++-- .../debug_settings/debug_variables_base.inl | 4 +-- 4 files changed, 41 insertions(+), 8 deletions(-) diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index a2b5e5aa47..47f074111a 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -1164,13 +1164,16 @@ inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceive } void Kernel::performKernelTunning(CommandStreamReceiver &commandStreamReceiver, const Vec3 &lws, const Vec3 &gws, const Vec3 &offsets, TimestampPacketContainer *timestampContainer) { - bool performTunning = false; + auto performTunning = TunningType::DISABLED; if (DebugManager.flags.EnableKernelTunning.get() != -1) { - performTunning = DebugManager.flags.EnableKernelTunning.get(); + performTunning = static_cast(DebugManager.flags.EnableKernelTunning.get()); } - if (performTunning) { + if (performTunning == TunningType::SIMPLE) { + this->singleSubdevicePreferedInCurrentEnqueue = !this->getKernelInfo(commandStreamReceiver.getRootDeviceIndex()).kernelDescriptor.kernelAttributes.flags.useGlobalAtomics; + + } else if (performTunning == TunningType::FULL) { KernelConfig config{gws, lws, offsets}; auto submissionDataIt = this->kernelSubmissionMap.find(config); diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 62d5ee77b7..a23e205e5d 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -82,6 +82,12 @@ class Kernel : public BaseObject<_cl_kernel> { TUNNING_DONE }; + enum class TunningType { + DISABLED, + SIMPLE, + FULL + }; + typedef int32_t (Kernel::*KernelArgHandler)(uint32_t argIndex, size_t argSize, const void *argVal); diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 90f2582a01..f27041a99b 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -1946,9 +1946,9 @@ TEST(KernelConfigTests, givenTwoKernelConfigsWhenCompareThenResultsAreCorrect) { EXPECT_FALSE(config == config2); } -HWTEST_F(KernelResidencyTest, givenEnableKernelTuningWhenPerformTunningThenKerneConfigDataIsTracked) { +HWTEST_F(KernelResidencyTest, givenEnableFullKernelTuningWhenPerformTunningThenKernelConfigDataIsTracked) { DebugManagerStateRestore restorer; - DebugManager.flags.EnableKernelTunning.set(1u); + DebugManager.flags.EnableKernelTunning.set(2u); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); MockKernelWithInternals mockKernel(*this->pClDevice); @@ -2023,6 +2023,30 @@ HWTEST_F(KernelResidencyTest, givenEnableKernelTuningWhenPerformTunningThenKerne EXPECT_EQ(result->second.singleSubdevicePrefered, mockKernel.mockKernel->singleSubdevicePreferedInCurrentEnqueue); } +HWTEST_F(KernelResidencyTest, givenSimpleKernelTunningAndNoAtomicsWhenPerformTunningThenSingleSubdeviceIsPreferred) { + DebugManagerStateRestore restorer; + DebugManager.flags.EnableKernelTunning.set(1u); + + auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); + MockKernelWithInternals mockKernel(*this->pClDevice); + + Vec3 lws{1, 1, 1}; + Vec3 gws{1, 1, 1}; + Vec3 offsets{1, 1, 1}; + MockKernel::KernelConfig config{gws, lws, offsets}; + + MockTimestampPacketContainer container(*commandStreamReceiver.getTimestampPacketAllocator(), 1); + + auto result = mockKernel.mockKernel->kernelSubmissionMap.find(config); + EXPECT_EQ(result, mockKernel.mockKernel->kernelSubmissionMap.end()); + + mockKernel.mockKernel->performKernelTunning(commandStreamReceiver, lws, gws, offsets, &container); + + result = mockKernel.mockKernel->kernelSubmissionMap.find(config); + EXPECT_EQ(result, mockKernel.mockKernel->kernelSubmissionMap.end()); + EXPECT_NE(mockKernel.mockKernel->isSingleSubdevicePreferred(), mockKernel.mockKernel->getKernelInfo(0u).kernelDescriptor.kernelAttributes.flags.useGlobalAtomics); +} + TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasImagesOnlyThenTrueIsReturned) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 892d6dbc2c..0882eb840e 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -170,7 +170,7 @@ DECLARE_DEBUG_VARIABLE(bool, AddClGlSharing, false, "Add cl-gl extension") DECLARE_DEBUG_VARIABLE(bool, EnableFormatQuery, false, "Enable sharing format querying") DECLARE_DEBUG_VARIABLE(bool, EnableFreeMemory, false, "Enable freeMemory in memory manager") DECLARE_DEBUG_VARIABLE(bool, ForceSamplerLowFilteringPrecision, false, "Force Low Filtering Precision Sampler mode") -DECLARE_DEBUG_VARIABLE(int32_t, EnableKernelTunning, -1, "Perform a tunning of enqueue kernel, -1:default(disabled), 0:disable, 1:enable") +DECLARE_DEBUG_VARIABLE(int32_t, EnableKernelTunning, -1, "Perform a tunning of enqueue kernel, -1:default(disabled), 0:disable, 1:enable simple kernel tunning, 2:enable full kernel tunning") DECLARE_DEBUG_VARIABLE(int32_t, EnableBOMmapCreate, -1, "Create BOs using mmap, -1:default, 0:disable(GEM_USERPTR), 1:enable") DECLARE_DEBUG_VARIABLE(int32_t, EnableGemCloseWorker, -1, "Use asynchronous gem object closing, -1:default, 0:disable, 1:enable") DECLARE_DEBUG_VARIABLE(int32_t, EnableIntelVme, -1, "-1: default, 0: disabled, 1: Enables cl_intel_motion_estimation extension")