Add simple kernel tunning variant

Related-To: NEO-5327

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk 2021-01-11 15:11:14 +00:00 committed by Compute-Runtime-Automation
parent a2eeeff880
commit 40390f7775
4 changed files with 41 additions and 8 deletions

View File

@ -1164,13 +1164,16 @@ inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceive
}
void Kernel::performKernelTunning(CommandStreamReceiver &commandStreamReceiver, const Vec3<size_t> &lws, const Vec3<size_t> &gws, const Vec3<size_t> &offsets, TimestampPacketContainer *timestampContainer) {
bool performTunning = false;
auto performTunning = TunningType::DISABLED;
if (DebugManager.flags.EnableKernelTunning.get() != -1) {
performTunning = DebugManager.flags.EnableKernelTunning.get();
performTunning = static_cast<TunningType>(DebugManager.flags.EnableKernelTunning.get());
}
if (performTunning) {
if (performTunning == TunningType::SIMPLE) {
this->singleSubdevicePreferedInCurrentEnqueue = !this->getKernelInfo(commandStreamReceiver.getRootDeviceIndex()).kernelDescriptor.kernelAttributes.flags.useGlobalAtomics;
} else if (performTunning == TunningType::FULL) {
KernelConfig config{gws, lws, offsets};
auto submissionDataIt = this->kernelSubmissionMap.find(config);

View File

@ -82,6 +82,12 @@ class Kernel : public BaseObject<_cl_kernel> {
TUNNING_DONE
};
enum class TunningType {
DISABLED,
SIMPLE,
FULL
};
typedef int32_t (Kernel::*KernelArgHandler)(uint32_t argIndex,
size_t argSize,
const void *argVal);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -1946,9 +1946,9 @@ TEST(KernelConfigTests, givenTwoKernelConfigsWhenCompareThenResultsAreCorrect) {
EXPECT_FALSE(config == config2);
}
HWTEST_F(KernelResidencyTest, givenEnableKernelTuningWhenPerformTunningThenKerneConfigDataIsTracked) {
HWTEST_F(KernelResidencyTest, givenEnableFullKernelTuningWhenPerformTunningThenKernelConfigDataIsTracked) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableKernelTunning.set(1u);
DebugManager.flags.EnableKernelTunning.set(2u);
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
MockKernelWithInternals mockKernel(*this->pClDevice);
@ -2023,6 +2023,30 @@ HWTEST_F(KernelResidencyTest, givenEnableKernelTuningWhenPerformTunningThenKerne
EXPECT_EQ(result->second.singleSubdevicePrefered, mockKernel.mockKernel->singleSubdevicePreferedInCurrentEnqueue);
}
HWTEST_F(KernelResidencyTest, givenSimpleKernelTunningAndNoAtomicsWhenPerformTunningThenSingleSubdeviceIsPreferred) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableKernelTunning.set(1u);
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
MockKernelWithInternals mockKernel(*this->pClDevice);
Vec3<size_t> lws{1, 1, 1};
Vec3<size_t> gws{1, 1, 1};
Vec3<size_t> offsets{1, 1, 1};
MockKernel::KernelConfig config{gws, lws, offsets};
MockTimestampPacketContainer container(*commandStreamReceiver.getTimestampPacketAllocator(), 1);
auto result = mockKernel.mockKernel->kernelSubmissionMap.find(config);
EXPECT_EQ(result, mockKernel.mockKernel->kernelSubmissionMap.end());
mockKernel.mockKernel->performKernelTunning(commandStreamReceiver, lws, gws, offsets, &container);
result = mockKernel.mockKernel->kernelSubmissionMap.find(config);
EXPECT_EQ(result, mockKernel.mockKernel->kernelSubmissionMap.end());
EXPECT_NE(mockKernel.mockKernel->isSingleSubdevicePreferred(), mockKernel.mockKernel->getKernelInfo(0u).kernelDescriptor.kernelAttributes.flags.useGlobalAtomics);
}
TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasImagesOnlyThenTrueIsReturned) {
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -170,7 +170,7 @@ DECLARE_DEBUG_VARIABLE(bool, AddClGlSharing, false, "Add cl-gl extension")
DECLARE_DEBUG_VARIABLE(bool, EnableFormatQuery, false, "Enable sharing format querying")
DECLARE_DEBUG_VARIABLE(bool, EnableFreeMemory, false, "Enable freeMemory in memory manager")
DECLARE_DEBUG_VARIABLE(bool, ForceSamplerLowFilteringPrecision, false, "Force Low Filtering Precision Sampler mode")
DECLARE_DEBUG_VARIABLE(int32_t, EnableKernelTunning, -1, "Perform a tunning of enqueue kernel, -1:default(disabled), 0:disable, 1:enable")
DECLARE_DEBUG_VARIABLE(int32_t, EnableKernelTunning, -1, "Perform a tunning of enqueue kernel, -1:default(disabled), 0:disable, 1:enable simple kernel tunning, 2:enable full kernel tunning")
DECLARE_DEBUG_VARIABLE(int32_t, EnableBOMmapCreate, -1, "Create BOs using mmap, -1:default, 0:disable(GEM_USERPTR), 1:enable")
DECLARE_DEBUG_VARIABLE(int32_t, EnableGemCloseWorker, -1, "Use asynchronous gem object closing, -1:default, 0:disable, 1:enable")
DECLARE_DEBUG_VARIABLE(int32_t, EnableIntelVme, -1, "-1: default, 0: disabled, 1: Enables cl_intel_motion_estimation extension")