Add simple kernel tunning variant
Related-To: NEO-5327 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
a2eeeff880
commit
40390f7775
|
@ -1164,13 +1164,16 @@ inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceive
|
|||
}
|
||||
|
||||
void Kernel::performKernelTunning(CommandStreamReceiver &commandStreamReceiver, const Vec3<size_t> &lws, const Vec3<size_t> &gws, const Vec3<size_t> &offsets, TimestampPacketContainer *timestampContainer) {
|
||||
bool performTunning = false;
|
||||
auto performTunning = TunningType::DISABLED;
|
||||
|
||||
if (DebugManager.flags.EnableKernelTunning.get() != -1) {
|
||||
performTunning = DebugManager.flags.EnableKernelTunning.get();
|
||||
performTunning = static_cast<TunningType>(DebugManager.flags.EnableKernelTunning.get());
|
||||
}
|
||||
|
||||
if (performTunning) {
|
||||
if (performTunning == TunningType::SIMPLE) {
|
||||
this->singleSubdevicePreferedInCurrentEnqueue = !this->getKernelInfo(commandStreamReceiver.getRootDeviceIndex()).kernelDescriptor.kernelAttributes.flags.useGlobalAtomics;
|
||||
|
||||
} else if (performTunning == TunningType::FULL) {
|
||||
KernelConfig config{gws, lws, offsets};
|
||||
|
||||
auto submissionDataIt = this->kernelSubmissionMap.find(config);
|
||||
|
|
|
@ -82,6 +82,12 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||
TUNNING_DONE
|
||||
};
|
||||
|
||||
enum class TunningType {
|
||||
DISABLED,
|
||||
SIMPLE,
|
||||
FULL
|
||||
};
|
||||
|
||||
typedef int32_t (Kernel::*KernelArgHandler)(uint32_t argIndex,
|
||||
size_t argSize,
|
||||
const void *argVal);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -1946,9 +1946,9 @@ TEST(KernelConfigTests, givenTwoKernelConfigsWhenCompareThenResultsAreCorrect) {
|
|||
EXPECT_FALSE(config == config2);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenEnableKernelTuningWhenPerformTunningThenKerneConfigDataIsTracked) {
|
||||
HWTEST_F(KernelResidencyTest, givenEnableFullKernelTuningWhenPerformTunningThenKernelConfigDataIsTracked) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableKernelTunning.set(1u);
|
||||
DebugManager.flags.EnableKernelTunning.set(2u);
|
||||
|
||||
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
MockKernelWithInternals mockKernel(*this->pClDevice);
|
||||
|
@ -2023,6 +2023,30 @@ HWTEST_F(KernelResidencyTest, givenEnableKernelTuningWhenPerformTunningThenKerne
|
|||
EXPECT_EQ(result->second.singleSubdevicePrefered, mockKernel.mockKernel->singleSubdevicePreferedInCurrentEnqueue);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenSimpleKernelTunningAndNoAtomicsWhenPerformTunningThenSingleSubdeviceIsPreferred) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableKernelTunning.set(1u);
|
||||
|
||||
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
MockKernelWithInternals mockKernel(*this->pClDevice);
|
||||
|
||||
Vec3<size_t> lws{1, 1, 1};
|
||||
Vec3<size_t> gws{1, 1, 1};
|
||||
Vec3<size_t> offsets{1, 1, 1};
|
||||
MockKernel::KernelConfig config{gws, lws, offsets};
|
||||
|
||||
MockTimestampPacketContainer container(*commandStreamReceiver.getTimestampPacketAllocator(), 1);
|
||||
|
||||
auto result = mockKernel.mockKernel->kernelSubmissionMap.find(config);
|
||||
EXPECT_EQ(result, mockKernel.mockKernel->kernelSubmissionMap.end());
|
||||
|
||||
mockKernel.mockKernel->performKernelTunning(commandStreamReceiver, lws, gws, offsets, &container);
|
||||
|
||||
result = mockKernel.mockKernel->kernelSubmissionMap.find(config);
|
||||
EXPECT_EQ(result, mockKernel.mockKernel->kernelSubmissionMap.end());
|
||||
EXPECT_NE(mockKernel.mockKernel->isSingleSubdevicePreferred(), mockKernel.mockKernel->getKernelInfo(0u).kernelDescriptor.kernelAttributes.flags.useGlobalAtomics);
|
||||
}
|
||||
|
||||
TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasImagesOnlyThenTrueIsReturned) {
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -170,7 +170,7 @@ DECLARE_DEBUG_VARIABLE(bool, AddClGlSharing, false, "Add cl-gl extension")
|
|||
DECLARE_DEBUG_VARIABLE(bool, EnableFormatQuery, false, "Enable sharing format querying")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableFreeMemory, false, "Enable freeMemory in memory manager")
|
||||
DECLARE_DEBUG_VARIABLE(bool, ForceSamplerLowFilteringPrecision, false, "Force Low Filtering Precision Sampler mode")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableKernelTunning, -1, "Perform a tunning of enqueue kernel, -1:default(disabled), 0:disable, 1:enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableKernelTunning, -1, "Perform a tunning of enqueue kernel, -1:default(disabled), 0:disable, 1:enable simple kernel tunning, 2:enable full kernel tunning")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableBOMmapCreate, -1, "Create BOs using mmap, -1:default, 0:disable(GEM_USERPTR), 1:enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableGemCloseWorker, -1, "Use asynchronous gem object closing, -1:default, 0:disable, 1:enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableIntelVme, -1, "-1: default, 0: disabled, 1: Enables cl_intel_motion_estimation extension")
|
||||
|
|
Loading…
Reference in New Issue