Add simple kernel tunning variant
Related-To: NEO-5327 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
a2eeeff880
commit
40390f7775
|
@ -1164,13 +1164,16 @@ inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceive
|
||||||
}
|
}
|
||||||
|
|
||||||
void Kernel::performKernelTunning(CommandStreamReceiver &commandStreamReceiver, const Vec3<size_t> &lws, const Vec3<size_t> &gws, const Vec3<size_t> &offsets, TimestampPacketContainer *timestampContainer) {
|
void Kernel::performKernelTunning(CommandStreamReceiver &commandStreamReceiver, const Vec3<size_t> &lws, const Vec3<size_t> &gws, const Vec3<size_t> &offsets, TimestampPacketContainer *timestampContainer) {
|
||||||
bool performTunning = false;
|
auto performTunning = TunningType::DISABLED;
|
||||||
|
|
||||||
if (DebugManager.flags.EnableKernelTunning.get() != -1) {
|
if (DebugManager.flags.EnableKernelTunning.get() != -1) {
|
||||||
performTunning = DebugManager.flags.EnableKernelTunning.get();
|
performTunning = static_cast<TunningType>(DebugManager.flags.EnableKernelTunning.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (performTunning) {
|
if (performTunning == TunningType::SIMPLE) {
|
||||||
|
this->singleSubdevicePreferedInCurrentEnqueue = !this->getKernelInfo(commandStreamReceiver.getRootDeviceIndex()).kernelDescriptor.kernelAttributes.flags.useGlobalAtomics;
|
||||||
|
|
||||||
|
} else if (performTunning == TunningType::FULL) {
|
||||||
KernelConfig config{gws, lws, offsets};
|
KernelConfig config{gws, lws, offsets};
|
||||||
|
|
||||||
auto submissionDataIt = this->kernelSubmissionMap.find(config);
|
auto submissionDataIt = this->kernelSubmissionMap.find(config);
|
||||||
|
|
|
@ -82,6 +82,12 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||||
TUNNING_DONE
|
TUNNING_DONE
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class TunningType {
|
||||||
|
DISABLED,
|
||||||
|
SIMPLE,
|
||||||
|
FULL
|
||||||
|
};
|
||||||
|
|
||||||
typedef int32_t (Kernel::*KernelArgHandler)(uint32_t argIndex,
|
typedef int32_t (Kernel::*KernelArgHandler)(uint32_t argIndex,
|
||||||
size_t argSize,
|
size_t argSize,
|
||||||
const void *argVal);
|
const void *argVal);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2017-2020 Intel Corporation
|
* Copyright (C) 2017-2021 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -1946,9 +1946,9 @@ TEST(KernelConfigTests, givenTwoKernelConfigsWhenCompareThenResultsAreCorrect) {
|
||||||
EXPECT_FALSE(config == config2);
|
EXPECT_FALSE(config == config2);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(KernelResidencyTest, givenEnableKernelTuningWhenPerformTunningThenKerneConfigDataIsTracked) {
|
HWTEST_F(KernelResidencyTest, givenEnableFullKernelTuningWhenPerformTunningThenKernelConfigDataIsTracked) {
|
||||||
DebugManagerStateRestore restorer;
|
DebugManagerStateRestore restorer;
|
||||||
DebugManager.flags.EnableKernelTunning.set(1u);
|
DebugManager.flags.EnableKernelTunning.set(2u);
|
||||||
|
|
||||||
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
|
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
MockKernelWithInternals mockKernel(*this->pClDevice);
|
MockKernelWithInternals mockKernel(*this->pClDevice);
|
||||||
|
@ -2023,6 +2023,30 @@ HWTEST_F(KernelResidencyTest, givenEnableKernelTuningWhenPerformTunningThenKerne
|
||||||
EXPECT_EQ(result->second.singleSubdevicePrefered, mockKernel.mockKernel->singleSubdevicePreferedInCurrentEnqueue);
|
EXPECT_EQ(result->second.singleSubdevicePrefered, mockKernel.mockKernel->singleSubdevicePreferedInCurrentEnqueue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(KernelResidencyTest, givenSimpleKernelTunningAndNoAtomicsWhenPerformTunningThenSingleSubdeviceIsPreferred) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.EnableKernelTunning.set(1u);
|
||||||
|
|
||||||
|
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
MockKernelWithInternals mockKernel(*this->pClDevice);
|
||||||
|
|
||||||
|
Vec3<size_t> lws{1, 1, 1};
|
||||||
|
Vec3<size_t> gws{1, 1, 1};
|
||||||
|
Vec3<size_t> offsets{1, 1, 1};
|
||||||
|
MockKernel::KernelConfig config{gws, lws, offsets};
|
||||||
|
|
||||||
|
MockTimestampPacketContainer container(*commandStreamReceiver.getTimestampPacketAllocator(), 1);
|
||||||
|
|
||||||
|
auto result = mockKernel.mockKernel->kernelSubmissionMap.find(config);
|
||||||
|
EXPECT_EQ(result, mockKernel.mockKernel->kernelSubmissionMap.end());
|
||||||
|
|
||||||
|
mockKernel.mockKernel->performKernelTunning(commandStreamReceiver, lws, gws, offsets, &container);
|
||||||
|
|
||||||
|
result = mockKernel.mockKernel->kernelSubmissionMap.find(config);
|
||||||
|
EXPECT_EQ(result, mockKernel.mockKernel->kernelSubmissionMap.end());
|
||||||
|
EXPECT_NE(mockKernel.mockKernel->isSingleSubdevicePreferred(), mockKernel.mockKernel->getKernelInfo(0u).kernelDescriptor.kernelAttributes.flags.useGlobalAtomics);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasImagesOnlyThenTrueIsReturned) {
|
TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasImagesOnlyThenTrueIsReturned) {
|
||||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2017-2020 Intel Corporation
|
* Copyright (C) 2017-2021 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -170,7 +170,7 @@ DECLARE_DEBUG_VARIABLE(bool, AddClGlSharing, false, "Add cl-gl extension")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, EnableFormatQuery, false, "Enable sharing format querying")
|
DECLARE_DEBUG_VARIABLE(bool, EnableFormatQuery, false, "Enable sharing format querying")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, EnableFreeMemory, false, "Enable freeMemory in memory manager")
|
DECLARE_DEBUG_VARIABLE(bool, EnableFreeMemory, false, "Enable freeMemory in memory manager")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, ForceSamplerLowFilteringPrecision, false, "Force Low Filtering Precision Sampler mode")
|
DECLARE_DEBUG_VARIABLE(bool, ForceSamplerLowFilteringPrecision, false, "Force Low Filtering Precision Sampler mode")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableKernelTunning, -1, "Perform a tunning of enqueue kernel, -1:default(disabled), 0:disable, 1:enable")
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableKernelTunning, -1, "Perform a tunning of enqueue kernel, -1:default(disabled), 0:disable, 1:enable simple kernel tunning, 2:enable full kernel tunning")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableBOMmapCreate, -1, "Create BOs using mmap, -1:default, 0:disable(GEM_USERPTR), 1:enable")
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableBOMmapCreate, -1, "Create BOs using mmap, -1:default, 0:disable(GEM_USERPTR), 1:enable")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableGemCloseWorker, -1, "Use asynchronous gem object closing, -1:default, 0:disable, 1:enable")
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableGemCloseWorker, -1, "Use asynchronous gem object closing, -1:default, 0:disable, 1:enable")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableIntelVme, -1, "-1: default, 0: disabled, 1: Enables cl_intel_motion_estimation extension")
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableIntelVme, -1, "-1: default, 0: disabled, 1: Enables cl_intel_motion_estimation extension")
|
||||||
|
|
Loading…
Reference in New Issue