Kmd notify improvements [2/n]: Use QuickKmdSleep for sporadic waits

- Measure time between wait calls. If delay is exeeded  use QuickKmdSleep
- Kmd Notify helper functions
- Refactor overriding from debug variables
- Refactor Kmd Notify tests


Change-Id: I123c31f492d98fd304184f99ee0bf7d733d06f04
This commit is contained in:
Dunajski, Bartosz
2018-03-22 09:41:17 +01:00
committed by sys_ocldev
parent c0a8522eb1
commit 9486dba6dd
27 changed files with 377 additions and 242 deletions

View File

@ -92,6 +92,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
const HardwareInfo &hwInfo;
CsrSizeRequestFlags csrSizeRequestFlags = {};
std::chrono::high_resolution_clock::time_point lastWaitForCompletionTimestamp;
};
template <typename GfxFamily>

View File

@ -40,6 +40,9 @@ namespace OCLRT {
template <typename GfxFamily>
CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw(const HardwareInfo &hwInfoIn) : hwInfo(hwInfoIn) {
requiredThreadArbitrationPolicy = PreambleHelper<GfxFamily>::getDefaultThreadArbitrationPolicy();
if (hwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits) {
lastWaitForCompletionTimestamp = std::chrono::high_resolution_clock::now();
}
}
template <typename GfxFamily>
@ -561,18 +564,22 @@ template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
const auto &kmdNotifyProperties = this->hwInfo.capabilityTable.kmdNotifyProperties;
const auto &kmdNotifyDelay = useQuickKmdSleep && kmdNotifyProperties.enableQuickKmdSleep ? kmdNotifyProperties.delayQuickKmdSleepMicroseconds
: kmdNotifyProperties.delayKmdNotifyMicroseconds;
useQuickKmdSleep |= kmdNotifyProperties.applyQuickKmdSleepForSporadicWait(lastWaitForCompletionTimestamp);
const auto &kmdNotifyDelay = kmdNotifyProperties.selectDelay(useQuickKmdSleep);
auto status = waitForCompletionWithTimeout(kmdNotifyProperties.enableKmdNotify && flushStampToWait != 0,
kmdNotifyDelay, taskCountToWait);
if (!status) {
waitForFlushStamp(flushStampToWait);
//now call blocking wait, this is to ensure that task count is reached
waitForCompletionWithTimeout(false, kmdNotifyDelay, taskCountToWait);
waitForCompletionWithTimeout(false, 0, taskCountToWait);
}
UNRECOVERABLE_IF(*getTagAddress() < taskCountToWait);
if (kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits) {
lastWaitForCompletionTimestamp = std::chrono::high_resolution_clock::now();
}
}
template <typename GfxFamily>

View File

@ -69,11 +69,11 @@ const RuntimeCapabilityTable BDW::capabilityTable{
{false, false},
&isSimulationBDW,
true,
true, // forceStatelessCompilationFor32Bit
{false, 0, false, 0}, // KmdNotifyProperties
false, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize
true, // forceStatelessCompilationFor32Bit
{false, 0, false, 0, false, 0}, // KmdNotifyProperties
false, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize
};
const HardwareInfo BDW_1x2x6::hwInfo = {

View File

@ -65,11 +65,11 @@ const RuntimeCapabilityTable BXT::capabilityTable{
{true, false},
&isSimulationBXT,
true,
false, // forceStatelessCompilationFor32Bit
{false, 0, false, 0}, // KmdNotifyProperties
false, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize
false, // forceStatelessCompilationFor32Bit
{false, 0, false, 0, false, 0}, // KmdNotifyProperties
false, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize
};
const HardwareInfo BXT_1x2x6::hwInfo = {

View File

@ -60,11 +60,11 @@ const RuntimeCapabilityTable CFL::capabilityTable{
{true, false},
&isSimulationCFL,
true,
true, // forceStatelessCompilationFor32Bit
{false, 0, false, 0}, // KmdNotifyProperties
true, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize
true, // forceStatelessCompilationFor32Bit
{false, 0, false, 0, false, 0}, // KmdNotifyProperties
true, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize
};
const HardwareInfo CFL_1x2x6::hwInfo = {

View File

@ -60,11 +60,11 @@ const RuntimeCapabilityTable GLK::capabilityTable{
{true, false},
&isSimulationGLK,
true,
false, // forceStatelessCompilationFor32Bit
{true, 30000, false, 0}, // KmdNotifyProperties
false, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize
false, // forceStatelessCompilationFor32Bit
{true, 30000, false, 0, false, 0}, // KmdNotifyProperties
false, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize
};
const HardwareInfo GLK_1x3x6::hwInfo = {

View File

@ -60,11 +60,11 @@ const RuntimeCapabilityTable KBL::capabilityTable{
{true, false},
&isSimulationKBL,
true,
true, // forceStatelessCompilationFor32Bit
{false, 0, false, 0}, // KmdNotifyProperties
true, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize
true, // forceStatelessCompilationFor32Bit
{false, 0, false, 0, false, 0}, // KmdNotifyProperties
true, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize
};
const HardwareInfo KBL_1x2x6::hwInfo = {

View File

@ -68,11 +68,11 @@ const RuntimeCapabilityTable SKL::capabilityTable{
{true, false},
&isSimulationSKL,
true,
true, // forceStatelessCompilationFor32Bit
{false, 0, false, 0}, // KmdNotifyProperties
true, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize
true, // forceStatelessCompilationFor32Bit
{false, 0, false, 0, false, 0}, // KmdNotifyProperties
true, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize
};
const HardwareInfo SKL_1x2x6::hwInfo = {

View File

@ -54,6 +54,7 @@ set(RUNTIME_SRCS_HELPERS_BASE
${CMAKE_CURRENT_SOURCE_DIR}/kernel_commands.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_commands.inl
${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_properties.h
${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_properties.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mipmap.h
${CMAKE_CURRENT_SOURCE_DIR}/options.cpp
${CMAKE_CURRENT_SOURCE_DIR}/options.h

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <cstdint>
#include "runtime/helpers/kmd_notify_properties.h"
using namespace OCLRT;
bool KmdNotifyProperties::applyQuickKmdSleepForSporadicWait(std::chrono::high_resolution_clock::time_point &lastWaitTimestamp) const {
if (enableQuickKmdSleepForSporadicWaits) {
auto now = std::chrono::high_resolution_clock::now();
auto timeDiff = std::chrono::duration_cast<std::chrono::microseconds>(now - lastWaitTimestamp).count();
if (timeDiff > delayQuickKmdSleepForSporadicWaitsMicroseconds) {
return true;
}
}
return false;
}
const int64_t &KmdNotifyProperties::selectDelay(bool useQuickKmdSleep) const {
return (useQuickKmdSleep && enableQuickKmdSleep) ? delayQuickKmdSleepMicroseconds
: delayKmdNotifyMicroseconds;
}
void KmdNotifyProperties::overrideFromDebugVariable(int32_t debugVariableValue, int64_t &destination) {
if (debugVariableValue >= 0) {
destination = static_cast<int64_t>(debugVariableValue);
}
}
void KmdNotifyProperties::overrideFromDebugVariable(int32_t debugVariableValue, bool &destination) {
if (debugVariableValue >= 0) {
destination = !!(debugVariableValue);
}
}

View File

@ -22,12 +22,25 @@
#pragma once
#include <cstdint>
#include <chrono>
namespace OCLRT {
struct KmdNotifyProperties {
// Main switch for KMD Notify optimization - if its disabled, all below are disabled too
bool enableKmdNotify;
int64_t delayKmdNotifyMicroseconds;
// Use smaller delay in specific situations (ie. from AsyncEventsHandler)
bool enableQuickKmdSleep;
int64_t delayQuickKmdSleepMicroseconds;
// If waits are called sporadically use QuickKmdSleep mode, otherwise use standard delay
bool enableQuickKmdSleepForSporadicWaits;
int64_t delayQuickKmdSleepForSporadicWaitsMicroseconds;
bool applyQuickKmdSleepForSporadicWait(std::chrono::high_resolution_clock::time_point &lastWaitTimestamp) const;
const int64_t &selectDelay(bool useQuickKmdSleep) const;
static void overrideFromDebugVariable(int32_t debugVariableValue, int64_t &destination);
static void overrideFromDebugVariable(int32_t debugVariableValue, bool &destination);
};
} // namespace OCLRT

View File

@ -79,6 +79,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableKmdNotify, -1, "-1: dont override,
DECLARE_DEBUG_VARIABLE(int32_t, OverrideKmdNotifyDelayMicroseconds, -1, "-1: dont override, 0: infinite timeout, >0: timeout in microseconds")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableQuickKmdSleep, -1, "-1: dont override, 0: disable, 1: enable. It works only when Kmd Notify is enabled.")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideQuickKmdSleepDelayMicroseconds, -1, "-1: dont override, 0: infinite timeout, >0: timeout in microseconds")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableQuickKmdSleepForSporadicWaits, -1, "-1: dont override, 0: disable, 1: enable. It works only when QuickKmdSleep is enabled.")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideDelayQuickKmdSleepForSporadicWaitsMicroseconds, -1, "-1: dont override, >0: timeout in microseconds")
DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib calls")
DECLARE_DEBUG_VARIABLE(int32_t, CsrDispatchMode, 0, "Chooses DispatchMode for Csr")
/*DRIVER TOGGLES*/

View File

@ -184,21 +184,13 @@ int HwInfoConfig::configureHwInfo(const HardwareInfo *inHwInfo, HardwareInfo *ou
static_cast<bool>(outHwInfo->pSkuTable->ftrGpGpuMidBatchPreempt) && preemption);
outHwInfo->capabilityTable.requiredPreemptionSurfaceSize = outHwInfo->pSysInfo->CsrSizeInMb * MemoryConstants::megaByte;
outHwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = DebugManager.flags.OverrideEnableKmdNotify.get() >= 0
? !!DebugManager.flags.OverrideEnableKmdNotify.get()
: outHwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify;
outHwInfo->capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds = DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.get() >= 0
? static_cast<int64_t>(DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.get())
: outHwInfo->capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds;
outHwInfo->capabilityTable.kmdNotifyProperties.enableQuickKmdSleep = DebugManager.flags.OverrideEnableQuickKmdSleep.get() >= 0
? !!DebugManager.flags.OverrideEnableQuickKmdSleep.get()
: outHwInfo->capabilityTable.kmdNotifyProperties.enableQuickKmdSleep;
outHwInfo->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds = DebugManager.flags.OverrideQuickKmdSleepDelayMicroseconds.get() >= 0
? static_cast<int64_t>(DebugManager.flags.OverrideQuickKmdSleepDelayMicroseconds.get())
: outHwInfo->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds;
auto &kmdNotifyProperties = outHwInfo->capabilityTable.kmdNotifyProperties;
KmdNotifyProperties::overrideFromDebugVariable(DebugManager.flags.OverrideEnableKmdNotify.get(), kmdNotifyProperties.enableKmdNotify);
KmdNotifyProperties::overrideFromDebugVariable(DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.get(), kmdNotifyProperties.delayKmdNotifyMicroseconds);
KmdNotifyProperties::overrideFromDebugVariable(DebugManager.flags.OverrideEnableQuickKmdSleep.get(), kmdNotifyProperties.enableQuickKmdSleep);
KmdNotifyProperties::overrideFromDebugVariable(DebugManager.flags.OverrideQuickKmdSleepDelayMicroseconds.get(), kmdNotifyProperties.delayQuickKmdSleepMicroseconds);
KmdNotifyProperties::overrideFromDebugVariable(DebugManager.flags.OverrideEnableQuickKmdSleepForSporadicWaits.get(), kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits);
KmdNotifyProperties::overrideFromDebugVariable(DebugManager.flags.OverrideDelayQuickKmdSleepForSporadicWaitsMicroseconds.get(), kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds);
pPlatform.release();
pSkuTable.release();

View File

@ -66,21 +66,13 @@ bool DeviceFactory::getDevices(HardwareInfo **pHWInfos, size_t &numDevices) {
// Instrumentation
tempHwInfos[devNum].capabilityTable.instrumentationEnabled &= haveInstrumentation;
tempHwInfos[devNum].capabilityTable.kmdNotifyProperties.enableKmdNotify = DebugManager.flags.OverrideEnableKmdNotify.get() >= 0
? !!DebugManager.flags.OverrideEnableKmdNotify.get()
: tempHwInfos[devNum].capabilityTable.kmdNotifyProperties.enableKmdNotify;
tempHwInfos[devNum].capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds = DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.get() >= 0
? static_cast<int64_t>(DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.get())
: tempHwInfos[devNum].capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds;
tempHwInfos[devNum].capabilityTable.kmdNotifyProperties.enableQuickKmdSleep = DebugManager.flags.OverrideEnableQuickKmdSleep.get() >= 0
? !!DebugManager.flags.OverrideEnableQuickKmdSleep.get()
: tempHwInfos[devNum].capabilityTable.kmdNotifyProperties.enableQuickKmdSleep;
tempHwInfos[devNum].capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds = DebugManager.flags.OverrideQuickKmdSleepDelayMicroseconds.get() >= 0
? static_cast<int64_t>(DebugManager.flags.OverrideQuickKmdSleepDelayMicroseconds.get())
: tempHwInfos[devNum].capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds;
auto &kmdNotifyProperties = tempHwInfos[devNum].capabilityTable.kmdNotifyProperties;
KmdNotifyProperties::overrideFromDebugVariable(DebugManager.flags.OverrideEnableKmdNotify.get(), kmdNotifyProperties.enableKmdNotify);
KmdNotifyProperties::overrideFromDebugVariable(DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.get(), kmdNotifyProperties.delayKmdNotifyMicroseconds);
KmdNotifyProperties::overrideFromDebugVariable(DebugManager.flags.OverrideEnableQuickKmdSleep.get(), kmdNotifyProperties.enableQuickKmdSleep);
KmdNotifyProperties::overrideFromDebugVariable(DebugManager.flags.OverrideQuickKmdSleepDelayMicroseconds.get(), kmdNotifyProperties.delayQuickKmdSleepMicroseconds);
KmdNotifyProperties::overrideFromDebugVariable(DebugManager.flags.OverrideEnableQuickKmdSleepForSporadicWaits.get(), kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits);
KmdNotifyProperties::overrideFromDebugVariable(DebugManager.flags.OverrideDelayQuickKmdSleepForSporadicWaitsMicroseconds.get(), kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds);
numDevices = 1;
*pHWInfos = tempHwInfos;