Kmd notify improvements [1/n]: Quick KMD sleep optimization

- KmdNotifyProperties struct for CapabilityTable that can be extended by
  incoming KmdNotify related optimizations
- Quick KMD sleep optimization that is called from async events handler
- Optimization makes a taskCount check in busy loop with much smaller
  delay than basic version of KMD Notify optimization

Change-Id: I60c851c59895f0cf9de1e1f21e755a8b4c2fe900
This commit is contained in:
Dunajski, Bartosz
2018-03-21 10:00:49 +01:00
committed by sys_ocldev
parent 029094437a
commit 516082e7c5
41 changed files with 309 additions and 102 deletions

View File

@ -151,13 +151,13 @@ bool CommandQueue::isCompleted(uint32_t taskCount) const {
return tag >= taskCount; return tag >= taskCount;
} }
void CommandQueue::waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait) { void CommandQueue::waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
WAIT_ENTER() WAIT_ENTER()
DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", taskCountToWait); DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", taskCountToWait);
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", getHwTag()); DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", getHwTag());
device->getCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait); device->getCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep);
DEBUG_BREAK_IF(getHwTag() < taskCountToWait); DEBUG_BREAK_IF(getHwTag() < taskCountToWait);
latestTaskCountWaited = taskCountToWait; latestTaskCountWaited = taskCountToWait;

View File

@ -318,7 +318,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
MOCKABLE_VIRTUAL bool isQueueBlocked(); MOCKABLE_VIRTUAL bool isQueueBlocked();
void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait); MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep);
void flushWaitList(cl_uint numEventsInWaitList, void flushWaitList(cl_uint numEventsInWaitList,
const cl_event *eventWaitList, const cl_event *eventWaitList,

View File

@ -325,7 +325,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
commandStreamReceiver.overrideMediaVFEStateDirty(true); commandStreamReceiver.overrideMediaVFEStateDirty(true);
if (devQueueHw->getSchedulerReturnInstance() > 0) { if (devQueueHw->getSchedulerReturnInstance() > 0) {
waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp); waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
BuiltinKernelsSimulation::SchedulerSimulation<GfxFamily> simulation; BuiltinKernelsSimulation::SchedulerSimulation<GfxFamily> simulation;
simulation.runSchedulerSimulation(devQueueHw->getQueueBuffer(), simulation.runSchedulerSimulation(devQueueHw->getQueueBuffer(),
@ -404,9 +404,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
if (blockQueue) { if (blockQueue) {
while (isQueueBlocked()) while (isQueueBlocked())
; ;
waitUntilComplete(taskCount, flushStamp->peekStamp()); waitUntilComplete(taskCount, flushStamp->peekStamp(), false);
} else { } else {
waitUntilComplete(taskCount, flushStamp->peekStamp()); waitUntilComplete(taskCount, flushStamp->peekStamp(), false);
for (auto sIt = surfacesForResidency, sE = surfacesForResidency + numSurfaceForResidency; for (auto sIt = surfacesForResidency, sE = surfacesForResidency + numSurfaceForResidency;
sIt != sE; ++sIt) { sIt != sE; ++sIt) {
(*sIt)->setCompletionStamp(completionStamp, nullptr, nullptr); (*sIt)->setCompletionStamp(completionStamp, nullptr, nullptr);

View File

@ -41,7 +41,7 @@ cl_int CommandQueueHw<GfxFamily>::finish(bool dcFlush) {
auto flushStampToWaitFor = this->flushStamp->peekStamp(); auto flushStampToWaitFor = this->flushStamp->peekStamp();
// Stall until HW reaches CQ taskCount // Stall until HW reaches CQ taskCount
waitUntilComplete(taskCountToWaitFor, flushStampToWaitFor); waitUntilComplete(taskCountToWaitFor, flushStampToWaitFor, false);
commandStreamReceiver.waitForTaskCountAndCleanAllocationList(taskCountToWaitFor, TEMPORARY_ALLOCATION); commandStreamReceiver.waitForTaskCountAndCleanAllocationList(taskCountToWaitFor, TEMPORARY_ALLOCATION);

View File

@ -111,7 +111,7 @@ class CommandStreamReceiver {
void requestThreadArbitrationPolicy(uint32_t requiredPolicy) { this->requiredThreadArbitrationPolicy = requiredPolicy; } void requestThreadArbitrationPolicy(uint32_t requiredPolicy) { this->requiredThreadArbitrationPolicy = requiredPolicy; }
virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait) = 0; virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) = 0;
MOCKABLE_VIRTUAL bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait); MOCKABLE_VIRTUAL bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
// returns size of block that needs to be reserved at the beginning of each instruction heap for CommandStreamReceiver // returns size of block that needs to be reserved at the beginning of each instruction heap for CommandStreamReceiver

View File

@ -65,7 +65,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const; size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const;
void programCoherency(LinearStream &csr, DispatchFlags &dispatchFlags); void programCoherency(LinearStream &csr, DispatchFlags &dispatchFlags);
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait) override; void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override;
const HardwareInfo &peekHwInfo() const { return hwInfo; } const HardwareInfo &peekHwInfo() const { return hwInfo; }
protected: protected:

View File

@ -558,14 +558,18 @@ inline void CommandStreamReceiverHw<GfxFamily>::emitNoop(LinearStream &commandSt
} }
template <typename GfxFamily> template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait) { inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
auto status = waitForCompletionWithTimeout(this->hwInfo.capabilityTable.enableKmdNotify && flushStampToWait != 0, const auto &kmdNotifyProperties = this->hwInfo.capabilityTable.kmdNotifyProperties;
this->hwInfo.capabilityTable.delayKmdNotifyMicroseconds,
taskCountToWait); const auto &kmdNotifyDelay = useQuickKmdSleep && kmdNotifyProperties.enableQuickKmdSleep ? kmdNotifyProperties.delayQuickKmdSleepMicroseconds
: kmdNotifyProperties.delayKmdNotifyMicroseconds;
auto status = waitForCompletionWithTimeout(kmdNotifyProperties.enableKmdNotify && flushStampToWait != 0,
kmdNotifyDelay, taskCountToWait);
if (!status) { if (!status) {
waitForFlushStamp(flushStampToWait); waitForFlushStamp(flushStampToWait);
//now call blocking wait, this is to ensure that task count is reached //now call blocking wait, this is to ensure that task count is reached
waitForCompletionWithTimeout(false, this->hwInfo.capabilityTable.delayKmdNotifyMicroseconds, taskCountToWait); waitForCompletionWithTimeout(false, kmdNotifyDelay, taskCountToWait);
} }
UNRECOVERABLE_IF(*getTagAddress() < taskCountToWait); UNRECOVERABLE_IF(*getTagAddress() < taskCountToWait);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017, Intel Corporation * Copyright (c) 2017 - 2018, Intel Corporation
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@ -87,7 +87,7 @@ void AsyncEventsHandler::asyncProcess() {
sleepCandidate = processList(); sleepCandidate = processList();
if (sleepCandidate) { if (sleepCandidate) {
sleepCandidate->wait(true); sleepCandidate->wait(true, true);
} }
std::this_thread::yield(); std::this_thread::yield();
} }

View File

@ -288,14 +288,14 @@ bool Event::calcProfilingData() {
return dataCalculated; return dataCalculated;
} }
inline bool Event::wait(bool blocking) { inline bool Event::wait(bool blocking, bool useQuickKmdSleep) {
while (this->taskCount == Event::eventNotReady) { while (this->taskCount == Event::eventNotReady) {
if (blocking == false) { if (blocking == false) {
return false; return false;
} }
} }
cmdQueue->waitUntilComplete(taskCount.load(), flushStamp->peekStamp()); cmdQueue->waitUntilComplete(taskCount.load(), flushStamp->peekStamp(), useQuickKmdSleep);
updateExecutionStatus(); updateExecutionStatus();
DEBUG_BREAK_IF(this->taskLevel == Event::eventNotReady && this->executionStatus >= 0); DEBUG_BREAK_IF(this->taskLevel == Event::eventNotReady && this->executionStatus >= 0);
@ -495,7 +495,7 @@ cl_int Event::waitForEvents(cl_uint numEvents,
return CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; return CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
} }
if (event->wait(false) == false) { if (event->wait(false, false) == false) {
pendingEventsLeft->push_back(event); pendingEventsLeft->push_back(event);
} }
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017, Intel Corporation * Copyright (c) 2017 - 2018, Intel Corporation
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@ -225,7 +225,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
//returns true on success //returns true on success
//if(blocking==false), will return with false instead of blocking while waiting for completion //if(blocking==false), will return with false instead of blocking while waiting for completion
virtual bool wait(bool blocking); virtual bool wait(bool blocking, bool useQuickKmdSleep);
bool isUserEvent() const { bool isUserEvent() const {
return (CL_COMMAND_USER == cmdType); return (CL_COMMAND_USER == cmdType);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017, Intel Corporation * Copyright (c) 2017 - 2018, Intel Corporation
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@ -37,7 +37,7 @@ void UserEvent::updateExecutionStatus() {
return; return;
} }
bool UserEvent::wait(bool blocking) { bool UserEvent::wait(bool blocking, bool useQuickKmdSleep) {
while (updateStatusAndCheckCompletion() == false) { while (updateStatusAndCheckCompletion() == false) {
if (blocking == false) { if (blocking == false) {
return false; return false;
@ -72,7 +72,7 @@ void VirtualEvent::updateExecutionStatus() {
; ;
} }
bool VirtualEvent::wait(bool blocking) { bool VirtualEvent::wait(bool blocking, bool useQuickKmdSleep) {
while (updateStatusAndCheckCompletion() == false) { while (updateStatusAndCheckCompletion() == false) {
if (blocking == false) { if (blocking == false) {
return false; return false;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017, Intel Corporation * Copyright (c) 2017 - 2018, Intel Corporation
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@ -33,7 +33,7 @@ class UserEvent : public Event {
~UserEvent() override = default; ~UserEvent() override = default;
bool wait(bool blocking) override; bool wait(bool blocking, bool useQuickKmdSleep) override;
void updateExecutionStatus() override; void updateExecutionStatus() override;
@ -48,7 +48,7 @@ class VirtualEvent : public Event {
~VirtualEvent() override = default; ~VirtualEvent() override = default;
bool wait(bool blocking) override; bool wait(bool blocking, bool useQuickKmdSleep) override;
bool setStatus(cl_int status) override; bool setStatus(cl_int status) override;

View File

@ -70,8 +70,7 @@ const RuntimeCapabilityTable BDW::capabilityTable{
&isSimulationBDW, &isSimulationBDW,
true, true,
true, // forceStatelessCompilationFor32Bit true, // forceStatelessCompilationFor32Bit
false, // EnableKmdNotify {false, 0, false, 0}, // KmdNotifyProperties
30000, // delayKmdNotifyMicroseconds
false, // ftr64KBpages false, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize MemoryConstants::pageSize //requiredPreemptionSurfaceSize

View File

@ -66,8 +66,7 @@ const RuntimeCapabilityTable BXT::capabilityTable{
&isSimulationBXT, &isSimulationBXT,
true, true,
false, // forceStatelessCompilationFor32Bit false, // forceStatelessCompilationFor32Bit
false, // EnableKmdNotify {false, 0, false, 0}, // KmdNotifyProperties
30000, // delayKmdNotifyMicroseconds
false, // ftr64KBpages false, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize MemoryConstants::pageSize //requiredPreemptionSurfaceSize

View File

@ -61,8 +61,7 @@ const RuntimeCapabilityTable CFL::capabilityTable{
&isSimulationCFL, &isSimulationCFL,
true, true,
true, // forceStatelessCompilationFor32Bit true, // forceStatelessCompilationFor32Bit
false, // EnableKmdNotify {false, 0, false, 0}, // KmdNotifyProperties
30000, // delayKmdNotifyMicroseconds
true, // ftr64KBpages true, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize MemoryConstants::pageSize //requiredPreemptionSurfaceSize

View File

@ -61,8 +61,7 @@ const RuntimeCapabilityTable GLK::capabilityTable{
&isSimulationGLK, &isSimulationGLK,
true, true,
false, // forceStatelessCompilationFor32Bit false, // forceStatelessCompilationFor32Bit
true, // EnableKmdNotify {true, 30000, false, 0}, // KmdNotifyProperties
30000, // delayKmdNotifyMicroseconds
false, // ftr64KBpages false, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize MemoryConstants::pageSize //requiredPreemptionSurfaceSize

View File

@ -61,8 +61,7 @@ const RuntimeCapabilityTable KBL::capabilityTable{
&isSimulationKBL, &isSimulationKBL,
true, true,
true, // forceStatelessCompilationFor32Bit true, // forceStatelessCompilationFor32Bit
false, // EnableKmdNotify {false, 0, false, 0}, // KmdNotifyProperties
30000, // delayKmdNotifyMicroseconds
true, // ftr64KBpages true, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize MemoryConstants::pageSize //requiredPreemptionSurfaceSize

View File

@ -69,8 +69,7 @@ const RuntimeCapabilityTable SKL::capabilityTable{
&isSimulationSKL, &isSimulationSKL,
true, true,
true, // forceStatelessCompilationFor32Bit true, // forceStatelessCompilationFor32Bit
false, // EnableKmdNotify {false, 0, false, 0}, // KmdNotifyProperties
30000, // delayKmdNotifyMicroseconds
true, // ftr64KBpages true, // ftr64KBpages
EngineType::ENGINE_RCS, // defaultEngineType EngineType::ENGINE_RCS, // defaultEngineType
MemoryConstants::pageSize //requiredPreemptionSurfaceSize MemoryConstants::pageSize //requiredPreemptionSurfaceSize

View File

@ -53,6 +53,7 @@ set(RUNTIME_SRCS_HELPERS_BASE
${CMAKE_CURRENT_SOURCE_DIR}/hw_info.h ${CMAKE_CURRENT_SOURCE_DIR}/hw_info.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_commands.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_commands.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_commands.inl ${CMAKE_CURRENT_SOURCE_DIR}/kernel_commands.inl
${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_properties.h
${CMAKE_CURRENT_SOURCE_DIR}/mipmap.h ${CMAKE_CURRENT_SOURCE_DIR}/mipmap.h
${CMAKE_CURRENT_SOURCE_DIR}/options.cpp ${CMAKE_CURRENT_SOURCE_DIR}/options.cpp
${CMAKE_CURRENT_SOURCE_DIR}/options.h ${CMAKE_CURRENT_SOURCE_DIR}/options.h

View File

@ -26,6 +26,7 @@
#include "sku_info.h" #include "sku_info.h"
#include "runtime/helpers/engine_node.h" #include "runtime/helpers/engine_node.h"
#include "runtime/helpers/kmd_notify_properties.h"
#include <cstddef> #include <cstddef>
namespace OCLRT { namespace OCLRT {
@ -64,8 +65,8 @@ struct RuntimeCapabilityTable {
bool forceStatelessCompilationFor32Bit; bool forceStatelessCompilationFor32Bit;
bool enableKmdNotify; KmdNotifyProperties kmdNotifyProperties;
int64_t delayKmdNotifyMicroseconds;
bool ftr64KBpages; bool ftr64KBpages;
EngineType defaultEngineType; EngineType defaultEngineType;

View File

@ -0,0 +1,33 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <cstdint>
namespace OCLRT {
struct KmdNotifyProperties {
bool enableKmdNotify;
int64_t delayKmdNotifyMicroseconds;
bool enableQuickKmdSleep;
int64_t delayQuickKmdSleepMicroseconds;
};
} // namespace OCLRT

View File

@ -89,7 +89,7 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
taskLevel, taskLevel,
dispatchFlags); dispatchFlags);
cmdQ.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp); cmdQ.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
if (!memObj.isMemObjZeroCopy()) { if (!memObj.isMemObjZeroCopy()) {
if (op == MAP) { if (op == MAP) {
@ -277,7 +277,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
for (auto &surface : surfaces) { for (auto &surface : surfaces) {
surface->setCompletionStamp(completionStamp, nullptr, nullptr); surface->setCompletionStamp(completionStamp, nullptr, nullptr);
} }
commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp); commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
if (printfHandler) { if (printfHandler) {
printfHandler.get()->printEnqueueOutput(); printfHandler.get()->printEnqueueOutput();
@ -317,7 +317,7 @@ CompletionStamp &CommandMarker::submit(uint32_t taskLevel, bool terminated) {
taskLevel, taskLevel,
dispatchFlags); dispatchFlags);
cmdQ.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp); cmdQ.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
return completionStamp; return completionStamp;
} }

View File

@ -77,6 +77,8 @@ DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeND, true, "Enables diffrent al
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeSquared, false, "Enables algorithm to compute the most squared work group as possible") DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeSquared, false, "Enables algorithm to compute the most squared work group as possible")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableKmdNotify, -1, "-1: dont override, 0: disable, 1: enable") DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableKmdNotify, -1, "-1: dont override, 0: disable, 1: enable")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideKmdNotifyDelayMicroseconds, -1, "-1: dont override, 0: infinite timeout, >0: timeout in microseconds") DECLARE_DEBUG_VARIABLE(int32_t, OverrideKmdNotifyDelayMicroseconds, -1, "-1: dont override, 0: infinite timeout, >0: timeout in microseconds")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableQuickKmdSleep, -1, "-1: dont override, 0: disable, 1: enable. It works only when Kmd Notify is enabled.")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideQuickKmdSleepDelayMicroseconds, -1, "-1: dont override, 0: infinite timeout, >0: timeout in microseconds")
DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib calls") DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib calls")
DECLARE_DEBUG_VARIABLE(int32_t, CsrDispatchMode, 0, "Chooses DispatchMode for Csr") DECLARE_DEBUG_VARIABLE(int32_t, CsrDispatchMode, 0, "Chooses DispatchMode for Csr")
/*DRIVER TOGGLES*/ /*DRIVER TOGGLES*/

View File

@ -184,13 +184,21 @@ int HwInfoConfig::configureHwInfo(const HardwareInfo *inHwInfo, HardwareInfo *ou
static_cast<bool>(outHwInfo->pSkuTable->ftrGpGpuMidBatchPreempt) && preemption); static_cast<bool>(outHwInfo->pSkuTable->ftrGpGpuMidBatchPreempt) && preemption);
outHwInfo->capabilityTable.requiredPreemptionSurfaceSize = outHwInfo->pSysInfo->CsrSizeInMb * MemoryConstants::megaByte; outHwInfo->capabilityTable.requiredPreemptionSurfaceSize = outHwInfo->pSysInfo->CsrSizeInMb * MemoryConstants::megaByte;
outHwInfo->capabilityTable.enableKmdNotify = DebugManager.flags.OverrideEnableKmdNotify.get() >= 0 outHwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = DebugManager.flags.OverrideEnableKmdNotify.get() >= 0
? !!DebugManager.flags.OverrideEnableKmdNotify.get() ? !!DebugManager.flags.OverrideEnableKmdNotify.get()
: outHwInfo->capabilityTable.enableKmdNotify; : outHwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify;
outHwInfo->capabilityTable.delayKmdNotifyMicroseconds = DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.get() >= 0 outHwInfo->capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds = DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.get() >= 0
? static_cast<int64_t>(DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.get()) ? static_cast<int64_t>(DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.get())
: outHwInfo->capabilityTable.delayKmdNotifyMicroseconds; : outHwInfo->capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds;
outHwInfo->capabilityTable.kmdNotifyProperties.enableQuickKmdSleep = DebugManager.flags.OverrideEnableQuickKmdSleep.get() >= 0
? !!DebugManager.flags.OverrideEnableQuickKmdSleep.get()
: outHwInfo->capabilityTable.kmdNotifyProperties.enableQuickKmdSleep;
outHwInfo->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds = DebugManager.flags.OverrideQuickKmdSleepDelayMicroseconds.get() >= 0
? static_cast<int64_t>(DebugManager.flags.OverrideQuickKmdSleepDelayMicroseconds.get())
: outHwInfo->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds;
pPlatform.release(); pPlatform.release();
pSkuTable.release(); pSkuTable.release();

View File

@ -66,13 +66,21 @@ bool DeviceFactory::getDevices(HardwareInfo **pHWInfos, size_t &numDevices) {
// Instrumentation // Instrumentation
tempHwInfos[devNum].capabilityTable.instrumentationEnabled &= haveInstrumentation; tempHwInfos[devNum].capabilityTable.instrumentationEnabled &= haveInstrumentation;
tempHwInfos[devNum].capabilityTable.enableKmdNotify = DebugManager.flags.OverrideEnableKmdNotify.get() >= 0 tempHwInfos[devNum].capabilityTable.kmdNotifyProperties.enableKmdNotify = DebugManager.flags.OverrideEnableKmdNotify.get() >= 0
? !!DebugManager.flags.OverrideEnableKmdNotify.get() ? !!DebugManager.flags.OverrideEnableKmdNotify.get()
: tempHwInfos[devNum].capabilityTable.enableKmdNotify; : tempHwInfos[devNum].capabilityTable.kmdNotifyProperties.enableKmdNotify;
tempHwInfos[devNum].capabilityTable.delayKmdNotifyMicroseconds = DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.get() >= 0 tempHwInfos[devNum].capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds = DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.get() >= 0
? static_cast<int64_t>(DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.get()) ? static_cast<int64_t>(DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.get())
: tempHwInfos[devNum].capabilityTable.delayKmdNotifyMicroseconds; : tempHwInfos[devNum].capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds;
tempHwInfos[devNum].capabilityTable.kmdNotifyProperties.enableQuickKmdSleep = DebugManager.flags.OverrideEnableQuickKmdSleep.get() >= 0
? !!DebugManager.flags.OverrideEnableQuickKmdSleep.get()
: tempHwInfos[devNum].capabilityTable.kmdNotifyProperties.enableQuickKmdSleep;
tempHwInfos[devNum].capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds = DebugManager.flags.OverrideQuickKmdSleepDelayMicroseconds.get() >= 0
? static_cast<int64_t>(DebugManager.flags.OverrideQuickKmdSleepDelayMicroseconds.get())
: tempHwInfos[devNum].capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds;
numDevices = 1; numDevices = 1;
*pHWInfos = tempHwInfos; *pHWInfos = tempHwInfos;

View File

@ -646,7 +646,7 @@ HWTEST_F(CommandQueueCSTest, getCSShouldReturnACSWithEnoughSizeCSRTraffic) {
struct KmdNotifyTests : public ::testing::Test { struct KmdNotifyTests : public ::testing::Test {
void SetUp() override { void SetUp() override {
resetObjects(1, 1); resetObjects(1, 1, 1, 2);
*device->getTagAddress() = taskCountToWait; *device->getTagAddress() = taskCountToWait;
} }
@ -656,7 +656,8 @@ struct KmdNotifyTests : public ::testing::Test {
DeviceFactory::releaseDevices(); DeviceFactory::releaseDevices();
} }
void resetObjects(int32_t overrideEnable, int32_t overrideTimeout) { void resetObjects(int32_t overrideKmdNotifyEnable, int32_t overrideKmdNotifyDelay,
int overrideQuickKmdSleepEnable, int32_t overrideQuickKmdSleepDelay) {
if (cmdQ) { if (cmdQ) {
delete cmdQ; delete cmdQ;
} }
@ -665,8 +666,10 @@ struct KmdNotifyTests : public ::testing::Test {
DeviceFactory::releaseDevices(); DeviceFactory::releaseDevices();
} }
DebugManagerStateRestore stateRestore; DebugManagerStateRestore stateRestore;
DebugManager.flags.OverrideEnableKmdNotify.set(overrideEnable); DebugManager.flags.OverrideEnableKmdNotify.set(overrideKmdNotifyEnable);
DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.set(overrideTimeout); DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.set(overrideKmdNotifyDelay);
DebugManager.flags.OverrideEnableQuickKmdSleep.set(overrideQuickKmdSleepEnable);
DebugManager.flags.OverrideQuickKmdSleepDelayMicroseconds.set(overrideQuickKmdSleepDelay);
size_t numDevices; size_t numDevices;
HardwareInfo *hwInfo = nullptr; HardwareInfo *hwInfo = nullptr;
DeviceFactory::getDevices(&hwInfo, numDevices); DeviceFactory::getDevices(&hwInfo, numDevices);
@ -702,18 +705,18 @@ HWTEST_F(KmdNotifyTests, givenTaskCountWhenWaitUntilCompletionCalledThenAlwaysTr
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 1, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 1, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 1, taskCountToWait)).Times(0); EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 1, taskCountToWait)).Times(0);
cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait); cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false);
} }
HWTEST_F(KmdNotifyTests, givenTaskCountAndKmdNotifyDisabledWhenWaitUntilCompletionCalledThenTryCpuPollingWithoutTimeout) { HWTEST_F(KmdNotifyTests, givenTaskCountAndKmdNotifyDisabledWhenWaitUntilCompletionCalledThenTryCpuPollingWithoutTimeout) {
resetObjects(0, 0); resetObjects(0, 0, 0, 0);
auto csr = new ::testing::NiceMock<MyCsr<FamilyType>>(device->getHardwareInfo()); auto csr = new ::testing::NiceMock<MyCsr<FamilyType>>(device->getHardwareInfo());
device->resetCommandStreamReceiver(csr); device->resetCommandStreamReceiver(csr);
EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0); EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0);
cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait); cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false);
} }
HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenWaitUntilCompletionCalledThenTryCpuPollingAndKmdWait) { HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenWaitUntilCompletionCalledThenTryCpuPollingAndKmdWait) {
@ -727,7 +730,7 @@ HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenWaitUntilCompletionCalledThen
EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 1, taskCountToWait)).Times(1).WillOnce(::testing::Return(false)); EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 1, taskCountToWait)).Times(1).WillOnce(::testing::Return(false));
//we have unrecoverable for this case, this will throw. //we have unrecoverable for this case, this will throw.
EXPECT_THROW(cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait), std::exception); EXPECT_THROW(cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false), std::exception);
} }
HWTEST_F(KmdNotifyTests, givenReadyTaskCountWhenWaitUntilCompletionCalledThenTryCpuPollingAndDontCallKmdWait) { HWTEST_F(KmdNotifyTests, givenReadyTaskCountWhenWaitUntilCompletionCalledThenTryCpuPollingAndDontCallKmdWait) {
@ -738,7 +741,38 @@ HWTEST_F(KmdNotifyTests, givenReadyTaskCountWhenWaitUntilCompletionCalledThenTry
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 1, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 1, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0); EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0);
cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait); cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false);
}
HWTEST_F(KmdNotifyTests, givenDefaultArgumentWhenWaitUntilCompleteIsCalledThenDisableQuickKmdSleep) {
auto csr = new ::testing::NiceMock<MyCsr<FamilyType>>(device->getHardwareInfo());
device->resetCommandStreamReceiver(csr);
auto expectedTimeout = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds;
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false);
}
HWTEST_F(KmdNotifyTests, givenEnabledQuickSleepWhenWaitUntilCompleteIsCalledThenChangeDelayValue) {
auto csr = new ::testing::NiceMock<MyCsr<FamilyType>>(device->getHardwareInfo());
device->resetCommandStreamReceiver(csr);
auto expectedTimeout = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds;
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, true);
}
HWTEST_F(KmdNotifyTests, givenDisabledQuickSleepWhenWaitUntilCompleteWithQuickSleepRequestIsCalledThenUseBaseDelayValue) {
resetObjects(1, 1, 0, 0);
auto csr = new ::testing::NiceMock<MyCsr<FamilyType>>(device->getHardwareInfo());
device->resetCommandStreamReceiver(csr);
auto expectedTimeout = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds;
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, true);
} }
HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenPollForCompletionCalledThenTimeout) { HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenPollForCompletionCalledThenTimeout) {
@ -766,11 +800,62 @@ HWTEST_F(KmdNotifyTests, givenZeroFlushStampWhenWaitIsCalledThenDisableTimeout)
auto csr = new ::testing::NiceMock<MyCsr<FamilyType>>(device->getHardwareInfo()); auto csr = new ::testing::NiceMock<MyCsr<FamilyType>>(device->getHardwareInfo());
device->resetCommandStreamReceiver(csr); device->resetCommandStreamReceiver(csr);
EXPECT_TRUE(device->getHardwareInfo().capabilityTable.enableKmdNotify); EXPECT_TRUE(device->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify);
EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, ::testing::_, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, ::testing::_, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0); EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0);
csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 0); csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 0, false);
}
struct WaitForQueueCompletionTests : public ::testing::Test {
template <typename Family>
struct MyCmdQueue : public CommandQueueHw<Family> {
MyCmdQueue(Context *context, Device *device) : CommandQueueHw<Family>(context, device, nullptr){};
void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
requestedUseQuickKmdSleep = useQuickKmdSleep;
waitUntilCompleteCounter++;
}
bool isQueueBlocked() override {
return false;
}
bool requestedUseQuickKmdSleep = false;
uint32_t waitUntilCompleteCounter = 0;
};
void SetUp() override {
device.reset(Device::create<MockDevice>(*platformDevices));
context.reset(new MockContext(device.get()));
}
std::unique_ptr<MockDevice> device;
std::unique_ptr<MockContext> context;
};
HWTEST_F(WaitForQueueCompletionTests, givenBlockingCallAndUnblockedQueueWhenEnqueuedThenCallWaitWithoutQuickKmdSleepRequest) {
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
uint32_t tmpPtr = 0;
auto buffer = std::unique_ptr<Buffer>(BufferHelper<>::create(context.get()));
cmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, 1, &tmpPtr, 0, nullptr, nullptr);
EXPECT_EQ(1u, cmdQ->waitUntilCompleteCounter);
EXPECT_FALSE(cmdQ->requestedUseQuickKmdSleep);
}
HWTEST_F(WaitForQueueCompletionTests, givenBlockingCallAndBlockedQueueWhenEnqueuedThenCallWaitWithoutQuickKmdSleepRequest) {
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
std::unique_ptr<Event> blockingEvent(new Event(cmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0));
cl_event clBlockingEvent = blockingEvent.get();
uint32_t tmpPtr = 0;
auto buffer = std::unique_ptr<Buffer>(BufferHelper<>::create(context.get()));
cmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, 1, &tmpPtr, 1, &clBlockingEvent, nullptr);
EXPECT_EQ(1u, cmdQ->waitUntilCompleteCounter);
EXPECT_FALSE(cmdQ->requestedUseQuickKmdSleep);
}
HWTEST_F(WaitForQueueCompletionTests, whenFinishIsCalledThenCallWaitWithoutQuickKmdSleepRequest) {
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
cmdQ->finish(false);
EXPECT_EQ(1u, cmdQ->waitUntilCompleteCounter);
EXPECT_FALSE(cmdQ->requestedUseQuickKmdSleep);
} }
constexpr char sipPattern[] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 39, 41}; constexpr char sipPattern[] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 39, 41};

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017, Intel Corporation * Copyright (c) 2017 - 2018, Intel Corporation
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@ -47,7 +47,7 @@ class AsyncEventsHandlerTests : public ::testing::Test {
this->updateTaskCount(taskCount); this->updateTaskCount(taskCount);
} }
MOCK_METHOD1(wait, bool(bool blocking)); MOCK_METHOD2(wait, bool(bool blocking, bool quickKmdSleep));
}; };
static void CL_CALLBACK callbackFcn(cl_event e, cl_int status, void *data) { static void CL_CALLBACK callbackFcn(cl_event e, cl_int status, void *data) {
@ -349,19 +349,19 @@ TEST_F(AsyncEventsHandlerTests, givenEventWithoutCallbacksWhenProcessedThenDontR
event2->setStatus(CL_COMPLETE); event2->setStatus(CL_COMPLETE);
} }
TEST_F(AsyncEventsHandlerTests, givenSleepCandidateWhenProcessedThenCallWait) { TEST_F(AsyncEventsHandlerTests, givenSleepCandidateWhenProcessedThenCallWaitWithQuickKmdSleepRequest) {
event1->setTaskStamp(0, 1); event1->setTaskStamp(0, 1);
event1->addCallback(&this->callbackFcn, CL_COMPLETE, &counter); event1->addCallback(&this->callbackFcn, CL_COMPLETE, &counter);
handler->registerEvent(event1); handler->registerEvent(event1);
handler->allowAsyncProcess.store(true); handler->allowAsyncProcess.store(true);
// break infinite loop after first iteartion // break infinite loop after first iteartion
auto unsetAsyncFlag = [&](bool arg) { auto unsetAsyncFlag = [&](bool blocking, bool quickKmdSleep) {
handler->allowAsyncProcess.store(false); handler->allowAsyncProcess.store(false);
return true; return true;
}; };
EXPECT_CALL(*event1, wait(true)).Times(1).WillOnce(Invoke(unsetAsyncFlag)); EXPECT_CALL(*event1, wait(true, true)).Times(1).WillOnce(Invoke(unsetAsyncFlag));
handler->asyncProcess(); handler->asyncProcess();

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017, Intel Corporation * Copyright (c) 2017 - 2018, Intel Corporation
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@ -88,8 +88,8 @@ struct InternalsEventTest
}; };
struct MyUserEvent : public VirtualEvent { struct MyUserEvent : public VirtualEvent {
bool wait(bool blocking) override { bool wait(bool blocking, bool quickKmdSleep) override {
return VirtualEvent::wait(blocking); return VirtualEvent::wait(blocking, quickKmdSleep);
}; };
uint32_t getTaskLevel() override { uint32_t getTaskLevel() override {
return VirtualEvent::getTaskLevel(); return VirtualEvent::getTaskLevel();

View File

@ -388,7 +388,7 @@ TEST_F(EventTest, GetEventInfo_InvalidParam) {
TEST_F(EventTest, Event_Wait_NonBlocking) { TEST_F(EventTest, Event_Wait_NonBlocking) {
Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, Event::eventNotReady); Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, Event::eventNotReady);
auto result = event.wait(false); auto result = event.wait(false, false);
EXPECT_FALSE(result); EXPECT_FALSE(result);
} }
@ -1415,6 +1415,54 @@ TEST_F(EventTest, addChildForEventCompleted) {
} }
} }
HWTEST_F(EventTest, givenQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestToWaitingFunction) {
struct MyCsr : public UltCommandStreamReceiver<FamilyType> {
MyCsr(const HardwareInfo &hwInfo) : UltCommandStreamReceiver<FamilyType>(hwInfo) {}
MOCK_METHOD3(waitForCompletionWithTimeout, bool(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait));
};
HardwareInfo localHwInfo = pDevice->getHardwareInfo();
localHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify = true;
localHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleep = true;
localHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds = 1;
localHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds = 2;
auto csr = new ::testing::NiceMock<MyCsr>(localHwInfo);
pDevice->resetCommandStreamReceiver(csr);
Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
EXPECT_CALL(*csr, waitForCompletionWithTimeout(::testing::_,
localHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds, ::testing::_))
.Times(1)
.WillOnce(::testing::Return(true));
event.wait(true, true);
}
HWTEST_F(EventTest, givenNonQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestToWaitingFunction) {
struct MyCsr : public UltCommandStreamReceiver<FamilyType> {
MyCsr(const HardwareInfo &hwInfo) : UltCommandStreamReceiver<FamilyType>(hwInfo) {}
MOCK_METHOD3(waitForCompletionWithTimeout, bool(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait));
};
HardwareInfo localHwInfo = pDevice->getHardwareInfo();
localHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify = true;
localHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleep = true;
localHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds = 1;
localHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds = 2;
auto csr = new ::testing::NiceMock<MyCsr>(localHwInfo);
pDevice->resetCommandStreamReceiver(csr);
Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
EXPECT_CALL(*csr, waitForCompletionWithTimeout(::testing::_,
localHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds, ::testing::_))
.Times(1)
.WillOnce(::testing::Return(true));
event.wait(true, false);
}
HWTEST_F(InternalsEventTest, givenCommandWhenSubmitCalledThenUpdateFlushStamp) { HWTEST_F(InternalsEventTest, givenCommandWhenSubmitCalledThenUpdateFlushStamp) {
auto pCmdQ = std::unique_ptr<CommandQueue>(new CommandQueue(mockContext, pDevice, 0)); auto pCmdQ = std::unique_ptr<CommandQueue>(new CommandQueue(mockContext, pDevice, 0));
MockEvent<Event> *event = new MockEvent<Event>(pCmdQ.get(), CL_COMMAND_MARKER, 0, 0); MockEvent<Event> *event = new MockEvent<Event>(pCmdQ.get(), CL_COMMAND_MARKER, 0, 0);

View File

@ -151,7 +151,7 @@ TEST(UserEvent, initialUserEventStateIsNotReadyForSubmission) {
TEST(UserEvent, GIVENUserEventWHENgetTaskLevelTHENSuccess) { TEST(UserEvent, GIVENUserEventWHENgetTaskLevelTHENSuccess) {
MyUserEvent uEvent; MyUserEvent uEvent;
EXPECT_EQ(0U, uEvent.getTaskLevel()); EXPECT_EQ(0U, uEvent.getTaskLevel());
EXPECT_FALSE(uEvent.wait(false)); EXPECT_FALSE(uEvent.wait(false, false));
} }
TEST(UserEvent, userEventAfterSetingStatusIsReadyForSubmission) { TEST(UserEvent, userEventAfterSetingStatusIsReadyForSubmission) {
@ -930,7 +930,7 @@ TEST_F(EventTests, waitForEventsDestroysTemporaryAllocations) {
TEST_F(EventTest, UserEvent_Wait_NonBlocking) { TEST_F(EventTest, UserEvent_Wait_NonBlocking) {
UserEvent event; UserEvent event;
auto result = event.wait(false); auto result = event.wait(false, false);
EXPECT_FALSE(result); EXPECT_FALSE(result);
} }
@ -1083,4 +1083,4 @@ TEST_F(EventTests, givenUserEventWhenSetStatusIsDoneThenDeviceMutextisAcquired)
mockedEvent mockEvent(this->context); mockedEvent mockEvent(this->context);
clSetUserEventStatus(&mockEvent, CL_COMPLETE); clSetUserEventStatus(&mockEvent, CL_COMPLETE);
EXPECT_TRUE(mockEvent.mutexProperlyAcquired); EXPECT_TRUE(mockEvent.mutexProperlyAcquired);
} }

View File

@ -56,8 +56,10 @@ GEN8TEST_F(Gen8DeviceCaps, whitelistedRegister) {
} }
GEN8TEST_F(Gen8DeviceCaps, kmdNotifyMechanism) { GEN8TEST_F(Gen8DeviceCaps, kmdNotifyMechanism) {
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.enableKmdNotify); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify);
EXPECT_EQ(30000, pDevice->getHardwareInfo().capabilityTable.delayKmdNotifyMicroseconds); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds);
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep);
EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds);
} }
GEN8TEST_F(Gen8DeviceCaps, compression) { GEN8TEST_F(Gen8DeviceCaps, compression) {

View File

@ -85,6 +85,8 @@ BXTTEST_F(BxtUsDeviceIdTest, isSimulationCap) {
} }
BXTTEST_F(BxtUsDeviceIdTest, kmdNotifyMechanism) { BXTTEST_F(BxtUsDeviceIdTest, kmdNotifyMechanism) {
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.enableKmdNotify); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify);
EXPECT_EQ(30000, pDevice->getHardwareInfo().capabilityTable.delayKmdNotifyMicroseconds); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds);
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep);
EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds);
} }

View File

@ -34,8 +34,10 @@ CFLTEST_F(CflDeviceCaps, reportsOcl21) {
} }
CFLTEST_F(CflDeviceCaps, kmdNotifyMechanism) { CFLTEST_F(CflDeviceCaps, kmdNotifyMechanism) {
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.enableKmdNotify); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify);
EXPECT_EQ(30000, pDevice->getHardwareInfo().capabilityTable.delayKmdNotifyMicroseconds); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds);
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep);
EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds);
} }
CFLTEST_F(CflDeviceCaps, GivenCFLWhenCheckftr64KBpagesThenTrue) { CFLTEST_F(CflDeviceCaps, GivenCFLWhenCheckftr64KBpagesThenTrue) {

View File

@ -74,8 +74,10 @@ GLKTEST_F(GlkUsDeviceIdTest, isSimulationCap) {
} }
GLKTEST_F(GlkUsDeviceIdTest, kmdNotifyMechanism) { GLKTEST_F(GlkUsDeviceIdTest, kmdNotifyMechanism) {
EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.enableKmdNotify); EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify);
EXPECT_EQ(30000, pDevice->getHardwareInfo().capabilityTable.delayKmdNotifyMicroseconds); EXPECT_EQ(30000, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds);
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep);
EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds);
} }
GLKTEST_F(GlkUsDeviceIdTest, GivenGLKWhenCheckftr64KBpagesThenFalse) { GLKTEST_F(GlkUsDeviceIdTest, GivenGLKWhenCheckftr64KBpagesThenFalse) {

View File

@ -34,8 +34,10 @@ KBLTEST_F(KblDeviceCaps, reportsOcl21) {
} }
KBLTEST_F(KblDeviceCaps, kmdNotifyMechanism) { KBLTEST_F(KblDeviceCaps, kmdNotifyMechanism) {
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.enableKmdNotify); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify);
EXPECT_EQ(30000, pDevice->getHardwareInfo().capabilityTable.delayKmdNotifyMicroseconds); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds);
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep);
EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds);
} }
KBLTEST_F(KblDeviceCaps, GivenKBLWhenCheckftr64KBpagesThenTrue) { KBLTEST_F(KblDeviceCaps, GivenKBLWhenCheckftr64KBpagesThenTrue) {

View File

@ -82,8 +82,10 @@ SKLTEST_F(SklUsDeviceIdTest, isSimulationCap) {
} }
SKLTEST_F(SklUsDeviceIdTest, kmdNotifyMechanism) { SKLTEST_F(SklUsDeviceIdTest, kmdNotifyMechanism) {
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.enableKmdNotify); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify);
EXPECT_EQ(30000, pDevice->getHardwareInfo().capabilityTable.delayKmdNotifyMicroseconds); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds);
EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep);
EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds);
} }
SKLTEST_F(SklUsDeviceIdTest, GivenSKLWhenCheckftr64KBpagesThenTrue) { SKLTEST_F(SklUsDeviceIdTest, GivenSKLWhenCheckftr64KBpagesThenTrue) {

View File

@ -444,7 +444,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
void addPipeControl(LinearStream &commandStream, bool dcFlush) override { void addPipeControl(LinearStream &commandStream, bool dcFlush) override {
} }
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait) override { void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep) override {
} }
CompletionStamp flushTask( CompletionStamp flushTask(

View File

@ -223,7 +223,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
} }
} }
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait) override { void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep) override {
} }
void addPipeControl(LinearStream &commandStream, bool dcFlush) override { void addPipeControl(LinearStream &commandStream, bool dcFlush) override {

View File

@ -102,18 +102,26 @@ TEST_F(DeviceFactoryTest, overrideKmdNotifySettings) {
bool success = DeviceFactory::getDevices(&hwInfoReference, numDevices); bool success = DeviceFactory::getDevices(&hwInfoReference, numDevices);
ASSERT_TRUE(success); ASSERT_TRUE(success);
auto refEnableKmdNotify = hwInfoReference->capabilityTable.enableKmdNotify; auto refEnableKmdNotify = hwInfoReference->capabilityTable.kmdNotifyProperties.enableKmdNotify;
auto refDelayKmdNotifyMicroseconds = hwInfoReference->capabilityTable.delayKmdNotifyMicroseconds; auto refDelayKmdNotifyMicroseconds = hwInfoReference->capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds;
auto refEnableQuickKmdSleep = hwInfoReference->capabilityTable.kmdNotifyProperties.enableQuickKmdSleep;
auto refDelayQuickKmdSleepMicroseconds = hwInfoReference->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds;
DeviceFactory::releaseDevices(); DeviceFactory::releaseDevices();
DebugManager.flags.OverrideEnableKmdNotify.set(!refEnableKmdNotify); DebugManager.flags.OverrideEnableKmdNotify.set(!refEnableKmdNotify);
DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.set(static_cast<int32_t>(refDelayKmdNotifyMicroseconds) + 10); DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.set(static_cast<int32_t>(refDelayKmdNotifyMicroseconds) + 10);
DebugManager.flags.OverrideEnableQuickKmdSleep.set(!refEnableQuickKmdSleep);
DebugManager.flags.OverrideQuickKmdSleepDelayMicroseconds.set(static_cast<int32_t>(refDelayQuickKmdSleepMicroseconds) + 11);
success = DeviceFactory::getDevices(&hwInfoOverriden, numDevices); success = DeviceFactory::getDevices(&hwInfoOverriden, numDevices);
ASSERT_TRUE(success); ASSERT_TRUE(success);
EXPECT_EQ(!refEnableKmdNotify, hwInfoOverriden->capabilityTable.enableKmdNotify); EXPECT_EQ(!refEnableKmdNotify, hwInfoOverriden->capabilityTable.kmdNotifyProperties.enableKmdNotify);
EXPECT_EQ(refDelayKmdNotifyMicroseconds + 10, hwInfoOverriden->capabilityTable.delayKmdNotifyMicroseconds); EXPECT_EQ(refDelayKmdNotifyMicroseconds + 10, hwInfoOverriden->capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds);
EXPECT_EQ(!refEnableQuickKmdSleep, hwInfoOverriden->capabilityTable.kmdNotifyProperties.enableQuickKmdSleep);
EXPECT_EQ(refDelayQuickKmdSleepMicroseconds + 11, hwInfoOverriden->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds);
DeviceFactory::releaseDevices(); DeviceFactory::releaseDevices();
} }
@ -142,4 +150,4 @@ TEST_F(DeviceFactoryTest, givenPointerToHwInfoWhenGetDevicedCalledThenRequiedSur
EXPECT_EQ(hwInfo->pSysInfo->CsrSizeInMb * MemoryConstants::megaByte, hwInfo->capabilityTable.requiredPreemptionSurfaceSize); EXPECT_EQ(hwInfo->pSysInfo->CsrSizeInMb * MemoryConstants::megaByte, hwInfo->capabilityTable.requiredPreemptionSurfaceSize);
DeviceFactory::releaseDevices(); DeviceFactory::releaseDevices();
} }

View File

@ -110,9 +110,7 @@ TEST(WddmTestEnumAdapters, expectTrue) {
} }
TEST(WddmTestEnumAdapters, givenEmptyHardwareInfoWhenEnumAdapterIsCalledThenCapabilityTableIsSet) { TEST(WddmTestEnumAdapters, givenEmptyHardwareInfoWhenEnumAdapterIsCalledThenCapabilityTableIsSet) {
HardwareInfo outHwInfo; HardwareInfo outHwInfo = {};
memset(&outHwInfo, 0, sizeof(outHwInfo));
auto hwInfo = *platformDevices[0]; auto hwInfo = *platformDevices[0];
std::unique_ptr<OsLibrary> mockGdiDll(setAdapterInfo(hwInfo.pPlatform, hwInfo.pSysInfo)); std::unique_ptr<OsLibrary> mockGdiDll(setAdapterInfo(hwInfo.pPlatform, hwInfo.pSysInfo));
@ -127,7 +125,10 @@ TEST(WddmTestEnumAdapters, givenEmptyHardwareInfoWhenEnumAdapterIsCalledThenCapa
EXPECT_EQ(outHwInfo.capabilityTable.defaultProfilingTimerResolution, hwInfo.capabilityTable.defaultProfilingTimerResolution); EXPECT_EQ(outHwInfo.capabilityTable.defaultProfilingTimerResolution, hwInfo.capabilityTable.defaultProfilingTimerResolution);
EXPECT_EQ(outHwInfo.capabilityTable.clVersionSupport, hwInfo.capabilityTable.clVersionSupport); EXPECT_EQ(outHwInfo.capabilityTable.clVersionSupport, hwInfo.capabilityTable.clVersionSupport);
EXPECT_EQ(outHwInfo.capabilityTable.delayKmdNotifyMicroseconds, hwInfo.capabilityTable.delayKmdNotifyMicroseconds); EXPECT_EQ(outHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify, hwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify);
EXPECT_EQ(outHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds, hwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds);
EXPECT_EQ(outHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleep, hwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleep);
EXPECT_EQ(outHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds, hwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds);
delete outHwInfo.pPlatform; delete outHwInfo.pPlatform;
delete outHwInfo.pSkuTable; delete outHwInfo.pSkuTable;

View File

@ -41,6 +41,8 @@ EnableForcePin = false
CsrDispatchMode = 0 CsrDispatchMode = 0
OverrideEnableKmdNotify = -1 OverrideEnableKmdNotify = -1
OverrideKmdNotifyDelayMs = -1 OverrideKmdNotifyDelayMs = -1
OverrideEnableQuickKmdSleep = -1
OverrideQuickKmdSleepDelayMicroseconds = -1
Enable64kbpages = -1 Enable64kbpages = -1
NodeOrdinal = -1 NodeOrdinal = -1
ProductFamilyOverride = unk ProductFamilyOverride = unk