Make KMD wait function non default and available under debug key

Related-To: NEO-5845

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-08-02 19:27:46 +00:00
committed by Compute-Runtime-Automation
parent e880cf2ad6
commit b454bcbfe7
5 changed files with 99 additions and 41 deletions

View File

@@ -24,11 +24,16 @@ namespace L0 {
CommandQueueAllocatorFn commandQueueFactory[IGFX_MAX_PRODUCT] = {};
CommandQueueImp::CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc)
: device(device), csr(csr), desc(*desc) {
: desc(*desc), device(device), csr(csr) {
int overrideCmdQueueSyncMode = NEO::DebugManager.flags.OverrideCmdQueueSynchronousMode.get();
if (overrideCmdQueueSyncMode != -1) {
this->desc.mode = static_cast<ze_command_queue_mode_t>(overrideCmdQueueSyncMode);
}
int overrideUseKmdWaitFunction = NEO::DebugManager.flags.OverrideUseKmdWaitFunction.get();
if (overrideUseKmdWaitFunction != -1) {
useKmdWaitFunction = !!(overrideUseKmdWaitFunction);
}
}
ze_result_t CommandQueueImp::destroy() {
@@ -79,9 +84,10 @@ void CommandQueueImp::submitBatchBuffer(size_t offset, NEO::ResidencyContainer &
}
ze_result_t CommandQueueImp::synchronize(uint64_t timeout) {
if (timeout == std::numeric_limits<uint64_t>::max()) {
if ((timeout == std::numeric_limits<uint64_t>::max()) && useKmdWaitFunction) {
auto &waitPair = buffers.getCurrentFlushStamp();
csr->waitForTaskCountWithKmdNotifyFallback(waitPair.first, waitPair.second, false, false);
postSyncOperations();
return ZE_RESULT_SUCCESS;
} else {
return synchronizeByPollingForTaskCount(timeout);
@@ -101,11 +107,7 @@ ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint64_t timeout)
return ZE_RESULT_NOT_READY;
}
printFunctionsPrintfOutput();
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger() && NEO::DebugManager.flags.DebuggerLogBitmask.get()) {
device->getL0Debugger()->printTrackedAddresses(csr->getOsContext().getContextId());
}
postSyncOperations();
return ZE_RESULT_SUCCESS;
}
@@ -118,6 +120,14 @@ void CommandQueueImp::printFunctionsPrintfOutput() {
this->printfFunctionContainer.clear();
}
void CommandQueueImp::postSyncOperations() {
printFunctionsPrintfOutput();
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger() && NEO::DebugManager.flags.DebuggerLogBitmask.get()) {
device->getL0Debugger()->printTrackedAddresses(csr->getOsContext().getContextId());
}
}
CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr,
const ze_command_queue_desc_t *desc, bool isCopyOnly, bool isInternal, ze_result_t &returnValue) {
CommandQueueAllocatorFn allocator = nullptr;

View File

@@ -89,15 +89,21 @@ struct CommandQueueImp : public CommandQueue {
void printFunctionsPrintfOutput();
Device *device = nullptr;
NEO::CommandStreamReceiver *csr = nullptr;
ze_command_queue_desc_t desc;
NEO::LinearStream *commandStream = nullptr;
std::atomic<uint32_t> taskCount{0};
std::vector<Kernel *> printfFunctionContainer;
bool gpgpuEnabled = false;
void postSyncOperations();
CommandBufferManager buffers;
NEO::HeapContainer heapContainer;
ze_command_queue_desc_t desc;
std::vector<Kernel *> printfFunctionContainer;
Device *device = nullptr;
NEO::CommandStreamReceiver *csr = nullptr;
NEO::LinearStream *commandStream = nullptr;
std::atomic<uint32_t> taskCount{0};
bool gpgpuEnabled = false;
bool useKmdWaitFunction = false;
};
} // namespace L0

View File

@@ -1397,37 +1397,77 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
using CommandQueueSynchronizeTest = Test<ContextFixture>;
template <typename GfxFamily>
struct SynchronizeCsr : public NEO::UltCommandStreamReceiver<GfxFamily> {
~SynchronizeCsr() override {
delete tagAddress;
}
SynchronizeCsr(const NEO::ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield)
: NEO::UltCommandStreamReceiver<GfxFamily>(const_cast<NEO::ExecutionEnvironment &>(executionEnvironment), 0, deviceBitfield) {
tagAddress = new uint32_t;
}
bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait) override {
waitForComplitionCalledTimes++;
return true;
}
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
waitForTaskCountWithKmdNotifyFallbackCalled++;
NEO::UltCommandStreamReceiver<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, quickKmdSleep, forcePowerSavingMode);
}
volatile uint32_t *getTagAddress() const override {
return tagAddress;
}
uint32_t *tagAddress;
uint32_t waitForComplitionCalledTimes = 0;
uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0;
};
HWTEST_F(CommandQueueSynchronizeTest, givenCallToSynchronizeThenCorrectEnableTimeoutAndTimeoutValuesAreUsed) {
struct SynchronizeCsr : public NEO::UltCommandStreamReceiver<FamilyType> {
~SynchronizeCsr() override {
delete tagAddress;
}
auto csr = std::unique_ptr<SynchronizeCsr<FamilyType>>(new SynchronizeCsr<FamilyType>(*device->getNEODevice()->getExecutionEnvironment(),
device->getNEODevice()->getDeviceBitfield()));
SynchronizeCsr(const NEO::ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield)
: NEO::UltCommandStreamReceiver<FamilyType>(const_cast<NEO::ExecutionEnvironment &>(executionEnvironment), 0, deviceBitfield) {
tagAddress = new uint32_t;
}
ze_command_queue_desc_t desc = {};
ze_command_queue_handle_t commandQueue = {};
ze_result_t res = context->createCommandQueue(device, &desc, &commandQueue);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_NE(nullptr, commandQueue);
bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait) override {
waitForComplitionCalledTimes++;
return true;
}
CommandQueue *queue = reinterpret_cast<CommandQueue *>(L0::CommandQueue::fromHandle(commandQueue));
queue->csr = csr.get();
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
waitForTaskCountWithKmdNotifyFallbackCalled++;
NEO::UltCommandStreamReceiver<FamilyType>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, quickKmdSleep, forcePowerSavingMode);
}
uint64_t timeout = 10;
bool enableTimeoutExpected = true;
int64_t timeoutMicrosecondsExpected = timeout;
volatile uint32_t *getTagAddress() const override {
return tagAddress;
}
uint32_t *tagAddress;
uint32_t waitForComplitionCalledTimes = 0;
uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0;
};
queue->synchronize(timeout);
EXPECT_EQ(1u, csr->waitForComplitionCalledTimes);
EXPECT_EQ(0u, csr->waitForTaskCountWithKmdNotifyFallbackCalled);
timeout = std::numeric_limits<uint64_t>::max();
enableTimeoutExpected = false;
timeoutMicrosecondsExpected = NEO::TimeoutControls::maxTimeout;
queue->synchronize(timeout);
EXPECT_EQ(2u, csr->waitForComplitionCalledTimes);
EXPECT_EQ(0u, csr->waitForTaskCountWithKmdNotifyFallbackCalled);
L0::CommandQueue::fromHandle(commandQueue)->destroy();
}
HWTEST_F(CommandQueueSynchronizeTest, givenDebugOverrideEnabledWhenCallToSynchronizeThenCorrectEnableTimeoutAndTimeoutValuesAreUsed) {
DebugManagerStateRestore restore;
NEO::DebugManager.flags.OverrideUseKmdWaitFunction.set(1);
auto csr = std::unique_ptr<SynchronizeCsr<FamilyType>>(new SynchronizeCsr<FamilyType>(*device->getNEODevice()->getExecutionEnvironment(),
device->getNEODevice()->getDeviceBitfield()));
auto csr = std::unique_ptr<SynchronizeCsr>(new SynchronizeCsr(*device->getNEODevice()->getExecutionEnvironment(),
device->getNEODevice()->getDeviceBitfield()));
ze_command_queue_desc_t desc = {};
ze_command_queue_handle_t commandQueue = {};
ze_result_t res = context->createCommandQueue(device, &desc, &commandQueue);

View File

@@ -304,6 +304,7 @@ EnableUserFenceUseCtxId = -1
EnableResourceTags = 0
SetKmdWaitTimeout = -1
OverrideNotifyEnableForTagUpdatePostSync = -1
OverrideUseKmdWaitFunction = -1
EnableCacheFlushAfterWalkerForAllQueues = -1
Force32BitDriverSupport = -1
OverrideCmdQueueSynchronousMode = -1

View File

@@ -69,6 +69,7 @@ DECLARE_DEBUG_VARIABLE(bool, GlobalSequencerFlushOnCopyEngine, false, "false: di
DECLARE_DEBUG_VARIABLE(bool, UseImmDataWriteModeOnPostSyncOperation, false, "Use IMM data write mode as post sync operation in Compute Walker")
DECLARE_DEBUG_VARIABLE(bool, DisableTimestampEvents, false, "Timestamp info will not be reported and events will only perform regular synchronization functions")
DECLARE_DEBUG_VARIABLE(bool, EnableResourceTags, false, "Enable resource tagging in GMM")
DECLARE_DEBUG_VARIABLE(bool, EnableFlushTaskSubmission, false, "true: driver uses csr flushTask for immediate submissions, false: driver uses legacy executeCommandList path")
DECLARE_DEBUG_VARIABLE(std::string, ForceDeviceId, std::string("unk"), "DeviceId selected for testing")
DECLARE_DEBUG_VARIABLE(std::string, LoadBinarySipFromFile, std::string("unk"), "Select binary file to load SIP kernel raw binary")
DECLARE_DEBUG_VARIABLE(int64_t, OverrideMultiStoragePlacement, -1, "-1: disable, 0+: tile mask, each bit corresponds to tile")
@@ -152,7 +153,6 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideTimestampPacketSize, -1, "-1: default, >
DECLARE_DEBUG_VARIABLE(int32_t, OverrideMaxWorkGroupCount, -1, "-1: default, >0: Max WG size")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideCmdQueueSynchronousMode, -1, "Overrides all command queues synchronous mode: -1: do not override, 0: implicit driver behavior, 1: synchronous, 2: asynchronous")
DECLARE_DEBUG_VARIABLE(int64_t, EnableStatelessCompression, -1, "-1: default, 0: disable, 1: Enable E2EC in SBA for all stateless accesses")
DECLARE_DEBUG_VARIABLE(bool, EnableFlushTaskSubmission, false, "true: driver uses csr flushTask for immediate submissions, false: driver uses legacy executeCommandList path")
/*LOGGING FLAGS*/
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")
@@ -212,6 +212,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, PerformImplicitFlushForNewResource, -1, "-1: pla
DECLARE_DEBUG_VARIABLE(int32_t, PerformImplicitFlushForIdleGpu, -1, "-1: platform specific, 0: force disable, 1: force enable")
DECLARE_DEBUG_VARIABLE(int32_t, EnableCacheFlushAfterWalkerForAllQueues, -1, "Enable cache flush after walker even if queue doesn't require it")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideKernelSizeLimitForSmallDispatch, -1, "-1: default, >=0: on XEHP+ changes the threshold for treating kernel as small during NULL LWS selection")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideUseKmdWaitFunction, -1, "-1: default (L0: disabled), 0: disabled, 1: enabled. It uses only busy loop to wait or busy loop with KMD wait function, when KMD fallback is enabled")
/*DIRECT SUBMISSION FLAGS*/
DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmission, -1, "-1: default (disabled), 0: disable, 1:enable. Enables direct submission of command buffers bypassing KMD")
@@ -228,8 +229,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionOverrideRenderSupport, -1, "Over
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionOverrideComputeSupport, -1, "Overrides default compute support: -1: do not override, 0: disable engine support, 1: enable engine support with init start, 2: enable engine support without init start")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableCacheFlush, -1, "-1: driver default, 0: additional cache flush is present 1: disable dispatching cache flush commands")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionNewResourceTlbFlush, -1, "-1: driver default - flush when new resource is bound, 0: disabled, 1: enabled")
DECLARE_DEBUG_VARIABLE(bool, USMEvictAfterMigration, true, "Evict USM allocation after implicit migration to GPU")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableMonitorFence, -1, "Disable dispatching monitor fence commands")
DECLARE_DEBUG_VARIABLE(bool, USMEvictAfterMigration, true, "Evict USM allocation after implicit migration to GPU")
/*FEATURE FLAGS*/
DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension")