mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-29 09:03:14 +08:00
Make KMD wait function non default and available under debug key
Related-To: NEO-5845 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
e880cf2ad6
commit
b454bcbfe7
@@ -24,11 +24,16 @@ namespace L0 {
|
||||
CommandQueueAllocatorFn commandQueueFactory[IGFX_MAX_PRODUCT] = {};
|
||||
|
||||
CommandQueueImp::CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc)
|
||||
: device(device), csr(csr), desc(*desc) {
|
||||
: desc(*desc), device(device), csr(csr) {
|
||||
int overrideCmdQueueSyncMode = NEO::DebugManager.flags.OverrideCmdQueueSynchronousMode.get();
|
||||
if (overrideCmdQueueSyncMode != -1) {
|
||||
this->desc.mode = static_cast<ze_command_queue_mode_t>(overrideCmdQueueSyncMode);
|
||||
}
|
||||
|
||||
int overrideUseKmdWaitFunction = NEO::DebugManager.flags.OverrideUseKmdWaitFunction.get();
|
||||
if (overrideUseKmdWaitFunction != -1) {
|
||||
useKmdWaitFunction = !!(overrideUseKmdWaitFunction);
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t CommandQueueImp::destroy() {
|
||||
@@ -79,9 +84,10 @@ void CommandQueueImp::submitBatchBuffer(size_t offset, NEO::ResidencyContainer &
|
||||
}
|
||||
|
||||
ze_result_t CommandQueueImp::synchronize(uint64_t timeout) {
|
||||
if (timeout == std::numeric_limits<uint64_t>::max()) {
|
||||
if ((timeout == std::numeric_limits<uint64_t>::max()) && useKmdWaitFunction) {
|
||||
auto &waitPair = buffers.getCurrentFlushStamp();
|
||||
csr->waitForTaskCountWithKmdNotifyFallback(waitPair.first, waitPair.second, false, false);
|
||||
postSyncOperations();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
} else {
|
||||
return synchronizeByPollingForTaskCount(timeout);
|
||||
@@ -101,11 +107,7 @@ ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint64_t timeout)
|
||||
return ZE_RESULT_NOT_READY;
|
||||
}
|
||||
|
||||
printFunctionsPrintfOutput();
|
||||
|
||||
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger() && NEO::DebugManager.flags.DebuggerLogBitmask.get()) {
|
||||
device->getL0Debugger()->printTrackedAddresses(csr->getOsContext().getContextId());
|
||||
}
|
||||
postSyncOperations();
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
@@ -118,6 +120,14 @@ void CommandQueueImp::printFunctionsPrintfOutput() {
|
||||
this->printfFunctionContainer.clear();
|
||||
}
|
||||
|
||||
void CommandQueueImp::postSyncOperations() {
|
||||
printFunctionsPrintfOutput();
|
||||
|
||||
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger() && NEO::DebugManager.flags.DebuggerLogBitmask.get()) {
|
||||
device->getL0Debugger()->printTrackedAddresses(csr->getOsContext().getContextId());
|
||||
}
|
||||
}
|
||||
|
||||
CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr,
|
||||
const ze_command_queue_desc_t *desc, bool isCopyOnly, bool isInternal, ze_result_t &returnValue) {
|
||||
CommandQueueAllocatorFn allocator = nullptr;
|
||||
|
||||
@@ -89,15 +89,21 @@ struct CommandQueueImp : public CommandQueue {
|
||||
|
||||
void printFunctionsPrintfOutput();
|
||||
|
||||
Device *device = nullptr;
|
||||
NEO::CommandStreamReceiver *csr = nullptr;
|
||||
ze_command_queue_desc_t desc;
|
||||
NEO::LinearStream *commandStream = nullptr;
|
||||
std::atomic<uint32_t> taskCount{0};
|
||||
std::vector<Kernel *> printfFunctionContainer;
|
||||
bool gpgpuEnabled = false;
|
||||
void postSyncOperations();
|
||||
|
||||
CommandBufferManager buffers;
|
||||
NEO::HeapContainer heapContainer;
|
||||
ze_command_queue_desc_t desc;
|
||||
std::vector<Kernel *> printfFunctionContainer;
|
||||
|
||||
Device *device = nullptr;
|
||||
NEO::CommandStreamReceiver *csr = nullptr;
|
||||
NEO::LinearStream *commandStream = nullptr;
|
||||
|
||||
std::atomic<uint32_t> taskCount{0};
|
||||
|
||||
bool gpgpuEnabled = false;
|
||||
bool useKmdWaitFunction = false;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -1397,37 +1397,77 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
|
||||
using CommandQueueSynchronizeTest = Test<ContextFixture>;
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct SynchronizeCsr : public NEO::UltCommandStreamReceiver<GfxFamily> {
|
||||
~SynchronizeCsr() override {
|
||||
delete tagAddress;
|
||||
}
|
||||
|
||||
SynchronizeCsr(const NEO::ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield)
|
||||
: NEO::UltCommandStreamReceiver<GfxFamily>(const_cast<NEO::ExecutionEnvironment &>(executionEnvironment), 0, deviceBitfield) {
|
||||
tagAddress = new uint32_t;
|
||||
}
|
||||
|
||||
bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait) override {
|
||||
waitForComplitionCalledTimes++;
|
||||
return true;
|
||||
}
|
||||
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
|
||||
waitForTaskCountWithKmdNotifyFallbackCalled++;
|
||||
NEO::UltCommandStreamReceiver<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, quickKmdSleep, forcePowerSavingMode);
|
||||
}
|
||||
|
||||
volatile uint32_t *getTagAddress() const override {
|
||||
return tagAddress;
|
||||
}
|
||||
|
||||
uint32_t *tagAddress;
|
||||
uint32_t waitForComplitionCalledTimes = 0;
|
||||
uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0;
|
||||
};
|
||||
|
||||
HWTEST_F(CommandQueueSynchronizeTest, givenCallToSynchronizeThenCorrectEnableTimeoutAndTimeoutValuesAreUsed) {
|
||||
struct SynchronizeCsr : public NEO::UltCommandStreamReceiver<FamilyType> {
|
||||
~SynchronizeCsr() override {
|
||||
delete tagAddress;
|
||||
}
|
||||
auto csr = std::unique_ptr<SynchronizeCsr<FamilyType>>(new SynchronizeCsr<FamilyType>(*device->getNEODevice()->getExecutionEnvironment(),
|
||||
device->getNEODevice()->getDeviceBitfield()));
|
||||
|
||||
SynchronizeCsr(const NEO::ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield)
|
||||
: NEO::UltCommandStreamReceiver<FamilyType>(const_cast<NEO::ExecutionEnvironment &>(executionEnvironment), 0, deviceBitfield) {
|
||||
tagAddress = new uint32_t;
|
||||
}
|
||||
ze_command_queue_desc_t desc = {};
|
||||
ze_command_queue_handle_t commandQueue = {};
|
||||
ze_result_t res = context->createCommandQueue(device, &desc, &commandQueue);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
EXPECT_NE(nullptr, commandQueue);
|
||||
|
||||
bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait) override {
|
||||
waitForComplitionCalledTimes++;
|
||||
return true;
|
||||
}
|
||||
CommandQueue *queue = reinterpret_cast<CommandQueue *>(L0::CommandQueue::fromHandle(commandQueue));
|
||||
queue->csr = csr.get();
|
||||
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
|
||||
waitForTaskCountWithKmdNotifyFallbackCalled++;
|
||||
NEO::UltCommandStreamReceiver<FamilyType>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, quickKmdSleep, forcePowerSavingMode);
|
||||
}
|
||||
uint64_t timeout = 10;
|
||||
bool enableTimeoutExpected = true;
|
||||
int64_t timeoutMicrosecondsExpected = timeout;
|
||||
|
||||
volatile uint32_t *getTagAddress() const override {
|
||||
return tagAddress;
|
||||
}
|
||||
uint32_t *tagAddress;
|
||||
uint32_t waitForComplitionCalledTimes = 0;
|
||||
uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0;
|
||||
};
|
||||
queue->synchronize(timeout);
|
||||
|
||||
EXPECT_EQ(1u, csr->waitForComplitionCalledTimes);
|
||||
EXPECT_EQ(0u, csr->waitForTaskCountWithKmdNotifyFallbackCalled);
|
||||
|
||||
timeout = std::numeric_limits<uint64_t>::max();
|
||||
enableTimeoutExpected = false;
|
||||
timeoutMicrosecondsExpected = NEO::TimeoutControls::maxTimeout;
|
||||
|
||||
queue->synchronize(timeout);
|
||||
|
||||
EXPECT_EQ(2u, csr->waitForComplitionCalledTimes);
|
||||
EXPECT_EQ(0u, csr->waitForTaskCountWithKmdNotifyFallbackCalled);
|
||||
|
||||
L0::CommandQueue::fromHandle(commandQueue)->destroy();
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueSynchronizeTest, givenDebugOverrideEnabledWhenCallToSynchronizeThenCorrectEnableTimeoutAndTimeoutValuesAreUsed) {
|
||||
DebugManagerStateRestore restore;
|
||||
NEO::DebugManager.flags.OverrideUseKmdWaitFunction.set(1);
|
||||
|
||||
auto csr = std::unique_ptr<SynchronizeCsr<FamilyType>>(new SynchronizeCsr<FamilyType>(*device->getNEODevice()->getExecutionEnvironment(),
|
||||
device->getNEODevice()->getDeviceBitfield()));
|
||||
|
||||
auto csr = std::unique_ptr<SynchronizeCsr>(new SynchronizeCsr(*device->getNEODevice()->getExecutionEnvironment(),
|
||||
device->getNEODevice()->getDeviceBitfield()));
|
||||
ze_command_queue_desc_t desc = {};
|
||||
ze_command_queue_handle_t commandQueue = {};
|
||||
ze_result_t res = context->createCommandQueue(device, &desc, &commandQueue);
|
||||
|
||||
@@ -304,6 +304,7 @@ EnableUserFenceUseCtxId = -1
|
||||
EnableResourceTags = 0
|
||||
SetKmdWaitTimeout = -1
|
||||
OverrideNotifyEnableForTagUpdatePostSync = -1
|
||||
OverrideUseKmdWaitFunction = -1
|
||||
EnableCacheFlushAfterWalkerForAllQueues = -1
|
||||
Force32BitDriverSupport = -1
|
||||
OverrideCmdQueueSynchronousMode = -1
|
||||
|
||||
@@ -69,6 +69,7 @@ DECLARE_DEBUG_VARIABLE(bool, GlobalSequencerFlushOnCopyEngine, false, "false: di
|
||||
DECLARE_DEBUG_VARIABLE(bool, UseImmDataWriteModeOnPostSyncOperation, false, "Use IMM data write mode as post sync operation in Compute Walker")
|
||||
DECLARE_DEBUG_VARIABLE(bool, DisableTimestampEvents, false, "Timestamp info will not be reported and events will only perform regular synchronization functions")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableResourceTags, false, "Enable resource tagging in GMM")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableFlushTaskSubmission, false, "true: driver uses csr flushTask for immediate submissions, false: driver uses legacy executeCommandList path")
|
||||
DECLARE_DEBUG_VARIABLE(std::string, ForceDeviceId, std::string("unk"), "DeviceId selected for testing")
|
||||
DECLARE_DEBUG_VARIABLE(std::string, LoadBinarySipFromFile, std::string("unk"), "Select binary file to load SIP kernel raw binary")
|
||||
DECLARE_DEBUG_VARIABLE(int64_t, OverrideMultiStoragePlacement, -1, "-1: disable, 0+: tile mask, each bit corresponds to tile")
|
||||
@@ -152,7 +153,6 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideTimestampPacketSize, -1, "-1: default, >
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideMaxWorkGroupCount, -1, "-1: default, >0: Max WG size")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideCmdQueueSynchronousMode, -1, "Overrides all command queues synchronous mode: -1: do not override, 0: implicit driver behavior, 1: synchronous, 2: asynchronous")
|
||||
DECLARE_DEBUG_VARIABLE(int64_t, EnableStatelessCompression, -1, "-1: default, 0: disable, 1: Enable E2EC in SBA for all stateless accesses")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableFlushTaskSubmission, false, "true: driver uses csr flushTask for immediate submissions, false: driver uses legacy executeCommandList path")
|
||||
|
||||
/*LOGGING FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")
|
||||
@@ -212,6 +212,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, PerformImplicitFlushForNewResource, -1, "-1: pla
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PerformImplicitFlushForIdleGpu, -1, "-1: platform specific, 0: force disable, 1: force enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableCacheFlushAfterWalkerForAllQueues, -1, "Enable cache flush after walker even if queue doesn't require it")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideKernelSizeLimitForSmallDispatch, -1, "-1: default, >=0: on XEHP+ changes the threshold for treating kernel as small during NULL LWS selection")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideUseKmdWaitFunction, -1, "-1: default (L0: disabled), 0: disabled, 1: enabled. It uses only busy loop to wait or busy loop with KMD wait function, when KMD fallback is enabled")
|
||||
|
||||
/*DIRECT SUBMISSION FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmission, -1, "-1: default (disabled), 0: disable, 1:enable. Enables direct submission of command buffers bypassing KMD")
|
||||
@@ -228,8 +229,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionOverrideRenderSupport, -1, "Over
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionOverrideComputeSupport, -1, "Overrides default compute support: -1: do not override, 0: disable engine support, 1: enable engine support with init start, 2: enable engine support without init start")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableCacheFlush, -1, "-1: driver default, 0: additional cache flush is present 1: disable dispatching cache flush commands")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionNewResourceTlbFlush, -1, "-1: driver default - flush when new resource is bound, 0: disabled, 1: enabled")
|
||||
DECLARE_DEBUG_VARIABLE(bool, USMEvictAfterMigration, true, "Evict USM allocation after implicit migration to GPU")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableMonitorFence, -1, "Disable dispatching monitor fence commands")
|
||||
DECLARE_DEBUG_VARIABLE(bool, USMEvictAfterMigration, true, "Evict USM allocation after implicit migration to GPU")
|
||||
|
||||
/*FEATURE FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension")
|
||||
|
||||
Reference in New Issue
Block a user