feature: new heuristic to enable relaxed ordering 2
Related-To: NEO-13431 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
parent
8f671cb6a8
commit
37e81d2a11
|
@ -354,7 +354,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||||
}
|
}
|
||||||
void postInitComputeSetup();
|
void postInitComputeSetup();
|
||||||
NEO::PreemptionMode obtainKernelPreemptionMode(Kernel *kernel);
|
NEO::PreemptionMode obtainKernelPreemptionMode(Kernel *kernel);
|
||||||
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) const { return false; }
|
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) { return false; }
|
||||||
virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {}
|
virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {}
|
||||||
bool canSkipInOrderEventWait(Event &event, bool ignorCbEventBoundToCmdList) const;
|
bool canSkipInOrderEventWait(Event &event, bool ignorCbEventBoundToCmdList) const;
|
||||||
bool handleInOrderImplicitDependencies(bool relaxedOrderingAllowed, bool copyOffloadOperation);
|
bool handleInOrderImplicitDependencies(bool relaxedOrderingAllowed, bool copyOffloadOperation);
|
||||||
|
|
|
@ -208,7 +208,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||||
TransferType getTransferType(const CpuMemCopyInfo &cpuMemCopyInfo);
|
TransferType getTransferType(const CpuMemCopyInfo &cpuMemCopyInfo);
|
||||||
size_t getTransferThreshold(TransferType transferType);
|
size_t getTransferThreshold(TransferType transferType);
|
||||||
bool isBarrierRequired();
|
bool isBarrierRequired();
|
||||||
bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) const override;
|
bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) override;
|
||||||
bool skipInOrderNonWalkerSignalingAllowed(ze_event_handle_t signalEvent) const override;
|
bool skipInOrderNonWalkerSignalingAllowed(ze_event_handle_t signalEvent) const override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -221,12 +221,15 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||||
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
|
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
|
||||||
void allocateOrReuseKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread) override;
|
void allocateOrReuseKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread) override;
|
||||||
void handleInOrderNonWalkerSignaling(Event *event, bool &hasStallingCmds, bool &relaxedOrderingDispatch, ze_result_t &result);
|
void handleInOrderNonWalkerSignaling(Event *event, bool &hasStallingCmds, bool &relaxedOrderingDispatch, ze_result_t &result);
|
||||||
|
CommandQueue *getCmdQImmediate(bool copyOffloadOperation) const;
|
||||||
|
|
||||||
MOCKABLE_VIRTUAL void checkAssert();
|
MOCKABLE_VIRTUAL void checkAssert();
|
||||||
ComputeFlushMethodType computeFlushMethod = nullptr;
|
ComputeFlushMethodType computeFlushMethod = nullptr;
|
||||||
|
uint64_t relaxedOrderingCounter = 0;
|
||||||
std::atomic<bool> dependenciesPresent{false};
|
std::atomic<bool> dependenciesPresent{false};
|
||||||
bool latestFlushIsHostVisible = false;
|
bool latestFlushIsHostVisible = false;
|
||||||
bool latestFlushIsCopyOffload = false;
|
bool latestFlushIsCopyOffload = false;
|
||||||
|
bool keepRelaxedOrderingEnabled = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <PRODUCT_FAMILY gfxProductFamily>
|
template <PRODUCT_FAMILY gfxProductFamily>
|
||||||
|
|
|
@ -398,7 +398,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission, bool requireTaskCountUpdate) {
|
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission, bool requireTaskCountUpdate) {
|
||||||
return executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation, requireTaskCountUpdate, copyOffloadSubmission ? this->cmdQImmediateCopyOffload : this->cmdQImmediate);
|
return executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation, requireTaskCountUpdate, getCmdQImmediate(copyOffloadSubmission));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
@ -546,7 +546,9 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::handleInOrderNonWalkerSignal
|
||||||
bool nonWalkerSignalingHasRelaxedOrdering = false;
|
bool nonWalkerSignalingHasRelaxedOrdering = false;
|
||||||
|
|
||||||
if (NEO::debugManager.flags.EnableInOrderRelaxedOrderingForEventsChaining.get() != 0) {
|
if (NEO::debugManager.flags.EnableInOrderRelaxedOrderingForEventsChaining.get() != 0) {
|
||||||
|
auto counterValueBeforeSecondCheck = this->relaxedOrderingCounter;
|
||||||
nonWalkerSignalingHasRelaxedOrdering = isRelaxedOrderingDispatchAllowed(1, false);
|
nonWalkerSignalingHasRelaxedOrdering = isRelaxedOrderingDispatchAllowed(1, false);
|
||||||
|
this->relaxedOrderingCounter = counterValueBeforeSecondCheck; // dont increment twice
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nonWalkerSignalingHasRelaxedOrdering) {
|
if (nonWalkerSignalingHasRelaxedOrdering) {
|
||||||
|
@ -1064,13 +1066,18 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
|
||||||
return hostSynchronize(timeout, true);
|
return hostSynchronize(timeout, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
CommandQueue *CommandListCoreFamilyImmediate<gfxCoreFamily>::getCmdQImmediate(bool copyOffloadOperation) const {
|
||||||
|
return copyOffloadOperation ? this->cmdQImmediateCopyOffload : this->cmdQImmediate;
|
||||||
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
|
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
|
||||||
bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission, ze_event_handle_t hSignalEvent,
|
bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission, ze_event_handle_t hSignalEvent,
|
||||||
bool requireTaskCountUpdate) {
|
bool requireTaskCountUpdate) {
|
||||||
auto signalEvent = Event::fromHandle(hSignalEvent);
|
auto signalEvent = Event::fromHandle(hSignalEvent);
|
||||||
|
|
||||||
auto queue = copyOffloadSubmission ? this->cmdQImmediateCopyOffload : this->cmdQImmediate;
|
auto queue = getCmdQImmediate(copyOffloadSubmission);
|
||||||
this->latestFlushIsCopyOffload = copyOffloadSubmission;
|
this->latestFlushIsCopyOffload = copyOffloadSubmission;
|
||||||
|
|
||||||
if (NEO::debugManager.flags.DeferStateInitSubmissionToFirstRegularUsage.get() == 1) {
|
if (NEO::debugManager.flags.DeferStateInitSubmissionToFirstRegularUsage.get() == 1) {
|
||||||
|
@ -1431,10 +1438,46 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkAssert() {
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) const {
|
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) {
|
||||||
|
auto csr = getCsr(copyOffload);
|
||||||
|
if (!csr->directSubmissionRelaxedOrderingEnabled()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
auto numEvents = numWaitEvents + (this->hasInOrderDependencies() ? 1 : 0);
|
auto numEvents = numWaitEvents + (this->hasInOrderDependencies() ? 1 : 0);
|
||||||
|
|
||||||
return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*getCsr(copyOffload), numEvents);
|
if (NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristic.get() == 1) {
|
||||||
|
uint32_t relaxedOrderingCounterThreshold = csr->getDirectSubmissionRelaxedOrderingQueueDepth();
|
||||||
|
|
||||||
|
auto queueTaskCount = getCmdQImmediate(copyOffload)->getTaskCount();
|
||||||
|
auto csrTaskCount = csr->peekTaskCount();
|
||||||
|
|
||||||
|
if ((this->device->getNEODevice()->isInitDeviceWithFirstSubmissionSupported(csr->getType()) || this->heaplessStateInitEnabled) && csr->peekTaskCount() == 1) {
|
||||||
|
DEBUG_BREAK_IF(queueTaskCount != 0);
|
||||||
|
queueTaskCount = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristicTreshold.get() != -1) {
|
||||||
|
relaxedOrderingCounterThreshold = static_cast<uint32_t>(NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristicTreshold.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (queueTaskCount == csrTaskCount) {
|
||||||
|
relaxedOrderingCounter++;
|
||||||
|
} else {
|
||||||
|
// Submission from another queue. Reset counter and keep relaxed ordering allowed
|
||||||
|
relaxedOrderingCounter = 0;
|
||||||
|
this->keepRelaxedOrderingEnabled = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (relaxedOrderingCounter > static_cast<uint64_t>(relaxedOrderingCounterThreshold)) {
|
||||||
|
this->keepRelaxedOrderingEnabled = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (keepRelaxedOrderingEnabled && (numEvents > 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*csr, numEvents);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
|
|
@ -206,6 +206,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||||
using BaseClass::eventSignalPipeControl;
|
using BaseClass::eventSignalPipeControl;
|
||||||
using BaseClass::finalStreamState;
|
using BaseClass::finalStreamState;
|
||||||
using BaseClass::frontEndStateTracking;
|
using BaseClass::frontEndStateTracking;
|
||||||
|
using BaseClass::getCmdQImmediate;
|
||||||
using BaseClass::getDcFlushRequired;
|
using BaseClass::getDcFlushRequired;
|
||||||
using BaseClass::getHostPtrAlloc;
|
using BaseClass::getHostPtrAlloc;
|
||||||
using BaseClass::getInOrderIncrementValue;
|
using BaseClass::getInOrderIncrementValue;
|
||||||
|
@ -227,6 +228,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||||
using BaseClass::pipeControlMultiKernelEventSync;
|
using BaseClass::pipeControlMultiKernelEventSync;
|
||||||
using BaseClass::pipelineSelectStateTracking;
|
using BaseClass::pipelineSelectStateTracking;
|
||||||
using BaseClass::programRegionGroupBarrier;
|
using BaseClass::programRegionGroupBarrier;
|
||||||
|
using BaseClass::relaxedOrderingCounter;
|
||||||
using BaseClass::requiredStreamState;
|
using BaseClass::requiredStreamState;
|
||||||
using BaseClass::requiresQueueUncachedMocs;
|
using BaseClass::requiresQueueUncachedMocs;
|
||||||
using BaseClass::signalAllEventPackets;
|
using BaseClass::signalAllEventPackets;
|
||||||
|
|
|
@ -2057,6 +2057,146 @@ HWTEST2_F(InOrderCmdListTests, givenRelaxedOrderingEnabledWhenSignalEventCalledT
|
||||||
verifyFlags(false, true); // relaxed ordering disabled == stalling semaphore
|
verifyFlags(false, true); // relaxed ordering disabled == stalling semaphore
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(InOrderCmdListTests, givenCounterHeuristicForRelaxedOrderingEnabledWhenAppendingThenEnableRelaxedOrderingCorrectly, IsAtLeastXeHpcCore) {
|
||||||
|
debugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
|
||||||
|
debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristic.set(1);
|
||||||
|
|
||||||
|
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||||
|
ultCsr->recordFlushedBatchBuffer = true;
|
||||||
|
|
||||||
|
auto directSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(*ultCsr);
|
||||||
|
ultCsr->directSubmission.reset(directSubmission);
|
||||||
|
|
||||||
|
auto verifyFlags = [&ultCsr](bool relaxedOrderingExpected, auto &cmdList, uint64_t expectedCounter) {
|
||||||
|
EXPECT_EQ(expectedCounter, cmdList->relaxedOrderingCounter);
|
||||||
|
EXPECT_EQ(relaxedOrderingExpected, ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto immCmdList0 = createImmCmdList<gfxCoreFamily>();
|
||||||
|
auto queue0 = immCmdList0->getCmdQImmediate(false);
|
||||||
|
EXPECT_EQ(0u, queue0->getTaskCount());
|
||||||
|
EXPECT_EQ(0u, immCmdList0->relaxedOrderingCounter);
|
||||||
|
|
||||||
|
// First queue. Dont enable yet
|
||||||
|
immCmdList0->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||||
|
verifyFlags(false, immCmdList0, 1);
|
||||||
|
|
||||||
|
immCmdList0->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||||
|
verifyFlags(false, immCmdList0, 2);
|
||||||
|
|
||||||
|
immCmdList0->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||||
|
verifyFlags(false, immCmdList0, 3);
|
||||||
|
|
||||||
|
auto immCmdList1 = createImmCmdList<gfxCoreFamily>();
|
||||||
|
auto queue1 = immCmdList1->getCmdQImmediate(false);
|
||||||
|
EXPECT_EQ(0u, queue1->getTaskCount());
|
||||||
|
EXPECT_EQ(0u, immCmdList1->relaxedOrderingCounter);
|
||||||
|
|
||||||
|
// Reset to 0 - new queue
|
||||||
|
immCmdList1->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||||
|
verifyFlags(false, immCmdList1, 0); // no dependencies
|
||||||
|
|
||||||
|
immCmdList1->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||||
|
verifyFlags(true, immCmdList1, 1);
|
||||||
|
|
||||||
|
immCmdList1->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||||
|
verifyFlags(true, immCmdList1, 2);
|
||||||
|
|
||||||
|
// Back to queue0. Reset to 0 - new queue
|
||||||
|
immCmdList0->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||||
|
verifyFlags(true, immCmdList0, 0);
|
||||||
|
|
||||||
|
EXPECT_TRUE(ultCsr->getDirectSubmissionRelaxedOrderingQueueDepth() > 1);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < ultCsr->getDirectSubmissionRelaxedOrderingQueueDepth(); i++) {
|
||||||
|
immCmdList0->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||||
|
verifyFlags(true, immCmdList0, i + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Threshold reached
|
||||||
|
immCmdList0->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||||
|
verifyFlags(false, immCmdList0, ultCsr->getDirectSubmissionRelaxedOrderingQueueDepth() + 1);
|
||||||
|
|
||||||
|
debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristicTreshold.set(1);
|
||||||
|
|
||||||
|
// Back to queue1. Reset to 0 - new queue
|
||||||
|
immCmdList1->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||||
|
verifyFlags(true, immCmdList1, 0);
|
||||||
|
|
||||||
|
immCmdList1->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||||
|
verifyFlags(true, immCmdList1, 1);
|
||||||
|
|
||||||
|
// Threshold reached
|
||||||
|
immCmdList1->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||||
|
verifyFlags(false, immCmdList1, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(InOrderCmdListTests, givenCounterHeuristicForRelaxedOrderingEnabledWithFirstDeviceInitSubmissionWhenAppendingThenEnableRelaxedOrderingCorrectly, IsAtLeastXeHpcCore) {
|
||||||
|
debugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
|
||||||
|
debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristic.set(1);
|
||||||
|
|
||||||
|
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||||
|
ultCsr->recordFlushedBatchBuffer = true;
|
||||||
|
|
||||||
|
VariableBackup<UltHwConfig> backup(&ultHwConfig);
|
||||||
|
ultHwConfig.useFirstSubmissionInitDevice = true;
|
||||||
|
|
||||||
|
if (!device->getNEODevice()->isInitDeviceWithFirstSubmissionSupported(ultCsr->getType())) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto directSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(*ultCsr);
|
||||||
|
ultCsr->directSubmission.reset(directSubmission);
|
||||||
|
|
||||||
|
EXPECT_EQ(0u, ultCsr->peekTaskCount());
|
||||||
|
|
||||||
|
ultCsr->initializeDeviceWithFirstSubmission(*device->getNEODevice());
|
||||||
|
EXPECT_EQ(1u, ultCsr->peekTaskCount());
|
||||||
|
|
||||||
|
auto immCmdList0 = createImmCmdList<gfxCoreFamily>();
|
||||||
|
auto queue0 = immCmdList0->getCmdQImmediate(false);
|
||||||
|
EXPECT_EQ(0u, queue0->getTaskCount());
|
||||||
|
EXPECT_EQ(0u, immCmdList0->relaxedOrderingCounter);
|
||||||
|
|
||||||
|
immCmdList0->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||||
|
EXPECT_EQ(1u, immCmdList0->relaxedOrderingCounter);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(InOrderCmdListTests, givenRelaxedOrderingWithCounterHeuristicWhenSubmisionSplitThenDontIncrementCounterTwice, IsAtLeastXeHpcCore) {
|
||||||
|
debugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
|
||||||
|
debugManager.flags.SkipInOrderNonWalkerSignalingAllowed.set(1);
|
||||||
|
debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristic.set(1);
|
||||||
|
debugManager.flags.EnableInOrderRelaxedOrderingForEventsChaining.set(1);
|
||||||
|
|
||||||
|
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||||
|
|
||||||
|
auto directSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(*ultCsr);
|
||||||
|
ultCsr->directSubmission.reset(directSubmission);
|
||||||
|
int client1, client2;
|
||||||
|
ultCsr->registerClient(&client1);
|
||||||
|
ultCsr->registerClient(&client2);
|
||||||
|
|
||||||
|
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||||
|
|
||||||
|
auto eventPool = createEvents<FamilyType>(1, true);
|
||||||
|
events[0]->signalScope = 0;
|
||||||
|
|
||||||
|
if (!immCmdList->skipInOrderNonWalkerSignalingAllowed(events[0].get())) {
|
||||||
|
GTEST_SKIP(); // not supported
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(0u, immCmdList->relaxedOrderingCounter);
|
||||||
|
|
||||||
|
zeCommandListAppendLaunchKernel(immCmdList->toHandle(), kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr);
|
||||||
|
EXPECT_EQ(1u, immCmdList->relaxedOrderingCounter);
|
||||||
|
|
||||||
|
zeCommandListAppendLaunchKernel(immCmdList->toHandle(), kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr);
|
||||||
|
EXPECT_EQ(2u, immCmdList->relaxedOrderingCounter);
|
||||||
|
|
||||||
|
zeCommandListAppendLaunchKernel(immCmdList->toHandle(), kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr);
|
||||||
|
EXPECT_EQ(3u, immCmdList->relaxedOrderingCounter);
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenWaitingForEventFromPreviousAppendThenSkip, IsAtLeastXeHpCore) {
|
HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenWaitingForEventFromPreviousAppendThenSkip, IsAtLeastXeHpCore) {
|
||||||
|
|
||||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||||
|
|
|
@ -377,6 +377,8 @@ class CommandStreamReceiver {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual uint32_t getDirectSubmissionRelaxedOrderingQueueDepth() const { return 0; }
|
||||||
|
|
||||||
virtual bool isKmdWaitOnTaskCountAllowed() const {
|
virtual bool isKmdWaitOnTaskCountAllowed() const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -156,6 +156,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool directSubmissionRelaxedOrderingEnabled() const override;
|
bool directSubmissionRelaxedOrderingEnabled() const override;
|
||||||
|
uint32_t getDirectSubmissionRelaxedOrderingQueueDepth() const override;
|
||||||
|
|
||||||
void stopDirectSubmission(bool blocking) override;
|
void stopDirectSubmission(bool blocking) override;
|
||||||
|
|
||||||
|
|
|
@ -966,6 +966,18 @@ bool CommandStreamReceiverHw<GfxFamily>::bcsRelaxedOrderingAllowed(const BlitPro
|
||||||
(blitPropertiesContainer.size() == 1) && !hasStallingCmds;
|
(blitPropertiesContainer.size() == 1) && !hasStallingCmds;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
uint32_t CommandStreamReceiverHw<GfxFamily>::getDirectSubmissionRelaxedOrderingQueueDepth() const {
|
||||||
|
if (directSubmission.get()) {
|
||||||
|
return directSubmission->getRelaxedOrderingQueueSize();
|
||||||
|
}
|
||||||
|
if (blitterDirectSubmission.get()) {
|
||||||
|
return blitterDirectSubmission->getRelaxedOrderingQueueSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
TaskCountType CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) {
|
TaskCountType CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) {
|
||||||
auto lock = obtainUniqueOwnership();
|
auto lock = obtainUniqueOwnership();
|
||||||
|
|
|
@ -450,6 +450,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrdering, -1, "-1: defaul
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingForBcs, -1, "-1: default, 0 - disable, 1 - enable. If set, enable RelaxedOrdering feature for BCS engine")
|
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingForBcs, -1, "-1: default, 0 - disable, 1 - enable. If set, enable RelaxedOrdering feature for BCS engine")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingQueueSizeLimit, -1, "-1: default, >0: Max gpu queue size. If limit is reached, scheduler wont consume new work")
|
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingQueueSizeLimit, -1, "-1: default, >0: Max gpu queue size. If limit is reached, scheduler wont consume new work")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingMinNumberOfClients, -1, "-1: default, >0: Enables RelaxedOrdering mode only if specified number of clients is assigned to given CSR.")
|
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingMinNumberOfClients, -1, "-1: default, >0: Enables RelaxedOrdering mode only if specified number of clients is assigned to given CSR.")
|
||||||
|
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingCounterHeuristic, -1, "-1: default, 0: disabled, 1: enabled. If set use counter based heuristic to allow for relaxed ordering dispatch")
|
||||||
|
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingCounterHeuristicTreshold, -1, "-1: default, >0: limit number of append calls to disable relaxed ordering dispatch")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionMonitorFenceInputPolicy, -1, "-1: default, 0: stalling command flag, 1: explicit monitor fence flag. Selects policy to dispatch monitor fence upon input flag, either for every stalling command or explicit motor fence dispatch")
|
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionMonitorFenceInputPolicy, -1, "-1: default, 0: stalling command flag, 1: explicit monitor fence flag. Selects policy to dispatch monitor fence upon input flag, either for every stalling command or explicit motor fence dispatch")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionPrintSemaphoreUsage, -1, "-1: default, 0: disabled, 1: enabled. If set, print DirectSubmission semaphore programming and unlocking")
|
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionPrintSemaphoreUsage, -1, "-1: default, 0: disabled, 1: enabled. If set, print DirectSubmission semaphore programming and unlocking")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionSwitchSemaphoreMode, -1, "-1: default, 1: enable switch on unsuccessful, 0: disable switch on unsuccessful")
|
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionSwitchSemaphoreMode, -1, "-1: default, 1: enable switch on unsuccessful, 0: disable switch on unsuccessful")
|
||||||
|
|
|
@ -103,6 +103,7 @@ class DirectSubmissionHw {
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void unblockPagingFenceSemaphore(uint64_t pagingFenceValue){};
|
virtual void unblockPagingFenceSemaphore(uint64_t pagingFenceValue){};
|
||||||
|
uint32_t getRelaxedOrderingQueueSize() const { return currentRelaxedOrderingQueueSize; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
static constexpr size_t prefetchSize = 8 * MemoryConstants::cacheLineSize;
|
static constexpr size_t prefetchSize = 8 * MemoryConstants::cacheLineSize;
|
||||||
|
|
|
@ -650,4 +650,6 @@ PrintCalculatedTimestamps = 0
|
||||||
DisableIndirectDetectionForKernelNames = unk
|
DisableIndirectDetectionForKernelNames = unk
|
||||||
ForceIndirectDetectionForCMKernels = -1
|
ForceIndirectDetectionForCMKernels = -1
|
||||||
LogIndirectDetectionKernelDetails = 0
|
LogIndirectDetectionKernelDetails = 0
|
||||||
|
DirectSubmissionRelaxedOrderingCounterHeuristic = -1
|
||||||
|
DirectSubmissionRelaxedOrderingCounterHeuristicTreshold = -1
|
||||||
# Please don't edit below this line
|
# Please don't edit below this line
|
||||||
|
|
Loading…
Reference in New Issue