mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
feature: new heuristic to enable relaxed ordering
Related-To: GSD-10308 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
75139d2322
commit
526f9c5e81
@@ -354,7 +354,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
}
|
||||
void postInitComputeSetup();
|
||||
NEO::PreemptionMode obtainKernelPreemptionMode(Kernel *kernel);
|
||||
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) const { return false; }
|
||||
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) { return false; }
|
||||
virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {}
|
||||
bool canSkipInOrderEventWait(Event &event, bool ignorCbEventBoundToCmdList) const;
|
||||
bool handleInOrderImplicitDependencies(bool relaxedOrderingAllowed, bool copyOffloadOperation);
|
||||
|
||||
@@ -208,7 +208,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
TransferType getTransferType(const CpuMemCopyInfo &cpuMemCopyInfo);
|
||||
size_t getTransferThreshold(TransferType transferType);
|
||||
bool isBarrierRequired();
|
||||
bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) const override;
|
||||
bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) override;
|
||||
bool skipInOrderNonWalkerSignalingAllowed(ze_event_handle_t signalEvent) const override;
|
||||
|
||||
protected:
|
||||
@@ -221,12 +221,15 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
|
||||
void allocateOrReuseKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread) override;
|
||||
void handleInOrderNonWalkerSignaling(Event *event, bool &hasStallingCmds, bool &relaxedOrderingDispatch, ze_result_t &result);
|
||||
CommandQueue *getCmdQImmediate(bool copyOffloadOperation) const;
|
||||
|
||||
MOCKABLE_VIRTUAL void checkAssert();
|
||||
ComputeFlushMethodType computeFlushMethod = nullptr;
|
||||
uint64_t relaxedOrderingCounter = 0;
|
||||
std::atomic<bool> dependenciesPresent{false};
|
||||
bool latestFlushIsHostVisible = false;
|
||||
bool latestFlushIsCopyOffload = false;
|
||||
bool keepRelaxedOrderingEnabled = false;
|
||||
};
|
||||
|
||||
template <PRODUCT_FAMILY gfxProductFamily>
|
||||
|
||||
@@ -398,7 +398,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission, bool requireTaskCountUpdate) {
|
||||
return executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation, requireTaskCountUpdate, copyOffloadSubmission ? this->cmdQImmediateCopyOffload : this->cmdQImmediate);
|
||||
return executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation, requireTaskCountUpdate, getCmdQImmediate(copyOffloadSubmission));
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -546,7 +546,9 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::handleInOrderNonWalkerSignal
|
||||
bool nonWalkerSignalingHasRelaxedOrdering = false;
|
||||
|
||||
if (NEO::debugManager.flags.EnableInOrderRelaxedOrderingForEventsChaining.get() != 0) {
|
||||
auto counterValueBeforeSecondCheck = this->relaxedOrderingCounter;
|
||||
nonWalkerSignalingHasRelaxedOrdering = isRelaxedOrderingDispatchAllowed(1, false);
|
||||
this->relaxedOrderingCounter = counterValueBeforeSecondCheck; // dont increment twice
|
||||
}
|
||||
|
||||
if (nonWalkerSignalingHasRelaxedOrdering) {
|
||||
@@ -1064,13 +1066,18 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
|
||||
return hostSynchronize(timeout, true);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
CommandQueue *CommandListCoreFamilyImmediate<gfxCoreFamily>::getCmdQImmediate(bool copyOffloadOperation) const {
|
||||
return copyOffloadOperation ? this->cmdQImmediateCopyOffload : this->cmdQImmediate;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
|
||||
bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission, ze_event_handle_t hSignalEvent,
|
||||
bool requireTaskCountUpdate) {
|
||||
auto signalEvent = Event::fromHandle(hSignalEvent);
|
||||
|
||||
auto queue = copyOffloadSubmission ? this->cmdQImmediateCopyOffload : this->cmdQImmediate;
|
||||
auto queue = getCmdQImmediate(copyOffloadSubmission);
|
||||
this->latestFlushIsCopyOffload = copyOffloadSubmission;
|
||||
|
||||
if (NEO::debugManager.flags.DeferStateInitSubmissionToFirstRegularUsage.get() == 1) {
|
||||
@@ -1431,10 +1438,46 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkAssert() {
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) const {
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) {
|
||||
auto csr = getCsr(copyOffload);
|
||||
if (!csr->directSubmissionRelaxedOrderingEnabled()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto numEvents = numWaitEvents + (this->hasInOrderDependencies() ? 1 : 0);
|
||||
|
||||
return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*getCsr(copyOffload), numEvents);
|
||||
if (NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristic.get() == 1) {
|
||||
uint32_t relaxedOrderingCounterThreshold = csr->getDirectSubmissionRelaxedOrderingQueueDepth();
|
||||
|
||||
auto queueTaskCount = getCmdQImmediate(copyOffload)->getTaskCount();
|
||||
auto csrTaskCount = csr->peekTaskCount();
|
||||
|
||||
if ((this->device->getNEODevice()->isInitDeviceWithFirstSubmissionSupported(csr->getType()) || this->heaplessStateInitEnabled) && csr->peekTaskCount() == 1) {
|
||||
DEBUG_BREAK_IF(queueTaskCount != 0);
|
||||
queueTaskCount = 1;
|
||||
}
|
||||
|
||||
if (NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristicTreshold.get() != -1) {
|
||||
relaxedOrderingCounterThreshold = static_cast<uint32_t>(NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristicTreshold.get());
|
||||
}
|
||||
|
||||
if (queueTaskCount == csrTaskCount) {
|
||||
relaxedOrderingCounter++;
|
||||
} else {
|
||||
// Submission from another queue. Reset counter and keep relaxed ordering allowed
|
||||
relaxedOrderingCounter = 0;
|
||||
this->keepRelaxedOrderingEnabled = true;
|
||||
}
|
||||
|
||||
if (relaxedOrderingCounter > static_cast<uint64_t>(relaxedOrderingCounterThreshold)) {
|
||||
this->keepRelaxedOrderingEnabled = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
return (keepRelaxedOrderingEnabled && (numEvents > 0));
|
||||
}
|
||||
|
||||
return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*csr, numEvents);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
||||
Reference in New Issue
Block a user