mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
feature: select execution queue from copy offload
Related-To: NEO-11376 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
5ae2709e6e
commit
dab221830e
@@ -183,7 +183,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
MOCKABLE_VIRTUAL ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation);
|
||||
MOCKABLE_VIRTUAL ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission);
|
||||
ze_result_t executeCommandListImmediateWithFlushTaskImpl(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, CommandQueue *cmdQ);
|
||||
ze_result_t appendCommandLists(uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||
@@ -200,7 +200,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
void checkAvailableSpace(uint32_t numEvents, bool hasRelaxedOrderingDependencies, size_t commandSize);
|
||||
void updateDispatchFlagsWithRequiredStreamState(NEO::DispatchFlags &dispatchFlags);
|
||||
|
||||
MOCKABLE_VIRTUAL ze_result_t flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, ze_event_handle_t hSignalEvent);
|
||||
MOCKABLE_VIRTUAL ze_result_t flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission, ze_event_handle_t hSignalEvent);
|
||||
|
||||
bool preferCopyThroughLockedPtr(CpuMemCopyInfo &cpuMemCopyInfo, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
|
||||
bool isSuitableUSMHostAlloc(NEO::SvmAllocationData *alloc);
|
||||
|
||||
@@ -376,8 +376,8 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation) {
|
||||
return executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation, this->cmdQImmediate);
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission) {
|
||||
return executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation, copyOffloadSubmission ? this->cmdQImmediateCopyOffload : this->cmdQImmediate);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -427,7 +427,7 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
|
||||
}
|
||||
|
||||
NEO::CompletionStamp completionStamp;
|
||||
if (isCopyOnly()) {
|
||||
if (cmdQ->peekIsCopyOnlyCommandQueue()) {
|
||||
completionStamp = flushBcsTask(*commandStream, commandStreamStart, hasStallingCmds, hasRelaxedOrderingDependencies, csr);
|
||||
} else {
|
||||
completionStamp = (this->*computeFlushMethod)(*commandStream, commandStreamStart, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation);
|
||||
@@ -442,7 +442,7 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
|
||||
|
||||
ze_result_t status = ZE_RESULT_SUCCESS;
|
||||
|
||||
if (cmdQ == this->cmdQImmediate) {
|
||||
if (cmdQ == this->cmdQImmediate || cmdQ == this->cmdQImmediateCopyOffload) {
|
||||
cmdQ->setTaskCount(completionStamp.taskCount);
|
||||
|
||||
if (this->isSyncModeQueue) {
|
||||
@@ -513,7 +513,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
|
||||
CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(event, true, false);
|
||||
}
|
||||
|
||||
return flushImmediate(ret, true, stallingCmdsForRelaxedOrdering, relaxedOrderingDispatch, true, hSignalEvent);
|
||||
return flushImmediate(ret, true, stallingCmdsForRelaxedOrdering, relaxedOrderingDispatch, true, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -525,7 +525,7 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::handleInOrderNonWalkerSignal
|
||||
}
|
||||
|
||||
if (nonWalkerSignalingHasRelaxedOrdering) {
|
||||
result = flushImmediate(result, true, hasStallingCmds, relaxedOrderingDispatch, true, nullptr);
|
||||
result = flushImmediate(result, true, hasStallingCmds, relaxedOrderingDispatch, true, false, nullptr);
|
||||
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*this->commandContainer.getCommandStream());
|
||||
relaxedOrderingDispatch = true;
|
||||
hasStallingCmds = hasStallingCmdsForRelaxedOrdering(1, relaxedOrderingDispatch);
|
||||
@@ -546,7 +546,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernelInd
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer,
|
||||
hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
||||
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -573,7 +573,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(ze_even
|
||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
||||
|
||||
this->dependenciesPresent = true;
|
||||
return flushImmediate(ret, true, isStallingOperation, relaxedOrderingDispatch, false, hSignalEvent);
|
||||
return flushImmediate(ret, true, isStallingOperation, relaxedOrderingDispatch, false, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -622,7 +622,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
|
||||
numWaitEvents, phWaitEvents, relaxedOrderingDispatch, forceDisableCopyOnlyInOrderSignaling);
|
||||
}
|
||||
|
||||
return flushImmediate(ret, true, hasStallindCmds, relaxedOrderingDispatch, true, hSignalEvent);
|
||||
return flushImmediate(ret, true, hasStallindCmds, relaxedOrderingDispatch, true, isCopyOffloadEnabled(), hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -679,7 +679,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
|
||||
hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch, forceDisableCopyOnlyInOrderSignaling);
|
||||
}
|
||||
|
||||
return flushImmediate(ret, true, hasStallindCmds, relaxedOrderingDispatch, true, hSignalEvent);
|
||||
return flushImmediate(ret, true, hasStallindCmds, relaxedOrderingDispatch, true, isCopyOffloadEnabled(), hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -694,7 +694,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryFill(void
|
||||
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
||||
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -704,7 +704,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendSignalEvent(ze_
|
||||
|
||||
checkAvailableSpace(0, false, commonImmediateCommandSize);
|
||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(hSignalEvent);
|
||||
return flushImmediate(ret, true, true, false, false, hSignalEvent);
|
||||
return flushImmediate(ret, true, true, false, false, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -714,7 +714,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendEventReset(ze_e
|
||||
|
||||
checkAvailableSpace(0, false, commonImmediateCommandSize);
|
||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendEventReset(hSignalEvent);
|
||||
return flushImmediate(ret, true, true, false, false, hSignalEvent);
|
||||
return flushImmediate(ret, true, true, false, false, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -744,7 +744,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
|
||||
} else {
|
||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost);
|
||||
}
|
||||
return flushImmediate(ret, false, false, relaxedOrdering, true, nullptr);
|
||||
return flushImmediate(ret, false, false, relaxedOrdering, true, false, nullptr);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -769,7 +769,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(ui
|
||||
return ret;
|
||||
}
|
||||
|
||||
return flushImmediate(ret, true, true, false, false, nullptr);
|
||||
return flushImmediate(ret, true, true, false, false, false, nullptr);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -781,7 +781,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWriteGlobalTime
|
||||
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
|
||||
return flushImmediate(ret, true, true, false, false, hSignalEvent);
|
||||
return flushImmediate(ret, true, true, false, false, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -825,7 +825,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyRegion
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent,
|
||||
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
||||
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -843,7 +843,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMe
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hSignalEvent,
|
||||
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
||||
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -861,7 +861,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hSignalEvent,
|
||||
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
||||
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -881,7 +881,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMe
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(hDstImage, srcPtr, pDstRegion, srcRowPitch, srcSlicePitch,
|
||||
hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
||||
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -901,7 +901,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(dstPtr, hSrcImage, pSrcRegion, destRowPitch, destSlicePitch,
|
||||
hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
||||
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -914,7 +914,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryRangesBar
|
||||
checkAvailableSpace(numWaitEvents, false, commonImmediateCommandSize);
|
||||
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
return flushImmediate(ret, true, true, false, false, hSignalEvent);
|
||||
return flushImmediate(ret, true, true, false, false, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -929,21 +929,21 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
|
||||
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, waitEventHandles, relaxedOrderingDispatch);
|
||||
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnMemory(void *desc, void *ptr, uint64_t data, ze_event_handle_t signalEventHandle, bool useQwordData) {
|
||||
checkAvailableSpace(0, false, commonImmediateCommandSize);
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(desc, ptr, data, signalEventHandle, useQwordData);
|
||||
return flushImmediate(ret, true, false, false, false, signalEventHandle);
|
||||
return flushImmediate(ret, true, false, false, false, false, signalEventHandle);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWriteToMemory(void *desc, void *ptr, uint64_t data) {
|
||||
checkAvailableSpace(0, false, commonImmediateCommandSize);
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(desc, ptr, data);
|
||||
return flushImmediate(ret, true, false, false, false, nullptr);
|
||||
return flushImmediate(ret, true, false, false, false, false, nullptr);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -1001,15 +1001,17 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
|
||||
bool hasRelaxedOrderingDependencies, bool kernelOperation, ze_event_handle_t hSignalEvent) {
|
||||
bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission, ze_event_handle_t hSignalEvent) {
|
||||
auto signalEvent = Event::fromHandle(hSignalEvent);
|
||||
|
||||
auto queue = copyOffloadSubmission ? this->cmdQImmediateCopyOffload : this->cmdQImmediate;
|
||||
|
||||
if (inputRet == ZE_RESULT_SUCCESS) {
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
if (signalEvent && (NEO::debugManager.flags.TrackNumCsrClientsOnSyncPoints.get() != 0)) {
|
||||
signalEvent->setLatestUsedCmdQueue(this->cmdQImmediate);
|
||||
signalEvent->setLatestUsedCmdQueue(queue);
|
||||
}
|
||||
inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation);
|
||||
inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation, copyOffloadSubmission);
|
||||
} else {
|
||||
inputRet = executeCommandListImmediate(performMigration);
|
||||
}
|
||||
@@ -1018,8 +1020,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
|
||||
this->latestFlushIsHostVisible = !this->dcFlushSupport;
|
||||
|
||||
if (signalEvent) {
|
||||
signalEvent->setCsr(this->csr, isInOrderExecutionEnabled());
|
||||
this->latestFlushIsHostVisible |= signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST);
|
||||
signalEvent->setCsr(static_cast<CommandQueueImp *>(queue)->getCsr(), isInOrderExecutionEnabled());
|
||||
this->latestFlushIsHostVisible |= signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST) && !copyOffloadSubmission;
|
||||
}
|
||||
|
||||
return inputRet;
|
||||
|
||||
Reference in New Issue
Block a user