mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-27 07:44:16 +08:00
feature: wait path improvements for dual stream offload
Related-To: NEO-7067 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
d3b11d1527
commit
201324f804
@@ -230,7 +230,7 @@ void CommandList::synchronizeEventList(uint32_t numWaitEvents, ze_event_handle_t
|
||||
}
|
||||
|
||||
NEO::CommandStreamReceiver *CommandList::getCsr(bool copyOffload) const {
|
||||
auto queue = (getCopyOffloadModeForOperation(copyOffload) == CopyOffloadModes::dualStream) ? this->cmdQImmediateCopyOffload : this->cmdQImmediate;
|
||||
auto queue = isDualStreamCopyOffloadOperation(copyOffload) ? this->cmdQImmediateCopyOffload : this->cmdQImmediate;
|
||||
|
||||
return static_cast<CommandQueueImp *>(queue)->getCsr();
|
||||
}
|
||||
|
||||
@@ -448,6 +448,9 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
}
|
||||
MOCKABLE_VIRTUAL void synchronizeEventList(uint32_t numWaitEvents, ze_event_handle_t *waitEventList);
|
||||
|
||||
bool isDualStreamCopyOffloadOperation(bool offloadOperation) const { return (getCopyOffloadModeForOperation(offloadOperation) == CopyOffloadModes::dualStream); }
|
||||
bool isNonDualStreamCopyOffloadOperation(bool offloadOperation) const { return offloadOperation && !isDualStreamCopyOffloadOperation(offloadOperation); }
|
||||
|
||||
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
|
||||
NEO::PrivateAllocsToReuseContainer ownedPrivateAllocations;
|
||||
std::vector<NEO::GraphicsAllocation *> patternAllocations;
|
||||
|
||||
@@ -195,7 +195,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) override;
|
||||
void appendWaitOnInOrderDependency(std::shared_ptr<NEO::InOrderExecInfo> &inOrderExecInfo, CommandToPatchContainer *outListCommands,
|
||||
uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed, bool implicitDependency,
|
||||
bool skipAddingWaitEventsToResidency, bool noopDispatch, bool copyOffloadOperation);
|
||||
bool skipAddingWaitEventsToResidency, bool noopDispatch, bool dualStreamCopyOffloadOperation);
|
||||
void appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation, bool stall, bool textureFlushRequired);
|
||||
void handleInOrderDependencyCounter(Event *signalEvent, bool nonWalkerInOrderCmdsChaining, bool copyOffloadOperation);
|
||||
void handleInOrderCounterOverflow(bool copyOffloadOperation);
|
||||
@@ -209,7 +209,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
void appendMultiPartitionEpilogue() override;
|
||||
void appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency, bool copyOperation);
|
||||
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, CommandToPatchContainer *outWaitCmds,
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency, bool skipAddingWaitEventsToResidency, bool copyOffloadOperation);
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency, bool skipAddingWaitEventsToResidency, bool dualStreamCopyOffloadOperation);
|
||||
|
||||
MOCKABLE_VIRTUAL void appendSynchronizedDispatchInitializationSection();
|
||||
MOCKABLE_VIRTUAL void appendSynchronizedDispatchCleanupSection();
|
||||
@@ -251,7 +251,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize,
|
||||
Event *signalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch);
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, bool dualStreamCopyOffload);
|
||||
|
||||
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
|
||||
Builtin builtin, const ze_copy_region_t *dstRegion,
|
||||
@@ -299,7 +299,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
Event *signalEvent,
|
||||
CmdListKernelLaunchParams &launchParams);
|
||||
|
||||
void appendWaitOnSingleEvent(Event *event, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, bool copyOffloadOperation, CommandToPatch::CommandType storedSemaphore);
|
||||
void appendWaitOnSingleEvent(Event *event, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, bool dualStreamCopyOffload, CommandToPatch::CommandType storedSemaphore);
|
||||
|
||||
void appendSdiInOrderCounterSignalling(uint64_t baseGpuVa, uint64_t signalValue, bool copyOffloadOperation);
|
||||
|
||||
@@ -383,7 +383,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) { return false; }
|
||||
virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {}
|
||||
bool canSkipInOrderEventWait(Event &event, bool ignorCbEventBoundToCmdList) const;
|
||||
bool handleInOrderImplicitDependencies(bool relaxedOrderingAllowed, bool copyOffloadOperation);
|
||||
bool handleInOrderImplicitDependencies(bool relaxedOrderingAllowed, bool dualStreamCopyOffloadOperation);
|
||||
bool isQwordInOrderCounter() const { return GfxFamily::isQwordInOrderCounter; }
|
||||
bool isInOrderNonWalkerSignalingRequired(const Event *event) const;
|
||||
bool hasInOrderDependencies() const;
|
||||
|
||||
@@ -195,7 +195,8 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(Event
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::handleInOrderCounterOverflow(bool copyOffloadOperation) {
|
||||
if (!isQwordInOrderCounter() && ((inOrderExecInfo->getCounterValue() + 1) == std::numeric_limits<uint32_t>::max())) {
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderExecInfo, nullptr, inOrderExecInfo->getCounterValue() + 1, inOrderExecInfo->getAllocationOffset(), false, true, false, false, copyOffloadOperation);
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderExecInfo, nullptr, inOrderExecInfo->getCounterValue() + 1, inOrderExecInfo->getAllocationOffset(), false, true, false, false,
|
||||
isDualStreamCopyOffloadOperation(copyOffloadOperation));
|
||||
|
||||
inOrderExecInfo->resetCounterValue();
|
||||
|
||||
@@ -1459,7 +1460,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(Ali
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize,
|
||||
Event *signalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
|
||||
bool relaxedOrderingDispatch, bool dualStreamCopyOffload) {
|
||||
srcRegion.originX += getRegionOffsetForAppendMemoryCopyBlitRegion(srcAllocationData);
|
||||
dstRegion.originX += getRegionOffsetForAppendMemoryCopyBlitRegion(dstAllocationData);
|
||||
|
||||
@@ -1479,7 +1481,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(Ali
|
||||
blitProperties.srcSize = srcSize;
|
||||
blitProperties.dstSize = dstSize;
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, true, false, true);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, true, false, dualStreamCopyOffload);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -1488,7 +1490,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(Ali
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false, true);
|
||||
const bool copyOnly = isCopyOnly(dualStreamCopyOffload);
|
||||
|
||||
if (copyOnly) {
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false, true);
|
||||
}
|
||||
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironmentRef();
|
||||
bool copyRegionPreferred = NEO::BlitCommandsHelper<GfxFamily>::isCopyRegionPreferred(copySizeModified, rootDeviceEnvironment, blitProperties.isSystemMemoryPoolUsed);
|
||||
if (copyRegionPreferred) {
|
||||
@@ -1498,7 +1504,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(Ali
|
||||
}
|
||||
dummyBlitWa.isWaRequired = true;
|
||||
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, true);
|
||||
if (copyOnly) {
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, true);
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -1685,7 +1693,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
|
||||
bool waitForImplicitInOrderDependency = !isCopyOnlyEnabled || inOrderCopyOnlySignalingAllowed;
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, memoryCopyParams.relaxedOrderingDispatch, false, waitForImplicitInOrderDependency, false, isCopyOnlyEnabled);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, memoryCopyParams.relaxedOrderingDispatch, false,
|
||||
waitForImplicitInOrderDependency, false, isDualStreamCopyOffloadOperation(memoryCopyParams.copyOffloadAllowed));
|
||||
|
||||
if (ret) {
|
||||
return ret;
|
||||
@@ -1714,8 +1723,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
|
||||
launchParams.pipeControlSignalling = (signalEvent && singlePipeControlPacket) || getDcFlushRequired(dstAllocationStruct.needsFlush);
|
||||
|
||||
if (!useAdditionalBlitProperties || !isCopyOnlyEnabled) {
|
||||
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, singlePipeControlPacket, false, isCopyOnlyEnabled);
|
||||
if (!isNonDualStreamCopyOffloadOperation(memoryCopyParams.copyOffloadAllowed)) {
|
||||
if (!useAdditionalBlitProperties || !isCopyOnlyEnabled) {
|
||||
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, singlePipeControlPacket, false, isCopyOnlyEnabled);
|
||||
}
|
||||
}
|
||||
|
||||
if (isCopyOnlyEnabled) {
|
||||
@@ -1788,8 +1799,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
|
||||
appendCopyOperationFence(signalEvent, srcAllocationStruct.alloc, dstAllocationStruct.alloc, isCopyOnlyEnabled);
|
||||
|
||||
if (!useAdditionalBlitProperties || !isCopyOnlyEnabled) {
|
||||
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, singlePipeControlPacket, false, isCopyOnlyEnabled);
|
||||
if (!isNonDualStreamCopyOffloadOperation(memoryCopyParams.copyOffloadAllowed)) {
|
||||
if (!useAdditionalBlitProperties || !isCopyOnlyEnabled) {
|
||||
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, singlePipeControlPacket, false, isCopyOnlyEnabled);
|
||||
}
|
||||
}
|
||||
|
||||
bool l3flushInPipeControl = !l3FlushAfterPostSyncRequired || isSplitOperation;
|
||||
@@ -1797,12 +1810,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
|
||||
addToMappedEventList(signalEvent);
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
if (this->isInOrderExecutionEnabled() && !isNonDualStreamCopyOffloadOperation(memoryCopyParams.copyOffloadAllowed)) {
|
||||
bool emitPipeControl = !isCopyOnlyEnabled && launchParams.pipeControlSignalling;
|
||||
|
||||
if (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed || emitPipeControl) {
|
||||
dispatchInOrderPostOperationBarrier(signalEvent, dcFlush, isCopyOnlyEnabled);
|
||||
appendSignalInOrderDependencyCounter(signalEvent, isCopyOnlyEnabled, false, false);
|
||||
appendSignalInOrderDependencyCounter(signalEvent, memoryCopyParams.copyOffloadAllowed, false, false);
|
||||
}
|
||||
|
||||
if (!isCopyOnlyEnabled || inOrderCopyOnlySignalingAllowed) {
|
||||
@@ -1876,14 +1889,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
||||
|
||||
memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(*srcAllocationStruct.alloc, *dstAllocationStruct.alloc);
|
||||
const bool isCopyOnlyEnabled = isCopyOnly(memoryCopyParams.copyOffloadAllowed);
|
||||
const bool inOrderCopyOnlySignalingAllowed = this->isInOrderExecutionEnabled() && !memoryCopyParams.forceDisableCopyOnlyInOrderSignaling && isCopyOnlyEnabled;
|
||||
const bool inOrderCopyOnlySignalingAllowed = this->isInOrderExecutionEnabled() && !memoryCopyParams.forceDisableCopyOnlyInOrderSignaling &&
|
||||
isCopyOnlyEnabled && !isNonDualStreamCopyOffloadOperation(memoryCopyParams.copyOffloadAllowed);
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
if (isCopyOnlyEnabled) {
|
||||
result = appendMemoryCopyBlitRegion(&srcAllocationStruct, &dstAllocationStruct, *srcRegion, *dstRegion,
|
||||
{srcRegion->width, srcRegion->height, srcRegion->depth},
|
||||
srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize3, dstSize3,
|
||||
signalEvent, numWaitEvents, phWaitEvents, memoryCopyParams.relaxedOrderingDispatch);
|
||||
signalEvent, numWaitEvents, phWaitEvents, memoryCopyParams.relaxedOrderingDispatch, isDualStreamCopyOffloadOperation(memoryCopyParams.copyOffloadAllowed));
|
||||
} else if ((srcRegion->depth > 1) || (srcRegion->originZ != 0) || (dstRegion->originZ != 0)) {
|
||||
result = this->appendMemoryCopyKernel3d(&dstAllocationStruct, &srcAllocationStruct, Builtin::copyBufferRectBytes3d,
|
||||
dstRegion, dstPitch, dstSlicePitch, dstAllocationStruct.offset,
|
||||
@@ -1907,7 +1921,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
if (inOrderCopyOnlySignalingAllowed) {
|
||||
appendSignalInOrderDependencyCounter(signalEvent, isCopyOnlyEnabled, false, false);
|
||||
appendSignalInOrderDependencyCounter(signalEvent, memoryCopyParams.copyOffloadAllowed, false, false);
|
||||
handleInOrderDependencyCounter(signalEvent, false, isCopyOnlyEnabled);
|
||||
}
|
||||
} else {
|
||||
@@ -2607,17 +2621,17 @@ inline uint32_t CommandListCoreFamily<gfxCoreFamily>::getRegionOffsetForAppendMe
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(bool relaxedOrderingAllowed, bool copyOffloadOperation) {
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(bool relaxedOrderingAllowed, bool dualStreamCopyOffloadOperation) {
|
||||
if (hasInOrderDependencies()) {
|
||||
if (inOrderExecInfo->isCounterAlreadyDone(inOrderExecInfo->getCounterValue())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (relaxedOrderingAllowed) {
|
||||
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream(), isCopyOnly(copyOffloadOperation));
|
||||
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream(), isCopyOnly(dualStreamCopyOffloadOperation));
|
||||
}
|
||||
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderExecInfo, nullptr, inOrderExecInfo->getCounterValue(), inOrderExecInfo->getAllocationOffset(), relaxedOrderingAllowed, true, false, false, copyOffloadOperation);
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderExecInfo, nullptr, inOrderExecInfo->getCounterValue(), inOrderExecInfo->getAllocationOffset(), relaxedOrderingAllowed, true, false, false, dualStreamCopyOffloadOperation);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -2627,7 +2641,7 @@ bool CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(boo
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, CommandToPatchContainer *outWaitCmds,
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency, bool skipAddingWaitEventsToResidency, bool copyOffloadOperation) {
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency, bool skipAddingWaitEventsToResidency, bool dualStreamCopyOffloadOperation) {
|
||||
bool inOrderDependenciesSent = false;
|
||||
|
||||
if (this->latestOperationRequiredNonWalkerInOrderCmdsChaining && !relaxedOrderingAllowed) {
|
||||
@@ -2635,22 +2649,22 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
|
||||
}
|
||||
|
||||
if (waitForImplicitInOrderDependency) {
|
||||
auto ret = this->flushInOrderCounterSignal(copyOffloadOperation || relaxedOrderingAllowed);
|
||||
auto ret = this->flushInOrderCounterSignal(dualStreamCopyOffloadOperation || relaxedOrderingAllowed);
|
||||
if (ret != ZE_RESULT_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
inOrderDependenciesSent = handleInOrderImplicitDependencies(relaxedOrderingAllowed, copyOffloadOperation);
|
||||
inOrderDependenciesSent = handleInOrderImplicitDependencies(relaxedOrderingAllowed, dualStreamCopyOffloadOperation);
|
||||
this->latestOperationHasOptimizedCbEvent = false;
|
||||
}
|
||||
|
||||
if (relaxedOrderingAllowed && numWaitEvents > 0 && !inOrderDependenciesSent) {
|
||||
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream(), isCopyOnly(copyOffloadOperation));
|
||||
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream(), isCopyOnly(dualStreamCopyOffloadOperation));
|
||||
}
|
||||
|
||||
if (numWaitEvents > 0) {
|
||||
if (phWaitEvents) {
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, outWaitCmds, relaxedOrderingAllowed, trackDependencies, false, skipAddingWaitEventsToResidency, false, copyOffloadOperation);
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, outWaitCmds, relaxedOrderingAllowed, trackDependencies, false, skipAddingWaitEventsToResidency, false, dualStreamCopyOffloadOperation);
|
||||
} else {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
@@ -2728,7 +2742,7 @@ NEO::GraphicsAllocation *CommandListCoreFamily<gfxCoreFamily>::getDeviceCounterA
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::shared_ptr<NEO::InOrderExecInfo> &inOrderExecInfo, CommandToPatchContainer *outListCommands,
|
||||
uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed, bool implicitDependency, bool skipAddingWaitEventsToResidency,
|
||||
bool noopDispatch, bool copyOffloadOperation) {
|
||||
bool noopDispatch, bool dualStreamCopyOffloadOperation) {
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
UNRECOVERABLE_IF(waitValue > static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) && !isQwordInOrderCounter());
|
||||
@@ -2741,13 +2755,15 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh
|
||||
uint64_t gpuAddress = inOrderExecInfo->getBaseDeviceAddress() + offset;
|
||||
|
||||
const uint32_t immWriteOffset = device->getL0GfxCoreHelper().getImmediateWritePostSyncOffset();
|
||||
const bool copyOnlyWait = isCopyOnly(dualStreamCopyOffloadOperation);
|
||||
|
||||
for (uint32_t i = 0; i < inOrderExecInfo->getNumDevicePartitionsToWait(); i++) {
|
||||
if (relaxedOrderingAllowed) {
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::less, true, isQwordInOrderCounter(), isCopyOnly(copyOffloadOperation));
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::less, true,
|
||||
isQwordInOrderCounter(), copyOnlyWait);
|
||||
|
||||
} else {
|
||||
auto resolveDependenciesViaPipeControls = !this->isCopyOnly(copyOffloadOperation) && !this->asMutable() && implicitDependency && (this->dcFlushSupport || (!this->heaplessModeEnabled && this->latestOperationHasOptimizedCbEvent));
|
||||
auto resolveDependenciesViaPipeControls = !copyOnlyWait && !this->asMutable() && implicitDependency && (this->dcFlushSupport || (!this->heaplessModeEnabled && this->latestOperationHasOptimizedCbEvent));
|
||||
|
||||
if (NEO::debugManager.flags.ResolveDependenciesViaPipeControls.get() != -1) {
|
||||
resolveDependenciesViaPipeControls = NEO::debugManager.flags.ResolveDependenciesViaPipeControls.get();
|
||||
@@ -2778,8 +2794,8 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh
|
||||
auto lri2 = commandContainer.getCommandStream()->template getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
|
||||
|
||||
if (!noopDispatch) {
|
||||
NEO::LriHelper<GfxFamily>::program(lri1, firstRegister, getLowPart(waitValue), true, isCopyOnly(copyOffloadOperation));
|
||||
NEO::LriHelper<GfxFamily>::program(lri2, secondRegister, getHighPart(waitValue), true, isCopyOnly(copyOffloadOperation));
|
||||
NEO::LriHelper<GfxFamily>::program(lri1, firstRegister, getLowPart(waitValue), true, copyOnlyWait);
|
||||
NEO::LriHelper<GfxFamily>::program(lri2, secondRegister, getHighPart(waitValue), true, copyOnlyWait);
|
||||
} else {
|
||||
memset(lri1, 0, sizeof(MI_LOAD_REGISTER_IMM));
|
||||
memset(lri2, 0, sizeof(MI_LOAD_REGISTER_IMM));
|
||||
@@ -2862,8 +2878,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
callId);
|
||||
}
|
||||
|
||||
const bool dualStreamCopyOffload = isDualStreamCopyOffloadOperation(copyOffloadOperation);
|
||||
|
||||
if (this->isInOrderExecutionEnabled() && apiRequest) {
|
||||
handleInOrderImplicitDependencies(false, copyOffloadOperation);
|
||||
handleInOrderImplicitDependencies(false, dualStreamCopyOffload);
|
||||
}
|
||||
|
||||
bool dcFlushRequired = false;
|
||||
@@ -2875,6 +2893,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
}
|
||||
}
|
||||
if (dcFlushRequired) {
|
||||
UNRECOVERABLE_IF(isNonDualStreamCopyOffloadOperation(copyOffloadOperation));
|
||||
if (isCopyOnly(copyOffloadOperation)) {
|
||||
NEO::MiFlushArgs args{this->dummyBlitWa};
|
||||
encodeMiFlush(0, 0, args);
|
||||
@@ -2905,7 +2924,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(event->getInOrderExecInfo(), outWaitCmds,
|
||||
waitValue, event->getInOrderAllocationOffset(),
|
||||
relaxedOrderingAllowed, false, skipAddingWaitEventsToResidency,
|
||||
isCbEventBoundToCmdList(event), copyOffloadOperation);
|
||||
isCbEventBoundToCmdList(event), dualStreamCopyOffload);
|
||||
|
||||
continue;
|
||||
}
|
||||
@@ -2918,10 +2937,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
commandContainer.addToResidencyContainer(event->getAllocation(this->device));
|
||||
}
|
||||
|
||||
appendWaitOnSingleEvent(event, outWaitCmds, relaxedOrderingAllowed, copyOffloadOperation, CommandToPatch::WaitEventSemaphoreWait);
|
||||
appendWaitOnSingleEvent(event, outWaitCmds, relaxedOrderingAllowed, dualStreamCopyOffload, CommandToPatch::WaitEventSemaphoreWait);
|
||||
}
|
||||
|
||||
if (isImmediateType() && isCopyOnly(copyOffloadOperation) && trackDependencies) {
|
||||
if (isImmediateType() && isCopyOnly(dualStreamCopyOffload) && trackDependencies) {
|
||||
NEO::MiFlushArgs args{this->dummyBlitWa};
|
||||
args.commandWithPostSync = true;
|
||||
auto csr = getCsr(false);
|
||||
@@ -2931,9 +2950,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
|
||||
if (apiRequest) {
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(nullptr, copyOffloadOperation, false, false);
|
||||
appendSignalInOrderDependencyCounter(nullptr, false, false, false);
|
||||
}
|
||||
handleInOrderDependencyCounter(nullptr, false, copyOffloadOperation);
|
||||
handleInOrderDependencyCounter(nullptr, false, false);
|
||||
}
|
||||
|
||||
if (NEO::debugManager.flags.EnableSWTags.get()) {
|
||||
@@ -2951,6 +2970,8 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSdiInOrderCounterSignalling(uint64_t baseGpuVa, uint64_t signalValue, bool copyOffloadOperation) {
|
||||
using MI_STORE_DATA_IMM = typename GfxFamily::MI_STORE_DATA_IMM;
|
||||
|
||||
UNRECOVERABLE_IF(isNonDualStreamCopyOffloadOperation(copyOffloadOperation));
|
||||
|
||||
uint64_t gpuVa = baseGpuVa + inOrderExecInfo->getAllocationOffset();
|
||||
|
||||
uint32_t numWrites = 1;
|
||||
@@ -2978,6 +2999,8 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(
|
||||
using ATOMIC_OPCODES = typename GfxFamily::MI_ATOMIC::ATOMIC_OPCODES;
|
||||
using DATA_SIZE = typename GfxFamily::MI_ATOMIC::DATA_SIZE;
|
||||
|
||||
UNRECOVERABLE_IF(isNonDualStreamCopyOffloadOperation(copyOffloadOperation));
|
||||
|
||||
uint64_t deviceAllocGpuVa = inOrderExecInfo->getBaseDeviceAddress();
|
||||
uint64_t signalValue = inOrderExecInfo->getCounterValue() + getInOrderIncrementValue();
|
||||
|
||||
@@ -3022,7 +3045,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(
|
||||
DATA_SIZE::DATA_SIZE_QWORD, 0, 0, signalEvent->getInOrderIncrementValue(), 0);
|
||||
}
|
||||
|
||||
if ((NEO::debugManager.flags.ProgramUserInterruptOnResolvedDependency.get() == 1 || copyOffloadOperation) && signalEvent && signalEvent->isInterruptModeEnabled()) {
|
||||
if ((NEO::debugManager.flags.ProgramUserInterruptOnResolvedDependency.get() == 1 || isCopyOnly(copyOffloadOperation)) && signalEvent && signalEvent->isInterruptModeEnabled()) {
|
||||
NEO::EnodeUserInterrupt<GfxFamily>::encode(*cmdStream);
|
||||
}
|
||||
}
|
||||
@@ -4214,7 +4237,7 @@ void CommandListCoreFamily<gfxCoreFamily>::dispatchEventRemainingPacketsPostSync
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(Event *event, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, bool copyOffloadOperation, CommandToPatch::CommandType storedSemaphore) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(Event *event, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, bool dualStreamCopyOffload, CommandToPatch::CommandType storedSemaphore) {
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
uint64_t gpuAddr = event->getCompletionFieldGpuAddress(this->device);
|
||||
@@ -4229,7 +4252,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(Event *event,
|
||||
for (uint32_t i = 0u; i < packetsToWait; i++) {
|
||||
if (relaxedOrderingAllowed) {
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddr, Event::STATE_CLEARED,
|
||||
NEO::CompareOperation::equal, true, false, isCopyOnly(copyOffloadOperation));
|
||||
NEO::CompareOperation::equal, true, false, isCopyOnly(dualStreamCopyOffload));
|
||||
} else {
|
||||
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
|
||||
gpuAddr,
|
||||
|
||||
@@ -53,6 +53,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
using BaseClass::getCsr;
|
||||
using BaseClass::isCopyOffloadEnabled;
|
||||
using BaseClass::isCopyOnly;
|
||||
using BaseClass::isDualStreamCopyOffloadOperation;
|
||||
using BaseClass::isInOrderExecutionEnabled;
|
||||
using BaseClass::isSkippingInOrderBarrierAllowed;
|
||||
using BaseClass::isTbxMode;
|
||||
|
||||
@@ -1126,7 +1126,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
|
||||
bool mainStorageCleanupNeeded = !mainInternalAllocStorage->getTemporaryAllocations().peekIsEmpty();
|
||||
bool copyOffloadStorageCleanupNeeded = false;
|
||||
|
||||
const bool dualStreamCopyOffload = (getCopyOffloadModeForOperation(isCopyOffloadEnabled()) == CopyOffloadModes::dualStream);
|
||||
const bool dualStreamCopyOffload = isDualStreamCopyOffloadOperation(isCopyOffloadEnabled());
|
||||
|
||||
if (dualStreamCopyOffload) {
|
||||
copyOffloadTaskCount = this->cmdQImmediateCopyOffload->getTaskCount();
|
||||
|
||||
@@ -706,10 +706,11 @@ class MockCommandListCoreFamily : public CommandListCoreFamily<gfxCoreFamily> {
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize,
|
||||
L0::Event *signalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, bool doubleStreamCopyOffload) override {
|
||||
srcBlitCopyRegionOffset = srcAllocationData->offset;
|
||||
dstBlitCopyRegionOffset = dstAllocationData->offset;
|
||||
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(srcAllocationData, dstAllocationData, srcRegion, dstRegion, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, srcSize, dstSize, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
||||
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(srcAllocationData, dstAllocationData, srcRegion, dstRegion, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch,
|
||||
srcSize, dstSize, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch, doubleStreamCopyOffload);
|
||||
}
|
||||
uintptr_t srcAlignedPtr;
|
||||
uintptr_t dstAlignedPtr;
|
||||
|
||||
@@ -98,7 +98,7 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize,
|
||||
L0::Event *signalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, bool doubleStreamCopyOffload) override {
|
||||
if (signalEvent) {
|
||||
useEvents = true;
|
||||
} else {
|
||||
@@ -1417,7 +1417,7 @@ HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenCopyRegionWithinMaxBli
|
||||
AlignedAllocationData dstAllocationData = {mockAllocationDst.gpuAddress, 0, &mockAllocationDst, false};
|
||||
size_t rowPitch = copySize.x;
|
||||
size_t slicePitch = copySize.x * copySize.y;
|
||||
commandList->appendMemoryCopyBlitRegion(&srcAllocationData, &dstAllocationData, srcRegion, dstRegion, copySize, rowPitch, slicePitch, rowPitch, slicePitch, srcSize, dstSize, nullptr, 0, nullptr, false);
|
||||
commandList->appendMemoryCopyBlitRegion(&srcAllocationData, &dstAllocationData, srcRegion, dstRegion, copySize, rowPitch, slicePitch, rowPitch, slicePitch, srcSize, dstSize, nullptr, 0, nullptr, false, false);
|
||||
GenCmdList cmdList;
|
||||
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
@@ -1468,7 +1468,7 @@ HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenCopyRegionWithinMaxBli
|
||||
AlignedAllocationData dstAllocationData = {mockAllocationDst.gpuAddress, 0, &mockAllocationDst, false};
|
||||
size_t rowPitch = copySize.x;
|
||||
size_t slicePitch = copySize.x * copySize.y;
|
||||
commandList->appendMemoryCopyBlitRegion(&srcAllocationData, &dstAllocationData, srcRegion, dstRegion, copySize, rowPitch, slicePitch, rowPitch, slicePitch, srcSize, dstSize, nullptr, 0, nullptr, false);
|
||||
commandList->appendMemoryCopyBlitRegion(&srcAllocationData, &dstAllocationData, srcRegion, dstRegion, copySize, rowPitch, slicePitch, rowPitch, slicePitch, srcSize, dstSize, nullptr, 0, nullptr, false, false);
|
||||
uint32_t bytesPerPixel = NEO::BlitCommandsHelper<FamilyType>::getAvailableBytesPerPixel(copySize.x, srcRegion.originX, dstRegion.originY, srcSize.x, dstSize.x);
|
||||
GenCmdList cmdList;
|
||||
|
||||
@@ -1518,7 +1518,7 @@ HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenCopyRegionGreaterThanM
|
||||
AlignedAllocationData dstAllocationData = {mockAllocationDst.gpuAddress, 0, &mockAllocationDst, false};
|
||||
size_t rowPitch = copySize.x;
|
||||
size_t slicePitch = copySize.x * copySize.y;
|
||||
commandList->appendMemoryCopyBlitRegion(&srcAllocationData, &dstAllocationData, srcRegion, dstRegion, copySize, rowPitch, slicePitch, rowPitch, slicePitch, srcSize, dstSize, nullptr, 0, nullptr, false);
|
||||
commandList->appendMemoryCopyBlitRegion(&srcAllocationData, &dstAllocationData, srcRegion, dstRegion, copySize, rowPitch, slicePitch, rowPitch, slicePitch, srcSize, dstSize, nullptr, 0, nullptr, false, false);
|
||||
GenCmdList cmdList;
|
||||
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
@@ -1545,7 +1545,7 @@ class MockCommandListForRegionSize : public WhiteBox<::L0::CommandListCoreFamily
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize,
|
||||
L0::Event *signalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, bool doubleStreamCopyOffload) override {
|
||||
this->srcSize = srcSize;
|
||||
this->dstSize = dstSize;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
|
||||
@@ -253,7 +253,7 @@ HWTEST2_F(AppendMemoryCopyTests, givenCopyCommandListWhenTimestampPassedToMemory
|
||||
|
||||
AlignedAllocationData srcAllocationData = {mockAllocationSrc.gpuAddress, 0, &mockAllocationSrc, false};
|
||||
AlignedAllocationData dstAllocationData = {mockAllocationDst.gpuAddress, 0, &mockAllocationDst, false};
|
||||
commandList->appendMemoryCopyBlitRegion(&srcAllocationData, &dstAllocationData, srcRegion, dstRegion, {0, 0, 0}, 0, 0, 0, 0, 0, 0, event.get(), 0, nullptr, false);
|
||||
commandList->appendMemoryCopyBlitRegion(&srcAllocationData, &dstAllocationData, srcRegion, dstRegion, {0, 0, 0}, 0, 0, 0, 0, 0, 0, event.get(), 0, nullptr, false, false);
|
||||
GenCmdList cmdList;
|
||||
|
||||
auto baseAddr = event->getGpuAddress(device);
|
||||
|
||||
@@ -138,7 +138,7 @@ class MockCommandListExtensionHw : public WhiteBox<::L0::CommandListCoreFamily<g
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize,
|
||||
Event *signalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, bool doubleStreamCopyOffload) override {
|
||||
if (signalEvent) {
|
||||
useEvents = true;
|
||||
} else {
|
||||
|
||||
@@ -1755,7 +1755,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenImmediateCmdListWhenDispa
|
||||
}
|
||||
|
||||
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
|
||||
copyOnlyCmdList->appendMemoryCopyBlitRegion(&allocationData, &allocationData, region, region, {0, 0, 0}, 0, 0, 0, 0, {0, 0, 0}, {0, 0, 0}, events[0].get(), 0, nullptr, false);
|
||||
copyOnlyCmdList->appendMemoryCopyBlitRegion(&allocationData, &allocationData, region, region, {0, 0, 0}, 0, 0, 0, 0, {0, 0, 0}, {0, 0, 0}, events[0].get(), 0, nullptr, false, false);
|
||||
if (dcFlushRequired) {
|
||||
EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode);
|
||||
} else {
|
||||
@@ -1909,7 +1909,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenNonInOrderCmdListWhenPass
|
||||
const void **ranges = reinterpret_cast<const void **>(©Data[0]);
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, immCmdList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, eventHandle, 0, nullptr));
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, copyOnlyCmdList->appendMemoryCopyBlitRegion(&allocationData, &allocationData, region, region, {0, 0, 0}, 0, 0, 0, 0, {0, 0, 0}, {0, 0, 0}, events[0].get(), 0, nullptr, false));
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, copyOnlyCmdList->appendMemoryCopyBlitRegion(&allocationData, &allocationData, region, region, {0, 0, 0}, 0, 0, 0, 0, {0, 0, 0}, {0, 0, 0}, events[0].get(), 0, nullptr, false, false));
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, immCmdList->appendMemoryCopy(©Data, ©Data, 1, eventHandle, 0, nullptr, copyParams));
|
||||
|
||||
|
||||
@@ -101,6 +101,94 @@ HWTEST2_F(CopyOffloadInOrderTests, givenNonDualStreamModeWhenSubmittedThenUseDef
|
||||
EXPECT_TRUE(immCmdList->latestFlushIsHostVisible);
|
||||
}
|
||||
|
||||
HWTEST2_F(CopyOffloadInOrderTests, givenNonDualStreamModeWhenSubmittedThenDontProgramBcsMmioBase, IsAtLeastXeHpCore) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
debugManager.flags.OverrideCopyOffloadMode.set(nonDualStreamMode);
|
||||
auto immCmdList0 = createImmCmdListWithOffload<FamilyType::gfxCoreFamily>();
|
||||
auto immCmdList1 = createImmCmdListWithOffload<FamilyType::gfxCoreFamily>();
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(2, true);
|
||||
auto eventHandle0 = events[0]->toHandle();
|
||||
auto eventHandle1 = events[1]->toHandle();
|
||||
|
||||
immCmdList0->appendMemoryCopy(©Data1, ©Data2, 1, eventHandle0, 0, nullptr, copyParams);
|
||||
immCmdList1->appendMemoryCopy(©Data1, ©Data2, 1, eventHandle1, 0, nullptr, copyParams);
|
||||
immCmdList1->appendMemoryCopy(©Data1, ©Data2, 1, nullptr, 1, &eventHandle0, copyParams);
|
||||
auto cmdStream = immCmdList1->getCmdContainer().getCommandStream();
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), 0));
|
||||
|
||||
auto itor = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
while (itor != cmdList.end()) {
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_TRUE(cmd->getRegisterOffset() < RegisterOffsets::bcs0Base);
|
||||
itor = find<MI_LOAD_REGISTER_IMM *>(++itor, cmdList.end());
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CopyOffloadInOrderTests, givenNonDualStreamModeAndProfilingEventWithRelaxedOrderingWhenAppendingThenDontBcsCommands, IsAtLeastXeHpCore) {
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
debugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
|
||||
debugManager.flags.OverrideCopyOffloadMode.set(nonDualStreamMode);
|
||||
|
||||
auto immCmdList = createImmCmdListWithOffload<FamilyType::gfxCoreFamily>();
|
||||
|
||||
auto mainQueueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(immCmdList->getCsr(false));
|
||||
auto copyQueueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(immCmdList->getCsr(true));
|
||||
|
||||
auto mainQueueDirectSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(*mainQueueCsr);
|
||||
auto offloadDirectSubmission = new MockDirectSubmissionHw<FamilyType, BlitterDispatcher<FamilyType>>(*copyQueueCsr);
|
||||
|
||||
mainQueueCsr->directSubmission.reset(mainQueueDirectSubmission);
|
||||
copyQueueCsr->blitterDirectSubmission.reset(offloadDirectSubmission);
|
||||
|
||||
int client1, client2;
|
||||
|
||||
mainQueueCsr->registerClient(&client1);
|
||||
mainQueueCsr->registerClient(&client2);
|
||||
copyQueueCsr->registerClient(&client1);
|
||||
copyQueueCsr->registerClient(&client2);
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, true);
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
auto offset = cmdStream->getUsed();
|
||||
|
||||
auto eventHandle = events[0]->toHandle();
|
||||
|
||||
immCmdList->appendMemoryCopy(©Data1, ©Data2, 1, eventHandle, 0, nullptr, copyParams);
|
||||
|
||||
ze_copy_region_t region = {0, 0, 0, 1, 1, 1};
|
||||
immCmdList->appendMemoryCopyRegion(©Data1, ®ion, 1, 1, ©Data2, ®ion, 1, 1, eventHandle, 0, nullptr, copyParams);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset)));
|
||||
|
||||
auto lrrCmds = findAll<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
|
||||
auto lriCmds = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
|
||||
auto lrmCmds = findAll<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
for (auto &lrr : lrrCmds) {
|
||||
auto lrrCmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*lrr);
|
||||
EXPECT_TRUE(lrrCmd->getSourceRegisterAddress() < RegisterOffsets::bcs0Base);
|
||||
EXPECT_TRUE(lrrCmd->getDestinationRegisterAddress() < RegisterOffsets::bcs0Base);
|
||||
}
|
||||
|
||||
for (auto &lri : lriCmds) {
|
||||
auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*lri);
|
||||
EXPECT_TRUE(lriCmd->getRegisterOffset() < RegisterOffsets::bcs0Base);
|
||||
}
|
||||
|
||||
for (auto &lrm : lrmCmds) {
|
||||
auto lrmCmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*lrm);
|
||||
EXPECT_TRUE(lrmCmd->getRegisterAddress() < RegisterOffsets::bcs0Base);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CopyOffloadInOrderTests, givenDebugFlagSetWhenCreatingCmdListThenEnableCopyOffload, IsAtLeastXeHpCore) {
|
||||
NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.set(1);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user