fix: copy offload mmio programming

Related-To: NEO-12771

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-09-24 08:43:48 +00:00
committed by Compute-Runtime-Automation
parent 6ce947cb77
commit 2618f586fa
27 changed files with 260 additions and 175 deletions

View File

@@ -65,7 +65,7 @@ ze_result_t zeCommandListAppendWaitOnEvents(
ze_command_list_handle_t hCommandList,
uint32_t numEvents,
ze_event_handle_t *phEvents) {
return L0::CommandList::fromHandle(hCommandList)->appendWaitOnEvents(numEvents, phEvents, nullptr, false, true, true, false, false);
return L0::CommandList::fromHandle(hCommandList)->appendWaitOnEvents(numEvents, phEvents, nullptr, false, true, true, false, false, false);
}
ze_result_t zeEventHostSignal(

View File

@@ -135,7 +135,7 @@ struct CommandList : _ze_command_list_handle_t {
virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0;
virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0;
virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds,
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush) = 0;
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) = 0;
virtual ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
virtual ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc,
@@ -254,9 +254,11 @@ struct CommandList : _ze_command_list_handle_t {
void removeMemoryPrefetchAllocations();
void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation);
void eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation);
bool isCopyOnly() const {
return NEO::EngineHelper::isCopyOnlyEngineType(engineGroupType);
bool isCopyOnly(bool copyOffloadOperation) const {
return NEO::EngineHelper::isCopyOnlyEngineType(engineGroupType) || (copyOffloadOperation && this->isCopyOffloadEnabled());
}
bool isCopyOffloadEnabled() const { return copyOperationOffloadEnabled; }
bool isInternal() const {
return internalUsage;
}
@@ -267,7 +269,7 @@ struct CommandList : _ze_command_list_handle_t {
return performMemoryPrefetch;
}
bool storeExternalPtrAsTemporary() const {
return isImmediateType() && (this->isFlushTaskSubmissionEnabled || isCopyOnly());
return isImmediateType() && (this->isFlushTaskSubmissionEnabled || isCopyOnly(false));
}
bool isWaitForEventsFromHostEnabled();
@@ -491,6 +493,7 @@ struct CommandList : _ze_command_list_handle_t {
bool requiresDcFlushForDcMitigation = false;
bool statelessBuiltinsEnabled = false;
bool localDispatchSupport = false;
bool copyOperationOffloadEnabled = false;
};
using CommandListAllocatorFn = CommandList *(*)(uint32_t);

View File

@@ -181,10 +181,10 @@ struct CommandListCoreFamily : public CommandListImp {
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds,
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush) override;
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) override;
void appendWaitOnInOrderDependency(std::shared_ptr<NEO::InOrderExecInfo> &inOrderExecInfo, CommandToPatchContainer *outListCommands,
uint64_t waitValue, uint32_t offset,
bool relaxedOrderingAllowed, bool implicitDependency, bool skipAddingWaitEventsToResidency, bool noopDispatch);
uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed, bool implicitDependency,
bool skipAddingWaitEventsToResidency, bool noopDispatch, bool copyOffloadOperation);
void appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation);
void handleInOrderDependencyCounter(Event *signalEvent, bool nonWalkerInOrderCmdsChaining, bool copyOffloadOperation);
@@ -197,7 +197,7 @@ struct CommandListCoreFamily : public CommandListImp {
void appendMultiPartitionEpilogue() override;
void appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency, bool copyOperation);
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, CommandToPatchContainer *outWaitCmds,
bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency, bool skipAddingWaitEventsToResidency);
bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency, bool skipAddingWaitEventsToResidency, bool copyOffloadOperation);
MOCKABLE_VIRTUAL void appendSynchronizedDispatchInitializationSection();
MOCKABLE_VIRTUAL void appendSynchronizedDispatchCleanupSection();
@@ -286,7 +286,7 @@ struct CommandListCoreFamily : public CommandListImp {
Event *signalEvent,
CmdListKernelLaunchParams &launchParams);
void appendWaitOnSingleEvent(Event *event, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, CommandToPatch::CommandType storedSemaphore);
void appendWaitOnSingleEvent(Event *event, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, bool copyOffloadOperation, CommandToPatch::CommandType storedSemaphore);
void appendSdiInOrderCounterSignalling(uint64_t baseGpuVa, uint64_t signalValue, bool copyOffloadOperation);
@@ -357,7 +357,7 @@ struct CommandListCoreFamily : public CommandListImp {
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) const { return false; }
virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {}
bool canSkipInOrderEventWait(Event &event, bool ignorCbEventBoundToCmdList) const;
bool handleInOrderImplicitDependencies(bool relaxedOrderingAllowed);
bool handleInOrderImplicitDependencies(bool relaxedOrderingAllowed, bool copyOffloadOperation);
bool isQwordInOrderCounter() const { return GfxFamily::isQwordInOrderCounter; }
bool isInOrderNonWalkerSignalingRequired(const Event *event) const;
bool hasInOrderDependencies() const;

View File

@@ -113,7 +113,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
commandContainer.reset();
clearCommandsToPatch();
if (!isCopyOnly()) {
if (!isCopyOnly(false)) {
printfKernelContainer.clear();
containsStatelessUncachedResource = false;
indirectAllocationsAllowed = false;
@@ -173,7 +173,7 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(Event
}
if (!isQwordInOrderCounter() && ((inOrderExecInfo->getCounterValue() + 1) == std::numeric_limits<uint32_t>::max())) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderExecInfo, nullptr, inOrderExecInfo->getCounterValue() + 1, inOrderExecInfo->getAllocationOffset(), false, true, false, false);
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderExecInfo, nullptr, inOrderExecInfo->getCounterValue() + 1, inOrderExecInfo->getAllocationOffset(), false, true, false, false, copyOffloadOperation);
inOrderExecInfo->resetCounterValue();
@@ -252,7 +252,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
this->duplicatedInOrderCounterStorageEnabled = gfxCoreHelper.duplicatedInOrderCounterStorageEnabled(rootDeviceEnvironment);
this->inOrderAtomicSignalingEnabled = gfxCoreHelper.inOrderAtomicSignallingEnabled(rootDeviceEnvironment);
this->scratchAddressPatchingEnabled = (this->heaplessModeEnabled && !isImmediateType());
this->copyOperationFenceSupported = (isCopyOnly() || isCopyOffloadEnabled()) && productHelper.isDeviceToHostCopySignalingFenceRequired();
this->copyOperationFenceSupported = (isCopyOnly(false) || isCopyOffloadEnabled()) && productHelper.isDeviceToHostCopySignalingFenceRequired();
this->defaultPipelinedThreadArbitrationPolicy = gfxCoreHelper.getDefaultThreadArbitrationPolicy();
this->implicitSynchronizedDispatchForCooperativeKernelsAllowed = l0GfxCoreHelper.implicitSynchronizedDispatchForCooperativeKernelsAllowed();
if (NEO::debugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
@@ -270,7 +270,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
this->commandContainer.setStateBaseAddressTracking(this->stateBaseAddressTracking);
this->commandContainer.setUsingPrimaryBuffer(this->dispatchCmdListBatchBufferAsPrimary);
if (device->isImplicitScalingCapable() && !this->internalUsage && !isCopyOnly()) {
if (device->isImplicitScalingCapable() && !this->internalUsage && !isCopyOnly(false)) {
this->partitionCount = static_cast<uint32_t>(neoDevice->getDeviceBitfield().count());
}
@@ -303,7 +303,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
auto returnValue = commandContainer.initialize(deviceImp->getActiveDevice(),
deviceImp->allocationsForReuse.get(),
NEO::EncodeStates<GfxFamily>::getSshHeapSize(),
!isCopyOnly(),
!isCopyOnly(false),
createSecondaryCmdBufferInHostMem);
if (!this->pipelineSelectStateTracking) {
// allow systolic support set in container when tracking disabled
@@ -313,7 +313,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
ze_result_t returnType = parseErrorCode(returnValue);
if (returnType == ZE_RESULT_SUCCESS) {
if (!isCopyOnly()) {
if (!isCopyOnly(false)) {
postInitComputeSetup();
}
}
@@ -345,7 +345,7 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::executeCommandListImmed
return commandListExecutionResult;
}
if (this->isCopyOnly() && !this->isSyncModeQueue && !this->isTbxMode) {
if (this->isCopyOnly(false) && !this->isSyncModeQueue && !this->isTbxMode) {
this->commandContainer.currentLinearStreamStartOffsetRef() = this->commandContainer.getCommandStream()->getUsed();
this->handlePostSubmissionState();
} else {
@@ -394,7 +394,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
}
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, launchParams.outListCommands, relaxedOrderingDispatch, true, true, launchParams.omitAddingWaitEventsResidency);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, launchParams.outListCommands, relaxedOrderingDispatch, true, true, launchParams.omitAddingWaitEventsResidency, false);
if (ret) {
return ret;
}
@@ -445,7 +445,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, true, true, false);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, true, true, false, false);
if (ret) {
return ret;
}
@@ -492,7 +492,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, true, true, false);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, true, true, false, false);
if (ret) {
return ret;
}
@@ -519,7 +519,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
commandContainer.addToResidencyContainer(alloc);
for (uint32_t i = 0; i < numKernels; i++) {
NEO::EncodeMathMMIO<GfxFamily>::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i, isCopyOnly());
NEO::EncodeMathMMIO<GfxFamily>::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i, isCopyOnly(false));
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandles[i]),
pLaunchArgumentsBuffer[i],
@@ -558,7 +558,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
}
if (this->isInOrderExecutionEnabled()) {
handleInOrderImplicitDependencies(isRelaxedOrderingDispatchAllowed(0, false));
handleInOrderImplicitDependencies(isRelaxedOrderingDispatchAllowed(0, false), false);
}
appendSynchronizedDispatchInitializationSection();
@@ -570,10 +570,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
// default state of event is single packet, handle case when reset is used 1st, launchkernel 2nd - just reset all packets then, use max
bool useMaxPackets = event->isEventTimestampFlagSet() || (event->getPacketsInUse() < this->partitionCount);
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
dispatchEventPostSyncOperation(event, nullptr, nullptr, Event::STATE_CLEARED, false, useMaxPackets, appendPipeControlWithPostSync, false, isCopyOnly());
bool appendPipeControlWithPostSync = (!isCopyOnly(false)) && (event->isSignalScope() || event->isEventTimestampFlagSet());
dispatchEventPostSyncOperation(event, nullptr, nullptr, Event::STATE_CLEARED, false, useMaxPackets, appendPipeControlWithPostSync, false, isCopyOnly(false));
if (!isCopyOnly()) {
if (!isCopyOnly(false)) {
if (this->partitionCount > 1) {
appendMultiTileBarrier(*neoDevice);
}
@@ -605,7 +605,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, false, true, true, false);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, false, true, true, false, false);
if (ret) {
return ret;
}
@@ -621,9 +621,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly());
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly(false));
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly());
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly(false));
addToMappedEventList(signalEvent);
if (this->isInOrderExecutionEnabled()) {
@@ -734,7 +734,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(z
image = peerImage;
}
if (isCopyOnly()) {
if (isCopyOnly(false)) {
size_t imgRowPitch = image->getImageInfo().rowPitch;
size_t imgSlicePitch = image->getImageInfo().slicePitch;
auto status = appendCopyImageBlit(allocationStruct.alloc, image->getAllocation(),
@@ -903,7 +903,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
image = peerImage;
}
if (isCopyOnly()) {
if (isCopyOnly(false)) {
size_t imgRowPitch = image->getImageInfo().rowPitch;
size_t imgSlicePitch = image->getImageInfo().slicePitch;
auto status = appendCopyImageBlit(image->getAllocation(), allocationStruct.alloc,
@@ -1001,7 +1001,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
event, numWaitEvents, phWaitEvents, launchParams, relaxedOrderingDispatch);
addToMappedEventList(event);
addFlushRequiredCommand(allocationStruct.needsFlush, event, isCopyOnly());
addFlushRequiredCommand(allocationStruct.needsFlush, event, isCopyOnly(false));
return ret;
}
@@ -1080,7 +1080,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
srcImage = peerImage;
}
if (isCopyOnly()) {
if (isCopyOnly(false)) {
auto bytesPerPixel = static_cast<uint32_t>(srcImage->getImageInfo().surfaceFormat->imageElementSizeInBytes);
ze_image_region_t region = getRegionFromImageDesc(srcImage->getImageDesc());
@@ -1333,7 +1333,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(Ali
blitProperties.srcSize = srcSize;
blitProperties.dstSize = dstSize;
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, true, false);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, true, false, true);
if (ret) {
return ret;
}
@@ -1401,7 +1401,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(NEO::Graph
uintptr_t dstAddress = static_cast<uintptr_t>(dstAllocation->getGpuAddress());
uintptr_t srcAddress = static_cast<uintptr_t>(srcAllocation->getGpuAddress());
ze_result_t ret = ZE_RESULT_ERROR_UNKNOWN;
if (isCopyOnly()) {
if (isCopyOnly(false)) {
return appendMemoryCopyBlit(dstAddress, dstAllocation, 0u,
srcAddress, srcAllocation, 0u,
size);
@@ -1452,7 +1452,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents,
bool relaxedOrderingDispatch, bool forceDisableCopyOnlyInOrderSignaling) {
const bool isCopyOnlyEnabled = isCopyOnly() || isCopyOffloadEnabled();
const bool isCopyOnlyEnabled = isCopyOnly(true);
const bool inOrderCopyOnlySignalingAllowed = this->isInOrderExecutionEnabled() && !forceDisableCopyOnlyInOrderSignaling && isCopyOnlyEnabled;
@@ -1523,7 +1523,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
bool waitForImplicitInOrderDependency = !isCopyOnlyEnabled || inOrderCopyOnlySignalingAllowed;
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, waitForImplicitInOrderDependency, false);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, waitForImplicitInOrderDependency, false, this->isCopyOffloadEnabled());
if (ret) {
return ret;
@@ -1672,7 +1672,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch,
bool forceDisableCopyOnlyInOrderSignaling) {
const bool isCopyOnlyEnabled = isCopyOnly() || isCopyOffloadEnabled();
const bool isCopyOnlyEnabled = isCopyOnly(true);
const bool inOrderCopyOnlySignalingAllowed = this->isInOrderExecutionEnabled() && !forceDisableCopyOnlyInOrderSignaling && isCopyOnlyEnabled;
@@ -1966,13 +1966,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
dcFlush = getDcFlushRequired(signalEvent->isSignalScope());
}
if (isCopyOnly()) {
if (isCopyOnly(false)) {
auto status = appendBlitFill(ptr, pattern, patternSize, size, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
addToMappedEventList(signalEvent);
return status;
}
ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, true, false);
ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, true, false, false);
if (res) {
return res;
}
@@ -2024,7 +2024,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
launchParams.isKernelSplitOperation = (fillArguments.leftRemainingBytes > 0 || fillArguments.rightRemainingBytes > 0);
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, singlePipeControlPacket, false, isCopyOnly());
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, singlePipeControlPacket, false, isCopyOnly(false));
if (fillArguments.leftRemainingBytes > 0) {
launchParams.numKernelsInSplitLaunch++;
@@ -2169,8 +2169,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
}
addToMappedEventList(signalEvent);
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, singlePipeControlPacket, false, isCopyOnly());
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent, isCopyOnly());
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, singlePipeControlPacket, false, isCopyOnly(false));
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent, isCopyOnly(false));
bool nonWalkerInOrderCmdChaining = false;
if (this->isInOrderExecutionEnabled()) {
@@ -2210,7 +2210,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
if (this->maxFillPaternSizeForCopyEngine < patternSize) {
return ZE_RESULT_ERROR_INVALID_SIZE;
} else {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, true, false);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, true, false, false);
if (ret) {
return ret;
}
@@ -2409,17 +2409,17 @@ inline uint32_t CommandListCoreFamily<gfxCoreFamily>::getRegionOffsetForAppendMe
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(bool relaxedOrderingAllowed) {
bool CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(bool relaxedOrderingAllowed, bool copyOffloadOperation) {
if (hasInOrderDependencies()) {
if (this->latestHostWaitedInOrderSyncValue >= inOrderExecInfo->getCounterValue()) {
return false;
}
if (relaxedOrderingAllowed) {
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream(), isCopyOnly());
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream(), isCopyOnly(copyOffloadOperation));
}
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderExecInfo, nullptr, inOrderExecInfo->getCounterValue(), inOrderExecInfo->getAllocationOffset(), relaxedOrderingAllowed, true, false, false);
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderExecInfo, nullptr, inOrderExecInfo->getCounterValue(), inOrderExecInfo->getAllocationOffset(), relaxedOrderingAllowed, true, false, false, copyOffloadOperation);
return true;
}
@@ -2429,7 +2429,7 @@ bool CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(boo
template <GFXCORE_FAMILY gfxCoreFamily>
inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, CommandToPatchContainer *outWaitCmds,
bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency, bool skipAddingWaitEventsToResidency) {
bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency, bool skipAddingWaitEventsToResidency, bool copyOffloadOperation) {
bool inOrderDependenciesSent = false;
if (this->latestOperationRequiredNonWalkerInOrderCmdsChaining && !relaxedOrderingAllowed) {
@@ -2437,16 +2437,16 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
}
if (waitForImplicitInOrderDependency) {
inOrderDependenciesSent = handleInOrderImplicitDependencies(relaxedOrderingAllowed);
inOrderDependenciesSent = handleInOrderImplicitDependencies(relaxedOrderingAllowed, copyOffloadOperation);
}
if (relaxedOrderingAllowed && numWaitEvents > 0 && !inOrderDependenciesSent) {
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream(), isCopyOnly());
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream(), isCopyOnly(copyOffloadOperation));
}
if (numWaitEvents > 0) {
if (phWaitEvents) {
return CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, outWaitCmds, relaxedOrderingAllowed, trackDependencies, false, skipAddingWaitEventsToResidency, false);
return CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, outWaitCmds, relaxedOrderingAllowed, trackDependencies, false, skipAddingWaitEventsToResidency, false, copyOffloadOperation);
} else {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
@@ -2458,7 +2458,7 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_handle_t hEvent) {
if (this->isInOrderExecutionEnabled()) {
handleInOrderImplicitDependencies(isRelaxedOrderingDispatchAllowed(0, false));
handleInOrderImplicitDependencies(isRelaxedOrderingDispatchAllowed(0, false), false);
}
auto event = Event::fromHandle(hEvent);
@@ -2481,8 +2481,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
}
event->setPacketsInUse(this->partitionCount);
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
dispatchEventPostSyncOperation(event, nullptr, nullptr, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync, false, isCopyOnly());
bool appendPipeControlWithPostSync = (!isCopyOnly(false)) && (event->isSignalScope() || event->isEventTimestampFlagSet());
dispatchEventPostSyncOperation(event, nullptr, nullptr, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync, false, isCopyOnly(false));
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(event, false);
@@ -2502,8 +2502,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::shared_ptr<NEO::InOrderExecInfo> &inOrderExecInfo, CommandToPatchContainer *outListCommands,
uint64_t waitValue, uint32_t offset,
bool relaxedOrderingAllowed, bool implicitDependency, bool skipAddingWaitEventsToResidency, bool noopDispatch) {
uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed, bool implicitDependency, bool skipAddingWaitEventsToResidency,
bool noopDispatch, bool copyOffloadOperation) {
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
UNRECOVERABLE_IF(waitValue > static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) && !isQwordInOrderCounter());
@@ -2518,7 +2518,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh
for (uint32_t i = 0; i < inOrderExecInfo->getNumDevicePartitionsToWait(); i++) {
if (relaxedOrderingAllowed) {
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::less, true, isQwordInOrderCounter(), isCopyOnly());
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::less, true, isQwordInOrderCounter(), isCopyOnly(copyOffloadOperation));
} else {
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
@@ -2537,8 +2537,8 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh
auto lri2 = commandContainer.getCommandStream()->template getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
if (!noopDispatch) {
NEO::LriHelper<GfxFamily>::program(lri1, firstRegister, getLowPart(waitValue), true, isCopyOnly());
NEO::LriHelper<GfxFamily>::program(lri2, secondRegister, getHighPart(waitValue), true, isCopyOnly());
NEO::LriHelper<GfxFamily>::program(lri1, firstRegister, getLowPart(waitValue), true, isCopyOnly(copyOffloadOperation));
NEO::LriHelper<GfxFamily>::program(lri2, secondRegister, getHighPart(waitValue), true, isCopyOnly(copyOffloadOperation));
} else {
memset(lri1, 0, sizeof(MI_LOAD_REGISTER_IMM));
memset(lri2, 0, sizeof(MI_LOAD_REGISTER_IMM));
@@ -2608,7 +2608,7 @@ bool CommandListCoreFamily<gfxCoreFamily>::canSkipInOrderEventWait(Event &event,
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds,
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush) {
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) {
NEO::Device *neoDevice = device->getNEODevice();
uint32_t callId = 0;
if (NEO::debugManager.flags.EnableSWTags.get()) {
@@ -2621,7 +2621,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
}
if (this->isInOrderExecutionEnabled() && apiRequest) {
handleInOrderImplicitDependencies(false);
handleInOrderImplicitDependencies(false, copyOffloadOperation);
}
bool dcFlushRequired = false;
@@ -2633,7 +2633,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
}
}
if (dcFlushRequired) {
if (isCopyOnly()) {
if (isCopyOnly(copyOffloadOperation)) {
NEO::MiFlushArgs args{this->dummyBlitWa};
encodeMiFlush(0, 0, args);
} else {
@@ -2663,7 +2663,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(event->getInOrderExecInfo(), outWaitCmds,
waitValue, event->getInOrderAllocationOffset(),
relaxedOrderingAllowed, false, skipAddingWaitEventsToResidency,
isCbEventBoundToCmdList(event));
isCbEventBoundToCmdList(event), copyOffloadOperation);
continue;
}
@@ -2672,10 +2672,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
commandContainer.addToResidencyContainer(event->getAllocation(this->device));
}
appendWaitOnSingleEvent(event, outWaitCmds, relaxedOrderingAllowed, CommandToPatch::WaitEventSemaphoreWait);
appendWaitOnSingleEvent(event, outWaitCmds, relaxedOrderingAllowed, copyOffloadOperation, CommandToPatch::WaitEventSemaphoreWait);
}
if (isImmediateType() && isCopyOnly() && trackDependencies) {
if (isImmediateType() && isCopyOnly(copyOffloadOperation) && trackDependencies) {
NEO::MiFlushArgs args{this->dummyBlitWa};
args.commandWithPostSync = true;
auto csr = getCsr(false);
@@ -2685,9 +2685,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
if (apiRequest) {
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(nullptr, false);
appendSignalInOrderDependencyCounter(nullptr, copyOffloadOperation);
}
handleInOrderDependencyCounter(nullptr, false, false);
handleInOrderDependencyCounter(nullptr, false, copyOffloadOperation);
}
if (NEO::debugManager.flags.EnableSWTags.get()) {
@@ -2817,11 +2817,11 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(Event *eve
uint64_t contextAddress = ptrOffset(baseAddr, contextOffset);
if (maskLsb) {
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, globalPostSyncCmdBuffer, isCopyOnly());
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, contextPostSyncCmdBuffer, isCopyOnly());
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, globalPostSyncCmdBuffer, copyOperation);
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, contextPostSyncCmdBuffer, copyOperation);
} else {
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, globalPostSyncCmdBuffer, isCopyOnly());
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, contextPostSyncCmdBuffer, isCopyOnly());
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, globalPostSyncCmdBuffer, copyOperation);
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, contextPostSyncCmdBuffer, copyOperation);
}
if (outTimeStampSyncCmds != nullptr) {
@@ -2891,7 +2891,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, false, true, true, false);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, false, true, true, false, false);
if (ret != ZE_RESULT_SUCCESS) {
return ret;
}
@@ -2907,7 +2907,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly());
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly(false));
auto allocationStruct = getAlignedAllocationData(this->device, dstptr, sizeof(uint64_t), false, false);
if (allocationStruct.alloc == nullptr) {
@@ -2915,7 +2915,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
}
commandContainer.addToResidencyContainer(allocationStruct.alloc);
if (isCopyOnly()) {
if (isCopyOnly(false)) {
NEO::MiFlushArgs args{this->dummyBlitWa};
args.timeStampOperation = true;
args.commandWithPostSync = true;
@@ -2933,7 +2933,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
args);
}
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly());
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly(false));
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent, false);
@@ -3039,7 +3039,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
for (uint32_t i = 0; i < numEvents; i++) {
auto event = Event::fromHandle(phEvents[i]);
if (event->isCounterBased()) {
appendWaitOnSingleEvent(event, nullptr, false, CommandToPatch::CbEventTimestampPostSyncSemaphoreWait);
appendWaitOnSingleEvent(event, nullptr, false, false, CommandToPatch::CbEventTimestampPostSyncSemaphoreWait);
}
}
@@ -3100,11 +3100,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::prepareIndirectParams(const ze
auto groupCount = ptrOffset(alloc->getGpuAddress(), groupCountOffset);
NEO::EncodeSetMMIO<GfxFamily>::encodeMEM(commandContainer, RegisterOffsets::gpgpuDispatchDimX,
ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountX)), isCopyOnly());
ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountX)), isCopyOnly(false));
NEO::EncodeSetMMIO<GfxFamily>::encodeMEM(commandContainer, RegisterOffsets::gpgpuDispatchDimY,
ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountY)), isCopyOnly());
ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountY)), isCopyOnly(false));
NEO::EncodeSetMMIO<GfxFamily>::encodeMEM(commandContainer, RegisterOffsets::gpgpuDispatchDimZ,
ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountZ)), isCopyOnly());
ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountZ)), isCopyOnly(false));
}
return ZE_RESULT_SUCCESS;
@@ -3452,7 +3452,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
return ZE_RESULT_SUCCESS;
}
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, true, true, false);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, true, true, false, false);
if (ret) {
return ret;
}
@@ -3468,10 +3468,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly());
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly(false));
if (!this->isInOrderExecutionEnabled()) {
if (isCopyOnly()) {
if (isCopyOnly(false)) {
NEO::MiFlushArgs args{this->dummyBlitWa};
uint64_t gpuAddress = 0u;
TaskCountType value = 0u;
@@ -3490,7 +3490,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
}
addToMappedEventList(signalEvent);
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, this->isInOrderExecutionEnabled(), false, isCopyOnly());
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, this->isInOrderExecutionEnabled(), false, isCopyOnly(false));
if (isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent, false);
@@ -3618,14 +3618,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
UNRECOVERABLE_IF(srcAllocationStruct.alloc == nullptr);
if (this->isInOrderExecutionEnabled()) {
handleInOrderImplicitDependencies(false);
handleInOrderImplicitDependencies(false, false);
}
if (!handleCounterBasedEventOperations(signalEvent)) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly());
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly(false));
commandContainer.addToResidencyContainer(srcAllocationStruct.alloc);
uint64_t gpuAddress = static_cast<uint64_t>(srcAllocationStruct.alignedAllocationPtr);
@@ -3636,8 +3636,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
if (isQwordInOrderCounter()) {
indirectMode = true;
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0, getLowPart(data), true, isCopyOnly());
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0 + 4, getHighPart(data), true, isCopyOnly());
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0, getLowPart(data), true, isCopyOnly(false));
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0 + 4, getHighPart(data), true, isCopyOnly(false));
} else {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
@@ -3657,7 +3657,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
NEO::MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(*commandContainer.getCommandStream(), gpuAddress, true, rootDeviceEnvironment);
}
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly());
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly(false));
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent, false);
@@ -3682,12 +3682,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
commandContainer.addToResidencyContainer(dstAllocationStruct.alloc);
if (this->isInOrderExecutionEnabled()) {
handleInOrderImplicitDependencies(false);
handleInOrderImplicitDependencies(false, false);
}
const uint64_t gpuAddress = static_cast<uint64_t>(dstAllocationStruct.alignedAllocationPtr);
if (isCopyOnly()) {
if (isCopyOnly(false)) {
NEO::MiFlushArgs args{this->dummyBlitWa};
args.commandWithPostSync = true;
encodeMiFlush(gpuAddress,
@@ -3880,7 +3880,7 @@ void CommandListCoreFamily<gfxCoreFamily>::dispatchEventRemainingPacketsPostSync
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(Event *event, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, CommandToPatch::CommandType storedSemaphore) {
void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(Event *event, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, bool copyOffloadOperation, CommandToPatch::CommandType storedSemaphore) {
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
uint64_t gpuAddr = event->getCompletionFieldGpuAddress(this->device);
@@ -3895,7 +3895,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(Event *event,
for (uint32_t i = 0u; i < packetsToWait; i++) {
if (relaxedOrderingAllowed) {
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddr, Event::STATE_CLEARED,
NEO::CompareOperation::equal, true, false, isCopyOnly());
NEO::CompareOperation::equal, true, false, isCopyOnly(copyOffloadOperation));
} else {
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
gpuAddr,
@@ -4100,7 +4100,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendFullSynchronizedDispatchInit()
// Patch Primary Tile section skip (to Secondary Tile section)
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(skipPrimaryTileSectionCmdStream, cmdStream->getCurrentGpuAddressPosition(), workPartitionAllocationGpuVa, 0,
NEO::CompareOperation::notEqual, false, false, isCopyOnly());
NEO::CompareOperation::notEqual, false, false, isCopyOnly(false));
// Secondary Tile section
{
@@ -4114,7 +4114,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendFullSynchronizedDispatchInit()
// Patch Primary Tile section jump to end
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(jumpToEndSectionFromPrimaryTile, cmdStream->getCurrentGpuAddressPosition(), syncAllocationGpuVa + sizeof(uint32_t), queueId,
NEO::CompareOperation::equal, false, false, isCopyOnly());
NEO::CompareOperation::equal, false, false, isCopyOnly(false));
// End section
NEO::EncodeMiPredicate<GfxFamily>::encode(*cmdStream, NEO::MiPredicateType::disable);
@@ -4151,7 +4151,7 @@ bool CommandListCoreFamily<gfxCoreFamily>::isDeviceToHostCopyEventFenceRequired(
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::isDeviceToHostBcsCopy(NEO::GraphicsAllocation *srcAllocation, NEO::GraphicsAllocation *dstAllocation, bool copyOffloadOperation) const {
return ((isCopyOnly() || copyOffloadOperation) && (srcAllocation->isAllocatedInLocalMemoryPool() && !dstAllocation->isAllocatedInLocalMemoryPool()));
return (isCopyOnly(copyOffloadOperation) && (srcAllocation->isAllocatedInLocalMemoryPool() && !dstAllocation->isAllocatedInLocalMemoryPool()));
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -106,7 +106,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
size_t size, bool flushHost) override;
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds,
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush) override;
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) override;
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;

View File

@@ -550,12 +550,12 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::handleInOrderNonWalkerSignal
if (nonWalkerSignalingHasRelaxedOrdering) {
result = flushImmediate(result, true, hasStallingCmds, relaxedOrderingDispatch, true, false, nullptr, false);
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*this->commandContainer.getCommandStream(), isCopyOnly());
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*this->commandContainer.getCommandStream(), isCopyOnly(false));
relaxedOrderingDispatch = true;
hasStallingCmds = hasStallingCmdsForRelaxedOrdering(1, relaxedOrderingDispatch);
}
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(event, nullptr, nonWalkerSignalingHasRelaxedOrdering, CommandToPatch::Invalid);
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(event, nullptr, nonWalkerSignalingHasRelaxedOrdering, false, CommandToPatch::Invalid);
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(event, false);
}
@@ -611,7 +611,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, isCopyOffloadEnabled());
auto estimatedSize = commonImmediateCommandSize;
if (isCopyOnly() || isCopyOffloadEnabled()) {
if (isCopyOnly(true)) {
auto nBlits = size / (NEO::BlitCommandsHelper<GfxFamily>::getMaxBlitWidth(this->device->getNEODevice()->getRootDeviceEnvironment()) *
NEO::BlitCommandsHelper<GfxFamily>::getMaxBlitHeight(this->device->getNEODevice()->getRootDeviceEnvironment(), true));
auto sizePerBlit = sizeof(typename GfxFamily::XY_COPY_BLT) + NEO::BlitCommandsHelper<GfxFamily>::estimatePostBlitCommandSize();
@@ -665,7 +665,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, isCopyOffloadEnabled());
auto estimatedSize = commonImmediateCommandSize;
if (isCopyOnly() || isCopyOffloadEnabled()) {
if (isCopyOnly(true)) {
auto xBlits = static_cast<size_t>(std::ceil(srcRegion->width / static_cast<double>(BlitterConstants::maxBlitWidth)));
auto yBlits = static_cast<size_t>(std::ceil(srcRegion->height / static_cast<double>(BlitterConstants::maxBlitHeight)));
auto zBlits = static_cast<size_t>(srcRegion->depth);
@@ -773,7 +773,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents, CommandToPatchContainer *outWaitCmds,
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush) {
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) {
bool allSignaled = true;
for (auto i = 0u; i < numEvents; i++) {
allSignaled &= (!this->dcFlushSupport && Event::fromHandle(phWaitEvents[i])->isAlreadyCompleted());
@@ -786,7 +786,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(ui
checkAvailableSpace(numEvents, false, commonImmediateCommandSize);
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents, outWaitCmds, relaxedOrderingAllowed, trackDependencies, apiRequest, skipAddingWaitEventsToResidency, false);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents, outWaitCmds, relaxedOrderingAllowed, trackDependencies, apiRequest, skipAddingWaitEventsToResidency, false, copyOffloadOperation);
this->dependenciesPresent = true;
if (skipFlush) {
@@ -838,7 +838,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyRegion
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false);
auto estimatedSize = commonImmediateCommandSize;
if (isCopyOnly()) {
if (isCopyOnly(false)) {
auto imgSize = L0::Image::fromHandle(hSrcImage)->getImageInfo().size;
auto nBlits = static_cast<size_t>(std::ceil(imgSize / static_cast<double>(BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight)));
auto sizePerBlit = sizeof(typename GfxFamily::XY_BLOCK_COPY_BLT) + NEO::BlitCommandsHelper<GfxFamily>::estimatePostBlitCommandSize();
@@ -1508,7 +1508,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendCommandLists(ui
auto ret = ZE_RESULT_SUCCESS;
checkAvailableSpace(numWaitEvents, false, commonImmediateCommandSize);
if (numWaitEvents) {
ret = this->appendWaitOnEvents(numWaitEvents, phWaitEvents, nullptr, false, true, true, true, true);
ret = this->appendWaitOnEvents(numWaitEvents, phWaitEvents, nullptr, false, true, true, true, true, false);
}
if (ret != ZE_RESULT_SUCCESS) {

View File

@@ -437,7 +437,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
if (inOrderExecSignalRequired) {
if (inOrderNonWalkerSignalling) {
if (!launchParams.skipInOrderNonWalkerSignaling) {
appendWaitOnSingleEvent(eventForInOrderExec, launchParams.outListCommands, false, CommandToPatch::CbEventTimestampPostSyncSemaphoreWait);
appendWaitOnSingleEvent(eventForInOrderExec, launchParams.outListCommands, false, false, CommandToPatch::CbEventTimestampPostSyncSemaphoreWait);
appendSignalInOrderDependencyCounter(eventForInOrderExec, false);
}
} else {
@@ -525,14 +525,14 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
partitionDataSize,
isCopyOnly());
isCopyOnly(false));
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset(),
isCopyOnly());
isCopyOnly(false));
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -621,7 +621,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool wor
if (workloadPartitionEvent && !device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout()) {
auto offset = beforeProfilingCmds ? NEO::ImplicitScalingDispatch<GfxFamily>::getTimeStampPostSyncOffset() : NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset();
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(), offset, isCopyOnly());
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(), offset, isCopyOnly(false));
}
}

View File

@@ -55,7 +55,7 @@ ze_result_t CommandListImp::destroy() {
}
if (!isImmediateType() &&
!isCopyOnly() &&
!isCopyOnly(false) &&
this->stateBaseAddressTracking &&
this->cmdListHeapAddressModel == NEO::HeapAddressModel::privateHeaps) {
@@ -257,7 +257,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
commandList->copyThroughLockedPtrEnabled = gfxCoreHelper.copyThroughLockedPtrEnabled(hwInfo, device->getProductHelper());
if ((NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.get() == 1 || queueProperties.copyOffloadHint) && !commandList->isCopyOnly() && commandList->isInOrderExecutionEnabled()) {
if ((NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.get() == 1 || queueProperties.copyOffloadHint) && !commandList->isCopyOnly(false) && commandList->isInOrderExecutionEnabled()) {
commandList->enableCopyOperationOffload(productFamily, device, desc);
}

View File

@@ -47,7 +47,6 @@ struct CommandListImp : public CommandList {
void enableSynchronizedDispatch(NEO::SynchronizedDispatchMode mode);
NEO::SynchronizedDispatchMode getSynchronizedDispatchMode() const { return synchronizedDispatchMode; }
void enableCopyOperationOffload(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc);
bool isCopyOffloadEnabled() const { return copyOperationOffloadEnabled; }
void setInterruptEventsCsr(NEO::CommandStreamReceiver &csr);
protected:
@@ -65,8 +64,6 @@ struct CommandListImp : public CommandList {
static constexpr bool cmdListDefaultGlobalAtomics = false;
std::vector<Event *> mappedTsEventList;
std::vector<Event *> interruptEvents;
bool copyOperationOffloadEnabled = false;
};
} // namespace L0

View File

@@ -104,10 +104,10 @@ struct BcsSplit {
for (size_t i = 0; i < cmdQsForSplit.size(); i++) {
if (barrierRequired) {
auto barrierEventHandle = this->events.barrier[markerEventIndex]->toHandle();
cmdList->addEventsToCmdList(1u, &barrierEventHandle, nullptr, hasRelaxedOrderingDependencies, false, true, false);
cmdList->addEventsToCmdList(1u, &barrierEventHandle, nullptr, hasRelaxedOrderingDependencies, false, true, false, false);
}
cmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, hasRelaxedOrderingDependencies, false, true, false);
cmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, hasRelaxedOrderingDependencies, false, true, false, false);
if (signalEvent && i == 0u) {
cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, true, false, true);
@@ -136,7 +136,7 @@ struct BcsSplit {
}
}
cmdList->addEventsToCmdList(static_cast<uint32_t>(cmdQsForSplit.size()), eventHandles.data(), nullptr, hasRelaxedOrderingDependencies, false, true, false);
cmdList->addEventsToCmdList(static_cast<uint32_t>(cmdQsForSplit.size()), eventHandles.data(), nullptr, hasRelaxedOrderingDependencies, false, true, false, false);
if (signalEvent) {
cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, true, false, true);
}

View File

@@ -348,7 +348,7 @@ void ImmediateCmdListSharedHeapsFlushTaskFixtureInit::appendNonKernelOperation(L
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
} else if (operation == NonKernelOperation::WaitOnEvents) {
auto eventHandle = event->toHandle();
result = currentCmdList->appendWaitOnEvents(1, &eventHandle, nullptr, false, false, false, false, false);
result = currentCmdList->appendWaitOnEvents(1, &eventHandle, nullptr, false, false, false, false, false, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
} else if (operation == NonKernelOperation::WriteGlobalTimestamp) {
uint64_t timestampAddress = 0xfffffffffff0L;

View File

@@ -477,7 +477,7 @@ struct MockCommandList : public CommandList {
ADDMETHOD_NOBASE(appendWaitOnEvents, ze_result_t, ZE_RESULT_SUCCESS,
(uint32_t numEvents,
ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds,
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush));
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation));
ADDMETHOD_NOBASE(appendWriteGlobalTimestamp, ze_result_t, ZE_RESULT_SUCCESS,
(uint64_t * dstptr,

View File

@@ -273,7 +273,7 @@ HWTEST2_F(CommandListImmediateWithAssert, givenKernelWithAssertWhenAppendedToAsy
cmdList.callBaseExecute = true;
cmdList.cmdListType = CommandList::CommandListType::typeImmediate;
cmdList.isSyncModeQueue = false;
auto commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, cmdList.isCopyOnly(), false, false, result);
auto commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, cmdList.isCopyOnly(false), false, false, result);
cmdList.cmdQImmediate = commandQueue;
result = cmdList.initialize(device, NEO::EngineGroupType::renderCompute, 0u);
@@ -307,7 +307,7 @@ HWTEST2_F(CommandListImmediateWithAssert, givenKernelWithAssertWhenAppendedToSyn
desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
desc.pNext = 0;
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
auto commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, cmdList.isCopyOnly(), false, false, result);
auto commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, cmdList.isCopyOnly(false), false, false, result);
cmdList.cmdQImmediate = commandQueue;
result = cmdList.initialize(device, NEO::EngineGroupType::renderCompute, 0u);
@@ -347,7 +347,7 @@ HWTEST2_F(CommandListImmediateWithAssert, givenKernelWithAssertWhenAppendToSynch
desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
desc.pNext = 0;
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
auto commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, cmdList.isCopyOnly(), false, false, result);
auto commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, cmdList.isCopyOnly(false), false, false, result);
cmdList.cmdQImmediate = commandQueue;
result = cmdList.initialize(device, NEO::EngineGroupType::renderCompute, 0u);

View File

@@ -1181,7 +1181,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false), false, false);
verifyFlags(commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false), true, true);
verifyFlags(commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false), true, true);
verifyFlags(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), nullptr, 0, nullptr), true, true);
@@ -1584,7 +1584,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false),
false, false);
verifyFlags(commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false), false, false);
verifyFlags(commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false), false, false);
verifyFlags(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), nullptr, numWaitlistEvents, waitlist),
false, false);
@@ -1974,7 +1974,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppen
ASSERT_NE(nullptr, eventObject->csrs[0]);
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]);
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false);
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
returnValue = commandList->appendBarrier(nullptr, 1, &event, false);
@@ -2089,7 +2089,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppen
ASSERT_NE(nullptr, eventObject->csrs[0]);
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]);
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false);
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
returnValue = commandList->appendBarrier(nullptr, 1, &event, false);
@@ -2185,7 +2185,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangAndEnabledFlushTaskSubmissionFlagWhenCre
const auto oldCsr = queue->csr;
queue->csr = &mockCommandStreamReceiver;
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false);
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
queue->csr = oldCsr;

View File

@@ -1389,7 +1389,7 @@ HWTEST2_F(CommandListCreateWithBcs,
auto internalCopyEngine = neoDevice->getInternalCopyEngine();
EXPECT_NE(nullptr, internalCopyEngine);
EXPECT_EQ(cmdQueue->getCsr(), internalCopyEngine->commandStreamReceiver);
EXPECT_TRUE(commandList->isCopyOnly());
EXPECT_TRUE(commandList->isCopyOnly(false));
}
HWTEST2_F(CommandListCreateWithBcs, givenForceFlushTaskEnabledWhenCreatingCommandListUsingLinkedCopyThenFlushTaskModeUsed, IsAtLeastXeHpCore) {
@@ -1408,7 +1408,7 @@ HWTEST2_F(CommandListCreateWithBcs, givenForceFlushTaskEnabledWhenCreatingComman
returnValue));
ASSERT_NE(nullptr, commandList);
EXPECT_TRUE(commandList->isCopyOnly());
EXPECT_TRUE(commandList->isCopyOnly(false));
EXPECT_TRUE(commandList->flushTaskSubmissionEnabled());
}

View File

@@ -644,7 +644,7 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::compute, returnValue));
ze_event_handle_t hEventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false);
result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
context->destroy();
@@ -809,7 +809,7 @@ HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEvents
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto eventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1u, &eventHandle, nullptr, false, true, false, false, false);
result = commandList->appendWaitOnEvents(1u, &eventHandle, nullptr, false, true, false, false, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(whiteBoxCmdList->getCsr(false)->getNextBarrierCount(), 2u);
@@ -851,7 +851,7 @@ HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEvents
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto eventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1u, &eventHandle, nullptr, false, false, false, false, false);
result = commandList->appendWaitOnEvents(1u, &eventHandle, nullptr, false, false, false, false, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(whiteBoxCmdList->getCsr(false)->getNextBarrierCount(), 1u);
}

View File

@@ -623,7 +623,7 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWith
event.signalScope = 0;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
auto eventHandle = event.toHandle();
commandList->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false);
commandList->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
@@ -647,7 +647,7 @@ HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThen
event.signalScope = 0;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
auto eventHandle = event.toHandle();
commandList->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false);
commandList->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
@@ -678,7 +678,7 @@ HWTEST_F(CommandListCreate, givenCommandListWhenAppendWaitEventsWithDcFlushThenP
event2.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
ze_event_handle_t events[] = {&event, &event2};
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false);
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
@@ -731,7 +731,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendW
ze_event_handle_t events[] = {&event, &event2};
size_t startOffset = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false);
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false, false);
size_t endOffset = commandContainer.getCommandStream()->getUsed();
size_t usedBufferSize = (endOffset - startOffset);
@@ -778,7 +778,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendW
ze_event_handle_t events[] = {&event, &event2};
size_t startOffset = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false);
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false, false);
size_t endOffset = commandContainer.getCommandStream()->getUsed();
size_t usedBufferSize = (endOffset - startOffset);
@@ -819,7 +819,7 @@ HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueAndCopyOnly
ze_event_handle_t events[] = {&event, &event2};
auto used = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false);
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
@@ -853,7 +853,7 @@ HWTEST2_F(CommandListCreate, givenImmediateCommandListAndAlreadyCompletedEventWh
ze_event_handle_t events[] = {&event, &event2};
event.isCompleted = Event::State::STATE_SIGNALED;
static_cast<CommandListCoreFamily<gfxCoreFamily> *>(commandList.get())->addEventsToCmdList(2, events, nullptr, false, false, true, false);
static_cast<CommandListCoreFamily<gfxCoreFamily> *>(commandList.get())->addEventsToCmdList(2, events, nullptr, false, false, true, false, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
@@ -1041,7 +1041,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhe
ze_event_handle_t events[] = {&event, &event2};
auto used = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false);
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
@@ -1074,7 +1074,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndTbxCsrWithCopyOnlyImmediateComm
event2.waitScope = 0;
ze_event_handle_t events[] = {&event, &event2};
auto ret = commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false);
auto ret = commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
}

View File

@@ -993,7 +993,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenAppendWaitOnE
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto eventHandle = event->toHandle();
cmdList.appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false);
cmdList.appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false, false);
EXPECT_TRUE(cmdList.dependenciesPresent);

View File

@@ -647,7 +647,7 @@ HWTEST_F(CommandListAppendLaunchKernelSWTags, givenEnableSWTagsWhenAppendWaitOnE
eventPool->createEvent(&eventDesc, &hEvent);
auto result = commandList->appendWaitOnEvents(1, &hEvent, nullptr, false, true, false, false, false);
auto result = commandList->appendWaitOnEvents(1, &hEvent, nullptr, false, true, false, false, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = cmdStream->getUsed();

View File

@@ -639,7 +639,7 @@ HWTEST2_F(AppendMemoryCopy, givenImmediateCommandListWhenAppendingMemoryCopyThen
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, false, false);
EXPECT_TRUE(commandList->isCopyOnly());
EXPECT_TRUE(commandList->isCopyOnly(false));
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
commandList->getCsr(false)->getInternalAllocationStorage()->getTemporaryAllocations().freeAllGraphicsAllocations(device->getNEODevice());

View File

@@ -34,7 +34,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventThenSemaphoreWait
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto usedSpaceBefore = commandList->getCmdContainer().getCommandStream()->getUsed();
ze_event_handle_t hEventHandle = event->toHandle();
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false);
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
@@ -89,7 +89,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCmdListWithDirectSubmissio
ultCsr->directSubmission.reset(directSubmission);
ze_event_handle_t hEventHandle = event->toHandle();
auto result = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(immCommandList.get())->addEventsToCmdList(1, &hEventHandle, nullptr, true, true, true, false);
auto result = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(immCommandList.get())->addEventsToCmdList(1, &hEventHandle, nullptr, true, true, true, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = immCommandList->getCmdContainer().getCommandStream()->getUsed();
@@ -253,12 +253,12 @@ class MockCommandListImmediateHwWithWaitEventFail : public WhiteBox<::L0::Comman
using BaseClass::setupFillKernelArguments;
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds,
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush) override {
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) override {
if (forceWaitEventError) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
appendWaitEventCalled++;
return BaseClass::appendWaitOnEvents(numEvents, phEvent, outWaitCmds, relaxedOrderingAllowed, trackDependencies, apiRequest, skipAddingWaitEventsToResidency, skipFlush);
return BaseClass::appendWaitOnEvents(numEvents, phEvent, outWaitCmds, relaxedOrderingAllowed, trackDependencies, apiRequest, skipAddingWaitEventsToResidency, skipFlush, copyOffloadOperation);
};
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override {
@@ -398,7 +398,7 @@ class MockAppendRegularCommandlistWithWaitOnEvents : public MockCommandListForAp
public:
MockAppendRegularCommandlistWithWaitOnEvents() : MockCommandListForAppendLaunchKernel<gfxCoreFamily>() {}
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds,
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush) override {
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) override {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
};
};
@@ -489,7 +489,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenTwoEventsWhenWaitOnEventsAppendedThe
ze_event_handle_t handles[2] = {event->toHandle(), event->toHandle()};
auto result = commandList->appendWaitOnEvents(2, handles, nullptr, false, true, false, false, false);
auto result = commandList->appendWaitOnEvents(2, handles, nullptr, false, true, false, false, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
@@ -520,7 +520,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenTwoEventsWhenWaitOnEventsAppendedThe
HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventsThenEventGraphicsAllocationIsAddedToResidencyContainer) {
ze_event_handle_t hEventHandle = event->toHandle();
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false);
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto &residencyContainer = commandList->getCmdContainer().getResidencyContainer();
@@ -547,7 +547,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenEventWithWaitScopeFlagDeviceWhenAppe
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_event_handle_t hEventHandle = event->toHandle();
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false);
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
@@ -592,7 +592,7 @@ HWTEST_F(CommandListAppendWaitOnUsedPacketSignalEvent, WhenAppendingWaitOnTimest
event->setPacketsInUse(3u);
ze_event_handle_t hEventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false);
result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
@@ -655,7 +655,7 @@ HWTEST_F(CommandListAppendWaitOnUsedPacketSignalEvent, WhenAppendingWaitOnTimest
ASSERT_EQ(9u, event->getPacketsInUse());
ze_event_handle_t hEventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false);
result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
@@ -772,7 +772,7 @@ HWTEST_F(CommandListAppendWaitOnSecondaryBatchBufferEvent, givenCommandBufferIsE
ze_event_handle_t hEventHandle = event->toHandle();
auto oldCommandBuffer = commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation();
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false);
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
@@ -834,7 +834,7 @@ HWTEST2_F(MultTileCommandListAppendWaitOnEvent,
ze_event_handle_t eventHandle = event->toHandle();
auto usedSpaceBefore = commandList->getCmdContainer().getCommandStream()->getUsed();
auto result = commandList->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false);
auto result = commandList->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
@@ -876,7 +876,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCommandListWhenAppendWaitO
ze_event_handle_t eventHandle = event->toHandle();
EXPECT_FALSE(cmdList.dependenciesPresent);
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false));
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false, false));
EXPECT_TRUE(cmdList.dependenciesPresent);
}
@@ -893,7 +893,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCommandListWhenAppendWaitO
ze_event_handle_t eventHandle = event->toHandle();
EXPECT_FALSE(cmdList.dependenciesPresent);
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false));
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false, false));
EXPECT_FALSE(cmdList.dependenciesPresent);
}
@@ -976,7 +976,7 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate
auto &ultCsr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
auto eventHandle = event->toHandle();
commandListImmediate->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false);
commandListImmediate->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false, false);
EXPECT_EQ(0u, ultCsr.downloadAllocationsCalledCount);
}
@@ -1135,7 +1135,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, GivenOutCmdListProvidedAndSkipResidencyFl
auto usedSpaceBefore = cmdListStream->getUsed();
bool skipResidency = true;
result = commandList->appendWaitOnEvents(eventCount, eventHandles.data(), &outSemaphoreWaitCmds, false, false, false, skipResidency, false);
result = commandList->appendWaitOnEvents(eventCount, eventHandles.data(), &outSemaphoreWaitCmds, false, false, false, skipResidency, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = cmdListStream->getUsed();
@@ -1168,7 +1168,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, GivenOutCmdListProvidedAndSkipResidencyFl
EXPECT_EQ(cmdListResidency.end(), eventResidencyIt);
skipResidency = false;
result = commandList->appendWaitOnEvents(eventCount, eventHandles.data(), &outSemaphoreWaitCmds, false, false, false, skipResidency, false);
result = commandList->appendWaitOnEvents(eventCount, eventHandles.data(), &outSemaphoreWaitCmds, false, false, false, skipResidency, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
eventResidencyIt = std::find(cmdListResidency.begin(), cmdListResidency.end(), eventPoolAllocation);

View File

@@ -1121,7 +1121,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
size_t sizeBefore = cmdStream->getUsed();
auto eventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false);
result = commandList->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false, false);
size_t sizeAfter = cmdStream->getUsed();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);

View File

@@ -2050,7 +2050,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAddingRelaxedOrderingEventsTh
auto offset = cmdStream->getUsed();
immCmdList->addEventsToCmdList(0, nullptr, nullptr, true, true, true, false);
immCmdList->addEventsToCmdList(0, nullptr, nullptr, true, true, true, false, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
@@ -4989,7 +4989,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenAtomicSignallingEnabledWhenSignalli
size_t offset = cmdStream->getUsed();
immCmdList->appendWaitOnEvents(1, &handle, nullptr, false, false, true, false, false);
immCmdList->appendWaitOnEvents(1, &handle, nullptr, false, false, true, false, false, false);
EXPECT_EQ(partitionCount * 2, immCmdList->inOrderExecInfo->getCounterValue());
@@ -5036,7 +5036,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenDuplicatedCounterStorageAndAtomicSi
size_t offset = cmdStream->getUsed();
immCmdList->appendWaitOnEvents(1, &handle, nullptr, false, false, true, false, false);
immCmdList->appendWaitOnEvents(1, &handle, nullptr, false, false, true, false, false, false);
EXPECT_EQ(partitionCount * 2, immCmdList->inOrderExecInfo->getCounterValue());
@@ -5091,7 +5091,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenDuplicatedCounterStorageAndWithoutA
size_t offset = cmdStream->getUsed();
immCmdList->appendWaitOnEvents(1, &handle, nullptr, false, false, true, false, false);
immCmdList->appendWaitOnEvents(1, &handle, nullptr, false, false, true, false, false, false);
expectedCounter += counterIncrement;
EXPECT_EQ(expectedCounter, immCmdList->inOrderExecInfo->getCounterValue());
@@ -7624,7 +7624,8 @@ HWTEST2_F(CopyOffloadInOrderTests, givenCopyOffloadEnabledWhenProgrammingHwCmdsT
using XY_COPY_BLT = typename std::remove_const<decltype(FamilyType::cmdInitXyCopyBlt)>::type;
auto immCmdList = createImmCmdListWithOffload<gfxCoreFamily>();
EXPECT_FALSE(immCmdList->isCopyOnly());
EXPECT_FALSE(immCmdList->isCopyOnly(false));
EXPECT_TRUE(immCmdList->isCopyOnly(true));
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
@@ -7659,9 +7660,13 @@ HWTEST2_F(CopyOffloadInOrderTests, givenCopyOffloadEnabledWhenProgrammingHwCmdsT
}
HWTEST2_F(CopyOffloadInOrderTests, givenProfilingEventWhenAppendingThenUseBcsCommands, IsAtLeastXeHpCore) {
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
auto immCmdList = createImmCmdListWithOffload<gfxCoreFamily>();
auto eventPool = createEvents<FamilyType>(1, false);
auto eventPool = createEvents<FamilyType>(1, true);
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
auto offset = cmdStream->getUsed();
@@ -7681,6 +7686,86 @@ HWTEST2_F(CopyOffloadInOrderTests, givenProfilingEventWhenAppendingThenUseBcsCom
auto miFlushCmds = findAll<typename FamilyType::MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
EXPECT_NE(0u, miFlushCmds.size());
auto lrrCmds = findAll<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
auto lriCmds = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
auto lrmCmds = findAll<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
for (auto &lrr : lrrCmds) {
auto lrrCmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*lrr);
EXPECT_TRUE(lrrCmd->getSourceRegisterAddress() > RegisterOffsets::bcs0Base);
EXPECT_TRUE(lrrCmd->getDestinationRegisterAddress() > RegisterOffsets::bcs0Base);
}
for (auto &lri : lriCmds) {
auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*lri);
EXPECT_TRUE(lriCmd->getRegisterOffset() > RegisterOffsets::bcs0Base);
}
for (auto &lrm : lrmCmds) {
auto lrmCmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*lrm);
EXPECT_TRUE(lrmCmd->getRegisterAddress() > RegisterOffsets::bcs0Base);
}
}
HWTEST2_F(CopyOffloadInOrderTests, givenProfilingEventWithRelaxedOrderingWhenAppendingThenUseBcsCommands, IsAtLeastXeHpCore) {
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
debugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
auto immCmdList = createImmCmdListWithOffload<gfxCoreFamily>();
auto mainQueueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(immCmdList->getCsr(false));
auto copyQueueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(immCmdList->getCsr(true));
auto mainQueueDirectSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(*mainQueueCsr);
auto offloadDirectSubmission = new MockDirectSubmissionHw<FamilyType, BlitterDispatcher<FamilyType>>(*copyQueueCsr);
mainQueueCsr->directSubmission.reset(mainQueueDirectSubmission);
copyQueueCsr->blitterDirectSubmission.reset(offloadDirectSubmission);
int client1, client2;
mainQueueCsr->registerClient(&client1);
mainQueueCsr->registerClient(&client2);
copyQueueCsr->registerClient(&client1);
copyQueueCsr->registerClient(&client2);
auto eventPool = createEvents<FamilyType>(1, true);
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
auto offset = cmdStream->getUsed();
auto eventHandle = events[0]->toHandle();
immCmdList->appendMemoryCopy(&copyData1, &copyData2, 1, eventHandle, 0, nullptr, false, false);
ze_copy_region_t region = {0, 0, 0, 1, 1, 1};
immCmdList->appendMemoryCopyRegion(&copyData1, &region, 1, 1, &copyData2, &region, 1, 1, eventHandle, 0, nullptr, false, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset)));
auto lrrCmds = findAll<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
auto lriCmds = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
auto lrmCmds = findAll<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
for (auto &lrr : lrrCmds) {
auto lrrCmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*lrr);
EXPECT_TRUE(lrrCmd->getSourceRegisterAddress() > RegisterOffsets::bcs0Base);
EXPECT_TRUE(lrrCmd->getDestinationRegisterAddress() > RegisterOffsets::bcs0Base);
}
for (auto &lri : lriCmds) {
auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*lri);
EXPECT_TRUE(lriCmd->getRegisterOffset() > RegisterOffsets::bcs0Base);
}
for (auto &lrm : lrmCmds) {
auto lrmCmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*lrm);
EXPECT_TRUE(lrmCmd->getRegisterAddress() > RegisterOffsets::bcs0Base);
}
}
HWTEST2_F(CopyOffloadInOrderTests, givenAtomicSignalingModeWhenUpdatingCounterThenUseCorrectHwCommands, IsAtLeastXeHpCore) {

View File

@@ -377,7 +377,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledWithImmediat
ASSERT_NE(nullptr, eventObject->csrs[0]);
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]);
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false);
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false);
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
returnValue = commandList->appendBarrier(nullptr, 1, &event, false);
@@ -441,7 +441,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledWithImmedia
ASSERT_NE(nullptr, eventObject->csrs[0]);
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]);
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false);
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false);
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
returnValue = commandList->appendBarrier(nullptr, 1, &event, false);

View File

@@ -569,7 +569,7 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigr
auto commandList = CommandList::create(productFamily, device, NEO::EngineGroupType::copy, cmdListFlags, returnValue, false);
auto commandListHandle = commandList->toHandle();
auto commandQueue = CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, commandList->isCopyOnly(), false, true, returnValue);
auto commandQueue = CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, commandList->isCopyOnly(false), false, true, returnValue);
ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC};
eventPoolDesc.count = 1;

View File

@@ -160,7 +160,7 @@ HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest,
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
CommandListImp *computeCommandList = static_cast<CommandListImp *>(CommandList::fromHandle(hComputeCommandList));
EXPECT_FALSE(computeCommandList->isCopyOnly());
EXPECT_FALSE(computeCommandList->isCopyOnly(false));
ze_command_queue_handle_t hCommandQueue{};
ze_command_queue_desc_t computeCommandQueueDesc{};
@@ -178,7 +178,7 @@ HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest,
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
CommandListImp *copyCommandList = static_cast<CommandListImp *>(CommandList::fromHandle(hCopyCommandList));
EXPECT_TRUE(copyCommandList->isCopyOnly());
EXPECT_TRUE(copyCommandList->isCopyOnly(false));
computeCommandQueue->destroy();
computeCommandList->destroy();
@@ -342,7 +342,7 @@ HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest,
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
CommandListImp *computeCommandList = static_cast<CommandListImp *>(CommandList::fromHandle(hComputeCommandList));
EXPECT_FALSE(computeCommandList->isCopyOnly());
EXPECT_FALSE(computeCommandList->isCopyOnly(false));
ze_command_list_handle_t hCopyCommandList{};
ze_command_queue_desc_t copyDesc{};
@@ -351,7 +351,7 @@ HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest,
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
CommandListImp *copyCommandList = static_cast<CommandListImp *>(CommandList::fromHandle(hCopyCommandList));
EXPECT_TRUE(copyCommandList->isCopyOnly());
EXPECT_TRUE(copyCommandList->isCopyOnly(false));
computeCommandList->destroy();
copyCommandList->destroy();

View File

@@ -819,7 +819,7 @@ ze_result_t OaMetricQueryImp::writeMetricQuery(CommandList &commandList, ze_even
commandList.getCmdContainer().addToResidencyContainer(pool.pAllocation);
// Wait for events before executing query.
commandList.appendWaitOnEvents(numWaitEvents, phWaitEvents, nullptr, false, true, false, false, false);
commandList.appendWaitOnEvents(numWaitEvents, phWaitEvents, nullptr, false, true, false, false, false, false);
if (metricQueriesSize) {