mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-27 15:53:13 +08:00
fix: copy offload mmio programming
Related-To: NEO-12771 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
6ce947cb77
commit
2618f586fa
@@ -65,7 +65,7 @@ ze_result_t zeCommandListAppendWaitOnEvents(
|
||||
ze_command_list_handle_t hCommandList,
|
||||
uint32_t numEvents,
|
||||
ze_event_handle_t *phEvents) {
|
||||
return L0::CommandList::fromHandle(hCommandList)->appendWaitOnEvents(numEvents, phEvents, nullptr, false, true, true, false, false);
|
||||
return L0::CommandList::fromHandle(hCommandList)->appendWaitOnEvents(numEvents, phEvents, nullptr, false, true, true, false, false, false);
|
||||
}
|
||||
|
||||
ze_result_t zeEventHostSignal(
|
||||
|
||||
@@ -135,7 +135,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0;
|
||||
virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0;
|
||||
virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds,
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush) = 0;
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) = 0;
|
||||
virtual ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc,
|
||||
@@ -254,9 +254,11 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
void removeMemoryPrefetchAllocations();
|
||||
void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation);
|
||||
void eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation);
|
||||
bool isCopyOnly() const {
|
||||
return NEO::EngineHelper::isCopyOnlyEngineType(engineGroupType);
|
||||
bool isCopyOnly(bool copyOffloadOperation) const {
|
||||
return NEO::EngineHelper::isCopyOnlyEngineType(engineGroupType) || (copyOffloadOperation && this->isCopyOffloadEnabled());
|
||||
}
|
||||
bool isCopyOffloadEnabled() const { return copyOperationOffloadEnabled; }
|
||||
|
||||
bool isInternal() const {
|
||||
return internalUsage;
|
||||
}
|
||||
@@ -267,7 +269,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
return performMemoryPrefetch;
|
||||
}
|
||||
bool storeExternalPtrAsTemporary() const {
|
||||
return isImmediateType() && (this->isFlushTaskSubmissionEnabled || isCopyOnly());
|
||||
return isImmediateType() && (this->isFlushTaskSubmissionEnabled || isCopyOnly(false));
|
||||
}
|
||||
bool isWaitForEventsFromHostEnabled();
|
||||
|
||||
@@ -491,6 +493,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
bool requiresDcFlushForDcMitigation = false;
|
||||
bool statelessBuiltinsEnabled = false;
|
||||
bool localDispatchSupport = false;
|
||||
bool copyOperationOffloadEnabled = false;
|
||||
};
|
||||
|
||||
using CommandListAllocatorFn = CommandList *(*)(uint32_t);
|
||||
|
||||
@@ -181,10 +181,10 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
|
||||
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds,
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush) override;
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) override;
|
||||
void appendWaitOnInOrderDependency(std::shared_ptr<NEO::InOrderExecInfo> &inOrderExecInfo, CommandToPatchContainer *outListCommands,
|
||||
uint64_t waitValue, uint32_t offset,
|
||||
bool relaxedOrderingAllowed, bool implicitDependency, bool skipAddingWaitEventsToResidency, bool noopDispatch);
|
||||
uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed, bool implicitDependency,
|
||||
bool skipAddingWaitEventsToResidency, bool noopDispatch, bool copyOffloadOperation);
|
||||
void appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation);
|
||||
void handleInOrderDependencyCounter(Event *signalEvent, bool nonWalkerInOrderCmdsChaining, bool copyOffloadOperation);
|
||||
|
||||
@@ -197,7 +197,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
void appendMultiPartitionEpilogue() override;
|
||||
void appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency, bool copyOperation);
|
||||
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, CommandToPatchContainer *outWaitCmds,
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency, bool skipAddingWaitEventsToResidency);
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency, bool skipAddingWaitEventsToResidency, bool copyOffloadOperation);
|
||||
|
||||
MOCKABLE_VIRTUAL void appendSynchronizedDispatchInitializationSection();
|
||||
MOCKABLE_VIRTUAL void appendSynchronizedDispatchCleanupSection();
|
||||
@@ -286,7 +286,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
Event *signalEvent,
|
||||
CmdListKernelLaunchParams &launchParams);
|
||||
|
||||
void appendWaitOnSingleEvent(Event *event, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, CommandToPatch::CommandType storedSemaphore);
|
||||
void appendWaitOnSingleEvent(Event *event, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, bool copyOffloadOperation, CommandToPatch::CommandType storedSemaphore);
|
||||
|
||||
void appendSdiInOrderCounterSignalling(uint64_t baseGpuVa, uint64_t signalValue, bool copyOffloadOperation);
|
||||
|
||||
@@ -357,7 +357,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) const { return false; }
|
||||
virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {}
|
||||
bool canSkipInOrderEventWait(Event &event, bool ignorCbEventBoundToCmdList) const;
|
||||
bool handleInOrderImplicitDependencies(bool relaxedOrderingAllowed);
|
||||
bool handleInOrderImplicitDependencies(bool relaxedOrderingAllowed, bool copyOffloadOperation);
|
||||
bool isQwordInOrderCounter() const { return GfxFamily::isQwordInOrderCounter; }
|
||||
bool isInOrderNonWalkerSignalingRequired(const Event *event) const;
|
||||
bool hasInOrderDependencies() const;
|
||||
|
||||
@@ -113,7 +113,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
|
||||
commandContainer.reset();
|
||||
clearCommandsToPatch();
|
||||
|
||||
if (!isCopyOnly()) {
|
||||
if (!isCopyOnly(false)) {
|
||||
printfKernelContainer.clear();
|
||||
containsStatelessUncachedResource = false;
|
||||
indirectAllocationsAllowed = false;
|
||||
@@ -173,7 +173,7 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(Event
|
||||
}
|
||||
|
||||
if (!isQwordInOrderCounter() && ((inOrderExecInfo->getCounterValue() + 1) == std::numeric_limits<uint32_t>::max())) {
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderExecInfo, nullptr, inOrderExecInfo->getCounterValue() + 1, inOrderExecInfo->getAllocationOffset(), false, true, false, false);
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderExecInfo, nullptr, inOrderExecInfo->getCounterValue() + 1, inOrderExecInfo->getAllocationOffset(), false, true, false, false, copyOffloadOperation);
|
||||
|
||||
inOrderExecInfo->resetCounterValue();
|
||||
|
||||
@@ -252,7 +252,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
||||
this->duplicatedInOrderCounterStorageEnabled = gfxCoreHelper.duplicatedInOrderCounterStorageEnabled(rootDeviceEnvironment);
|
||||
this->inOrderAtomicSignalingEnabled = gfxCoreHelper.inOrderAtomicSignallingEnabled(rootDeviceEnvironment);
|
||||
this->scratchAddressPatchingEnabled = (this->heaplessModeEnabled && !isImmediateType());
|
||||
this->copyOperationFenceSupported = (isCopyOnly() || isCopyOffloadEnabled()) && productHelper.isDeviceToHostCopySignalingFenceRequired();
|
||||
this->copyOperationFenceSupported = (isCopyOnly(false) || isCopyOffloadEnabled()) && productHelper.isDeviceToHostCopySignalingFenceRequired();
|
||||
this->defaultPipelinedThreadArbitrationPolicy = gfxCoreHelper.getDefaultThreadArbitrationPolicy();
|
||||
this->implicitSynchronizedDispatchForCooperativeKernelsAllowed = l0GfxCoreHelper.implicitSynchronizedDispatchForCooperativeKernelsAllowed();
|
||||
if (NEO::debugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
|
||||
@@ -270,7 +270,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
||||
this->commandContainer.setStateBaseAddressTracking(this->stateBaseAddressTracking);
|
||||
this->commandContainer.setUsingPrimaryBuffer(this->dispatchCmdListBatchBufferAsPrimary);
|
||||
|
||||
if (device->isImplicitScalingCapable() && !this->internalUsage && !isCopyOnly()) {
|
||||
if (device->isImplicitScalingCapable() && !this->internalUsage && !isCopyOnly(false)) {
|
||||
this->partitionCount = static_cast<uint32_t>(neoDevice->getDeviceBitfield().count());
|
||||
}
|
||||
|
||||
@@ -303,7 +303,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
||||
auto returnValue = commandContainer.initialize(deviceImp->getActiveDevice(),
|
||||
deviceImp->allocationsForReuse.get(),
|
||||
NEO::EncodeStates<GfxFamily>::getSshHeapSize(),
|
||||
!isCopyOnly(),
|
||||
!isCopyOnly(false),
|
||||
createSecondaryCmdBufferInHostMem);
|
||||
if (!this->pipelineSelectStateTracking) {
|
||||
// allow systolic support set in container when tracking disabled
|
||||
@@ -313,7 +313,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
||||
|
||||
ze_result_t returnType = parseErrorCode(returnValue);
|
||||
if (returnType == ZE_RESULT_SUCCESS) {
|
||||
if (!isCopyOnly()) {
|
||||
if (!isCopyOnly(false)) {
|
||||
postInitComputeSetup();
|
||||
}
|
||||
}
|
||||
@@ -345,7 +345,7 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::executeCommandListImmed
|
||||
return commandListExecutionResult;
|
||||
}
|
||||
|
||||
if (this->isCopyOnly() && !this->isSyncModeQueue && !this->isTbxMode) {
|
||||
if (this->isCopyOnly(false) && !this->isSyncModeQueue && !this->isTbxMode) {
|
||||
this->commandContainer.currentLinearStreamStartOffsetRef() = this->commandContainer.getCommandStream()->getUsed();
|
||||
this->handlePostSubmissionState();
|
||||
} else {
|
||||
@@ -394,7 +394,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
|
||||
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
|
||||
}
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, launchParams.outListCommands, relaxedOrderingDispatch, true, true, launchParams.omitAddingWaitEventsResidency);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, launchParams.outListCommands, relaxedOrderingDispatch, true, true, launchParams.omitAddingWaitEventsResidency, false);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -445,7 +445,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, true, true, false);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, true, true, false, false);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -492,7 +492,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, true, true, false);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, true, true, false, false);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -519,7 +519,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
||||
commandContainer.addToResidencyContainer(alloc);
|
||||
|
||||
for (uint32_t i = 0; i < numKernels; i++) {
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i, isCopyOnly());
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i, isCopyOnly(false));
|
||||
|
||||
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandles[i]),
|
||||
pLaunchArgumentsBuffer[i],
|
||||
@@ -558,7 +558,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
||||
}
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
handleInOrderImplicitDependencies(isRelaxedOrderingDispatchAllowed(0, false));
|
||||
handleInOrderImplicitDependencies(isRelaxedOrderingDispatchAllowed(0, false), false);
|
||||
}
|
||||
|
||||
appendSynchronizedDispatchInitializationSection();
|
||||
@@ -570,10 +570,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
||||
// default state of event is single packet, handle case when reset is used 1st, launchkernel 2nd - just reset all packets then, use max
|
||||
bool useMaxPackets = event->isEventTimestampFlagSet() || (event->getPacketsInUse() < this->partitionCount);
|
||||
|
||||
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
|
||||
dispatchEventPostSyncOperation(event, nullptr, nullptr, Event::STATE_CLEARED, false, useMaxPackets, appendPipeControlWithPostSync, false, isCopyOnly());
|
||||
bool appendPipeControlWithPostSync = (!isCopyOnly(false)) && (event->isSignalScope() || event->isEventTimestampFlagSet());
|
||||
dispatchEventPostSyncOperation(event, nullptr, nullptr, Event::STATE_CLEARED, false, useMaxPackets, appendPipeControlWithPostSync, false, isCopyOnly(false));
|
||||
|
||||
if (!isCopyOnly()) {
|
||||
if (!isCopyOnly(false)) {
|
||||
if (this->partitionCount > 1) {
|
||||
appendMultiTileBarrier(*neoDevice);
|
||||
}
|
||||
@@ -605,7 +605,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) {
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, false, true, true, false);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, false, true, true, false, false);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -621,9 +621,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly());
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly(false));
|
||||
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly());
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly(false));
|
||||
addToMappedEventList(signalEvent);
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
@@ -734,7 +734,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(z
|
||||
image = peerImage;
|
||||
}
|
||||
|
||||
if (isCopyOnly()) {
|
||||
if (isCopyOnly(false)) {
|
||||
size_t imgRowPitch = image->getImageInfo().rowPitch;
|
||||
size_t imgSlicePitch = image->getImageInfo().slicePitch;
|
||||
auto status = appendCopyImageBlit(allocationStruct.alloc, image->getAllocation(),
|
||||
@@ -903,7 +903,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
|
||||
image = peerImage;
|
||||
}
|
||||
|
||||
if (isCopyOnly()) {
|
||||
if (isCopyOnly(false)) {
|
||||
size_t imgRowPitch = image->getImageInfo().rowPitch;
|
||||
size_t imgSlicePitch = image->getImageInfo().slicePitch;
|
||||
auto status = appendCopyImageBlit(image->getAllocation(), allocationStruct.alloc,
|
||||
@@ -1001,7 +1001,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
|
||||
event, numWaitEvents, phWaitEvents, launchParams, relaxedOrderingDispatch);
|
||||
addToMappedEventList(event);
|
||||
|
||||
addFlushRequiredCommand(allocationStruct.needsFlush, event, isCopyOnly());
|
||||
addFlushRequiredCommand(allocationStruct.needsFlush, event, isCopyOnly(false));
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1080,7 +1080,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
|
||||
srcImage = peerImage;
|
||||
}
|
||||
|
||||
if (isCopyOnly()) {
|
||||
if (isCopyOnly(false)) {
|
||||
auto bytesPerPixel = static_cast<uint32_t>(srcImage->getImageInfo().surfaceFormat->imageElementSizeInBytes);
|
||||
|
||||
ze_image_region_t region = getRegionFromImageDesc(srcImage->getImageDesc());
|
||||
@@ -1333,7 +1333,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(Ali
|
||||
blitProperties.srcSize = srcSize;
|
||||
blitProperties.dstSize = dstSize;
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, true, false);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, true, false, true);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -1401,7 +1401,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(NEO::Graph
|
||||
uintptr_t dstAddress = static_cast<uintptr_t>(dstAllocation->getGpuAddress());
|
||||
uintptr_t srcAddress = static_cast<uintptr_t>(srcAllocation->getGpuAddress());
|
||||
ze_result_t ret = ZE_RESULT_ERROR_UNKNOWN;
|
||||
if (isCopyOnly()) {
|
||||
if (isCopyOnly(false)) {
|
||||
return appendMemoryCopyBlit(dstAddress, dstAllocation, 0u,
|
||||
srcAddress, srcAllocation, 0u,
|
||||
size);
|
||||
@@ -1452,7 +1452,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents,
|
||||
bool relaxedOrderingDispatch, bool forceDisableCopyOnlyInOrderSignaling) {
|
||||
const bool isCopyOnlyEnabled = isCopyOnly() || isCopyOffloadEnabled();
|
||||
const bool isCopyOnlyEnabled = isCopyOnly(true);
|
||||
|
||||
const bool inOrderCopyOnlySignalingAllowed = this->isInOrderExecutionEnabled() && !forceDisableCopyOnlyInOrderSignaling && isCopyOnlyEnabled;
|
||||
|
||||
@@ -1523,7 +1523,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
|
||||
bool waitForImplicitInOrderDependency = !isCopyOnlyEnabled || inOrderCopyOnlySignalingAllowed;
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, waitForImplicitInOrderDependency, false);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, waitForImplicitInOrderDependency, false, this->isCopyOffloadEnabled());
|
||||
|
||||
if (ret) {
|
||||
return ret;
|
||||
@@ -1672,7 +1672,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch,
|
||||
bool forceDisableCopyOnlyInOrderSignaling) {
|
||||
const bool isCopyOnlyEnabled = isCopyOnly() || isCopyOffloadEnabled();
|
||||
const bool isCopyOnlyEnabled = isCopyOnly(true);
|
||||
|
||||
const bool inOrderCopyOnlySignalingAllowed = this->isInOrderExecutionEnabled() && !forceDisableCopyOnlyInOrderSignaling && isCopyOnlyEnabled;
|
||||
|
||||
@@ -1966,13 +1966,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
dcFlush = getDcFlushRequired(signalEvent->isSignalScope());
|
||||
}
|
||||
|
||||
if (isCopyOnly()) {
|
||||
if (isCopyOnly(false)) {
|
||||
auto status = appendBlitFill(ptr, pattern, patternSize, size, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
||||
addToMappedEventList(signalEvent);
|
||||
return status;
|
||||
}
|
||||
|
||||
ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, true, false);
|
||||
ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, true, false, false);
|
||||
if (res) {
|
||||
return res;
|
||||
}
|
||||
@@ -2024,7 +2024,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
launchParams.isKernelSplitOperation = (fillArguments.leftRemainingBytes > 0 || fillArguments.rightRemainingBytes > 0);
|
||||
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);
|
||||
|
||||
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, singlePipeControlPacket, false, isCopyOnly());
|
||||
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, singlePipeControlPacket, false, isCopyOnly(false));
|
||||
|
||||
if (fillArguments.leftRemainingBytes > 0) {
|
||||
launchParams.numKernelsInSplitLaunch++;
|
||||
@@ -2169,8 +2169,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
}
|
||||
|
||||
addToMappedEventList(signalEvent);
|
||||
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, singlePipeControlPacket, false, isCopyOnly());
|
||||
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent, isCopyOnly());
|
||||
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, singlePipeControlPacket, false, isCopyOnly(false));
|
||||
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent, isCopyOnly(false));
|
||||
|
||||
bool nonWalkerInOrderCmdChaining = false;
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
@@ -2210,7 +2210,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
|
||||
if (this->maxFillPaternSizeForCopyEngine < patternSize) {
|
||||
return ZE_RESULT_ERROR_INVALID_SIZE;
|
||||
} else {
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, true, false);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, true, false, false);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -2409,17 +2409,17 @@ inline uint32_t CommandListCoreFamily<gfxCoreFamily>::getRegionOffsetForAppendMe
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(bool relaxedOrderingAllowed) {
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(bool relaxedOrderingAllowed, bool copyOffloadOperation) {
|
||||
if (hasInOrderDependencies()) {
|
||||
if (this->latestHostWaitedInOrderSyncValue >= inOrderExecInfo->getCounterValue()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (relaxedOrderingAllowed) {
|
||||
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream(), isCopyOnly());
|
||||
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream(), isCopyOnly(copyOffloadOperation));
|
||||
}
|
||||
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderExecInfo, nullptr, inOrderExecInfo->getCounterValue(), inOrderExecInfo->getAllocationOffset(), relaxedOrderingAllowed, true, false, false);
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderExecInfo, nullptr, inOrderExecInfo->getCounterValue(), inOrderExecInfo->getAllocationOffset(), relaxedOrderingAllowed, true, false, false, copyOffloadOperation);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -2429,7 +2429,7 @@ bool CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(boo
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, CommandToPatchContainer *outWaitCmds,
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency, bool skipAddingWaitEventsToResidency) {
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency, bool skipAddingWaitEventsToResidency, bool copyOffloadOperation) {
|
||||
bool inOrderDependenciesSent = false;
|
||||
|
||||
if (this->latestOperationRequiredNonWalkerInOrderCmdsChaining && !relaxedOrderingAllowed) {
|
||||
@@ -2437,16 +2437,16 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
|
||||
}
|
||||
|
||||
if (waitForImplicitInOrderDependency) {
|
||||
inOrderDependenciesSent = handleInOrderImplicitDependencies(relaxedOrderingAllowed);
|
||||
inOrderDependenciesSent = handleInOrderImplicitDependencies(relaxedOrderingAllowed, copyOffloadOperation);
|
||||
}
|
||||
|
||||
if (relaxedOrderingAllowed && numWaitEvents > 0 && !inOrderDependenciesSent) {
|
||||
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream(), isCopyOnly());
|
||||
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream(), isCopyOnly(copyOffloadOperation));
|
||||
}
|
||||
|
||||
if (numWaitEvents > 0) {
|
||||
if (phWaitEvents) {
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, outWaitCmds, relaxedOrderingAllowed, trackDependencies, false, skipAddingWaitEventsToResidency, false);
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, outWaitCmds, relaxedOrderingAllowed, trackDependencies, false, skipAddingWaitEventsToResidency, false, copyOffloadOperation);
|
||||
} else {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
@@ -2458,7 +2458,7 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_handle_t hEvent) {
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
handleInOrderImplicitDependencies(isRelaxedOrderingDispatchAllowed(0, false));
|
||||
handleInOrderImplicitDependencies(isRelaxedOrderingDispatchAllowed(0, false), false);
|
||||
}
|
||||
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
@@ -2481,8 +2481,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
||||
}
|
||||
|
||||
event->setPacketsInUse(this->partitionCount);
|
||||
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
|
||||
dispatchEventPostSyncOperation(event, nullptr, nullptr, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync, false, isCopyOnly());
|
||||
bool appendPipeControlWithPostSync = (!isCopyOnly(false)) && (event->isSignalScope() || event->isEventTimestampFlagSet());
|
||||
dispatchEventPostSyncOperation(event, nullptr, nullptr, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync, false, isCopyOnly(false));
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(event, false);
|
||||
@@ -2502,8 +2502,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::shared_ptr<NEO::InOrderExecInfo> &inOrderExecInfo, CommandToPatchContainer *outListCommands,
|
||||
uint64_t waitValue, uint32_t offset,
|
||||
bool relaxedOrderingAllowed, bool implicitDependency, bool skipAddingWaitEventsToResidency, bool noopDispatch) {
|
||||
uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed, bool implicitDependency, bool skipAddingWaitEventsToResidency,
|
||||
bool noopDispatch, bool copyOffloadOperation) {
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
UNRECOVERABLE_IF(waitValue > static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) && !isQwordInOrderCounter());
|
||||
@@ -2518,7 +2518,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh
|
||||
|
||||
for (uint32_t i = 0; i < inOrderExecInfo->getNumDevicePartitionsToWait(); i++) {
|
||||
if (relaxedOrderingAllowed) {
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::less, true, isQwordInOrderCounter(), isCopyOnly());
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::less, true, isQwordInOrderCounter(), isCopyOnly(copyOffloadOperation));
|
||||
|
||||
} else {
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
@@ -2537,8 +2537,8 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh
|
||||
auto lri2 = commandContainer.getCommandStream()->template getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
|
||||
|
||||
if (!noopDispatch) {
|
||||
NEO::LriHelper<GfxFamily>::program(lri1, firstRegister, getLowPart(waitValue), true, isCopyOnly());
|
||||
NEO::LriHelper<GfxFamily>::program(lri2, secondRegister, getHighPart(waitValue), true, isCopyOnly());
|
||||
NEO::LriHelper<GfxFamily>::program(lri1, firstRegister, getLowPart(waitValue), true, isCopyOnly(copyOffloadOperation));
|
||||
NEO::LriHelper<GfxFamily>::program(lri2, secondRegister, getHighPart(waitValue), true, isCopyOnly(copyOffloadOperation));
|
||||
} else {
|
||||
memset(lri1, 0, sizeof(MI_LOAD_REGISTER_IMM));
|
||||
memset(lri2, 0, sizeof(MI_LOAD_REGISTER_IMM));
|
||||
@@ -2608,7 +2608,7 @@ bool CommandListCoreFamily<gfxCoreFamily>::canSkipInOrderEventWait(Event &event,
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds,
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush) {
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) {
|
||||
NEO::Device *neoDevice = device->getNEODevice();
|
||||
uint32_t callId = 0;
|
||||
if (NEO::debugManager.flags.EnableSWTags.get()) {
|
||||
@@ -2621,7 +2621,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
}
|
||||
|
||||
if (this->isInOrderExecutionEnabled() && apiRequest) {
|
||||
handleInOrderImplicitDependencies(false);
|
||||
handleInOrderImplicitDependencies(false, copyOffloadOperation);
|
||||
}
|
||||
|
||||
bool dcFlushRequired = false;
|
||||
@@ -2633,7 +2633,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
}
|
||||
}
|
||||
if (dcFlushRequired) {
|
||||
if (isCopyOnly()) {
|
||||
if (isCopyOnly(copyOffloadOperation)) {
|
||||
NEO::MiFlushArgs args{this->dummyBlitWa};
|
||||
encodeMiFlush(0, 0, args);
|
||||
} else {
|
||||
@@ -2663,7 +2663,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(event->getInOrderExecInfo(), outWaitCmds,
|
||||
waitValue, event->getInOrderAllocationOffset(),
|
||||
relaxedOrderingAllowed, false, skipAddingWaitEventsToResidency,
|
||||
isCbEventBoundToCmdList(event));
|
||||
isCbEventBoundToCmdList(event), copyOffloadOperation);
|
||||
|
||||
continue;
|
||||
}
|
||||
@@ -2672,10 +2672,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
commandContainer.addToResidencyContainer(event->getAllocation(this->device));
|
||||
}
|
||||
|
||||
appendWaitOnSingleEvent(event, outWaitCmds, relaxedOrderingAllowed, CommandToPatch::WaitEventSemaphoreWait);
|
||||
appendWaitOnSingleEvent(event, outWaitCmds, relaxedOrderingAllowed, copyOffloadOperation, CommandToPatch::WaitEventSemaphoreWait);
|
||||
}
|
||||
|
||||
if (isImmediateType() && isCopyOnly() && trackDependencies) {
|
||||
if (isImmediateType() && isCopyOnly(copyOffloadOperation) && trackDependencies) {
|
||||
NEO::MiFlushArgs args{this->dummyBlitWa};
|
||||
args.commandWithPostSync = true;
|
||||
auto csr = getCsr(false);
|
||||
@@ -2685,9 +2685,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
|
||||
if (apiRequest) {
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(nullptr, false);
|
||||
appendSignalInOrderDependencyCounter(nullptr, copyOffloadOperation);
|
||||
}
|
||||
handleInOrderDependencyCounter(nullptr, false, false);
|
||||
handleInOrderDependencyCounter(nullptr, false, copyOffloadOperation);
|
||||
}
|
||||
|
||||
if (NEO::debugManager.flags.EnableSWTags.get()) {
|
||||
@@ -2817,11 +2817,11 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(Event *eve
|
||||
uint64_t contextAddress = ptrOffset(baseAddr, contextOffset);
|
||||
|
||||
if (maskLsb) {
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, globalPostSyncCmdBuffer, isCopyOnly());
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, contextPostSyncCmdBuffer, isCopyOnly());
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, globalPostSyncCmdBuffer, copyOperation);
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, contextPostSyncCmdBuffer, copyOperation);
|
||||
} else {
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, globalPostSyncCmdBuffer, isCopyOnly());
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, contextPostSyncCmdBuffer, isCopyOnly());
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, globalPostSyncCmdBuffer, copyOperation);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, contextPostSyncCmdBuffer, copyOperation);
|
||||
}
|
||||
|
||||
if (outTimeStampSyncCmds != nullptr) {
|
||||
@@ -2891,7 +2891,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||
uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, false, true, true, false);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, false, true, true, false, false);
|
||||
if (ret != ZE_RESULT_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
@@ -2907,7 +2907,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly());
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly(false));
|
||||
|
||||
auto allocationStruct = getAlignedAllocationData(this->device, dstptr, sizeof(uint64_t), false, false);
|
||||
if (allocationStruct.alloc == nullptr) {
|
||||
@@ -2915,7 +2915,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||
}
|
||||
commandContainer.addToResidencyContainer(allocationStruct.alloc);
|
||||
|
||||
if (isCopyOnly()) {
|
||||
if (isCopyOnly(false)) {
|
||||
NEO::MiFlushArgs args{this->dummyBlitWa};
|
||||
args.timeStampOperation = true;
|
||||
args.commandWithPostSync = true;
|
||||
@@ -2933,7 +2933,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||
args);
|
||||
}
|
||||
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly());
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly(false));
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false);
|
||||
@@ -3039,7 +3039,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
|
||||
for (uint32_t i = 0; i < numEvents; i++) {
|
||||
auto event = Event::fromHandle(phEvents[i]);
|
||||
if (event->isCounterBased()) {
|
||||
appendWaitOnSingleEvent(event, nullptr, false, CommandToPatch::CbEventTimestampPostSyncSemaphoreWait);
|
||||
appendWaitOnSingleEvent(event, nullptr, false, false, CommandToPatch::CbEventTimestampPostSyncSemaphoreWait);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3100,11 +3100,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::prepareIndirectParams(const ze
|
||||
|
||||
auto groupCount = ptrOffset(alloc->getGpuAddress(), groupCountOffset);
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeMEM(commandContainer, RegisterOffsets::gpgpuDispatchDimX,
|
||||
ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountX)), isCopyOnly());
|
||||
ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountX)), isCopyOnly(false));
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeMEM(commandContainer, RegisterOffsets::gpgpuDispatchDimY,
|
||||
ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountY)), isCopyOnly());
|
||||
ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountY)), isCopyOnly(false));
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeMEM(commandContainer, RegisterOffsets::gpgpuDispatchDimZ,
|
||||
ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountZ)), isCopyOnly());
|
||||
ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountZ)), isCopyOnly(false));
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
@@ -3452,7 +3452,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, true, true, false);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, true, true, false, false);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -3468,10 +3468,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly());
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly(false));
|
||||
|
||||
if (!this->isInOrderExecutionEnabled()) {
|
||||
if (isCopyOnly()) {
|
||||
if (isCopyOnly(false)) {
|
||||
NEO::MiFlushArgs args{this->dummyBlitWa};
|
||||
uint64_t gpuAddress = 0u;
|
||||
TaskCountType value = 0u;
|
||||
@@ -3490,7 +3490,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
||||
}
|
||||
|
||||
addToMappedEventList(signalEvent);
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, this->isInOrderExecutionEnabled(), false, isCopyOnly());
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, this->isInOrderExecutionEnabled(), false, isCopyOnly(false));
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false);
|
||||
@@ -3618,14 +3618,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
|
||||
UNRECOVERABLE_IF(srcAllocationStruct.alloc == nullptr);
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
handleInOrderImplicitDependencies(false);
|
||||
handleInOrderImplicitDependencies(false, false);
|
||||
}
|
||||
|
||||
if (!handleCounterBasedEventOperations(signalEvent)) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly());
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false, isCopyOnly(false));
|
||||
|
||||
commandContainer.addToResidencyContainer(srcAllocationStruct.alloc);
|
||||
uint64_t gpuAddress = static_cast<uint64_t>(srcAllocationStruct.alignedAllocationPtr);
|
||||
@@ -3636,8 +3636,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
|
||||
if (isQwordInOrderCounter()) {
|
||||
indirectMode = true;
|
||||
|
||||
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0, getLowPart(data), true, isCopyOnly());
|
||||
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0 + 4, getHighPart(data), true, isCopyOnly());
|
||||
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0, getLowPart(data), true, isCopyOnly(false));
|
||||
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0 + 4, getHighPart(data), true, isCopyOnly(false));
|
||||
|
||||
} else {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
@@ -3657,7 +3657,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(*commandContainer.getCommandStream(), gpuAddress, true, rootDeviceEnvironment);
|
||||
}
|
||||
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly());
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly(false));
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false);
|
||||
@@ -3682,12 +3682,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
|
||||
commandContainer.addToResidencyContainer(dstAllocationStruct.alloc);
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
handleInOrderImplicitDependencies(false);
|
||||
handleInOrderImplicitDependencies(false, false);
|
||||
}
|
||||
|
||||
const uint64_t gpuAddress = static_cast<uint64_t>(dstAllocationStruct.alignedAllocationPtr);
|
||||
|
||||
if (isCopyOnly()) {
|
||||
if (isCopyOnly(false)) {
|
||||
NEO::MiFlushArgs args{this->dummyBlitWa};
|
||||
args.commandWithPostSync = true;
|
||||
encodeMiFlush(gpuAddress,
|
||||
@@ -3880,7 +3880,7 @@ void CommandListCoreFamily<gfxCoreFamily>::dispatchEventRemainingPacketsPostSync
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(Event *event, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, CommandToPatch::CommandType storedSemaphore) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(Event *event, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, bool copyOffloadOperation, CommandToPatch::CommandType storedSemaphore) {
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
uint64_t gpuAddr = event->getCompletionFieldGpuAddress(this->device);
|
||||
@@ -3895,7 +3895,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(Event *event,
|
||||
for (uint32_t i = 0u; i < packetsToWait; i++) {
|
||||
if (relaxedOrderingAllowed) {
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddr, Event::STATE_CLEARED,
|
||||
NEO::CompareOperation::equal, true, false, isCopyOnly());
|
||||
NEO::CompareOperation::equal, true, false, isCopyOnly(copyOffloadOperation));
|
||||
} else {
|
||||
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
|
||||
gpuAddr,
|
||||
@@ -4100,7 +4100,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendFullSynchronizedDispatchInit()
|
||||
|
||||
// Patch Primary Tile section skip (to Secondary Tile section)
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(skipPrimaryTileSectionCmdStream, cmdStream->getCurrentGpuAddressPosition(), workPartitionAllocationGpuVa, 0,
|
||||
NEO::CompareOperation::notEqual, false, false, isCopyOnly());
|
||||
NEO::CompareOperation::notEqual, false, false, isCopyOnly(false));
|
||||
|
||||
// Secondary Tile section
|
||||
{
|
||||
@@ -4114,7 +4114,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendFullSynchronizedDispatchInit()
|
||||
|
||||
// Patch Primary Tile section jump to end
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(jumpToEndSectionFromPrimaryTile, cmdStream->getCurrentGpuAddressPosition(), syncAllocationGpuVa + sizeof(uint32_t), queueId,
|
||||
NEO::CompareOperation::equal, false, false, isCopyOnly());
|
||||
NEO::CompareOperation::equal, false, false, isCopyOnly(false));
|
||||
|
||||
// End section
|
||||
NEO::EncodeMiPredicate<GfxFamily>::encode(*cmdStream, NEO::MiPredicateType::disable);
|
||||
@@ -4151,7 +4151,7 @@ bool CommandListCoreFamily<gfxCoreFamily>::isDeviceToHostCopyEventFenceRequired(
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::isDeviceToHostBcsCopy(NEO::GraphicsAllocation *srcAllocation, NEO::GraphicsAllocation *dstAllocation, bool copyOffloadOperation) const {
|
||||
return ((isCopyOnly() || copyOffloadOperation) && (srcAllocation->isAllocatedInLocalMemoryPool() && !dstAllocation->isAllocatedInLocalMemoryPool()));
|
||||
return (isCopyOnly(copyOffloadOperation) && (srcAllocation->isAllocatedInLocalMemoryPool() && !dstAllocation->isAllocatedInLocalMemoryPool()));
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
||||
@@ -106,7 +106,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
size_t size, bool flushHost) override;
|
||||
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds,
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush) override;
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) override;
|
||||
|
||||
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||
|
||||
@@ -550,12 +550,12 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::handleInOrderNonWalkerSignal
|
||||
|
||||
if (nonWalkerSignalingHasRelaxedOrdering) {
|
||||
result = flushImmediate(result, true, hasStallingCmds, relaxedOrderingDispatch, true, false, nullptr, false);
|
||||
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*this->commandContainer.getCommandStream(), isCopyOnly());
|
||||
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*this->commandContainer.getCommandStream(), isCopyOnly(false));
|
||||
relaxedOrderingDispatch = true;
|
||||
hasStallingCmds = hasStallingCmdsForRelaxedOrdering(1, relaxedOrderingDispatch);
|
||||
}
|
||||
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(event, nullptr, nonWalkerSignalingHasRelaxedOrdering, CommandToPatch::Invalid);
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(event, nullptr, nonWalkerSignalingHasRelaxedOrdering, false, CommandToPatch::Invalid);
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(event, false);
|
||||
}
|
||||
|
||||
@@ -611,7 +611,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, isCopyOffloadEnabled());
|
||||
|
||||
auto estimatedSize = commonImmediateCommandSize;
|
||||
if (isCopyOnly() || isCopyOffloadEnabled()) {
|
||||
if (isCopyOnly(true)) {
|
||||
auto nBlits = size / (NEO::BlitCommandsHelper<GfxFamily>::getMaxBlitWidth(this->device->getNEODevice()->getRootDeviceEnvironment()) *
|
||||
NEO::BlitCommandsHelper<GfxFamily>::getMaxBlitHeight(this->device->getNEODevice()->getRootDeviceEnvironment(), true));
|
||||
auto sizePerBlit = sizeof(typename GfxFamily::XY_COPY_BLT) + NEO::BlitCommandsHelper<GfxFamily>::estimatePostBlitCommandSize();
|
||||
@@ -665,7 +665,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, isCopyOffloadEnabled());
|
||||
|
||||
auto estimatedSize = commonImmediateCommandSize;
|
||||
if (isCopyOnly() || isCopyOffloadEnabled()) {
|
||||
if (isCopyOnly(true)) {
|
||||
auto xBlits = static_cast<size_t>(std::ceil(srcRegion->width / static_cast<double>(BlitterConstants::maxBlitWidth)));
|
||||
auto yBlits = static_cast<size_t>(std::ceil(srcRegion->height / static_cast<double>(BlitterConstants::maxBlitHeight)));
|
||||
auto zBlits = static_cast<size_t>(srcRegion->depth);
|
||||
@@ -773,7 +773,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents, CommandToPatchContainer *outWaitCmds,
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush) {
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) {
|
||||
bool allSignaled = true;
|
||||
for (auto i = 0u; i < numEvents; i++) {
|
||||
allSignaled &= (!this->dcFlushSupport && Event::fromHandle(phWaitEvents[i])->isAlreadyCompleted());
|
||||
@@ -786,7 +786,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(ui
|
||||
checkAvailableSpace(numEvents, false, commonImmediateCommandSize);
|
||||
}
|
||||
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents, outWaitCmds, relaxedOrderingAllowed, trackDependencies, apiRequest, skipAddingWaitEventsToResidency, false);
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents, outWaitCmds, relaxedOrderingAllowed, trackDependencies, apiRequest, skipAddingWaitEventsToResidency, false, copyOffloadOperation);
|
||||
this->dependenciesPresent = true;
|
||||
|
||||
if (skipFlush) {
|
||||
@@ -838,7 +838,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyRegion
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false);
|
||||
|
||||
auto estimatedSize = commonImmediateCommandSize;
|
||||
if (isCopyOnly()) {
|
||||
if (isCopyOnly(false)) {
|
||||
auto imgSize = L0::Image::fromHandle(hSrcImage)->getImageInfo().size;
|
||||
auto nBlits = static_cast<size_t>(std::ceil(imgSize / static_cast<double>(BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight)));
|
||||
auto sizePerBlit = sizeof(typename GfxFamily::XY_BLOCK_COPY_BLT) + NEO::BlitCommandsHelper<GfxFamily>::estimatePostBlitCommandSize();
|
||||
@@ -1508,7 +1508,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendCommandLists(ui
|
||||
auto ret = ZE_RESULT_SUCCESS;
|
||||
checkAvailableSpace(numWaitEvents, false, commonImmediateCommandSize);
|
||||
if (numWaitEvents) {
|
||||
ret = this->appendWaitOnEvents(numWaitEvents, phWaitEvents, nullptr, false, true, true, true, true);
|
||||
ret = this->appendWaitOnEvents(numWaitEvents, phWaitEvents, nullptr, false, true, true, true, true, false);
|
||||
}
|
||||
|
||||
if (ret != ZE_RESULT_SUCCESS) {
|
||||
|
||||
@@ -437,7 +437,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
if (inOrderExecSignalRequired) {
|
||||
if (inOrderNonWalkerSignalling) {
|
||||
if (!launchParams.skipInOrderNonWalkerSignaling) {
|
||||
appendWaitOnSingleEvent(eventForInOrderExec, launchParams.outListCommands, false, CommandToPatch::CbEventTimestampPostSyncSemaphoreWait);
|
||||
appendWaitOnSingleEvent(eventForInOrderExec, launchParams.outListCommands, false, false, CommandToPatch::CbEventTimestampPostSyncSemaphoreWait);
|
||||
appendSignalInOrderDependencyCounter(eventForInOrderExec, false);
|
||||
}
|
||||
} else {
|
||||
@@ -525,14 +525,14 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
|
||||
partitionDataSize,
|
||||
isCopyOnly());
|
||||
isCopyOnly(false));
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset(),
|
||||
isCopyOnly());
|
||||
isCopyOnly(false));
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -621,7 +621,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool wor
|
||||
if (workloadPartitionEvent && !device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout()) {
|
||||
auto offset = beforeProfilingCmds ? NEO::ImplicitScalingDispatch<GfxFamily>::getTimeStampPostSyncOffset() : NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset();
|
||||
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(), offset, isCopyOnly());
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(), offset, isCopyOnly(false));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -55,7 +55,7 @@ ze_result_t CommandListImp::destroy() {
|
||||
}
|
||||
|
||||
if (!isImmediateType() &&
|
||||
!isCopyOnly() &&
|
||||
!isCopyOnly(false) &&
|
||||
this->stateBaseAddressTracking &&
|
||||
this->cmdListHeapAddressModel == NEO::HeapAddressModel::privateHeaps) {
|
||||
|
||||
@@ -257,7 +257,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
|
||||
|
||||
commandList->copyThroughLockedPtrEnabled = gfxCoreHelper.copyThroughLockedPtrEnabled(hwInfo, device->getProductHelper());
|
||||
|
||||
if ((NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.get() == 1 || queueProperties.copyOffloadHint) && !commandList->isCopyOnly() && commandList->isInOrderExecutionEnabled()) {
|
||||
if ((NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.get() == 1 || queueProperties.copyOffloadHint) && !commandList->isCopyOnly(false) && commandList->isInOrderExecutionEnabled()) {
|
||||
commandList->enableCopyOperationOffload(productFamily, device, desc);
|
||||
}
|
||||
|
||||
|
||||
@@ -47,7 +47,6 @@ struct CommandListImp : public CommandList {
|
||||
void enableSynchronizedDispatch(NEO::SynchronizedDispatchMode mode);
|
||||
NEO::SynchronizedDispatchMode getSynchronizedDispatchMode() const { return synchronizedDispatchMode; }
|
||||
void enableCopyOperationOffload(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc);
|
||||
bool isCopyOffloadEnabled() const { return copyOperationOffloadEnabled; }
|
||||
void setInterruptEventsCsr(NEO::CommandStreamReceiver &csr);
|
||||
|
||||
protected:
|
||||
@@ -65,8 +64,6 @@ struct CommandListImp : public CommandList {
|
||||
static constexpr bool cmdListDefaultGlobalAtomics = false;
|
||||
std::vector<Event *> mappedTsEventList;
|
||||
std::vector<Event *> interruptEvents;
|
||||
|
||||
bool copyOperationOffloadEnabled = false;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -104,10 +104,10 @@ struct BcsSplit {
|
||||
for (size_t i = 0; i < cmdQsForSplit.size(); i++) {
|
||||
if (barrierRequired) {
|
||||
auto barrierEventHandle = this->events.barrier[markerEventIndex]->toHandle();
|
||||
cmdList->addEventsToCmdList(1u, &barrierEventHandle, nullptr, hasRelaxedOrderingDependencies, false, true, false);
|
||||
cmdList->addEventsToCmdList(1u, &barrierEventHandle, nullptr, hasRelaxedOrderingDependencies, false, true, false, false);
|
||||
}
|
||||
|
||||
cmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, hasRelaxedOrderingDependencies, false, true, false);
|
||||
cmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, hasRelaxedOrderingDependencies, false, true, false, false);
|
||||
|
||||
if (signalEvent && i == 0u) {
|
||||
cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, true, false, true);
|
||||
@@ -136,7 +136,7 @@ struct BcsSplit {
|
||||
}
|
||||
}
|
||||
|
||||
cmdList->addEventsToCmdList(static_cast<uint32_t>(cmdQsForSplit.size()), eventHandles.data(), nullptr, hasRelaxedOrderingDependencies, false, true, false);
|
||||
cmdList->addEventsToCmdList(static_cast<uint32_t>(cmdQsForSplit.size()), eventHandles.data(), nullptr, hasRelaxedOrderingDependencies, false, true, false, false);
|
||||
if (signalEvent) {
|
||||
cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, true, false, true);
|
||||
}
|
||||
|
||||
@@ -348,7 +348,7 @@ void ImmediateCmdListSharedHeapsFlushTaskFixtureInit::appendNonKernelOperation(L
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
} else if (operation == NonKernelOperation::WaitOnEvents) {
|
||||
auto eventHandle = event->toHandle();
|
||||
result = currentCmdList->appendWaitOnEvents(1, &eventHandle, nullptr, false, false, false, false, false);
|
||||
result = currentCmdList->appendWaitOnEvents(1, &eventHandle, nullptr, false, false, false, false, false, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
} else if (operation == NonKernelOperation::WriteGlobalTimestamp) {
|
||||
uint64_t timestampAddress = 0xfffffffffff0L;
|
||||
|
||||
@@ -477,7 +477,7 @@ struct MockCommandList : public CommandList {
|
||||
ADDMETHOD_NOBASE(appendWaitOnEvents, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(uint32_t numEvents,
|
||||
ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds,
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush));
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation));
|
||||
|
||||
ADDMETHOD_NOBASE(appendWriteGlobalTimestamp, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(uint64_t * dstptr,
|
||||
|
||||
@@ -273,7 +273,7 @@ HWTEST2_F(CommandListImmediateWithAssert, givenKernelWithAssertWhenAppendedToAsy
|
||||
cmdList.callBaseExecute = true;
|
||||
cmdList.cmdListType = CommandList::CommandListType::typeImmediate;
|
||||
cmdList.isSyncModeQueue = false;
|
||||
auto commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, cmdList.isCopyOnly(), false, false, result);
|
||||
auto commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, cmdList.isCopyOnly(false), false, false, result);
|
||||
cmdList.cmdQImmediate = commandQueue;
|
||||
|
||||
result = cmdList.initialize(device, NEO::EngineGroupType::renderCompute, 0u);
|
||||
@@ -307,7 +307,7 @@ HWTEST2_F(CommandListImmediateWithAssert, givenKernelWithAssertWhenAppendedToSyn
|
||||
desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
|
||||
desc.pNext = 0;
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
||||
auto commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, cmdList.isCopyOnly(), false, false, result);
|
||||
auto commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, cmdList.isCopyOnly(false), false, false, result);
|
||||
cmdList.cmdQImmediate = commandQueue;
|
||||
|
||||
result = cmdList.initialize(device, NEO::EngineGroupType::renderCompute, 0u);
|
||||
@@ -347,7 +347,7 @@ HWTEST2_F(CommandListImmediateWithAssert, givenKernelWithAssertWhenAppendToSynch
|
||||
desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
|
||||
desc.pNext = 0;
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
||||
auto commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, cmdList.isCopyOnly(), false, false, result);
|
||||
auto commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, cmdList.isCopyOnly(false), false, false, result);
|
||||
cmdList.cmdQImmediate = commandQueue;
|
||||
|
||||
result = cmdList.initialize(device, NEO::EngineGroupType::renderCompute, 0u);
|
||||
|
||||
@@ -1181,7 +1181,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
|
||||
|
||||
verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false), false, false);
|
||||
|
||||
verifyFlags(commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false), true, true);
|
||||
verifyFlags(commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false), true, true);
|
||||
|
||||
verifyFlags(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), nullptr, 0, nullptr), true, true);
|
||||
|
||||
@@ -1584,7 +1584,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
|
||||
verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false),
|
||||
false, false);
|
||||
|
||||
verifyFlags(commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false), false, false);
|
||||
verifyFlags(commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false), false, false);
|
||||
|
||||
verifyFlags(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), nullptr, numWaitlistEvents, waitlist),
|
||||
false, false);
|
||||
@@ -1974,7 +1974,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppen
|
||||
ASSERT_NE(nullptr, eventObject->csrs[0]);
|
||||
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]);
|
||||
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false);
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
returnValue = commandList->appendBarrier(nullptr, 1, &event, false);
|
||||
@@ -2089,7 +2089,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppen
|
||||
ASSERT_NE(nullptr, eventObject->csrs[0]);
|
||||
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]);
|
||||
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false);
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
returnValue = commandList->appendBarrier(nullptr, 1, &event, false);
|
||||
@@ -2185,7 +2185,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangAndEnabledFlushTaskSubmissionFlagWhenCre
|
||||
const auto oldCsr = queue->csr;
|
||||
queue->csr = &mockCommandStreamReceiver;
|
||||
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false);
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false);
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
|
||||
|
||||
queue->csr = oldCsr;
|
||||
|
||||
@@ -1389,7 +1389,7 @@ HWTEST2_F(CommandListCreateWithBcs,
|
||||
auto internalCopyEngine = neoDevice->getInternalCopyEngine();
|
||||
EXPECT_NE(nullptr, internalCopyEngine);
|
||||
EXPECT_EQ(cmdQueue->getCsr(), internalCopyEngine->commandStreamReceiver);
|
||||
EXPECT_TRUE(commandList->isCopyOnly());
|
||||
EXPECT_TRUE(commandList->isCopyOnly(false));
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreateWithBcs, givenForceFlushTaskEnabledWhenCreatingCommandListUsingLinkedCopyThenFlushTaskModeUsed, IsAtLeastXeHpCore) {
|
||||
@@ -1408,7 +1408,7 @@ HWTEST2_F(CommandListCreateWithBcs, givenForceFlushTaskEnabledWhenCreatingComman
|
||||
returnValue));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
|
||||
EXPECT_TRUE(commandList->isCopyOnly());
|
||||
EXPECT_TRUE(commandList->isCopyOnly(false));
|
||||
EXPECT_TRUE(commandList->flushTaskSubmissionEnabled());
|
||||
}
|
||||
|
||||
|
||||
@@ -644,7 +644,7 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::compute, returnValue));
|
||||
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false);
|
||||
result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
context->destroy();
|
||||
@@ -809,7 +809,7 @@ HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEvents
|
||||
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
auto eventHandle = event->toHandle();
|
||||
|
||||
result = commandList->appendWaitOnEvents(1u, &eventHandle, nullptr, false, true, false, false, false);
|
||||
result = commandList->appendWaitOnEvents(1u, &eventHandle, nullptr, false, true, false, false, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(whiteBoxCmdList->getCsr(false)->getNextBarrierCount(), 2u);
|
||||
|
||||
@@ -851,7 +851,7 @@ HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEvents
|
||||
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
auto eventHandle = event->toHandle();
|
||||
|
||||
result = commandList->appendWaitOnEvents(1u, &eventHandle, nullptr, false, false, false, false, false);
|
||||
result = commandList->appendWaitOnEvents(1u, &eventHandle, nullptr, false, false, false, false, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(whiteBoxCmdList->getCsr(false)->getNextBarrierCount(), 1u);
|
||||
}
|
||||
|
||||
@@ -623,7 +623,7 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWith
|
||||
event.signalScope = 0;
|
||||
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
auto eventHandle = event.toHandle();
|
||||
commandList->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false);
|
||||
commandList->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false, false);
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
@@ -647,7 +647,7 @@ HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThen
|
||||
event.signalScope = 0;
|
||||
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
auto eventHandle = event.toHandle();
|
||||
commandList->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false);
|
||||
commandList->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false, false);
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
@@ -678,7 +678,7 @@ HWTEST_F(CommandListCreate, givenCommandListWhenAppendWaitEventsWithDcFlushThenP
|
||||
event2.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
ze_event_handle_t events[] = {&event, &event2};
|
||||
|
||||
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false);
|
||||
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false, false);
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
@@ -731,7 +731,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendW
|
||||
ze_event_handle_t events[] = {&event, &event2};
|
||||
|
||||
size_t startOffset = commandContainer.getCommandStream()->getUsed();
|
||||
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false);
|
||||
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false, false);
|
||||
size_t endOffset = commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
size_t usedBufferSize = (endOffset - startOffset);
|
||||
@@ -778,7 +778,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendW
|
||||
ze_event_handle_t events[] = {&event, &event2};
|
||||
|
||||
size_t startOffset = commandContainer.getCommandStream()->getUsed();
|
||||
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false);
|
||||
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false, false);
|
||||
size_t endOffset = commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
size_t usedBufferSize = (endOffset - startOffset);
|
||||
@@ -819,7 +819,7 @@ HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueAndCopyOnly
|
||||
ze_event_handle_t events[] = {&event, &event2};
|
||||
|
||||
auto used = commandContainer.getCommandStream()->getUsed();
|
||||
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false);
|
||||
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false, false);
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
@@ -853,7 +853,7 @@ HWTEST2_F(CommandListCreate, givenImmediateCommandListAndAlreadyCompletedEventWh
|
||||
ze_event_handle_t events[] = {&event, &event2};
|
||||
event.isCompleted = Event::State::STATE_SIGNALED;
|
||||
|
||||
static_cast<CommandListCoreFamily<gfxCoreFamily> *>(commandList.get())->addEventsToCmdList(2, events, nullptr, false, false, true, false);
|
||||
static_cast<CommandListCoreFamily<gfxCoreFamily> *>(commandList.get())->addEventsToCmdList(2, events, nullptr, false, false, true, false, false);
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
@@ -1041,7 +1041,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhe
|
||||
ze_event_handle_t events[] = {&event, &event2};
|
||||
|
||||
auto used = commandContainer.getCommandStream()->getUsed();
|
||||
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false);
|
||||
commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
@@ -1074,7 +1074,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndTbxCsrWithCopyOnlyImmediateComm
|
||||
event2.waitScope = 0;
|
||||
ze_event_handle_t events[] = {&event, &event2};
|
||||
|
||||
auto ret = commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false);
|
||||
auto ret = commandList->appendWaitOnEvents(2, events, nullptr, false, true, false, false, false, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
}
|
||||
|
||||
|
||||
@@ -993,7 +993,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenAppendWaitOnE
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
auto eventHandle = event->toHandle();
|
||||
cmdList.appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false);
|
||||
cmdList.appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false, false);
|
||||
|
||||
EXPECT_TRUE(cmdList.dependenciesPresent);
|
||||
|
||||
|
||||
@@ -647,7 +647,7 @@ HWTEST_F(CommandListAppendLaunchKernelSWTags, givenEnableSWTagsWhenAppendWaitOnE
|
||||
|
||||
eventPool->createEvent(&eventDesc, &hEvent);
|
||||
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEvent, nullptr, false, true, false, false, false);
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEvent, nullptr, false, true, false, false, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = cmdStream->getUsed();
|
||||
|
||||
@@ -639,7 +639,7 @@ HWTEST2_F(AppendMemoryCopy, givenImmediateCommandListWhenAppendingMemoryCopyThen
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
|
||||
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, false, false);
|
||||
EXPECT_TRUE(commandList->isCopyOnly());
|
||||
EXPECT_TRUE(commandList->isCopyOnly(false));
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
commandList->getCsr(false)->getInternalAllocationStorage()->getTemporaryAllocations().freeAllGraphicsAllocations(device->getNEODevice());
|
||||
|
||||
@@ -34,7 +34,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventThenSemaphoreWait
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
auto usedSpaceBefore = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false);
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
@@ -89,7 +89,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCmdListWithDirectSubmissio
|
||||
ultCsr->directSubmission.reset(directSubmission);
|
||||
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
auto result = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(immCommandList.get())->addEventsToCmdList(1, &hEventHandle, nullptr, true, true, true, false);
|
||||
auto result = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(immCommandList.get())->addEventsToCmdList(1, &hEventHandle, nullptr, true, true, true, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = immCommandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
@@ -253,12 +253,12 @@ class MockCommandListImmediateHwWithWaitEventFail : public WhiteBox<::L0::Comman
|
||||
using BaseClass::setupFillKernelArguments;
|
||||
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds,
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush) override {
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) override {
|
||||
if (forceWaitEventError) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
appendWaitEventCalled++;
|
||||
return BaseClass::appendWaitOnEvents(numEvents, phEvent, outWaitCmds, relaxedOrderingAllowed, trackDependencies, apiRequest, skipAddingWaitEventsToResidency, skipFlush);
|
||||
return BaseClass::appendWaitOnEvents(numEvents, phEvent, outWaitCmds, relaxedOrderingAllowed, trackDependencies, apiRequest, skipAddingWaitEventsToResidency, skipFlush, copyOffloadOperation);
|
||||
};
|
||||
|
||||
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override {
|
||||
@@ -398,7 +398,7 @@ class MockAppendRegularCommandlistWithWaitOnEvents : public MockCommandListForAp
|
||||
public:
|
||||
MockAppendRegularCommandlistWithWaitOnEvents() : MockCommandListForAppendLaunchKernel<gfxCoreFamily>() {}
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds,
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush) override {
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) override {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
};
|
||||
};
|
||||
@@ -489,7 +489,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenTwoEventsWhenWaitOnEventsAppendedThe
|
||||
|
||||
ze_event_handle_t handles[2] = {event->toHandle(), event->toHandle()};
|
||||
|
||||
auto result = commandList->appendWaitOnEvents(2, handles, nullptr, false, true, false, false, false);
|
||||
auto result = commandList->appendWaitOnEvents(2, handles, nullptr, false, true, false, false, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
@@ -520,7 +520,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenTwoEventsWhenWaitOnEventsAppendedThe
|
||||
|
||||
HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventsThenEventGraphicsAllocationIsAddedToResidencyContainer) {
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false);
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto &residencyContainer = commandList->getCmdContainer().getResidencyContainer();
|
||||
@@ -547,7 +547,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenEventWithWaitScopeFlagDeviceWhenAppe
|
||||
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false);
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
@@ -592,7 +592,7 @@ HWTEST_F(CommandListAppendWaitOnUsedPacketSignalEvent, WhenAppendingWaitOnTimest
|
||||
|
||||
event->setPacketsInUse(3u);
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false);
|
||||
result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||
@@ -655,7 +655,7 @@ HWTEST_F(CommandListAppendWaitOnUsedPacketSignalEvent, WhenAppendingWaitOnTimest
|
||||
ASSERT_EQ(9u, event->getPacketsInUse());
|
||||
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false);
|
||||
result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||
@@ -772,7 +772,7 @@ HWTEST_F(CommandListAppendWaitOnSecondaryBatchBufferEvent, givenCommandBufferIsE
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
|
||||
auto oldCommandBuffer = commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation();
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false);
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, nullptr, false, true, false, false, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
@@ -834,7 +834,7 @@ HWTEST2_F(MultTileCommandListAppendWaitOnEvent,
|
||||
ze_event_handle_t eventHandle = event->toHandle();
|
||||
|
||||
auto usedSpaceBefore = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
auto result = commandList->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false);
|
||||
auto result = commandList->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
@@ -876,7 +876,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCommandListWhenAppendWaitO
|
||||
ze_event_handle_t eventHandle = event->toHandle();
|
||||
|
||||
EXPECT_FALSE(cmdList.dependenciesPresent);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false, false));
|
||||
EXPECT_TRUE(cmdList.dependenciesPresent);
|
||||
}
|
||||
|
||||
@@ -893,7 +893,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCommandListWhenAppendWaitO
|
||||
ze_event_handle_t eventHandle = event->toHandle();
|
||||
|
||||
EXPECT_FALSE(cmdList.dependenciesPresent);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false, false));
|
||||
EXPECT_FALSE(cmdList.dependenciesPresent);
|
||||
}
|
||||
|
||||
@@ -976,7 +976,7 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate
|
||||
auto &ultCsr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
auto eventHandle = event->toHandle();
|
||||
commandListImmediate->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false);
|
||||
commandListImmediate->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false, false);
|
||||
|
||||
EXPECT_EQ(0u, ultCsr.downloadAllocationsCalledCount);
|
||||
}
|
||||
@@ -1135,7 +1135,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, GivenOutCmdListProvidedAndSkipResidencyFl
|
||||
auto usedSpaceBefore = cmdListStream->getUsed();
|
||||
|
||||
bool skipResidency = true;
|
||||
result = commandList->appendWaitOnEvents(eventCount, eventHandles.data(), &outSemaphoreWaitCmds, false, false, false, skipResidency, false);
|
||||
result = commandList->appendWaitOnEvents(eventCount, eventHandles.data(), &outSemaphoreWaitCmds, false, false, false, skipResidency, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = cmdListStream->getUsed();
|
||||
@@ -1168,7 +1168,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, GivenOutCmdListProvidedAndSkipResidencyFl
|
||||
EXPECT_EQ(cmdListResidency.end(), eventResidencyIt);
|
||||
|
||||
skipResidency = false;
|
||||
result = commandList->appendWaitOnEvents(eventCount, eventHandles.data(), &outSemaphoreWaitCmds, false, false, false, skipResidency, false);
|
||||
result = commandList->appendWaitOnEvents(eventCount, eventHandles.data(), &outSemaphoreWaitCmds, false, false, false, skipResidency, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
eventResidencyIt = std::find(cmdListResidency.begin(), cmdListResidency.end(), eventPoolAllocation);
|
||||
|
||||
@@ -1121,7 +1121,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
|
||||
size_t sizeBefore = cmdStream->getUsed();
|
||||
auto eventHandle = event->toHandle();
|
||||
result = commandList->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false);
|
||||
result = commandList->appendWaitOnEvents(1, &eventHandle, nullptr, false, true, false, false, false, false);
|
||||
size_t sizeAfter = cmdStream->getUsed();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
|
||||
@@ -2050,7 +2050,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAddingRelaxedOrderingEventsTh
|
||||
|
||||
auto offset = cmdStream->getUsed();
|
||||
|
||||
immCmdList->addEventsToCmdList(0, nullptr, nullptr, true, true, true, false);
|
||||
immCmdList->addEventsToCmdList(0, nullptr, nullptr, true, true, true, false, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
@@ -4989,7 +4989,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenAtomicSignallingEnabledWhenSignalli
|
||||
|
||||
size_t offset = cmdStream->getUsed();
|
||||
|
||||
immCmdList->appendWaitOnEvents(1, &handle, nullptr, false, false, true, false, false);
|
||||
immCmdList->appendWaitOnEvents(1, &handle, nullptr, false, false, true, false, false, false);
|
||||
|
||||
EXPECT_EQ(partitionCount * 2, immCmdList->inOrderExecInfo->getCounterValue());
|
||||
|
||||
@@ -5036,7 +5036,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenDuplicatedCounterStorageAndAtomicSi
|
||||
|
||||
size_t offset = cmdStream->getUsed();
|
||||
|
||||
immCmdList->appendWaitOnEvents(1, &handle, nullptr, false, false, true, false, false);
|
||||
immCmdList->appendWaitOnEvents(1, &handle, nullptr, false, false, true, false, false, false);
|
||||
|
||||
EXPECT_EQ(partitionCount * 2, immCmdList->inOrderExecInfo->getCounterValue());
|
||||
|
||||
@@ -5091,7 +5091,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenDuplicatedCounterStorageAndWithoutA
|
||||
|
||||
size_t offset = cmdStream->getUsed();
|
||||
|
||||
immCmdList->appendWaitOnEvents(1, &handle, nullptr, false, false, true, false, false);
|
||||
immCmdList->appendWaitOnEvents(1, &handle, nullptr, false, false, true, false, false, false);
|
||||
|
||||
expectedCounter += counterIncrement;
|
||||
EXPECT_EQ(expectedCounter, immCmdList->inOrderExecInfo->getCounterValue());
|
||||
@@ -7624,7 +7624,8 @@ HWTEST2_F(CopyOffloadInOrderTests, givenCopyOffloadEnabledWhenProgrammingHwCmdsT
|
||||
using XY_COPY_BLT = typename std::remove_const<decltype(FamilyType::cmdInitXyCopyBlt)>::type;
|
||||
|
||||
auto immCmdList = createImmCmdListWithOffload<gfxCoreFamily>();
|
||||
EXPECT_FALSE(immCmdList->isCopyOnly());
|
||||
EXPECT_FALSE(immCmdList->isCopyOnly(false));
|
||||
EXPECT_TRUE(immCmdList->isCopyOnly(true));
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
|
||||
@@ -7659,9 +7660,13 @@ HWTEST2_F(CopyOffloadInOrderTests, givenCopyOffloadEnabledWhenProgrammingHwCmdsT
|
||||
}
|
||||
|
||||
HWTEST2_F(CopyOffloadInOrderTests, givenProfilingEventWhenAppendingThenUseBcsCommands, IsAtLeastXeHpCore) {
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
|
||||
auto immCmdList = createImmCmdListWithOffload<gfxCoreFamily>();
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, false);
|
||||
auto eventPool = createEvents<FamilyType>(1, true);
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
auto offset = cmdStream->getUsed();
|
||||
@@ -7681,6 +7686,86 @@ HWTEST2_F(CopyOffloadInOrderTests, givenProfilingEventWhenAppendingThenUseBcsCom
|
||||
|
||||
auto miFlushCmds = findAll<typename FamilyType::MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(0u, miFlushCmds.size());
|
||||
|
||||
auto lrrCmds = findAll<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
|
||||
auto lriCmds = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
|
||||
auto lrmCmds = findAll<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
for (auto &lrr : lrrCmds) {
|
||||
auto lrrCmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*lrr);
|
||||
EXPECT_TRUE(lrrCmd->getSourceRegisterAddress() > RegisterOffsets::bcs0Base);
|
||||
EXPECT_TRUE(lrrCmd->getDestinationRegisterAddress() > RegisterOffsets::bcs0Base);
|
||||
}
|
||||
|
||||
for (auto &lri : lriCmds) {
|
||||
auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*lri);
|
||||
EXPECT_TRUE(lriCmd->getRegisterOffset() > RegisterOffsets::bcs0Base);
|
||||
}
|
||||
|
||||
for (auto &lrm : lrmCmds) {
|
||||
auto lrmCmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*lrm);
|
||||
EXPECT_TRUE(lrmCmd->getRegisterAddress() > RegisterOffsets::bcs0Base);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CopyOffloadInOrderTests, givenProfilingEventWithRelaxedOrderingWhenAppendingThenUseBcsCommands, IsAtLeastXeHpCore) {
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
debugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
|
||||
|
||||
auto immCmdList = createImmCmdListWithOffload<gfxCoreFamily>();
|
||||
|
||||
auto mainQueueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(immCmdList->getCsr(false));
|
||||
auto copyQueueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(immCmdList->getCsr(true));
|
||||
|
||||
auto mainQueueDirectSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(*mainQueueCsr);
|
||||
auto offloadDirectSubmission = new MockDirectSubmissionHw<FamilyType, BlitterDispatcher<FamilyType>>(*copyQueueCsr);
|
||||
|
||||
mainQueueCsr->directSubmission.reset(mainQueueDirectSubmission);
|
||||
copyQueueCsr->blitterDirectSubmission.reset(offloadDirectSubmission);
|
||||
|
||||
int client1, client2;
|
||||
|
||||
mainQueueCsr->registerClient(&client1);
|
||||
mainQueueCsr->registerClient(&client2);
|
||||
copyQueueCsr->registerClient(&client1);
|
||||
copyQueueCsr->registerClient(&client2);
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, true);
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
auto offset = cmdStream->getUsed();
|
||||
|
||||
auto eventHandle = events[0]->toHandle();
|
||||
|
||||
immCmdList->appendMemoryCopy(©Data1, ©Data2, 1, eventHandle, 0, nullptr, false, false);
|
||||
|
||||
ze_copy_region_t region = {0, 0, 0, 1, 1, 1};
|
||||
immCmdList->appendMemoryCopyRegion(©Data1, ®ion, 1, 1, ©Data2, ®ion, 1, 1, eventHandle, 0, nullptr, false, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset)));
|
||||
|
||||
auto lrrCmds = findAll<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
|
||||
auto lriCmds = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
|
||||
auto lrmCmds = findAll<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
for (auto &lrr : lrrCmds) {
|
||||
auto lrrCmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*lrr);
|
||||
EXPECT_TRUE(lrrCmd->getSourceRegisterAddress() > RegisterOffsets::bcs0Base);
|
||||
EXPECT_TRUE(lrrCmd->getDestinationRegisterAddress() > RegisterOffsets::bcs0Base);
|
||||
}
|
||||
|
||||
for (auto &lri : lriCmds) {
|
||||
auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*lri);
|
||||
EXPECT_TRUE(lriCmd->getRegisterOffset() > RegisterOffsets::bcs0Base);
|
||||
}
|
||||
|
||||
for (auto &lrm : lrmCmds) {
|
||||
auto lrmCmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*lrm);
|
||||
EXPECT_TRUE(lrmCmd->getRegisterAddress() > RegisterOffsets::bcs0Base);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CopyOffloadInOrderTests, givenAtomicSignalingModeWhenUpdatingCounterThenUseCorrectHwCommands, IsAtLeastXeHpCore) {
|
||||
|
||||
@@ -377,7 +377,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledWithImmediat
|
||||
ASSERT_NE(nullptr, eventObject->csrs[0]);
|
||||
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]);
|
||||
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false);
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false);
|
||||
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
|
||||
|
||||
returnValue = commandList->appendBarrier(nullptr, 1, &event, false);
|
||||
@@ -441,7 +441,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledWithImmedia
|
||||
ASSERT_NE(nullptr, eventObject->csrs[0]);
|
||||
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]);
|
||||
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false);
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false);
|
||||
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
|
||||
|
||||
returnValue = commandList->appendBarrier(nullptr, 1, &event, false);
|
||||
|
||||
@@ -569,7 +569,7 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigr
|
||||
|
||||
auto commandList = CommandList::create(productFamily, device, NEO::EngineGroupType::copy, cmdListFlags, returnValue, false);
|
||||
auto commandListHandle = commandList->toHandle();
|
||||
auto commandQueue = CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, commandList->isCopyOnly(), false, true, returnValue);
|
||||
auto commandQueue = CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, commandList->isCopyOnly(false), false, true, returnValue);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC};
|
||||
eventPoolDesc.count = 1;
|
||||
|
||||
@@ -160,7 +160,7 @@ HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest,
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
|
||||
CommandListImp *computeCommandList = static_cast<CommandListImp *>(CommandList::fromHandle(hComputeCommandList));
|
||||
EXPECT_FALSE(computeCommandList->isCopyOnly());
|
||||
EXPECT_FALSE(computeCommandList->isCopyOnly(false));
|
||||
|
||||
ze_command_queue_handle_t hCommandQueue{};
|
||||
ze_command_queue_desc_t computeCommandQueueDesc{};
|
||||
@@ -178,7 +178,7 @@ HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest,
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
|
||||
CommandListImp *copyCommandList = static_cast<CommandListImp *>(CommandList::fromHandle(hCopyCommandList));
|
||||
EXPECT_TRUE(copyCommandList->isCopyOnly());
|
||||
EXPECT_TRUE(copyCommandList->isCopyOnly(false));
|
||||
|
||||
computeCommandQueue->destroy();
|
||||
computeCommandList->destroy();
|
||||
@@ -342,7 +342,7 @@ HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest,
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
|
||||
CommandListImp *computeCommandList = static_cast<CommandListImp *>(CommandList::fromHandle(hComputeCommandList));
|
||||
EXPECT_FALSE(computeCommandList->isCopyOnly());
|
||||
EXPECT_FALSE(computeCommandList->isCopyOnly(false));
|
||||
|
||||
ze_command_list_handle_t hCopyCommandList{};
|
||||
ze_command_queue_desc_t copyDesc{};
|
||||
@@ -351,7 +351,7 @@ HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest,
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
|
||||
CommandListImp *copyCommandList = static_cast<CommandListImp *>(CommandList::fromHandle(hCopyCommandList));
|
||||
EXPECT_TRUE(copyCommandList->isCopyOnly());
|
||||
EXPECT_TRUE(copyCommandList->isCopyOnly(false));
|
||||
|
||||
computeCommandList->destroy();
|
||||
copyCommandList->destroy();
|
||||
|
||||
@@ -819,7 +819,7 @@ ze_result_t OaMetricQueryImp::writeMetricQuery(CommandList &commandList, ze_even
|
||||
commandList.getCmdContainer().addToResidencyContainer(pool.pAllocation);
|
||||
|
||||
// Wait for events before executing query.
|
||||
commandList.appendWaitOnEvents(numWaitEvents, phWaitEvents, nullptr, false, true, false, false, false);
|
||||
commandList.appendWaitOnEvents(numWaitEvents, phWaitEvents, nullptr, false, true, false, false, false, false);
|
||||
|
||||
if (metricQueriesSize) {
|
||||
|
||||
|
||||
Reference in New Issue
Block a user