feature: control post sync completion check

Related-To: NEO-14844

Signed-off-by: Tomasz Biernacik <tomasz.biernacik@intel.com>
This commit is contained in:
Tomasz Biernacik
2025-07-10 10:59:42 +00:00
committed by Compute-Runtime-Automation
parent 087d1ecea4
commit 2c5cbec033
24 changed files with 447 additions and 32 deletions

View File

@@ -456,6 +456,16 @@ struct CommandList : _ze_command_list_handle_t {
return this->captureTarget->capture<api>(apiArgs...);
}
inline bool getIsWalkerWithProfilingEnqueued() {
return this->isWalkerWithProfilingEnqueued;
}
inline bool getAndClearIsWalkerWithProfilingEnqueued() {
bool retVal = this->isWalkerWithProfilingEnqueued;
this->isWalkerWithProfilingEnqueued = false;
return retVal;
}
protected:
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize, bool copyOffload);
NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, bool hostCopyAllowed, bool copyOffload);
@@ -553,6 +563,8 @@ struct CommandList : _ze_command_list_handle_t {
bool l3FlushAfterPostSyncRequired = false;
bool textureCacheFlushPending = false;
bool closedCmdList = false;
bool isWalkerWithProfilingEnqueued = false;
bool shouldRegisterEnqueuedWalkerWithProfiling = false;
Graph *captureTarget = nullptr;
};

View File

@@ -157,6 +157,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
taskCountUpdateFenceRequired = false;
textureCacheFlushPending = false;
closedCmdList = false;
isWalkerWithProfilingEnqueued = false;
this->inOrderPatchCmds.clear();
@@ -273,6 +274,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
this->compactL3FlushEventPacket = L0GfxCoreHelper::useCompactL3FlushEventPacket(hwInfo, this->l3FlushAfterPostSyncRequired);
this->useAdditionalBlitProperties = productHelper.useAdditionalBlitProperties();
this->isPostImageWriteFlushRequired = releaseHelper ? releaseHelper->isPostImageWriteFlushRequired() : false;
this->shouldRegisterEnqueuedWalkerWithProfiling = this->device->getNEODevice()->getProductHelper().shouldRegisterEnqueuedWalkerWithProfiling();
if (NEO::debugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
this->defaultPipelinedThreadArbitrationPolicy = NEO::debugManager.flags.OverrideThreadArbitrationPolicy.get();
@@ -448,6 +450,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
if (!launchParams.isKernelSplitOperation) {
event->resetKernelCountAndPacketUsedCount();
}
if (event->isEventTimestampFlagSet() && this->shouldRegisterEnqueuedWalkerWithProfiling) {
this->isWalkerWithProfilingEnqueued = true;
}
}
if (!handleCounterBasedEventOperations(event, launchParams.omitAddingEventResidency)) {
@@ -501,6 +507,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
event->setKernelWithPrintfDeviceMutex(kernel->getDevicePrintfKernelMutex());
}
launchParams.isHostSignalScopeEvent = event->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST);
if (event->isEventTimestampFlagSet() && this->shouldRegisterEnqueuedWalkerWithProfiling) {
this->isWalkerWithProfilingEnqueued = true;
}
}
if (!handleCounterBasedEventOperations(event, false)) {
@@ -547,6 +557,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
if (hEvent) {
event = Event::fromHandle(hEvent);
launchParams.isHostSignalScopeEvent = event->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST);
if (this->shouldRegisterEnqueuedWalkerWithProfiling && event->isEventTimestampFlagSet()) {
this->isWalkerWithProfilingEnqueued = true;
}
}
if (!handleCounterBasedEventOperations(event, false)) {
@@ -1476,6 +1490,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
if (signalEvent) {
if (signalEvent->isEventTimestampFlagSet() && this->shouldRegisterEnqueuedWalkerWithProfiling) {
this->isWalkerWithProfilingEnqueued = true;
}
}
Kernel *builtinKernel = nullptr;
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(builtin);
@@ -2351,6 +2371,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
signalEvent = Event::fromHandle(hSignalEvent);
launchParams.isHostSignalScopeEvent = signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST);
dcFlush = getDcFlushRequired(signalEvent->isSignalScope());
if (signalEvent->isEventTimestampFlagSet() && this->shouldRegisterEnqueuedWalkerWithProfiling) {
this->isWalkerWithProfilingEnqueued = true;
}
}
if (isCopyOnly(memoryCopyParams.copyOffloadAllowed)) {
@@ -3267,6 +3290,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(
args.dcFlushEnable = true;
args.workloadPartitionOffset = partitionCount > 1;
args.textureCacheInvalidationEnable = textureFlushRequired;
args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
*cmdStream,
@@ -3484,6 +3508,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
} else {
NEO::PipeControlArgs args;
args.blockSettingPostSyncProperties = true;
args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
*commandContainer.getCommandStream(),
@@ -4291,6 +4316,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
NEO::PipeControlArgs args;
args.dcFlushEnable = getDcFlushRequired(!!descriptor->writeScope);
args.dcFlushEnable &= dstAllocationStruct.needsFlush;
args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
*commandContainer.getCommandStream(),
@@ -4428,6 +4454,7 @@ void CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCommands(const CmdLis
NEO::PipeControlArgs pipeControlArgs;
pipeControlArgs.dcFlushEnable = getDcFlushRequired(signalScope);
pipeControlArgs.workloadPartitionOffset = eventOperations.workPartitionOperation;
pipeControlArgs.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
const auto &productHelper = this->device->getNEODevice()->getRootDeviceEnvironment().template getHelper<NEO::ProductHelper>();
if (productHelper.isDirectSubmissionConstantCacheInvalidationNeeded(this->device->getHwInfo())) {
@@ -4853,6 +4880,7 @@ void CommandListCoreFamily<gfxCoreFamily>::programEventL3Flush(Event *event) {
NEO::PipeControlArgs args;
args.dcFlushEnable = true;
args.workloadPartitionOffset = partitionCount > 1;
args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
cmdListStream,

View File

@@ -263,15 +263,17 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmedia
NEO::LinearStream *optionalEpilogueCmdStream = getOptionalEpilogueCmdStream(&cmdStreamTask, appendOperation);
NEO::ImmediateDispatchFlags dispatchFlags{
&this->requiredStreamState, // requiredState
sshCpuPointer, // sshCpuBase
optionalEpilogueCmdStream, // optionalEpilogueCmdStream
appendOperation, // dispatchOperation
this->isSyncModeQueue, // blockingAppend
requireTaskCountUpdate, // requireTaskCountUpdate
hasRelaxedOrderingDependencies, // hasRelaxedOrderingDependencies
hasStallingCmds // hasStallingCmds
&this->requiredStreamState, // requiredState
sshCpuPointer, // sshCpuBase
optionalEpilogueCmdStream, // optionalEpilogueCmdStream
appendOperation, // dispatchOperation
this->isSyncModeQueue, // blockingAppend
requireTaskCountUpdate, // requireTaskCountUpdate
hasRelaxedOrderingDependencies, // hasRelaxedOrderingDependencies
hasStallingCmds, // hasStallingCmds
this->isWalkerWithProfilingEnqueued // isWalkerWithProfilingEnqueued
};
this->isWalkerWithProfilingEnqueued = false;
CommandListImp::storeReferenceTsToMappedEvents(true);
return getCsr(false)->flushImmediateTask(cmdStreamTask,
@@ -294,15 +296,17 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmedia
NEO::LinearStream *optionalEpilogueCmdStream = getOptionalEpilogueCmdStream(&cmdStreamTask, appendOperation);
NEO::ImmediateDispatchFlags dispatchFlags{
nullptr, // requiredState
sshCpuPointer, // sshCpuBase
optionalEpilogueCmdStream, // optionalEpilogueCmdStream
appendOperation, // dispatchOperation
this->isSyncModeQueue, // blockingAppend
requireTaskCountUpdate, // requireTaskCountUpdate
hasRelaxedOrderingDependencies, // hasRelaxedOrderingDependencies
hasStallingCmds // hasStallingCmds
nullptr, // requiredState
sshCpuPointer, // sshCpuBase
optionalEpilogueCmdStream, // optionalEpilogueCmdStream
appendOperation, // dispatchOperation
this->isSyncModeQueue, // blockingAppend
requireTaskCountUpdate, // requireTaskCountUpdate
hasRelaxedOrderingDependencies, // hasRelaxedOrderingDependencies
hasStallingCmds, // hasStallingCmds
this->isWalkerWithProfilingEnqueued // isWalkerWithProfilingEnqueued
};
this->isWalkerWithProfilingEnqueued = false;
CommandListImp::storeReferenceTsToMappedEvents(true);
return getCsr(false)->flushImmediateTaskStateless(cmdStreamTask,
@@ -348,6 +352,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
false // isDcFlushRequiredOnStallingCommandsOnNextFlush
);
dispatchFlags.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
dispatchFlags.optionalEpilogueCmdStream = getOptionalEpilogueCmdStream(&cmdStreamTask, appendOperation);
auto ioh = (this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::indirectObject));
@@ -1806,6 +1811,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendCommandLists(ui
return ret;
}
this->isWalkerWithProfilingEnqueued |= this->cmdQImmediate->getAndClearIsWalkerWithProfilingEnqueued();
CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(signalEvent,
nullptr,
nullptr,

View File

@@ -329,6 +329,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
template <GFXCORE_FAMILY gfxCoreFamily>
inline NEO::PipeControlArgs CommandListCoreFamily<gfxCoreFamily>::createBarrierFlags() {
NEO::PipeControlArgs args;
args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
return args;
}

View File

@@ -632,6 +632,7 @@ NEO::PipeControlArgs CommandListCoreFamily<gfxCoreFamily>::createBarrierFlags()
args.hdcPipelineFlush = true;
args.unTypedDataPortCacheFlush = true;
args.textureCacheInvalidationEnable = this->consumeTextureCacheFlushPending();
args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
return args;
}

View File

@@ -84,6 +84,12 @@ struct CommandQueue : _ze_command_queue_handle_t {
TaskCountType getTaskCount() const { return taskCount; }
void setTaskCount(TaskCountType newTaskCount) { taskCount = newTaskCount; }
inline bool getAndClearIsWalkerWithProfilingEnqueued() {
bool retVal = this->isWalkerWithProfilingEnqueued;
this->isWalkerWithProfilingEnqueued = false;
return retVal;
}
protected:
bool frontEndTrackingEnabled() const;
@@ -104,6 +110,7 @@ struct CommandQueue : _ze_command_queue_handle_t {
bool dispatchCmdListBatchBufferAsPrimary = false;
bool heaplessModeEnabled = false;
bool heaplessStateInitEnabled = false;
bool isWalkerWithProfilingEnqueued = false;
};
using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr,

View File

@@ -827,6 +827,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
ctx.spaceForResidency += estimateCommandListResidencySize(commandList);
}
this->isWalkerWithProfilingEnqueued = commandList->getIsWalkerWithProfilingEnqueued();
}
this->getCsr()->getResidencyAllocations().reserve(ctx.spaceForResidency);
@@ -1387,6 +1389,7 @@ void CommandQueueHw<gfxCoreFamily>::dispatchTaskCountPostSyncRegular(
args.dcFlushEnable = this->csr->getDcFlushSupport();
args.workloadPartitionOffset = this->partitionCount > 1;
args.notifyEnable = this->csr->isUsedNotifyEnableForPostSync();
args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
cmdStream,
NEO::PostSyncMode::immediateData,

View File

@@ -2164,6 +2164,225 @@ HWTEST2_F(CommandListCreateTests, givenInOrderExecutionWhenDispatchingRelaxedOrd
NEO::CompareOperation::less, true, FamilyType::isQwordInOrderCounter, false));
}
HWTEST2_F(CommandListCreateTests, givenDirectSubmissionAndImmCmdListWhenDispatchingWalkerWithProfilingThenSetCsrFlagIsWalkerWithProfilingEnqueued, IsAtLeastXeCore) {
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::renderCompute, returnValue));
ASSERT_NE(nullptr, commandList);
auto whiteBoxCmdList = static_cast<CommandList *>(commandList.get());
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
ze_event_handle_t event = nullptr;
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event));
std::unique_ptr<L0::Event> eventObject(L0::Event::fromHandle(event));
std::unique_ptr<L0::ult::Module> mockModule = std::make_unique<L0::ult::Module>(device, nullptr, ModuleType::builtin);
Mock<::L0::KernelImp> kernel;
kernel.module = mockModule.get();
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
uint8_t srcPtr[64] = {};
uint8_t dstPtr[64] = {};
const ze_copy_region_t region = {0U, 0U, 0U, 1, 1, 0U};
driverHandle->importExternalPointer(dstPtr, MemoryConstants::pageSize);
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(whiteBoxCmdList->getCsr(false));
auto verifyFlag = [&ultCsr](ze_result_t result, bool dispatchFlag) {
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(ultCsr->isWalkerWithProfilingEnqueued, dispatchFlag);
ultCsr->isWalkerWithProfilingEnqueued = false;
};
auto expectFlagEnabled = true && this->device->getNEODevice()->getProductHelper().shouldRegisterEnqueuedWalkerWithProfiling();
// non-pipelined state
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, launchParams), expectFlagEnabled);
// non-pipelined state already programmed
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, launchParams), expectFlagEnabled);
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams), false);
verifyFlag(commandList->appendLaunchKernelIndirect(kernel.toHandle(), groupCount, event, 0, nullptr, false), expectFlagEnabled);
verifyFlag(commandList->appendBarrier(event, 0, nullptr, false), false);
CmdListMemoryCopyParams copyParams = {};
verifyFlag(commandList->appendMemoryCopy(dstPtr, srcPtr, 8, event, 0, nullptr, copyParams), expectFlagEnabled);
verifyFlag(commandList->appendMemoryCopyRegion(dstPtr, &region, 0, 0, srcPtr, &region, 0, 0, event, 0, nullptr, copyParams), expectFlagEnabled);
verifyFlag(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, event, 0, nullptr, copyParams), expectFlagEnabled);
verifyFlag(commandList->appendEventReset(event), false);
verifyFlag(commandList->appendSignalEvent(event, false), false);
verifyFlag(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false), false);
verifyFlag(commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false), false);
verifyFlag(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), event, 0, nullptr), false);
if constexpr (FamilyType::supportsSampler) {
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::copyImageRegion);
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
auto image = std::make_unique<WhiteBox<::L0::ImageCoreFamily<FamilyType::gfxCoreFamily>>>();
ze_image_region_t imgRegion = {1, 1, 1, 1, 1, 1};
ze_image_desc_t zeDesc = {};
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
image->initialize(device, &zeDesc);
auto bytesPerPixel = static_cast<uint32_t>(image->getImageInfo().surfaceFormat->imageElementSizeInBytes);
CmdListMemoryCopyParams copyParams = {};
verifyFlag(commandList->appendImageCopyRegion(image->toHandle(), image->toHandle(), &imgRegion, &imgRegion, event, 0, nullptr, copyParams), expectFlagEnabled);
verifyFlag(commandList->appendImageCopyFromMemory(image->toHandle(), dstPtr, &imgRegion, event, 0, nullptr, copyParams), expectFlagEnabled);
verifyFlag(commandList->appendImageCopyToMemory(dstPtr, image->toHandle(), &imgRegion, event, 0, nullptr, copyParams), expectFlagEnabled);
verifyFlag(commandList->appendImageCopyFromMemoryExt(image->toHandle(), dstPtr, &imgRegion, bytesPerPixel, bytesPerPixel, event, 0, nullptr, copyParams), expectFlagEnabled);
verifyFlag(commandList->appendImageCopyToMemoryExt(dstPtr, image->toHandle(), &imgRegion, bytesPerPixel, bytesPerPixel, event, 0, nullptr, copyParams), expectFlagEnabled);
}
size_t rangeSizes = 1;
const void **ranges = reinterpret_cast<const void **>(&dstPtr[0]);
verifyFlag(commandList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, event, 0, nullptr), false);
CmdListKernelLaunchParams cooperativeParams = {};
cooperativeParams.isCooperative = true;
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, cooperativeParams), expectFlagEnabled);
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, cooperativeParams), expectFlagEnabled);
driverHandle->releaseImportedPointer(dstPtr);
}
HWTEST2_F(CommandListCreateTests, givenCmdListWhenDispatchingWalkerWithProfilingThenSetCmdListFlagIsWalkerWithProfilingEnqueued, IsAtLeastXeCore) {
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false));
ASSERT_NE(nullptr, commandList);
auto whiteBoxCmdList = static_cast<CommandList *>(commandList.get());
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
ze_event_handle_t event = nullptr;
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event));
std::unique_ptr<L0::Event> eventObject(L0::Event::fromHandle(event));
std::unique_ptr<L0::ult::Module> mockModule = std::make_unique<L0::ult::Module>(device, nullptr, ModuleType::builtin);
Mock<::L0::KernelImp> kernel;
kernel.module = mockModule.get();
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
uint8_t srcPtr[64] = {};
uint8_t dstPtr[64] = {};
const ze_copy_region_t region = {0U, 0U, 0U, 1, 1, 0U};
driverHandle->importExternalPointer(dstPtr, MemoryConstants::pageSize);
auto verifyFlag = [&whiteBoxCmdList](ze_result_t result, bool dispatchFlag) {
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(whiteBoxCmdList->getAndClearIsWalkerWithProfilingEnqueued(), dispatchFlag);
};
auto expectFlagEnabled = true && device->getNEODevice()->getProductHelper().shouldRegisterEnqueuedWalkerWithProfiling();
// non-pipelined state
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, launchParams), expectFlagEnabled);
// non-pipelined state already programmed
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, launchParams), expectFlagEnabled);
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams), false);
verifyFlag(commandList->appendLaunchKernelIndirect(kernel.toHandle(), groupCount, event, 0, nullptr, false), expectFlagEnabled);
verifyFlag(commandList->appendBarrier(event, 0, nullptr, false), false);
CmdListMemoryCopyParams copyParams = {};
verifyFlag(commandList->appendMemoryCopy(dstPtr, srcPtr, 8, event, 0, nullptr, copyParams), expectFlagEnabled);
verifyFlag(commandList->appendMemoryCopyRegion(dstPtr, &region, 0, 0, srcPtr, &region, 0, 0, event, 0, nullptr, copyParams), expectFlagEnabled);
verifyFlag(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, event, 0, nullptr, copyParams), expectFlagEnabled);
verifyFlag(commandList->appendEventReset(event), false);
verifyFlag(commandList->appendSignalEvent(event, false), false);
verifyFlag(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false), false);
verifyFlag(commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false), false);
verifyFlag(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), event, 0, nullptr), false);
if constexpr (FamilyType::supportsSampler) {
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::copyImageRegion);
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
auto image = std::make_unique<WhiteBox<::L0::ImageCoreFamily<FamilyType::gfxCoreFamily>>>();
ze_image_region_t imgRegion = {1, 1, 1, 1, 1, 1};
ze_image_desc_t zeDesc = {};
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
image->initialize(device, &zeDesc);
auto bytesPerPixel = static_cast<uint32_t>(image->getImageInfo().surfaceFormat->imageElementSizeInBytes);
CmdListMemoryCopyParams copyParams = {};
verifyFlag(commandList->appendImageCopyRegion(image->toHandle(), image->toHandle(), &imgRegion, &imgRegion, event, 0, nullptr, copyParams), expectFlagEnabled);
verifyFlag(commandList->appendImageCopyFromMemory(image->toHandle(), dstPtr, &imgRegion, event, 0, nullptr, copyParams), expectFlagEnabled);
verifyFlag(commandList->appendImageCopyToMemory(dstPtr, image->toHandle(), &imgRegion, event, 0, nullptr, copyParams), expectFlagEnabled);
verifyFlag(commandList->appendImageCopyFromMemoryExt(image->toHandle(), dstPtr, &imgRegion, bytesPerPixel, bytesPerPixel, event, 0, nullptr, copyParams), expectFlagEnabled);
verifyFlag(commandList->appendImageCopyToMemoryExt(dstPtr, image->toHandle(), &imgRegion, bytesPerPixel, bytesPerPixel, event, 0, nullptr, copyParams), expectFlagEnabled);
}
size_t rangeSizes = 1;
const void **ranges = reinterpret_cast<const void **>(&dstPtr[0]);
verifyFlag(commandList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, event, 0, nullptr), false);
CmdListKernelLaunchParams cooperativeParams = {};
cooperativeParams.isCooperative = true;
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, cooperativeParams), expectFlagEnabled);
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, cooperativeParams), expectFlagEnabled);
driverHandle->releaseImportedPointer(dstPtr);
}
TEST_F(CommandListCreateTests, GivenGpuHangWhenCreatingImmCmdListWithSyncModeAndAppendBarrierThenAppendBarrierReturnsDeviceLost) {
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;