mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-23 03:33:13 +08:00
feature: control post sync completion check
Related-To: NEO-14844 Signed-off-by: Tomasz Biernacik <tomasz.biernacik@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
087d1ecea4
commit
2c5cbec033
@@ -456,6 +456,16 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
return this->captureTarget->capture<api>(apiArgs...);
|
||||
}
|
||||
|
||||
inline bool getIsWalkerWithProfilingEnqueued() {
|
||||
return this->isWalkerWithProfilingEnqueued;
|
||||
}
|
||||
|
||||
inline bool getAndClearIsWalkerWithProfilingEnqueued() {
|
||||
bool retVal = this->isWalkerWithProfilingEnqueued;
|
||||
this->isWalkerWithProfilingEnqueued = false;
|
||||
return retVal;
|
||||
}
|
||||
|
||||
protected:
|
||||
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize, bool copyOffload);
|
||||
NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, bool hostCopyAllowed, bool copyOffload);
|
||||
@@ -553,6 +563,8 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
bool l3FlushAfterPostSyncRequired = false;
|
||||
bool textureCacheFlushPending = false;
|
||||
bool closedCmdList = false;
|
||||
bool isWalkerWithProfilingEnqueued = false;
|
||||
bool shouldRegisterEnqueuedWalkerWithProfiling = false;
|
||||
|
||||
Graph *captureTarget = nullptr;
|
||||
};
|
||||
|
||||
@@ -157,6 +157,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
|
||||
taskCountUpdateFenceRequired = false;
|
||||
textureCacheFlushPending = false;
|
||||
closedCmdList = false;
|
||||
isWalkerWithProfilingEnqueued = false;
|
||||
|
||||
this->inOrderPatchCmds.clear();
|
||||
|
||||
@@ -273,6 +274,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
||||
this->compactL3FlushEventPacket = L0GfxCoreHelper::useCompactL3FlushEventPacket(hwInfo, this->l3FlushAfterPostSyncRequired);
|
||||
this->useAdditionalBlitProperties = productHelper.useAdditionalBlitProperties();
|
||||
this->isPostImageWriteFlushRequired = releaseHelper ? releaseHelper->isPostImageWriteFlushRequired() : false;
|
||||
this->shouldRegisterEnqueuedWalkerWithProfiling = this->device->getNEODevice()->getProductHelper().shouldRegisterEnqueuedWalkerWithProfiling();
|
||||
|
||||
if (NEO::debugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
|
||||
this->defaultPipelinedThreadArbitrationPolicy = NEO::debugManager.flags.OverrideThreadArbitrationPolicy.get();
|
||||
@@ -448,6 +450,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
|
||||
if (!launchParams.isKernelSplitOperation) {
|
||||
event->resetKernelCountAndPacketUsedCount();
|
||||
}
|
||||
|
||||
if (event->isEventTimestampFlagSet() && this->shouldRegisterEnqueuedWalkerWithProfiling) {
|
||||
this->isWalkerWithProfilingEnqueued = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!handleCounterBasedEventOperations(event, launchParams.omitAddingEventResidency)) {
|
||||
@@ -501,6 +507,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
|
||||
event->setKernelWithPrintfDeviceMutex(kernel->getDevicePrintfKernelMutex());
|
||||
}
|
||||
launchParams.isHostSignalScopeEvent = event->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST);
|
||||
|
||||
if (event->isEventTimestampFlagSet() && this->shouldRegisterEnqueuedWalkerWithProfiling) {
|
||||
this->isWalkerWithProfilingEnqueued = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!handleCounterBasedEventOperations(event, false)) {
|
||||
@@ -547,6 +557,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
||||
if (hEvent) {
|
||||
event = Event::fromHandle(hEvent);
|
||||
launchParams.isHostSignalScopeEvent = event->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST);
|
||||
|
||||
if (this->shouldRegisterEnqueuedWalkerWithProfiling && event->isEventTimestampFlagSet()) {
|
||||
this->isWalkerWithProfilingEnqueued = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!handleCounterBasedEventOperations(event, false)) {
|
||||
@@ -1476,6 +1490,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
|
||||
|
||||
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
||||
|
||||
if (signalEvent) {
|
||||
if (signalEvent->isEventTimestampFlagSet() && this->shouldRegisterEnqueuedWalkerWithProfiling) {
|
||||
this->isWalkerWithProfilingEnqueued = true;
|
||||
}
|
||||
}
|
||||
|
||||
Kernel *builtinKernel = nullptr;
|
||||
|
||||
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(builtin);
|
||||
@@ -2351,6 +2371,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
signalEvent = Event::fromHandle(hSignalEvent);
|
||||
launchParams.isHostSignalScopeEvent = signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST);
|
||||
dcFlush = getDcFlushRequired(signalEvent->isSignalScope());
|
||||
if (signalEvent->isEventTimestampFlagSet() && this->shouldRegisterEnqueuedWalkerWithProfiling) {
|
||||
this->isWalkerWithProfilingEnqueued = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (isCopyOnly(memoryCopyParams.copyOffloadAllowed)) {
|
||||
@@ -3267,6 +3290,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(
|
||||
args.dcFlushEnable = true;
|
||||
args.workloadPartitionOffset = partitionCount > 1;
|
||||
args.textureCacheInvalidationEnable = textureFlushRequired;
|
||||
args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
|
||||
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
*cmdStream,
|
||||
@@ -3484,6 +3508,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||
} else {
|
||||
NEO::PipeControlArgs args;
|
||||
args.blockSettingPostSyncProperties = true;
|
||||
args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
|
||||
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
*commandContainer.getCommandStream(),
|
||||
@@ -4291,6 +4316,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = getDcFlushRequired(!!descriptor->writeScope);
|
||||
args.dcFlushEnable &= dstAllocationStruct.needsFlush;
|
||||
args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
|
||||
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
*commandContainer.getCommandStream(),
|
||||
@@ -4428,6 +4454,7 @@ void CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCommands(const CmdLis
|
||||
NEO::PipeControlArgs pipeControlArgs;
|
||||
pipeControlArgs.dcFlushEnable = getDcFlushRequired(signalScope);
|
||||
pipeControlArgs.workloadPartitionOffset = eventOperations.workPartitionOperation;
|
||||
pipeControlArgs.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
|
||||
|
||||
const auto &productHelper = this->device->getNEODevice()->getRootDeviceEnvironment().template getHelper<NEO::ProductHelper>();
|
||||
if (productHelper.isDirectSubmissionConstantCacheInvalidationNeeded(this->device->getHwInfo())) {
|
||||
@@ -4853,6 +4880,7 @@ void CommandListCoreFamily<gfxCoreFamily>::programEventL3Flush(Event *event) {
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = true;
|
||||
args.workloadPartitionOffset = partitionCount > 1;
|
||||
args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
|
||||
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
cmdListStream,
|
||||
|
||||
@@ -263,15 +263,17 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmedia
|
||||
NEO::LinearStream *optionalEpilogueCmdStream = getOptionalEpilogueCmdStream(&cmdStreamTask, appendOperation);
|
||||
|
||||
NEO::ImmediateDispatchFlags dispatchFlags{
|
||||
&this->requiredStreamState, // requiredState
|
||||
sshCpuPointer, // sshCpuBase
|
||||
optionalEpilogueCmdStream, // optionalEpilogueCmdStream
|
||||
appendOperation, // dispatchOperation
|
||||
this->isSyncModeQueue, // blockingAppend
|
||||
requireTaskCountUpdate, // requireTaskCountUpdate
|
||||
hasRelaxedOrderingDependencies, // hasRelaxedOrderingDependencies
|
||||
hasStallingCmds // hasStallingCmds
|
||||
&this->requiredStreamState, // requiredState
|
||||
sshCpuPointer, // sshCpuBase
|
||||
optionalEpilogueCmdStream, // optionalEpilogueCmdStream
|
||||
appendOperation, // dispatchOperation
|
||||
this->isSyncModeQueue, // blockingAppend
|
||||
requireTaskCountUpdate, // requireTaskCountUpdate
|
||||
hasRelaxedOrderingDependencies, // hasRelaxedOrderingDependencies
|
||||
hasStallingCmds, // hasStallingCmds
|
||||
this->isWalkerWithProfilingEnqueued // isWalkerWithProfilingEnqueued
|
||||
};
|
||||
this->isWalkerWithProfilingEnqueued = false;
|
||||
CommandListImp::storeReferenceTsToMappedEvents(true);
|
||||
|
||||
return getCsr(false)->flushImmediateTask(cmdStreamTask,
|
||||
@@ -294,15 +296,17 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmedia
|
||||
NEO::LinearStream *optionalEpilogueCmdStream = getOptionalEpilogueCmdStream(&cmdStreamTask, appendOperation);
|
||||
|
||||
NEO::ImmediateDispatchFlags dispatchFlags{
|
||||
nullptr, // requiredState
|
||||
sshCpuPointer, // sshCpuBase
|
||||
optionalEpilogueCmdStream, // optionalEpilogueCmdStream
|
||||
appendOperation, // dispatchOperation
|
||||
this->isSyncModeQueue, // blockingAppend
|
||||
requireTaskCountUpdate, // requireTaskCountUpdate
|
||||
hasRelaxedOrderingDependencies, // hasRelaxedOrderingDependencies
|
||||
hasStallingCmds // hasStallingCmds
|
||||
nullptr, // requiredState
|
||||
sshCpuPointer, // sshCpuBase
|
||||
optionalEpilogueCmdStream, // optionalEpilogueCmdStream
|
||||
appendOperation, // dispatchOperation
|
||||
this->isSyncModeQueue, // blockingAppend
|
||||
requireTaskCountUpdate, // requireTaskCountUpdate
|
||||
hasRelaxedOrderingDependencies, // hasRelaxedOrderingDependencies
|
||||
hasStallingCmds, // hasStallingCmds
|
||||
this->isWalkerWithProfilingEnqueued // isWalkerWithProfilingEnqueued
|
||||
};
|
||||
this->isWalkerWithProfilingEnqueued = false;
|
||||
CommandListImp::storeReferenceTsToMappedEvents(true);
|
||||
|
||||
return getCsr(false)->flushImmediateTaskStateless(cmdStreamTask,
|
||||
@@ -348,6 +352,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
|
||||
false // isDcFlushRequiredOnStallingCommandsOnNextFlush
|
||||
);
|
||||
|
||||
dispatchFlags.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
|
||||
dispatchFlags.optionalEpilogueCmdStream = getOptionalEpilogueCmdStream(&cmdStreamTask, appendOperation);
|
||||
|
||||
auto ioh = (this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::indirectObject));
|
||||
@@ -1806,6 +1811,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendCommandLists(ui
|
||||
return ret;
|
||||
}
|
||||
|
||||
this->isWalkerWithProfilingEnqueued |= this->cmdQImmediate->getAndClearIsWalkerWithProfilingEnqueued();
|
||||
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(signalEvent,
|
||||
nullptr,
|
||||
nullptr,
|
||||
|
||||
@@ -329,6 +329,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
inline NEO::PipeControlArgs CommandListCoreFamily<gfxCoreFamily>::createBarrierFlags() {
|
||||
NEO::PipeControlArgs args;
|
||||
args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
|
||||
return args;
|
||||
}
|
||||
|
||||
|
||||
@@ -632,6 +632,7 @@ NEO::PipeControlArgs CommandListCoreFamily<gfxCoreFamily>::createBarrierFlags()
|
||||
args.hdcPipelineFlush = true;
|
||||
args.unTypedDataPortCacheFlush = true;
|
||||
args.textureCacheInvalidationEnable = this->consumeTextureCacheFlushPending();
|
||||
args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
|
||||
return args;
|
||||
}
|
||||
|
||||
|
||||
@@ -84,6 +84,12 @@ struct CommandQueue : _ze_command_queue_handle_t {
|
||||
TaskCountType getTaskCount() const { return taskCount; }
|
||||
void setTaskCount(TaskCountType newTaskCount) { taskCount = newTaskCount; }
|
||||
|
||||
inline bool getAndClearIsWalkerWithProfilingEnqueued() {
|
||||
bool retVal = this->isWalkerWithProfilingEnqueued;
|
||||
this->isWalkerWithProfilingEnqueued = false;
|
||||
return retVal;
|
||||
}
|
||||
|
||||
protected:
|
||||
bool frontEndTrackingEnabled() const;
|
||||
|
||||
@@ -104,6 +110,7 @@ struct CommandQueue : _ze_command_queue_handle_t {
|
||||
bool dispatchCmdListBatchBufferAsPrimary = false;
|
||||
bool heaplessModeEnabled = false;
|
||||
bool heaplessStateInitEnabled = false;
|
||||
bool isWalkerWithProfilingEnqueued = false;
|
||||
};
|
||||
|
||||
using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr,
|
||||
|
||||
@@ -827,6 +827,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
|
||||
|
||||
ctx.spaceForResidency += estimateCommandListResidencySize(commandList);
|
||||
}
|
||||
|
||||
this->isWalkerWithProfilingEnqueued = commandList->getIsWalkerWithProfilingEnqueued();
|
||||
}
|
||||
|
||||
this->getCsr()->getResidencyAllocations().reserve(ctx.spaceForResidency);
|
||||
@@ -1387,6 +1389,7 @@ void CommandQueueHw<gfxCoreFamily>::dispatchTaskCountPostSyncRegular(
|
||||
args.dcFlushEnable = this->csr->getDcFlushSupport();
|
||||
args.workloadPartitionOffset = this->partitionCount > 1;
|
||||
args.notifyEnable = this->csr->isUsedNotifyEnableForPostSync();
|
||||
args.isWalkerWithProfilingEnqueued = this->getAndClearIsWalkerWithProfilingEnqueued();
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
cmdStream,
|
||||
NEO::PostSyncMode::immediateData,
|
||||
|
||||
@@ -2164,6 +2164,225 @@ HWTEST2_F(CommandListCreateTests, givenInOrderExecutionWhenDispatchingRelaxedOrd
|
||||
NEO::CompareOperation::less, true, FamilyType::isQwordInOrderCounter, false));
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreateTests, givenDirectSubmissionAndImmCmdListWhenDispatchingWalkerWithProfilingThenSetCsrFlagIsWalkerWithProfilingEnqueued, IsAtLeastXeCore) {
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::renderCompute, returnValue));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
auto whiteBoxCmdList = static_cast<CommandList *>(commandList.get());
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
|
||||
ze_event_handle_t event = nullptr;
|
||||
|
||||
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event));
|
||||
std::unique_ptr<L0::Event> eventObject(L0::Event::fromHandle(event));
|
||||
|
||||
std::unique_ptr<L0::ult::Module> mockModule = std::make_unique<L0::ult::Module>(device, nullptr, ModuleType::builtin);
|
||||
Mock<::L0::KernelImp> kernel;
|
||||
kernel.module = mockModule.get();
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
uint8_t srcPtr[64] = {};
|
||||
uint8_t dstPtr[64] = {};
|
||||
const ze_copy_region_t region = {0U, 0U, 0U, 1, 1, 0U};
|
||||
|
||||
driverHandle->importExternalPointer(dstPtr, MemoryConstants::pageSize);
|
||||
|
||||
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(whiteBoxCmdList->getCsr(false));
|
||||
|
||||
auto verifyFlag = [&ultCsr](ze_result_t result, bool dispatchFlag) {
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(ultCsr->isWalkerWithProfilingEnqueued, dispatchFlag);
|
||||
ultCsr->isWalkerWithProfilingEnqueued = false;
|
||||
};
|
||||
|
||||
auto expectFlagEnabled = true && this->device->getNEODevice()->getProductHelper().shouldRegisterEnqueuedWalkerWithProfiling();
|
||||
// non-pipelined state
|
||||
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, launchParams), expectFlagEnabled);
|
||||
|
||||
// non-pipelined state already programmed
|
||||
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, launchParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams), false);
|
||||
|
||||
verifyFlag(commandList->appendLaunchKernelIndirect(kernel.toHandle(), groupCount, event, 0, nullptr, false), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendBarrier(event, 0, nullptr, false), false);
|
||||
|
||||
CmdListMemoryCopyParams copyParams = {};
|
||||
verifyFlag(commandList->appendMemoryCopy(dstPtr, srcPtr, 8, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendMemoryCopyRegion(dstPtr, ®ion, 0, 0, srcPtr, ®ion, 0, 0, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendEventReset(event), false);
|
||||
|
||||
verifyFlag(commandList->appendSignalEvent(event, false), false);
|
||||
|
||||
verifyFlag(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false), false);
|
||||
|
||||
verifyFlag(commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false), false);
|
||||
|
||||
verifyFlag(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), event, 0, nullptr), false);
|
||||
|
||||
if constexpr (FamilyType::supportsSampler) {
|
||||
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::copyImageRegion);
|
||||
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
|
||||
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
|
||||
|
||||
auto image = std::make_unique<WhiteBox<::L0::ImageCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
ze_image_region_t imgRegion = {1, 1, 1, 1, 1, 1};
|
||||
ze_image_desc_t zeDesc = {};
|
||||
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
|
||||
image->initialize(device, &zeDesc);
|
||||
auto bytesPerPixel = static_cast<uint32_t>(image->getImageInfo().surfaceFormat->imageElementSizeInBytes);
|
||||
|
||||
CmdListMemoryCopyParams copyParams = {};
|
||||
|
||||
verifyFlag(commandList->appendImageCopyRegion(image->toHandle(), image->toHandle(), &imgRegion, &imgRegion, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendImageCopyFromMemory(image->toHandle(), dstPtr, &imgRegion, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendImageCopyToMemory(dstPtr, image->toHandle(), &imgRegion, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendImageCopyFromMemoryExt(image->toHandle(), dstPtr, &imgRegion, bytesPerPixel, bytesPerPixel, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendImageCopyToMemoryExt(dstPtr, image->toHandle(), &imgRegion, bytesPerPixel, bytesPerPixel, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
}
|
||||
|
||||
size_t rangeSizes = 1;
|
||||
const void **ranges = reinterpret_cast<const void **>(&dstPtr[0]);
|
||||
verifyFlag(commandList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, event, 0, nullptr), false);
|
||||
|
||||
CmdListKernelLaunchParams cooperativeParams = {};
|
||||
cooperativeParams.isCooperative = true;
|
||||
|
||||
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, cooperativeParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, cooperativeParams), expectFlagEnabled);
|
||||
|
||||
driverHandle->releaseImportedPointer(dstPtr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreateTests, givenCmdListWhenDispatchingWalkerWithProfilingThenSetCmdListFlagIsWalkerWithProfilingEnqueued, IsAtLeastXeCore) {
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
auto whiteBoxCmdList = static_cast<CommandList *>(commandList.get());
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
|
||||
ze_event_handle_t event = nullptr;
|
||||
|
||||
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event));
|
||||
std::unique_ptr<L0::Event> eventObject(L0::Event::fromHandle(event));
|
||||
|
||||
std::unique_ptr<L0::ult::Module> mockModule = std::make_unique<L0::ult::Module>(device, nullptr, ModuleType::builtin);
|
||||
Mock<::L0::KernelImp> kernel;
|
||||
kernel.module = mockModule.get();
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
uint8_t srcPtr[64] = {};
|
||||
uint8_t dstPtr[64] = {};
|
||||
const ze_copy_region_t region = {0U, 0U, 0U, 1, 1, 0U};
|
||||
|
||||
driverHandle->importExternalPointer(dstPtr, MemoryConstants::pageSize);
|
||||
|
||||
auto verifyFlag = [&whiteBoxCmdList](ze_result_t result, bool dispatchFlag) {
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(whiteBoxCmdList->getAndClearIsWalkerWithProfilingEnqueued(), dispatchFlag);
|
||||
};
|
||||
|
||||
auto expectFlagEnabled = true && device->getNEODevice()->getProductHelper().shouldRegisterEnqueuedWalkerWithProfiling();
|
||||
// non-pipelined state
|
||||
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, launchParams), expectFlagEnabled);
|
||||
|
||||
// non-pipelined state already programmed
|
||||
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, launchParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams), false);
|
||||
|
||||
verifyFlag(commandList->appendLaunchKernelIndirect(kernel.toHandle(), groupCount, event, 0, nullptr, false), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendBarrier(event, 0, nullptr, false), false);
|
||||
|
||||
CmdListMemoryCopyParams copyParams = {};
|
||||
verifyFlag(commandList->appendMemoryCopy(dstPtr, srcPtr, 8, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendMemoryCopyRegion(dstPtr, ®ion, 0, 0, srcPtr, ®ion, 0, 0, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendEventReset(event), false);
|
||||
|
||||
verifyFlag(commandList->appendSignalEvent(event, false), false);
|
||||
|
||||
verifyFlag(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false), false);
|
||||
|
||||
verifyFlag(commandList->appendWaitOnEvents(1, &event, nullptr, false, true, false, false, false, false), false);
|
||||
|
||||
verifyFlag(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), event, 0, nullptr), false);
|
||||
|
||||
if constexpr (FamilyType::supportsSampler) {
|
||||
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::copyImageRegion);
|
||||
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
|
||||
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
|
||||
|
||||
auto image = std::make_unique<WhiteBox<::L0::ImageCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
ze_image_region_t imgRegion = {1, 1, 1, 1, 1, 1};
|
||||
ze_image_desc_t zeDesc = {};
|
||||
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
|
||||
image->initialize(device, &zeDesc);
|
||||
auto bytesPerPixel = static_cast<uint32_t>(image->getImageInfo().surfaceFormat->imageElementSizeInBytes);
|
||||
|
||||
CmdListMemoryCopyParams copyParams = {};
|
||||
|
||||
verifyFlag(commandList->appendImageCopyRegion(image->toHandle(), image->toHandle(), &imgRegion, &imgRegion, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendImageCopyFromMemory(image->toHandle(), dstPtr, &imgRegion, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendImageCopyToMemory(dstPtr, image->toHandle(), &imgRegion, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendImageCopyFromMemoryExt(image->toHandle(), dstPtr, &imgRegion, bytesPerPixel, bytesPerPixel, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendImageCopyToMemoryExt(dstPtr, image->toHandle(), &imgRegion, bytesPerPixel, bytesPerPixel, event, 0, nullptr, copyParams), expectFlagEnabled);
|
||||
}
|
||||
|
||||
size_t rangeSizes = 1;
|
||||
const void **ranges = reinterpret_cast<const void **>(&dstPtr[0]);
|
||||
verifyFlag(commandList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, event, 0, nullptr), false);
|
||||
|
||||
CmdListKernelLaunchParams cooperativeParams = {};
|
||||
cooperativeParams.isCooperative = true;
|
||||
|
||||
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, cooperativeParams), expectFlagEnabled);
|
||||
|
||||
verifyFlag(commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event, 0, nullptr, cooperativeParams), expectFlagEnabled);
|
||||
|
||||
driverHandle->releaseImportedPointer(dstPtr);
|
||||
}
|
||||
|
||||
TEST_F(CommandListCreateTests, GivenGpuHangWhenCreatingImmCmdListWithSyncModeAndAppendBarrierThenAppendBarrierReturnsDeviceLost) {
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
||||
|
||||
Reference in New Issue
Block a user