fix: Regular Event waits for in-order counter if HW chaining is required
Related-To: NEO-8145 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
parent
982ef34644
commit
9cc1a23ab6
|
@ -163,7 +163,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
|
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
|
||||||
virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0;
|
virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0;
|
||||||
virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0;
|
virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0;
|
||||||
virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) = 0;
|
virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest) = 0;
|
||||||
virtual ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
virtual ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
||||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
|
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
|
||||||
virtual ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc,
|
virtual ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc,
|
||||||
|
|
|
@ -170,7 +170,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||||
ze_result_t hostSynchronize(uint64_t timeout) override;
|
ze_result_t hostSynchronize(uint64_t timeout) override;
|
||||||
|
|
||||||
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
|
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
|
||||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override;
|
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest) override;
|
||||||
void appendWaitOnInOrderDependency(std::shared_ptr<InOrderExecInfo> &inOrderExecInfo, uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed, bool implicitDependency);
|
void appendWaitOnInOrderDependency(std::shared_ptr<InOrderExecInfo> &inOrderExecInfo, uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed, bool implicitDependency);
|
||||||
void appendSignalInOrderDependencyCounter(Event *signalEvent);
|
void appendSignalInOrderDependencyCounter(Event *signalEvent);
|
||||||
void handleInOrderDependencyCounter(Event *signalEvent, bool nonWalkerInOrderCmdsChaining);
|
void handleInOrderDependencyCounter(Event *signalEvent, bool nonWalkerInOrderCmdsChaining);
|
||||||
|
|
|
@ -153,6 +153,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(Event *signalEvent, bool nonWalkerInOrderCmdsChaining) {
|
void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(Event *signalEvent, bool nonWalkerInOrderCmdsChaining) {
|
||||||
|
if (!isInOrderExecutionEnabled()) {
|
||||||
|
if (signalEvent && signalEvent->getInOrderExecInfo().get()) {
|
||||||
|
UNRECOVERABLE_IF(signalEvent->isCounterBased());
|
||||||
|
signalEvent->unsetInOrderExecInfo(); // unset temporary asignment from previous append calls
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!isQwordInOrderCounter() && ((inOrderExecInfo->getCounterValue() + 1) == std::numeric_limits<uint32_t>::max())) {
|
if (!isQwordInOrderCounter() && ((inOrderExecInfo->getCounterValue() + 1) == std::numeric_limits<uint32_t>::max())) {
|
||||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderExecInfo, inOrderExecInfo->getCounterValue() + 1, inOrderAllocationOffset, false, true);
|
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderExecInfo, inOrderExecInfo->getCounterValue() + 1, inOrderAllocationOffset, false, true);
|
||||||
|
|
||||||
|
@ -172,8 +181,12 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(Event
|
||||||
|
|
||||||
this->commandContainer.addToResidencyContainer(&inOrderExecInfo->getDeviceCounterAllocation());
|
this->commandContainer.addToResidencyContainer(&inOrderExecInfo->getDeviceCounterAllocation());
|
||||||
|
|
||||||
if (signalEvent && signalEvent->isCounterBased()) {
|
if (signalEvent) {
|
||||||
|
if (signalEvent->isCounterBased() || nonWalkerInOrderCmdsChaining) {
|
||||||
signalEvent->updateInOrderExecState(inOrderExecInfo, inOrderExecInfo->getCounterValue(), this->inOrderAllocationOffset);
|
signalEvent->updateInOrderExecState(inOrderExecInfo, inOrderExecInfo->getCounterValue(), this->inOrderAllocationOffset);
|
||||||
|
} else {
|
||||||
|
signalEvent->unsetInOrderExecInfo();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this->latestOperationRequiredNonWalkerInOrderCmdsChaining = nonWalkerInOrderCmdsChaining;
|
this->latestOperationRequiredNonWalkerInOrderCmdsChaining = nonWalkerInOrderCmdsChaining;
|
||||||
|
@ -367,7 +380,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
|
||||||
auto res = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), threadGroupDimensions,
|
auto res = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), threadGroupDimensions,
|
||||||
event, launchParams);
|
event, launchParams);
|
||||||
|
|
||||||
if (isInOrderExecutionEnabled() && !launchParams.skipInOrderNonWalkerSignaling) {
|
if (!launchParams.skipInOrderNonWalkerSignaling) {
|
||||||
handleInOrderDependencyCounter(event, isInOrderNonWalkerSignalingRequired(event));
|
handleInOrderDependencyCounter(event, isInOrderNonWalkerSignalingRequired(event));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -412,9 +425,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
|
||||||
event, launchParams);
|
event, launchParams);
|
||||||
addToMappedEventList(event);
|
addToMappedEventList(event);
|
||||||
|
|
||||||
if (this->isInOrderExecutionEnabled()) {
|
|
||||||
handleInOrderDependencyCounter(event, isInOrderNonWalkerSignalingRequired(event));
|
handleInOrderDependencyCounter(event, isInOrderNonWalkerSignalingRequired(event));
|
||||||
}
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -454,9 +466,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
|
||||||
addToMappedEventList(event);
|
addToMappedEventList(event);
|
||||||
appendSignalEventPostWalker(event, false);
|
appendSignalEventPostWalker(event, false);
|
||||||
|
|
||||||
if (isInOrderExecutionEnabled()) {
|
|
||||||
handleInOrderDependencyCounter(event, isInOrderNonWalkerSignalingRequired(event));
|
handleInOrderDependencyCounter(event, isInOrderNonWalkerSignalingRequired(event));
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -553,8 +563,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
||||||
|
|
||||||
if (this->isInOrderExecutionEnabled()) {
|
if (this->isInOrderExecutionEnabled()) {
|
||||||
appendSignalInOrderDependencyCounter(event);
|
appendSignalInOrderDependencyCounter(event);
|
||||||
handleInOrderDependencyCounter(event, false);
|
|
||||||
}
|
}
|
||||||
|
handleInOrderDependencyCounter(event, false);
|
||||||
|
|
||||||
if (NEO::debugManager.flags.EnableSWTags.get()) {
|
if (NEO::debugManager.flags.EnableSWTags.get()) {
|
||||||
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
|
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
|
||||||
|
@ -596,8 +606,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
|
||||||
|
|
||||||
if (this->isInOrderExecutionEnabled()) {
|
if (this->isInOrderExecutionEnabled()) {
|
||||||
appendSignalInOrderDependencyCounter(signalEvent);
|
appendSignalInOrderDependencyCounter(signalEvent);
|
||||||
handleInOrderDependencyCounter(signalEvent, false);
|
|
||||||
}
|
}
|
||||||
|
handleInOrderDependencyCounter(signalEvent, false);
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -1528,6 +1538,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||||
bool nonWalkerInOrderCmdChaining = !isCopyOnly() && isInOrderNonWalkerSignalingRequired(signalEvent) && !emitPipeControl;
|
bool nonWalkerInOrderCmdChaining = !isCopyOnly() && isInOrderNonWalkerSignalingRequired(signalEvent) && !emitPipeControl;
|
||||||
handleInOrderDependencyCounter(signalEvent, nonWalkerInOrderCmdChaining);
|
handleInOrderDependencyCounter(signalEvent, nonWalkerInOrderCmdChaining);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
handleInOrderDependencyCounter(signalEvent, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (NEO::debugManager.flags.EnableSWTags.get()) {
|
if (NEO::debugManager.flags.EnableSWTags.get()) {
|
||||||
|
@ -1626,6 +1638,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
||||||
bool nonWalkerInOrderCmdChaining = !isCopyOnly() && isInOrderNonWalkerSignalingRequired(signalEvent);
|
bool nonWalkerInOrderCmdChaining = !isCopyOnly() && isInOrderNonWalkerSignalingRequired(signalEvent);
|
||||||
handleInOrderDependencyCounter(signalEvent, nonWalkerInOrderCmdChaining);
|
handleInOrderDependencyCounter(signalEvent, nonWalkerInOrderCmdChaining);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
handleInOrderDependencyCounter(signalEvent, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (NEO::debugManager.flags.EnableSWTags.get()) {
|
if (NEO::debugManager.flags.EnableSWTags.get()) {
|
||||||
|
@ -2068,8 +2082,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||||
appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket);
|
appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket);
|
||||||
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent);
|
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent);
|
||||||
|
|
||||||
if (this->isInOrderExecutionEnabled()) {
|
|
||||||
bool nonWalkerInOrderCmdChaining = false;
|
bool nonWalkerInOrderCmdChaining = false;
|
||||||
|
if (this->isInOrderExecutionEnabled()) {
|
||||||
if (launchParams.isKernelSplitOperation) {
|
if (launchParams.isKernelSplitOperation) {
|
||||||
if (!signalEvent) {
|
if (!signalEvent) {
|
||||||
NEO::PipeControlArgs args;
|
NEO::PipeControlArgs args;
|
||||||
|
@ -2079,9 +2093,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||||
} else {
|
} else {
|
||||||
nonWalkerInOrderCmdChaining = isInOrderNonWalkerSignalingRequired(signalEvent);
|
nonWalkerInOrderCmdChaining = isInOrderNonWalkerSignalingRequired(signalEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
handleInOrderDependencyCounter(signalEvent, nonWalkerInOrderCmdChaining);
|
|
||||||
}
|
}
|
||||||
|
handleInOrderDependencyCounter(signalEvent, nonWalkerInOrderCmdChaining);
|
||||||
|
|
||||||
if (NEO::debugManager.flags.EnableSWTags.get()) {
|
if (NEO::debugManager.flags.EnableSWTags.get()) {
|
||||||
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
|
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
|
||||||
|
@ -2147,8 +2160,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
|
||||||
|
|
||||||
if (isInOrderExecutionEnabled()) {
|
if (isInOrderExecutionEnabled()) {
|
||||||
appendSignalInOrderDependencyCounter(signalEvent);
|
appendSignalInOrderDependencyCounter(signalEvent);
|
||||||
handleInOrderDependencyCounter(signalEvent, false);
|
|
||||||
}
|
}
|
||||||
|
handleInOrderDependencyCounter(signalEvent, false);
|
||||||
}
|
}
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -2379,8 +2392,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
||||||
|
|
||||||
if (this->isInOrderExecutionEnabled()) {
|
if (this->isInOrderExecutionEnabled()) {
|
||||||
appendSignalInOrderDependencyCounter(event);
|
appendSignalInOrderDependencyCounter(event);
|
||||||
handleInOrderDependencyCounter(event, false);
|
|
||||||
}
|
}
|
||||||
|
handleInOrderDependencyCounter(event, false);
|
||||||
|
|
||||||
if (NEO::debugManager.flags.EnableSWTags.get()) {
|
if (NEO::debugManager.flags.EnableSWTags.get()) {
|
||||||
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
|
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
|
||||||
|
@ -2443,16 +2456,14 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
bool CommandListCoreFamily<gfxCoreFamily>::canSkipInOrderEventWait(const Event &event) const {
|
bool CommandListCoreFamily<gfxCoreFamily>::canSkipInOrderEventWait(const Event &event) const {
|
||||||
if (isInOrderExecutionEnabled()) {
|
if (isInOrderExecutionEnabled()) {
|
||||||
return ((this->cmdListType == TYPE_IMMEDIATE && event.getLatestUsedCmdQueue() == this->cmdQImmediate) || // 1. Immediate CmdList can skip "regular Events" from the same CmdList
|
return ((this->cmdListType == TYPE_IMMEDIATE && event.getLatestUsedCmdQueue() == this->cmdQImmediate) || // 1. Immediate CmdList can skip "regular Events" from the same CmdList
|
||||||
(event.getInOrderExecDataAllocation() == &inOrderExecInfo->getDeviceCounterAllocation())); // 2. Both Immediate and Regular CmdLists can skip "in-order Events" from the same CmdList
|
(event.isCounterBased() && event.getInOrderExecDataAllocation() == &inOrderExecInfo->getDeviceCounterAllocation())); // 2. Both Immediate and Regular CmdLists can skip "CounterBased Events" from the same CmdList
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) {
|
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest) {
|
||||||
signalInOrderCompletion &= this->isInOrderExecutionEnabled();
|
|
||||||
|
|
||||||
NEO::Device *neoDevice = device->getNEODevice();
|
NEO::Device *neoDevice = device->getNEODevice();
|
||||||
uint32_t callId = 0;
|
uint32_t callId = 0;
|
||||||
if (NEO::debugManager.flags.EnableSWTags.get()) {
|
if (NEO::debugManager.flags.EnableSWTags.get()) {
|
||||||
|
@ -2464,7 +2475,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||||
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
|
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (signalInOrderCompletion) {
|
if (this->isInOrderExecutionEnabled() && apiRequest) {
|
||||||
handleInOrderImplicitDependencies(false);
|
handleInOrderImplicitDependencies(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2521,8 +2532,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||||
commandContainer.addToResidencyContainer(this->csr->getTagAllocation());
|
commandContainer.addToResidencyContainer(this->csr->getTagAllocation());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (signalInOrderCompletion) {
|
if (apiRequest) {
|
||||||
|
if (this->isInOrderExecutionEnabled()) {
|
||||||
appendSignalInOrderDependencyCounter(nullptr);
|
appendSignalInOrderDependencyCounter(nullptr);
|
||||||
|
}
|
||||||
handleInOrderDependencyCounter(nullptr, false);
|
handleInOrderDependencyCounter(nullptr, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2697,8 +2710,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||||
|
|
||||||
if (this->isInOrderExecutionEnabled()) {
|
if (this->isInOrderExecutionEnabled()) {
|
||||||
appendSignalInOrderDependencyCounter(signalEvent);
|
appendSignalInOrderDependencyCounter(signalEvent);
|
||||||
handleInOrderDependencyCounter(signalEvent, false);
|
|
||||||
}
|
}
|
||||||
|
handleInOrderDependencyCounter(signalEvent, false);
|
||||||
|
|
||||||
addToMappedEventList(signalEvent);
|
addToMappedEventList(signalEvent);
|
||||||
|
|
||||||
|
@ -3210,8 +3223,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
||||||
|
|
||||||
if (isInOrderExecutionEnabled()) {
|
if (isInOrderExecutionEnabled()) {
|
||||||
appendSignalInOrderDependencyCounter(signalEvent);
|
appendSignalInOrderDependencyCounter(signalEvent);
|
||||||
handleInOrderDependencyCounter(signalEvent, false);
|
|
||||||
}
|
}
|
||||||
|
handleInOrderDependencyCounter(signalEvent, false);
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -3375,8 +3388,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
|
||||||
|
|
||||||
if (this->isInOrderExecutionEnabled()) {
|
if (this->isInOrderExecutionEnabled()) {
|
||||||
appendSignalInOrderDependencyCounter(signalEvent);
|
appendSignalInOrderDependencyCounter(signalEvent);
|
||||||
handleInOrderDependencyCounter(signalEvent, false);
|
|
||||||
}
|
}
|
||||||
|
handleInOrderDependencyCounter(signalEvent, false);
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -3423,8 +3436,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
|
||||||
|
|
||||||
if (this->isInOrderExecutionEnabled()) {
|
if (this->isInOrderExecutionEnabled()) {
|
||||||
appendSignalInOrderDependencyCounter(nullptr);
|
appendSignalInOrderDependencyCounter(nullptr);
|
||||||
handleInOrderDependencyCounter(nullptr, false);
|
|
||||||
}
|
}
|
||||||
|
handleInOrderDependencyCounter(nullptr, false);
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
|
@ -99,7 +99,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||||
NEO::GraphicsAllocation *srcAllocation,
|
NEO::GraphicsAllocation *srcAllocation,
|
||||||
size_t size, bool flushHost) override;
|
size_t size, bool flushHost) override;
|
||||||
|
|
||||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override;
|
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest) override;
|
||||||
|
|
||||||
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
||||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||||
|
|
|
@ -444,11 +444,13 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
|
||||||
hSignalEvent, numWaitEvents, phWaitEvents,
|
hSignalEvent, numWaitEvents, phWaitEvents,
|
||||||
launchParams, relaxedOrderingDispatch);
|
launchParams, relaxedOrderingDispatch);
|
||||||
|
|
||||||
if (isInOrderExecutionEnabled() && launchParams.skipInOrderNonWalkerSignaling) {
|
if (launchParams.skipInOrderNonWalkerSignaling) {
|
||||||
// skip only in base appendLaunchKernel()
|
|
||||||
auto event = Event::fromHandle(hSignalEvent);
|
auto event = Event::fromHandle(hSignalEvent);
|
||||||
|
|
||||||
|
if (isInOrderExecutionEnabled()) {
|
||||||
|
// Skip only in base appendLaunchKernel(). Handle remaining operations here.
|
||||||
handleInOrderNonWalkerSignaling(event, stallingCmdsForRelaxedOrdering, relaxedOrderingDispatch, ret);
|
handleInOrderNonWalkerSignaling(event, stallingCmdsForRelaxedOrdering, relaxedOrderingDispatch, ret);
|
||||||
|
}
|
||||||
CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(event, true);
|
CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(event, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -706,7 +708,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) {
|
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest) {
|
||||||
bool allSignaled = true;
|
bool allSignaled = true;
|
||||||
for (auto i = 0u; i < numEvents; i++) {
|
for (auto i = 0u; i < numEvents; i++) {
|
||||||
allSignaled &= (!this->dcFlushSupport && Event::fromHandle(phWaitEvents[i])->isAlreadyCompleted());
|
allSignaled &= (!this->dcFlushSupport && Event::fromHandle(phWaitEvents[i])->isAlreadyCompleted());
|
||||||
|
@ -716,7 +718,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(ui
|
||||||
}
|
}
|
||||||
checkAvailableSpace(numEvents, false, commonImmediateCommandSize);
|
checkAvailableSpace(numEvents, false, commonImmediateCommandSize);
|
||||||
|
|
||||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies, signalInOrderCompletion);
|
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies, apiRequest);
|
||||||
this->dependenciesPresent = true;
|
this->dependenciesPresent = true;
|
||||||
return flushImmediate(ret, true, true, false, false, nullptr);
|
return flushImmediate(ret, true, true, false, false, nullptr);
|
||||||
}
|
}
|
||||||
|
@ -1043,7 +1045,10 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(cons
|
||||||
if (hSignalEvent) {
|
if (hSignalEvent) {
|
||||||
signalEvent = Event::fromHandle(hSignalEvent);
|
signalEvent = Event::fromHandle(hSignalEvent);
|
||||||
}
|
}
|
||||||
this->handleCounterBasedEventOperations(signalEvent);
|
|
||||||
|
if (!this->handleCounterBasedEventOperations(signalEvent)) {
|
||||||
|
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||||
|
}
|
||||||
|
|
||||||
const void *cpuMemcpySrcPtr = srcLockPointer ? srcLockPointer : cpuMemCopyInfo.srcPtr;
|
const void *cpuMemcpySrcPtr = srcLockPointer ? srcLockPointer : cpuMemCopyInfo.srcPtr;
|
||||||
void *cpuMemcpyDstPtr = dstLockPointer ? dstLockPointer : cpuMemCopyInfo.dstPtr;
|
void *cpuMemcpyDstPtr = dstLockPointer ? dstLockPointer : cpuMemCopyInfo.dstPtr;
|
||||||
|
|
|
@ -137,8 +137,8 @@ struct BcsSplit {
|
||||||
|
|
||||||
if (cmdList->isInOrderExecutionEnabled()) {
|
if (cmdList->isInOrderExecutionEnabled()) {
|
||||||
cmdList->appendSignalInOrderDependencyCounter(signalEvent);
|
cmdList->appendSignalInOrderDependencyCounter(signalEvent);
|
||||||
cmdList->handleInOrderDependencyCounter(signalEvent, false);
|
|
||||||
}
|
}
|
||||||
|
cmdList->handleInOrderDependencyCounter(signalEvent, false);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
|
@ -367,8 +367,7 @@ void Event::disableImplicitCounterBasedMode() {
|
||||||
|
|
||||||
if (counterBasedMode == CounterBasedMode::ImplicitlyEnabled || counterBasedMode == CounterBasedMode::InitiallyDisabled) {
|
if (counterBasedMode == CounterBasedMode::ImplicitlyEnabled || counterBasedMode == CounterBasedMode::InitiallyDisabled) {
|
||||||
counterBasedMode = CounterBasedMode::ImplicitlyDisabled;
|
counterBasedMode = CounterBasedMode::ImplicitlyDisabled;
|
||||||
inOrderExecInfo.reset();
|
unsetInOrderExecInfo();
|
||||||
inOrderExecSignalValue = 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -461,4 +460,10 @@ void Event::setReferenceTs(uint64_t currentCpuTimeStamp) {
|
||||||
|
|
||||||
NEO::GraphicsAllocation *Event::getInOrderExecDataAllocation() const { return inOrderExecInfo.get() ? &inOrderExecInfo->getDeviceCounterAllocation() : nullptr; }
|
NEO::GraphicsAllocation *Event::getInOrderExecDataAllocation() const { return inOrderExecInfo.get() ? &inOrderExecInfo->getDeviceCounterAllocation() : nullptr; }
|
||||||
|
|
||||||
|
void Event::unsetInOrderExecInfo() {
|
||||||
|
inOrderExecInfo.reset();
|
||||||
|
inOrderAllocationOffset = 0;
|
||||||
|
inOrderExecSignalValue = 0;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|
|
@ -257,6 +257,7 @@ struct Event : _ze_event_handle_t {
|
||||||
std::shared_ptr<InOrderExecInfo> &getInOrderExecInfo() { return inOrderExecInfo; }
|
std::shared_ptr<InOrderExecInfo> &getInOrderExecInfo() { return inOrderExecInfo; }
|
||||||
void enableKmdWaitMode() { kmdWaitMode = true; }
|
void enableKmdWaitMode() { kmdWaitMode = true; }
|
||||||
bool isKmdWaitModeEnabled() const { return kmdWaitMode; }
|
bool isKmdWaitModeEnabled() const { return kmdWaitMode; }
|
||||||
|
void unsetInOrderExecInfo();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {}
|
Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {}
|
||||||
|
|
|
@ -194,6 +194,12 @@ void EventImp<TagSizeT>::handleSuccessfulHostSynchronization() {
|
||||||
}
|
}
|
||||||
this->setIsCompleted();
|
this->setIsCompleted();
|
||||||
unsetCmdQueue();
|
unsetCmdQueue();
|
||||||
|
|
||||||
|
if (!isCounterBased()) {
|
||||||
|
// Temporary assignment. If in-order CmdList required to use Event allocation for HW commands chaining, we need to wait for the counter.
|
||||||
|
// After successful host synchronization, we can unset CL counter.
|
||||||
|
unsetInOrderExecInfo();
|
||||||
|
}
|
||||||
for (auto &csr : csrs) {
|
for (auto &csr : csrs) {
|
||||||
csr->getInternalAllocationStorage()->cleanAllocationList(csr->peekTaskCount(), NEO::AllocationUsage::TEMPORARY_ALLOCATION);
|
csr->getInternalAllocationStorage()->cleanAllocationList(csr->peekTaskCount(), NEO::AllocationUsage::TEMPORARY_ALLOCATION);
|
||||||
}
|
}
|
||||||
|
@ -289,7 +295,7 @@ ze_result_t EventImp<TagSizeT>::queryStatus() {
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isCounterBased()) {
|
if (isCounterBased() || this->inOrderExecInfo.get()) {
|
||||||
return queryCounterBasedEventStatus();
|
return queryCounterBasedEventStatus();
|
||||||
} else {
|
} else {
|
||||||
return queryStatusEventPackets();
|
return queryStatusEventPackets();
|
||||||
|
@ -517,11 +523,6 @@ ze_result_t EventImp<TagSizeT>::reset() {
|
||||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->counterBasedMode == CounterBasedMode::ImplicitlyEnabled) {
|
|
||||||
inOrderExecInfo.reset();
|
|
||||||
inOrderExecSignalValue = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (NEO::debugManager.flags.SynchronizeEventBeforeReset.get() != -1) {
|
if (NEO::debugManager.flags.SynchronizeEventBeforeReset.get() != -1) {
|
||||||
if (NEO::debugManager.flags.SynchronizeEventBeforeReset.get() == 2 && queryStatus() != ZE_RESULT_SUCCESS) {
|
if (NEO::debugManager.flags.SynchronizeEventBeforeReset.get() == 2 && queryStatus() != ZE_RESULT_SUCCESS) {
|
||||||
printf("\nzeEventHostReset: Event %p not ready. Calling zeEventHostSynchronize.", this);
|
printf("\nzeEventHostReset: Event %p not ready. Calling zeEventHostSynchronize.", this);
|
||||||
|
@ -530,6 +531,7 @@ ze_result_t EventImp<TagSizeT>::reset() {
|
||||||
hostSynchronize(std::numeric_limits<uint64_t>::max());
|
hostSynchronize(std::numeric_limits<uint64_t>::max());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsetInOrderExecInfo();
|
||||||
unsetCmdQueue();
|
unsetCmdQueue();
|
||||||
this->resetCompletionStatus();
|
this->resetCompletionStatus();
|
||||||
this->resetDeviceCompletionData(false);
|
this->resetDeviceCompletionData(false);
|
||||||
|
|
|
@ -422,7 +422,7 @@ struct MockCommandList : public CommandList {
|
||||||
|
|
||||||
ADDMETHOD_NOBASE(appendWaitOnEvents, ze_result_t, ZE_RESULT_SUCCESS,
|
ADDMETHOD_NOBASE(appendWaitOnEvents, ze_result_t, ZE_RESULT_SUCCESS,
|
||||||
(uint32_t numEvents,
|
(uint32_t numEvents,
|
||||||
ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion));
|
ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest));
|
||||||
|
|
||||||
ADDMETHOD_NOBASE(appendWriteGlobalTimestamp, ze_result_t, ZE_RESULT_SUCCESS,
|
ADDMETHOD_NOBASE(appendWriteGlobalTimestamp, ze_result_t, ZE_RESULT_SUCCESS,
|
||||||
(uint64_t * dstptr,
|
(uint64_t * dstptr,
|
||||||
|
|
|
@ -1109,18 +1109,99 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderEventWhenAppendEventResetCalledThenRe
|
||||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, immCmdList->appendEventReset(events[0]->toHandle()));
|
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, immCmdList->appendEventReset(events[0]->toHandle()));
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWheUsingRegularEventThenDontSetInOrderParams, IsAtLeastSkl) {
|
HWTEST2_F(InOrderCmdListTests, givenRegularEventWithTemporaryInOrderDataAssignmentWhenCallingSynchronizeOrResetThenUnset, IsAtLeastSkl) {
|
||||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||||
|
|
||||||
|
auto hostAddress = static_cast<uint64_t *>(immCmdList->inOrderExecInfo->getDeviceCounterAllocation().getUnderlyingBuffer());
|
||||||
|
|
||||||
|
auto eventPool = createEvents<FamilyType>(1, false);
|
||||||
|
events[0]->makeCounterBasedInitiallyDisabled();
|
||||||
|
|
||||||
|
auto nonWalkerSignallingSupported = immCmdList->isInOrderNonWalkerSignalingRequired(events[0].get());
|
||||||
|
|
||||||
|
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||||
|
|
||||||
|
EXPECT_EQ(nonWalkerSignallingSupported, events[0]->inOrderExecInfo.get() != nullptr);
|
||||||
|
|
||||||
|
EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(1));
|
||||||
|
EXPECT_EQ(nonWalkerSignallingSupported, events[0]->inOrderExecInfo.get() != nullptr);
|
||||||
|
|
||||||
|
if (nonWalkerSignallingSupported) {
|
||||||
|
*hostAddress = 1;
|
||||||
|
} else {
|
||||||
|
*reinterpret_cast<uint64_t *>(events[0]->getCompletionFieldHostAddress()) = Event::STATE_SIGNALED;
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(1));
|
||||||
|
EXPECT_EQ(events[0]->inOrderExecInfo.get(), nullptr);
|
||||||
|
|
||||||
|
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||||
|
EXPECT_EQ(nonWalkerSignallingSupported, events[0]->inOrderExecInfo.get() != nullptr);
|
||||||
|
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->reset());
|
||||||
|
EXPECT_EQ(events[0]->inOrderExecInfo.get(), nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWheUsingRegularEventThenSetInOrderParamsOnlyWhenChainingIsRequired, IsAtLeastSkl) {
|
||||||
|
uint32_t counterOffset = 64;
|
||||||
|
|
||||||
|
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||||
|
immCmdList->inOrderAllocationOffset = counterOffset;
|
||||||
|
|
||||||
auto eventPool = createEvents<FamilyType>(1, false);
|
auto eventPool = createEvents<FamilyType>(1, false);
|
||||||
events[0]->makeCounterBasedInitiallyDisabled();
|
events[0]->makeCounterBasedInitiallyDisabled();
|
||||||
|
|
||||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||||
|
EXPECT_FALSE(events[0]->isCounterBased());
|
||||||
|
|
||||||
|
if (immCmdList->isInOrderNonWalkerSignalingRequired(events[0].get())) {
|
||||||
|
EXPECT_EQ(events[0]->inOrderExecSignalValue, 1u);
|
||||||
|
EXPECT_NE(events[0]->inOrderExecInfo.get(), nullptr);
|
||||||
|
EXPECT_EQ(events[0]->inOrderAllocationOffset, counterOffset);
|
||||||
|
} else {
|
||||||
|
EXPECT_EQ(events[0]->inOrderExecSignalValue, 0u);
|
||||||
|
EXPECT_EQ(events[0]->inOrderExecInfo.get(), nullptr);
|
||||||
|
EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto copyImmCmdList = createCopyOnlyImmCmdList<gfxCoreFamily>();
|
||||||
|
|
||||||
|
uint32_t copyData = 0;
|
||||||
|
void *deviceAlloc = nullptr;
|
||||||
|
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||||
|
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 128, 128, &deviceAlloc);
|
||||||
|
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
|
||||||
|
|
||||||
|
copyImmCmdList->appendMemoryCopy(deviceAlloc, ©Data, 1, events[0]->toHandle(), 0, nullptr, false, false);
|
||||||
|
|
||||||
EXPECT_FALSE(events[0]->isCounterBased());
|
EXPECT_FALSE(events[0]->isCounterBased());
|
||||||
EXPECT_EQ(events[0]->inOrderExecSignalValue, 0u);
|
EXPECT_EQ(events[0]->inOrderExecSignalValue, 0u);
|
||||||
EXPECT_EQ(events[0]->inOrderExecInfo.get(), nullptr);
|
EXPECT_EQ(events[0]->inOrderExecInfo.get(), nullptr);
|
||||||
EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u);
|
EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u);
|
||||||
|
|
||||||
|
context->freeMem(deviceAlloc);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(InOrderCmdListTests, givenRegularEventWithInOrderExecInfoWhenReusedOnRegularCmdListThenUnsetInOrderData, IsAtLeastSkl) {
|
||||||
|
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||||
|
|
||||||
|
auto eventPool = createEvents<FamilyType>(1, false);
|
||||||
|
events[0]->makeCounterBasedInitiallyDisabled();
|
||||||
|
|
||||||
|
auto nonWalkerSignallingSupported = immCmdList->isInOrderNonWalkerSignalingRequired(events[0].get());
|
||||||
|
|
||||||
|
EXPECT_TRUE(immCmdList->isInOrderExecutionEnabled());
|
||||||
|
|
||||||
|
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||||
|
|
||||||
|
EXPECT_EQ(nonWalkerSignallingSupported, events[0]->inOrderExecInfo.get() != nullptr);
|
||||||
|
|
||||||
|
immCmdList->inOrderExecInfo.reset();
|
||||||
|
EXPECT_FALSE(immCmdList->isInOrderExecutionEnabled());
|
||||||
|
|
||||||
|
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||||
|
|
||||||
|
EXPECT_EQ(nullptr, events[0]->inOrderExecInfo.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphoreForPreviousDispatch, IsAtLeastXeHpCore) {
|
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphoreForPreviousDispatch, IsAtLeastXeHpCore) {
|
||||||
|
@ -1735,6 +1816,9 @@ HWTEST2_F(InOrderCmdListTests, givenNonInOrderCmdListWhenPassingCounterBasedEven
|
||||||
desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_NOT_EQUAL;
|
desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_NOT_EQUAL;
|
||||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, immCmdList->appendWaitOnMemory(reinterpret_cast<void *>(&desc), copyData, 1, eventHandle, false));
|
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, immCmdList->appendWaitOnMemory(reinterpret_cast<void *>(&desc), copyData, 1, eventHandle, false));
|
||||||
|
|
||||||
|
immCmdList->copyThroughLockedPtrEnabled = true;
|
||||||
|
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, immCmdList->appendMemoryCopy(alloc, ©Data, 1, eventHandle, 0, nullptr, false, false));
|
||||||
|
|
||||||
context->freeMem(alloc);
|
context->freeMem(alloc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue