feature: enable in-order sync allocation for regular cmd lists 2
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
parent
3eb98163a8
commit
efddaa1251
|
@ -327,8 +327,8 @@ struct CommandListCoreFamily : CommandListImp {
|
|||
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { return false; }
|
||||
virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {}
|
||||
bool isInOrderEventWaitRequired(const Event &event) const;
|
||||
virtual bool useCounterAllocationForInOrderMode() const { return false; }
|
||||
void handleInOrderImplicitDependencies(bool relaxedOrderingAllowed);
|
||||
virtual void handleInOrderDependencyCounter();
|
||||
};
|
||||
|
||||
template <PRODUCT_FAMILY gfxProductFamily>
|
||||
|
|
|
@ -141,9 +141,35 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
|
|||
inOrderDependencyCounter = 0;
|
||||
inOrderAllocationOffset = 0;
|
||||
|
||||
if (inOrderDependencyCounterAllocation) {
|
||||
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter() {
|
||||
if ((inOrderDependencyCounter + 1) == std::numeric_limits<uint32_t>::max()) {
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false);
|
||||
|
||||
inOrderDependencyCounter = 0;
|
||||
|
||||
// multitile immediate writes are uint64_t aligned
|
||||
uint32_t offset = this->partitionCount * static_cast<uint32_t>(sizeof(uint64_t));
|
||||
|
||||
inOrderAllocationOffset += offset;
|
||||
|
||||
UNRECOVERABLE_IF(inOrderAllocationOffset + offset >= inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
|
||||
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(); // write 1 on new offset
|
||||
}
|
||||
|
||||
inOrderDependencyCounter++;
|
||||
|
||||
this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::handlePostSubmissionState() {
|
||||
this->commandContainer.getResidencyContainer().clear();
|
||||
|
@ -327,6 +353,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
|
|||
|
||||
auto res = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), threadGroupDimensions,
|
||||
event, launchParams);
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
handleInOrderDependencyCounter();
|
||||
}
|
||||
|
||||
addToMappedEventList(event);
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
|
||||
|
@ -363,6 +394,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
|
|||
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), launchKernelArgs,
|
||||
event, launchParams);
|
||||
addToMappedEventList(event);
|
||||
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
handleInOrderDependencyCounter();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -395,6 +430,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
|
|||
addToMappedEventList(event);
|
||||
appendSignalEventPostWalker(event);
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
handleInOrderDependencyCounter();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -479,8 +518,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
|||
}
|
||||
}
|
||||
|
||||
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
appendSignalInOrderDependencyCounter();
|
||||
handleInOrderDependencyCounter();
|
||||
}
|
||||
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
|
@ -517,8 +557,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
|
|||
appendSignalEventPostWalker(signalEvent);
|
||||
addToMappedEventList(signalEvent);
|
||||
|
||||
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
appendSignalInOrderDependencyCounter();
|
||||
handleInOrderDependencyCounter();
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
|
@ -672,6 +713,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
|
|||
event, numWaitEvents, phWaitEvents,
|
||||
launchParams, relaxedOrderingDispatch);
|
||||
addToMappedEventList(Event::fromHandle(hEvent));
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
@ -974,6 +1016,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
|
|||
event, numWaitEvents, phWaitEvents,
|
||||
launchParams, relaxedOrderingDispatch);
|
||||
addToMappedEventList(event);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
@ -1416,12 +1459,18 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
|||
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
|
||||
addToMappedEventList(signalEvent);
|
||||
|
||||
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed)) {
|
||||
if (!signalEvent && !isCopyOnly()) {
|
||||
NEO::PipeControlArgs args;
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
if (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed) {
|
||||
if (!signalEvent && !isCopyOnly()) {
|
||||
NEO::PipeControlArgs args;
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
|
||||
}
|
||||
appendSignalInOrderDependencyCounter();
|
||||
}
|
||||
|
||||
if (!isCopyOnly() || inOrderCopyOnlySignalingAllowed) {
|
||||
handleInOrderDependencyCounter();
|
||||
}
|
||||
appendSignalInOrderDependencyCounter();
|
||||
}
|
||||
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
|
@ -1449,7 +1498,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
|||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch,
|
||||
bool forceDisableCopyOnlyInOrderSignaling) {
|
||||
|
||||
const bool inOrderCopyOnlySignalingAllowed = this->inOrderExecutionEnabled && !forceDisableCopyOnlyInOrderSignaling;
|
||||
const bool inOrderCopyOnlySignalingAllowed = this->inOrderExecutionEnabled && !forceDisableCopyOnlyInOrderSignaling && isCopyOnly();
|
||||
|
||||
NEO::Device *neoDevice = device->getNEODevice();
|
||||
uint32_t callId = 0;
|
||||
|
@ -1509,8 +1558,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
|||
addToMappedEventList(signalEvent);
|
||||
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
|
||||
|
||||
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && isCopyOnly() && inOrderCopyOnlySignalingAllowed) {
|
||||
appendSignalInOrderDependencyCounter();
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
if (inOrderCopyOnlySignalingAllowed) {
|
||||
appendSignalInOrderDependencyCounter();
|
||||
}
|
||||
|
||||
if (!isCopyOnly() || inOrderCopyOnlySignalingAllowed) {
|
||||
handleInOrderDependencyCounter();
|
||||
}
|
||||
}
|
||||
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
|
@ -1932,12 +1987,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||
appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket);
|
||||
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent);
|
||||
|
||||
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && launchParams.isKernelSplitOperation) {
|
||||
if (!signalEvent) {
|
||||
NEO::PipeControlArgs args;
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
if (launchParams.isKernelSplitOperation) {
|
||||
if (!signalEvent) {
|
||||
NEO::PipeControlArgs args;
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
|
||||
}
|
||||
appendSignalInOrderDependencyCounter();
|
||||
}
|
||||
appendSignalInOrderDependencyCounter();
|
||||
|
||||
handleInOrderDependencyCounter();
|
||||
}
|
||||
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
|
@ -1998,8 +2057,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
|
|||
|
||||
appendSignalEventPostWalker(signalEvent);
|
||||
|
||||
if (isInOrderExecutionEnabled() && useCounterAllocationForInOrderMode()) {
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter();
|
||||
handleInOrderDependencyCounter();
|
||||
}
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
|
@ -2166,16 +2226,12 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(boo
|
|||
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream());
|
||||
}
|
||||
|
||||
if (useCounterAllocationForInOrderMode()) {
|
||||
if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) {
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed);
|
||||
} else if (!isCopyOnly()) {
|
||||
appendComputeBarrierCommand();
|
||||
}
|
||||
}
|
||||
|
||||
if (cmdListType == TYPE_REGULAR && this->inOrderExecutionEnabled && !hasInOrderDependencies) {
|
||||
inOrderDependencyCounter++; // First append is without dependencies. Increment counter to program barrier on next calls.
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
@ -2224,8 +2280,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
|||
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
|
||||
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync, false);
|
||||
|
||||
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
appendSignalInOrderDependencyCounter();
|
||||
handleInOrderDependencyCounter();
|
||||
}
|
||||
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
|
@ -2244,7 +2301,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::Gr
|
|||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
UNRECOVERABLE_IF(waitValue > std::numeric_limits<uint32_t>::max());
|
||||
UNRECOVERABLE_IF(!useCounterAllocationForInOrderMode());
|
||||
UNRECOVERABLE_IF(this->cmdListType == CommandListType::TYPE_REGULAR);
|
||||
|
||||
commandContainer.addToResidencyContainer(dependencyCounterAllocation);
|
||||
|
||||
|
@ -2338,8 +2395,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
|||
commandContainer.addToResidencyContainer(this->csr->getTagAllocation());
|
||||
}
|
||||
|
||||
if (signalInOrderCompletion && useCounterAllocationForInOrderMode()) {
|
||||
if (signalInOrderCompletion) {
|
||||
appendSignalInOrderDependencyCounter();
|
||||
handleInOrderDependencyCounter();
|
||||
}
|
||||
|
||||
makeResidentDummyAllocation();
|
||||
|
@ -2357,8 +2415,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
|||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter() {
|
||||
UNRECOVERABLE_IF(!useCounterAllocationForInOrderMode());
|
||||
|
||||
uint32_t signalValue = this->inOrderDependencyCounter + 1;
|
||||
|
||||
uint64_t gpuVa = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset;
|
||||
|
@ -2494,8 +2550,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
|||
|
||||
appendSignalEventPostWalker(signalEvent);
|
||||
|
||||
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
appendSignalInOrderDependencyCounter();
|
||||
handleInOrderDependencyCounter();
|
||||
}
|
||||
|
||||
auto allocationStruct = getAlignedAllocationData(this->device, dstptr, sizeof(uint64_t), false);
|
||||
|
@ -2988,7 +3045,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
|||
|
||||
appendEventForProfiling(signalEvent, true);
|
||||
|
||||
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
appendSignalInOrderDependencyCounter();
|
||||
} else if (isCopyOnly()) {
|
||||
NEO::MiFlushArgs args{this->dummyBlitWa};
|
||||
|
@ -3009,6 +3066,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
|||
|
||||
addToMappedEventList(signalEvent);
|
||||
appendSignalEventPostWalker(signalEvent);
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
handleInOrderDependencyCounter();
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -3130,12 +3192,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
|
|||
}
|
||||
UNRECOVERABLE_IF(srcAllocationStruct.alloc == nullptr);
|
||||
|
||||
appendEventForProfiling(signalEvent, true);
|
||||
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
handleInOrderImplicitDependencies(false);
|
||||
}
|
||||
|
||||
appendEventForProfiling(signalEvent, true);
|
||||
|
||||
commandContainer.addToResidencyContainer(srcAllocationStruct.alloc);
|
||||
uint64_t gpuAddress = static_cast<uint64_t>(srcAllocationStruct.alignedAllocationPtr);
|
||||
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), gpuAddress, data, comparator, false, false, false);
|
||||
|
@ -3151,6 +3213,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
|
|||
|
||||
appendSignalEventPostWalker(signalEvent);
|
||||
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
appendSignalInOrderDependencyCounter();
|
||||
handleInOrderDependencyCounter();
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -3194,6 +3261,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
|
|||
args);
|
||||
}
|
||||
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
appendSignalInOrderDependencyCounter();
|
||||
handleInOrderDependencyCounter();
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
|
@ -189,9 +189,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
|||
ze_result_t hostSynchronize(uint64_t timeout, TaskCountType taskCount, bool handlePostWaitOperations);
|
||||
bool hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch) const;
|
||||
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
|
||||
void handleInOrderDependencyCounter();
|
||||
bool isSkippingInOrderBarrierAllowed(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) const;
|
||||
bool useCounterAllocationForInOrderMode() const override { return true; }
|
||||
void allocateOrReuseKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread) override;
|
||||
|
||||
MOCKABLE_VIRTUAL void checkAssert();
|
||||
|
|
|
@ -876,10 +876,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
|
|||
auto signalEvent = Event::fromHandle(hSignalEvent);
|
||||
|
||||
if (inputRet == ZE_RESULT_SUCCESS) {
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
handleInOrderDependencyCounter();
|
||||
}
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
if (signalEvent && (NEO::DebugManager.flags.TrackNumCsrClientsOnSyncPoints.get() != 0)) {
|
||||
signalEvent->setLatestUsedCmdQueue(this->cmdQImmediate);
|
||||
|
@ -904,28 +900,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
|
|||
return inputRet;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamilyImmediate<gfxCoreFamily>::handleInOrderDependencyCounter() {
|
||||
if ((inOrderDependencyCounter + 1) == std::numeric_limits<uint32_t>::max()) {
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false);
|
||||
|
||||
inOrderDependencyCounter = 0;
|
||||
|
||||
// multitile immediate writes are uint64_t aligned
|
||||
uint32_t offset = this->partitionCount * static_cast<uint32_t>(sizeof(uint64_t));
|
||||
|
||||
inOrderAllocationOffset += offset;
|
||||
|
||||
UNRECOVERABLE_IF(inOrderAllocationOffset + offset >= inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
|
||||
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(); // write 1 on new offset
|
||||
}
|
||||
|
||||
inOrderDependencyCounter++;
|
||||
|
||||
this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::preferCopyThroughLockedPtr(CpuMemCopyInfo &cpuMemCopyInfo, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
|
||||
if (NEO::DebugManager.flags.ExperimentalForceCopyThroughLock.get() == 1) {
|
||||
|
|
|
@ -296,7 +296,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
|||
this->dcFlushSupport // dcFlushEnable
|
||||
};
|
||||
|
||||
bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation && useCounterAllocationForInOrderMode());
|
||||
bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation);
|
||||
bool inOrderNonWalkerSignalling = event && (isTimestampEvent || !isInOrderExecEvent);
|
||||
|
||||
if (inOrderExecSignalRequired) {
|
||||
|
|
|
@ -228,17 +228,15 @@ void CommandListImp::setStreamPropertiesDefaultSettings(NEO::StreamProperties &s
|
|||
void CommandListImp::enableInOrderExecution() {
|
||||
UNRECOVERABLE_IF(inOrderExecutionEnabled);
|
||||
|
||||
if (this->cmdListType == TYPE_IMMEDIATE) {
|
||||
auto device = this->device->getNEODevice();
|
||||
auto device = this->device->getNEODevice();
|
||||
|
||||
NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()};
|
||||
NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()};
|
||||
|
||||
inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
|
||||
inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
|
||||
|
||||
UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation);
|
||||
UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation);
|
||||
|
||||
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
|
||||
}
|
||||
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
|
||||
|
||||
inOrderExecutionEnabled = true;
|
||||
}
|
||||
|
|
|
@ -1807,6 +1807,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
|
|||
|
||||
uint8_t ptr[64] = {};
|
||||
|
||||
uint64_t inOrderSyncVa = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
|
||||
|
||||
regularCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
auto verifyPc = [](const GenCmdList::iterator &iterator) {
|
||||
|
@ -1815,9 +1817,21 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
|
|||
ASSERT_NE(nullptr, pcCmd);
|
||||
};
|
||||
|
||||
auto verifySdi = [](GenCmdList::reverse_iterator rIterator) {
|
||||
auto verifySdi = [&inOrderSyncVa](GenCmdList::reverse_iterator rIterator, GenCmdList::reverse_iterator rEnd, uint32_t signalValue) {
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*rIterator);
|
||||
EXPECT_EQ(nullptr, sdiCmd);
|
||||
while (sdiCmd == nullptr) {
|
||||
sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*(++rIterator));
|
||||
if (rIterator == rEnd) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_NE(nullptr, sdiCmd);
|
||||
|
||||
EXPECT_EQ(inOrderSyncVa, sdiCmd->getAddress());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(signalValue, sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
};
|
||||
|
||||
{
|
||||
|
@ -1831,7 +1845,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
|
|||
(cmdStream->getUsed() - offset)));
|
||||
|
||||
verifyPc(cmdList.begin());
|
||||
verifySdi(cmdList.rbegin());
|
||||
verifySdi(cmdList.rbegin(), cmdList.rend(), 2);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -1847,7 +1861,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
|
|||
(cmdStream->getUsed() - offset)));
|
||||
|
||||
verifyPc(cmdList.begin());
|
||||
verifySdi(cmdList.rbegin());
|
||||
verifySdi(cmdList.rbegin(), cmdList.rend(), 3);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -1861,7 +1875,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
|
|||
(cmdStream->getUsed() - offset)));
|
||||
|
||||
verifyPc(cmdList.begin());
|
||||
verifySdi(cmdList.rbegin());
|
||||
verifySdi(cmdList.rbegin(), cmdList.rend(), 4);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -1877,7 +1891,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
|
|||
(cmdStream->getUsed() - offset)));
|
||||
|
||||
verifyPc(cmdList.begin());
|
||||
verifySdi(cmdList.rbegin());
|
||||
verifySdi(cmdList.rbegin(), cmdList.rend(), 5);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -1893,7 +1907,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
|
|||
(cmdStream->getUsed() - offset)));
|
||||
|
||||
verifyPc(cmdList.begin());
|
||||
verifySdi(cmdList.rbegin());
|
||||
verifySdi(cmdList.rbegin(), cmdList.rend(), 6);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2275,7 +2289,17 @@ HWTEST2_F(InOrderCmdListTests, givenRegularInOrderCmdListWhenProgrammingAppendWa
|
|||
ASSERT_NE(nullptr, pcCmd);
|
||||
|
||||
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(cmdList.end(), sdiItor);
|
||||
EXPECT_NE(cmdList.end(), sdiItor);
|
||||
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
|
||||
ASSERT_NE(nullptr, sdiCmd);
|
||||
|
||||
uint64_t syncVa = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(syncVa, sdiCmd->getAddress());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(3u, sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflowThenHandleItCorrectly, IsAtLeastXeHpCore) {
|
||||
|
@ -3171,7 +3195,9 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
|
|||
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*walkerItor);
|
||||
auto &postSync = walkerCmd->getPostSync();
|
||||
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION_NO_WRITE, postSync.getOperation());
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
|
||||
EXPECT_EQ(1u, postSync.getImmediateData());
|
||||
EXPECT_EQ(regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress());
|
||||
|
||||
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(cmdList.end(), sdiItor);
|
||||
|
@ -3180,7 +3206,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
|
|||
offset = cmdStream->getUsed();
|
||||
|
||||
regularCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(1u, regularCmdList->inOrderDependencyCounter);
|
||||
EXPECT_EQ(2u, regularCmdList->inOrderDependencyCounter);
|
||||
|
||||
{
|
||||
GenCmdList cmdList;
|
||||
|
@ -3195,20 +3221,25 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
|
|||
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*walkerItor);
|
||||
auto &postSync = walkerCmd->getPostSync();
|
||||
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION_NO_WRITE, postSync.getOperation());
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
|
||||
EXPECT_EQ(2u, postSync.getImmediateData());
|
||||
EXPECT_EQ(regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress());
|
||||
|
||||
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(cmdList.end(), sdiItor);
|
||||
}
|
||||
|
||||
regularCmdList->inOrderAllocationOffset = 123;
|
||||
auto hostAddr = static_cast<uint32_t *>(regularCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
*hostAddr = 0x1234;
|
||||
|
||||
regularCmdList->reset();
|
||||
EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter);
|
||||
EXPECT_EQ(0u, regularCmdList->inOrderAllocationOffset);
|
||||
EXPECT_EQ(0u, *hostAddr);
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenDontUpdateCounterAllocation, IsAtLeastXeHpCore) {
|
||||
HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenUpdateCounterAllocation, IsAtLeastXeHpCore) {
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, true);
|
||||
|
@ -3224,7 +3255,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
|
|||
size_t offset = cmdStream->getUsed();
|
||||
|
||||
EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter);
|
||||
EXPECT_EQ(nullptr, regularCmdList->inOrderDependencyCounterAllocation);
|
||||
EXPECT_NE(nullptr, regularCmdList->inOrderDependencyCounterAllocation);
|
||||
|
||||
constexpr size_t size = 128 * sizeof(uint32_t);
|
||||
auto data = allocHostMem(size);
|
||||
|
@ -3245,8 +3276,8 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
|
|||
ptrOffset(cmdStream->getCpuBase(), offset),
|
||||
(cmdStream->getUsed() - offset)));
|
||||
|
||||
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(cmdList.end(), sdiItor);
|
||||
auto sdiCmds = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(2u, sdiCmds.size());
|
||||
}
|
||||
|
||||
offset = copyOnlyCmdStream->getUsed();
|
||||
|
@ -3259,7 +3290,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
|
|||
(copyOnlyCmdStream->getUsed() - offset)));
|
||||
|
||||
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(cmdList.end(), sdiItor);
|
||||
EXPECT_NE(cmdList.end(), sdiItor);
|
||||
}
|
||||
|
||||
context->freeMem(data);
|
||||
|
@ -3288,7 +3319,18 @@ HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingReg
|
|||
(cmdStream->getUsed() - offset)));
|
||||
|
||||
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(cmdList.end(), sdiItor);
|
||||
EXPECT_NE(cmdList.end(), sdiItor);
|
||||
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
|
||||
|
||||
ASSERT_NE(nullptr, sdiCmd);
|
||||
|
||||
auto gpuAddress = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(gpuAddress, sdiCmd->getAddress());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(1u, sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
}
|
||||
|
||||
offset = cmdStream->getUsed();
|
||||
|
@ -3301,12 +3343,23 @@ HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingReg
|
|||
ptrOffset(cmdStream->getCpuBase(), offset),
|
||||
(cmdStream->getUsed() - offset)));
|
||||
|
||||
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(cmdList.end(), sdiItor);
|
||||
|
||||
auto copyCmd = genCmdCast<XY_COPY_BLT *>(*cmdList.begin());
|
||||
|
||||
EXPECT_NE(nullptr, copyCmd);
|
||||
|
||||
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), sdiItor);
|
||||
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
|
||||
|
||||
ASSERT_NE(nullptr, sdiCmd);
|
||||
|
||||
auto gpuAddress = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(gpuAddress, sdiCmd->getAddress());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(2u, sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
}
|
||||
|
||||
alignedFree(alignedPtr);
|
||||
|
|
Loading…
Reference in New Issue