feature: Experimental support of immediate cmd list in-order execution [2/n]

- appendWaitOnEvents for previous dispatch
- update RelaxedOrdering logic
- update Event::setIsCompleted logic to reset already completed Event

Related-To: LOCI-4332

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-04-26 16:43:07 +00:00
committed by Compute-Runtime-Automation
parent 32d8a3bc6d
commit 75827b66c6
7 changed files with 155 additions and 36 deletions

View File

@@ -2024,6 +2024,9 @@ inline uint32_t CommandListCoreFamily<gfxCoreFamily>::getRegionOffsetForAppendMe
template <GFXCORE_FAMILY gfxCoreFamily>
inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) {
if (latestSentInOrderEvent) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(1, &latestSentInOrderEvent, relaxedOrderingAllowed, trackDependencies);
}
if (numWaitEvents > 0) {
if (phWaitEvents) {

View File

@@ -167,6 +167,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
TransferType getTransferType(NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc);
size_t getTransferThreshold(TransferType transferType);
bool isBarrierRequired();
bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const;
protected:
using BaseClass::latestInOrderOperationCompleted;

View File

@@ -327,7 +327,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch) {
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
@@ -351,7 +351,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernelIndirect(
ze_kernel_handle_t kernelHandle, const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
@@ -388,7 +388,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
@@ -409,7 +409,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
NEO::TransferDirection direction;
auto isSplitNeeded = this->isAppendSplitNeeded(dstptr, srcptr, size, direction);
if (isSplitNeeded) {
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, 1); // split generates more than 1 event
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(1); // split generates more than 1 event
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, void *, const void *>(this, dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents, true, relaxedOrderingDispatch, direction, [&](void *dstptrParam, const void *srcptrParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
return CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptrParam, srcptrParam, sizeParam, hSignalEventParam, 0u, nullptr, relaxedOrderingDispatch);
});
@@ -433,7 +433,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
@@ -445,7 +445,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
NEO::TransferDirection direction;
auto isSplitNeeded = this->isAppendSplitNeeded(dstPtr, srcPtr, this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch), direction);
if (isSplitNeeded) {
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, 1); // split generates more than 1 event
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(1); // split generates more than 1 event
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, uint32_t, uint32_t>(this, dstRegion->originX, srcRegion->originX, dstRegion->width, hSignalEvent, numWaitEvents, phWaitEvents, true, relaxedOrderingDispatch, direction, [&](uint32_t dstOriginXParam, uint32_t srcOriginXParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
ze_copy_region_t dstRegionLocal = {};
ze_copy_region_t srcRegionLocal = {};
@@ -474,7 +474,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryFill(void
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
@@ -527,7 +527,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
bool relaxedOrdering = false;
if (isSplitNeeded) {
relaxedOrdering = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, 1); // split generates more than 1 event
relaxedOrdering = isRelaxedOrderingDispatchAllowed(1); // split generates more than 1 event
uintptr_t dstAddress = static_cast<uintptr_t>(dstAllocation->getGpuAddress());
uintptr_t srcAddress = static_cast<uintptr_t>(srcAllocation->getGpuAddress());
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, uintptr_t, uintptr_t>(this, dstAddress, srcAddress, size, nullptr, 0u, nullptr, false, relaxedOrdering, direction, [&](uintptr_t dstAddressParam, uintptr_t srcAddressParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
@@ -601,7 +601,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyRegion
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
@@ -621,7 +621,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMe
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
@@ -642,7 +642,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
@@ -676,7 +676,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *waitEventHandles, bool relaxedOrderingDispatch) {
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
@@ -1020,4 +1020,11 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkAssert() {
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const {
auto numEvents = numWaitEvents + (latestSentInOrderEvent ? 1 : 0);
return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numEvents);
}
} // namespace L0

View File

@@ -387,4 +387,15 @@ void Event::resetPackets(bool resetAllPackets) {
this->csr = this->device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
}
void Event::setIsCompleted() {
if (this->isCompleted.load() == STATE_CLEARED) {
this->isCompleted = STATE_SIGNALED;
if (latestUsedInOrderCmdList) {
latestUsedInOrderCmdList->unsetLastInOrderOutEvent(this->toHandle());
latestUsedInOrderCmdList = nullptr;
}
}
}
} // namespace L0

View File

@@ -162,11 +162,7 @@ struct Event : _ze_event_handle_t {
this->isCompleted.store(disableFromRegularList ? HOST_CACHING_DISABLED_PERMANENT : HOST_CACHING_DISABLED);
}
void setIsCompleted() {
if (this->isCompleted.load() == STATE_CLEARED) {
this->isCompleted = STATE_SIGNALED;
}
}
void setIsCompleted();
bool isAlreadyCompleted() {
return this->isCompleted == STATE_SIGNALED;