mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-09 22:43:00 +08:00
feature: Experimental support of immediate cmd list in-order execution [2/n]
- appendWaitOnEvents for previous dispatch - update RelaxedOrdering logic - update Event::setIsCompleted logic to reset already completed Event Related-To: LOCI-4332 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
32d8a3bc6d
commit
75827b66c6
@@ -2024,6 +2024,9 @@ inline uint32_t CommandListCoreFamily<gfxCoreFamily>::getRegionOffsetForAppendMe
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) {
|
||||
if (latestSentInOrderEvent) {
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(1, &latestSentInOrderEvent, relaxedOrderingAllowed, trackDependencies);
|
||||
}
|
||||
|
||||
if (numWaitEvents > 0) {
|
||||
if (phWaitEvents) {
|
||||
|
||||
@@ -167,6 +167,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
TransferType getTransferType(NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc);
|
||||
size_t getTransferThreshold(TransferType transferType);
|
||||
bool isBarrierRequired();
|
||||
bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const;
|
||||
|
||||
protected:
|
||||
using BaseClass::latestInOrderOperationCompleted;
|
||||
|
||||
@@ -327,7 +327,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
|
||||
const CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch) {
|
||||
|
||||
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
|
||||
@@ -351,7 +351,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernelIndirect(
|
||||
ze_kernel_handle_t kernelHandle, const ze_group_count_t *pDispatchArgumentsBuffer,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
|
||||
@@ -388,7 +388,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
|
||||
@@ -409,7 +409,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
|
||||
NEO::TransferDirection direction;
|
||||
auto isSplitNeeded = this->isAppendSplitNeeded(dstptr, srcptr, size, direction);
|
||||
if (isSplitNeeded) {
|
||||
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, 1); // split generates more than 1 event
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(1); // split generates more than 1 event
|
||||
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, void *, const void *>(this, dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents, true, relaxedOrderingDispatch, direction, [&](void *dstptrParam, const void *srcptrParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptrParam, srcptrParam, sizeParam, hSignalEventParam, 0u, nullptr, relaxedOrderingDispatch);
|
||||
});
|
||||
@@ -433,7 +433,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
|
||||
@@ -445,7 +445,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
|
||||
NEO::TransferDirection direction;
|
||||
auto isSplitNeeded = this->isAppendSplitNeeded(dstPtr, srcPtr, this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch), direction);
|
||||
if (isSplitNeeded) {
|
||||
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, 1); // split generates more than 1 event
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(1); // split generates more than 1 event
|
||||
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, uint32_t, uint32_t>(this, dstRegion->originX, srcRegion->originX, dstRegion->width, hSignalEvent, numWaitEvents, phWaitEvents, true, relaxedOrderingDispatch, direction, [&](uint32_t dstOriginXParam, uint32_t srcOriginXParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
|
||||
ze_copy_region_t dstRegionLocal = {};
|
||||
ze_copy_region_t srcRegionLocal = {};
|
||||
@@ -474,7 +474,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryFill(void
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
|
||||
@@ -527,7 +527,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
|
||||
bool relaxedOrdering = false;
|
||||
|
||||
if (isSplitNeeded) {
|
||||
relaxedOrdering = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, 1); // split generates more than 1 event
|
||||
relaxedOrdering = isRelaxedOrderingDispatchAllowed(1); // split generates more than 1 event
|
||||
uintptr_t dstAddress = static_cast<uintptr_t>(dstAllocation->getGpuAddress());
|
||||
uintptr_t srcAddress = static_cast<uintptr_t>(srcAllocation->getGpuAddress());
|
||||
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, uintptr_t, uintptr_t>(this, dstAddress, srcAddress, size, nullptr, 0u, nullptr, false, relaxedOrdering, direction, [&](uintptr_t dstAddressParam, uintptr_t srcAddressParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
|
||||
@@ -601,7 +601,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyRegion
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
|
||||
@@ -621,7 +621,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMe
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
|
||||
@@ -642,7 +642,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
|
||||
@@ -676,7 +676,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *waitEventHandles, bool relaxedOrderingDispatch) {
|
||||
relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents);
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch);
|
||||
@@ -1020,4 +1020,11 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkAssert() {
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const {
|
||||
auto numEvents = numWaitEvents + (latestSentInOrderEvent ? 1 : 0);
|
||||
|
||||
return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numEvents);
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -387,4 +387,15 @@ void Event::resetPackets(bool resetAllPackets) {
|
||||
this->csr = this->device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
|
||||
}
|
||||
|
||||
void Event::setIsCompleted() {
|
||||
if (this->isCompleted.load() == STATE_CLEARED) {
|
||||
this->isCompleted = STATE_SIGNALED;
|
||||
|
||||
if (latestUsedInOrderCmdList) {
|
||||
latestUsedInOrderCmdList->unsetLastInOrderOutEvent(this->toHandle());
|
||||
latestUsedInOrderCmdList = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -162,11 +162,7 @@ struct Event : _ze_event_handle_t {
|
||||
this->isCompleted.store(disableFromRegularList ? HOST_CACHING_DISABLED_PERMANENT : HOST_CACHING_DISABLED);
|
||||
}
|
||||
|
||||
void setIsCompleted() {
|
||||
if (this->isCompleted.load() == STATE_CLEARED) {
|
||||
this->isCompleted = STATE_SIGNALED;
|
||||
}
|
||||
}
|
||||
void setIsCompleted();
|
||||
|
||||
bool isAlreadyCompleted() {
|
||||
return this->isCompleted == STATE_SIGNALED;
|
||||
|
||||
Reference in New Issue
Block a user