mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
feature: Experimental support of immediate cmd list in-order execution [6/n]
Related-To: LOCI-4332 - Signal appendWaitOnEvents API call - Signal appendBarrier call - Handle sync allocation residency Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
5c988e8a76
commit
41478c5972
@@ -139,7 +139,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
|
||||
virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0;
|
||||
virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0;
|
||||
virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) = 0;
|
||||
virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) = 0;
|
||||
virtual ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc,
|
||||
|
||||
@@ -167,7 +167,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||
|
||||
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) override;
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override;
|
||||
void appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint32_t waitValue, bool relaxedOrderingAllowed);
|
||||
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||
|
||||
@@ -2033,7 +2033,7 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
|
||||
|
||||
if (numWaitEvents > 0) {
|
||||
if (phWaitEvents) {
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies);
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies, false);
|
||||
} else {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
@@ -2095,9 +2095,11 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::Gr
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) {
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) {
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
signalInOrderCompletion &= this->inOrderExecutionEnabled;
|
||||
|
||||
NEO::Device *neoDevice = device->getNEODevice();
|
||||
uint32_t callId = 0;
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
@@ -2166,6 +2168,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
commandContainer.addToResidencyContainer(this->csr->getTagAllocation());
|
||||
}
|
||||
|
||||
if (signalInOrderCompletion) {
|
||||
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(*commandContainer.getCommandStream(), this->inOrderDependencyCounterAllocation->getGpuAddress(),
|
||||
this->inOrderDependencyCounter + 1, 0, false, false);
|
||||
}
|
||||
|
||||
makeResidentDummyAllocation();
|
||||
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
@@ -2266,7 +2273,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||
|
||||
if (numWaitEvents > 0) {
|
||||
if (phWaitEvents) {
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, false, true);
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, false, true, false);
|
||||
} else {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
@@ -92,7 +92,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
NEO::GraphicsAllocation *srcAllocation,
|
||||
size_t size, bool flushHost) override;
|
||||
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) override;
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override;
|
||||
|
||||
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||
|
||||
@@ -543,7 +543,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) {
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) {
|
||||
bool allSignaled = true;
|
||||
for (auto i = 0u; i < numEvents; i++) {
|
||||
allSignaled &= (!this->dcFlushSupport && Event::fromHandle(phWaitEvents[i])->isAlreadyCompleted());
|
||||
@@ -555,7 +555,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(ui
|
||||
checkAvailableSpace(numEvents, false);
|
||||
checkWaitEventsState(numEvents, phWaitEvents);
|
||||
}
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies);
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies, signalInOrderCompletion);
|
||||
this->dependenciesPresent = true;
|
||||
return flushImmediate(ret, true, true, false, nullptr);
|
||||
}
|
||||
@@ -691,6 +691,8 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
|
||||
bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) {
|
||||
if (inputRet == ZE_RESULT_SUCCESS) {
|
||||
this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation);
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies);
|
||||
} else {
|
||||
|
||||
@@ -385,7 +385,17 @@ void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
|
||||
appendMultiTileBarrier(*neoDevice);
|
||||
} else {
|
||||
NEO::PipeControlArgs args = createBarrierFlags();
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
|
||||
NEO::PostSyncMode postSyncMode = NEO::PostSyncMode::NoWrite;
|
||||
uint64_t gpuWriteAddress = 0;
|
||||
uint64_t writeValue = 0;
|
||||
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
postSyncMode = NEO::PostSyncMode::ImmediateData;
|
||||
gpuWriteAddress = this->inOrderDependencyCounterAllocation->getGpuAddress();
|
||||
writeValue = this->inOrderDependencyCounter + 1;
|
||||
}
|
||||
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), postSyncMode, gpuWriteAddress, writeValue, args);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user