Optimize UMD to skip PIPE_CONTROL command having DCFlush

Related-To: NEO-5927

Signed-off-by: Vinod Tipparaju <vinod.tipparaju@intel.com>
This commit is contained in:
Vinod Tipparaju 2021-06-08 12:45:34 +00:00 committed by Compute-Runtime-Automation
parent 2715f03ee2
commit 823dee432a
5 changed files with 73 additions and 39 deletions

View File

@ -269,7 +269,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
Event::STATE_CLEARED, args);
} else {
NEO::PipeControlArgs args;
args.dcFlushEnable = (!event->signalScope) ? false : true;
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
args.dcFlushEnable = (!event->signalScope) ? false : true;
}
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
size_t estimatedSizeRequired =
@ -966,9 +968,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(NEO::Graph
isStateless);
}
if (flushHost) {
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
if (flushHost) {
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
}
return ret;
@ -1067,14 +1071,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
appendEventForProfilingAllWalkers(hSignalEvent, false);
auto event = Event::fromHandle(hSignalEvent);
if (event) {
dstAllocationStruct.needsFlush &= !event->signalScope;
}
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
auto event = Event::fromHandle(hSignalEvent);
if (event) {
dstAllocationStruct.needsFlush &= !event->signalScope;
}
if (dstAllocationStruct.needsFlush && !isCopyOnly()) {
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
if (dstAllocationStruct.needsFlush && !isCopyOnly()) {
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
}
return ret;
@ -1140,14 +1146,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
return result;
}
auto event = Event::fromHandle(hSignalEvent);
if (event) {
dstAllocationStruct.needsFlush &= !event->signalScope;
}
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
auto event = Event::fromHandle(hSignalEvent);
if (event) {
dstAllocationStruct.needsFlush &= !event->signalScope;
}
if (dstAllocationStruct.needsFlush && !isCopyOnly()) {
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
if (dstAllocationStruct.needsFlush && !isCopyOnly()) {
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
}
return ZE_RESULT_SUCCESS;
@ -1458,14 +1466,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
appendEventForProfilingAllWalkers(hSignalEvent, false);
auto event = Event::fromHandle(hSignalEvent);
if (event) {
hostPointerNeedsFlush &= !event->signalScope;
}
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
auto event = Event::fromHandle(hSignalEvent);
if (event) {
hostPointerNeedsFlush &= !event->signalScope;
}
if (hostPointerNeedsFlush) {
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
if (hostPointerNeedsFlush) {
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
}
return res;
@ -1669,7 +1679,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
} else {
NEO::PipeControlArgs args;
applyScope = (!event->signalScope) ? false : true;
args.dcFlushEnable = applyScope;
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
args.dcFlushEnable = applyScope;
}
if (applyScope || event->isEventTimestampFlagSet()) {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
*commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
@ -1702,9 +1714,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
constexpr uint32_t eventStateClear = static_cast<uint32_t>(-1);
bool dcFlushRequired = false;
for (uint32_t i = 0; i < numEvents; i++) {
auto event = Event::fromHandle(phEvent[i]);
dcFlushRequired |= (!event->waitScope) ? false : true;
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
for (uint32_t i = 0; i < numEvents; i++) {
auto event = Event::fromHandle(phEvent[i]);
dcFlushRequired |= (!event->waitScope) ? false : true;
}
}
if (dcFlushRequired) {
@ -1805,9 +1819,10 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_hand
if (beforeWalker) {
appendWriteKernelTimestamp(hEvent, beforeWalker, true);
} else {
NEO::PipeControlArgs args = {};
args.dcFlushEnable = (!event->signalScope) ? false : true;
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
args.dcFlushEnable = (!event->signalScope) ? false : true;
}
NEO::MemorySynchronizationCommands<GfxFamily>::setPostSyncExtraProperties(args,
commandContainer.getDevice()->getHardwareInfo());

View File

@ -250,6 +250,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryFill(void
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendSignalEvent(ze_event_handle_t hSignalEvent) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
ze_result_t ret = ZE_RESULT_SUCCESS;
auto event = Event::fromHandle(hSignalEvent);
bool isTimestampEvent = event->isEventTimestampFlagSet();
@ -268,7 +269,9 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendSignalEvent(ze_
}
} else {
NEO::PipeControlArgs args;
args.dcFlushEnable = (!event->signalScope) ? false : true;
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
args.dcFlushEnable = (!event->signalScope) ? false : true;
}
this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_SIGNALED, args, false, false, false);
if (this->isSyncModeQueue) {
this->csr->flushTagUpdate();
@ -281,6 +284,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendSignalEvent(ze_
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendEventReset(ze_event_handle_t hSignalEvent) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
ze_result_t ret = ZE_RESULT_SUCCESS;
auto event = Event::fromHandle(hSignalEvent);
bool isTimestampEvent = event->isEventTimestampFlagSet();
@ -299,7 +303,9 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendEventReset(ze_e
}
} else {
NEO::PipeControlArgs args;
args.dcFlushEnable = (!event->signalScope) ? false : true;
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
args.dcFlushEnable = (!event->signalScope) ? false : true;
}
this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_CLEARED, args, false, false, false);
if (this->isSyncModeQueue) {
this->csr->flushTagUpdate();
@ -322,6 +328,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
ze_result_t ret = ZE_RESULT_SUCCESS;
bool isTimestampEvent = false;
@ -344,9 +351,11 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(ui
}
} else {
bool dcFlushRequired = false;
for (uint32_t i = 0; i < numEvents; i++) {
auto event = Event::fromHandle(phWaitEvents[i]);
dcFlushRequired |= (!event->waitScope) ? false : true;
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
for (uint32_t i = 0; i < numEvents; i++) {
auto event = Event::fromHandle(phWaitEvents[i]);
dcFlushRequired |= (!event->waitScope) ? false : true;
}
}
NEO::PipeControlArgs args;

View File

@ -138,7 +138,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
auto event = Event::fromHandle(hEvent);
eventAlloc = &event->getAllocation(this->device);
commandContainer.addToResidencyContainer(eventAlloc);
L3FlushEnable = (!event->signalScope) ? false : true;
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
L3FlushEnable = (!event->signalScope) ? false : true;
}
isTimestampEvent = event->isEventTimestampFlagSet();
eventAddress = event->getPacketAddress(this->device);
}

View File

@ -377,7 +377,11 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventsThenS
EXPECT_NE(cmdList.end(), itor);
itor++;
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
if (MemorySynchronizationCommands<FamilyType>::isDcFlushAllowed()) {
EXPECT_NE(cmdList.end(), itor);
} else {
EXPECT_EQ(cmdList.end(), itor);
}
}
using platformSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;

View File

@ -625,7 +625,11 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWith
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
if (MemorySynchronizationCommands<FamilyType>::isDcFlushAllowed()) {
EXPECT_NE(cmdList.end(), itor);
} else {
EXPECT_EQ(cmdList.end(), itor);
}
}
HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThenPipeControlIsProgrammed) {