Do not set dcFlush in Immediate dispatch mode.

Some devices do not need dcFlush.
Setting it prevents further optimization of pipe controls which
are not needed.

Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek
2022-03-02 12:43:02 +00:00
committed by Compute-Runtime-Automation
parent bfacd14b61
commit 8e94d568a8
2 changed files with 50 additions and 3 deletions

View File

@@ -80,6 +80,50 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelNotRequiringDCFl
buffer->release();
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnableUpdateTaskFromWaitWhenNonBlockingCallIsMadeThenNoPipeControlInsertedOnDevicesWithoutDCFlushRequirements) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(3u);
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
MockContext ctx(pClDevice);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.timestampPacketWriteEnabled = false;
CommandQueueHw<FamilyType> commandQueue(&ctx, pClDevice, 0, false);
size_t tempBuffer[] = {0, 1, 2};
size_t dstBuffer[] = {0, 1, 2};
cl_int retVal = 0;
auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal);
commandQueue.enqueueWriteBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, nullptr, 0u, nullptr, 0);
auto &commandStreamTask = *commandStreamReceiver.lastFlushedCommandStream;
cmdList.clear();
// Parse command list
parseCommands<FamilyType>(commandStreamTask, 0);
auto pipeControlExpected = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo);
auto itorPC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
if (pipeControlExpected) {
EXPECT_NE(cmdList.end(), itorPC);
if (UnitTestHelper<FamilyType>::isPipeControlWArequired(pDevice->getHardwareInfo())) {
itorPC++;
itorPC = find<PIPE_CONTROL *>(itorPC, cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
}
// Verify that the dcFlushEnabled bit is set in PC
auto pCmdWA = reinterpret_cast<PIPE_CONTROL *>(*itorPC);
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), pCmdWA->getDcFlushEnable());
} else {
EXPECT_EQ(cmdList.end(), itorPC);
}
buffer->release();
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenTaskCsPassedAsCommandStreamParamWhenFlushingTaskThenCompletionStampIsCorrect) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();

View File

@@ -187,6 +187,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
}
const auto &hwInfo = peekHwInfo();
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
bool updateTag = false;
if (dispatchFlags.blocking || dispatchFlags.dcFlush || dispatchFlags.guardCommandBufferWithPipeControl) {
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
@@ -195,7 +197,10 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
levelClosed = true;
//if we guard with ppc, flush dc as well to speed up completion latency
if (dispatchFlags.guardCommandBufferWithPipeControl) {
dispatchFlags.dcFlush = true;
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
if (hwInfoConfig.isDcFlushAllowed()) {
dispatchFlags.dcFlush = true;
}
}
}
@@ -255,8 +260,6 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode,
hwInfo);
auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily);
if (dispatchFlags.threadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent) {
if (this->streamProperties.stateComputeMode.threadArbitrationPolicy.value != -1) {
// Reuse previous programming