Flush small task adjustments

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk 2021-10-12 08:27:57 +00:00 committed by Compute-Runtime-Automation
parent f1b6b733f0
commit 32ae9555f1
15 changed files with 215 additions and 32 deletions

View File

@ -176,7 +176,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(
NEO::PipeControlArgs args;
this->csr->flushNonKernelTask(nullptr, 0, 0, args, false, false, false);
if (this->isSyncModeQueue) {
this->csr->flushTagUpdate();
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
}
@ -288,7 +287,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendSignalEvent(ze_
}
this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_SIGNALED, args, false, false, false);
if (this->isSyncModeQueue) {
this->csr->flushTagUpdate();
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
}
@ -322,7 +320,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendEventReset(ze_e
}
this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_CLEARED, args, false, false, false);
if (this->isSyncModeQueue) {
this->csr->flushTagUpdate();
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
}

View File

@ -28,7 +28,6 @@ CommandListAllocatorFn commandListFactoryImmediate[IGFX_MAX_PRODUCT] = {};
ze_result_t CommandListImp::destroy() {
if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
this->csr->flushTagUpdate();
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
}

View File

@ -558,6 +558,10 @@ void CommandQueueHw<gfxCoreFamily>::dispatchTaskCountWrite(NEO::LinearStream &co
UNRECOVERABLE_IF(csr == nullptr);
if (csr->isUpdateTagFromWaitEnabled()) {
return;
}
auto taskCountToWrite = csr->peekTaskCount() + 1;
auto gpuAddress = static_cast<uint64_t>(csr->getTagAllocation()->getGpuAddress());

View File

@ -243,6 +243,34 @@ HWTEST_F(CommandQueueCreate, given100CmdListsWhenExecutingThenCommandStreamIsNot
commandQueue->destroy();
}
HWTEST_F(CommandQueueCreate, givenUpdateTaskCountFromWaitWhenDispatchTaskCountWriteThenNoPipeControlFlushed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(1);
const ze_command_queue_desc_t desc = {};
ze_result_t returnValue;
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
commandQueue->dispatchTaskCountWrite(*commandQueue->commandStream, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), commandQueue->commandStream->getUsed()));
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itor);
commandQueue->destroy();
}
HWTEST_F(CommandQueueCreate, givenContainerWithAllocationsWhenResidencyContainerIsEmptyThenMakeResidentWasNotCalled) {
auto csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr->setupContext(*neoDevice->getDefaultEngine().osContext);

View File

@ -125,6 +125,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEmptyQueueWhenFinishingThenTa
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTaskCountToWaitBiggerThanLatestSentTaskCountWhenWaitForCompletionThenFlushPipeControl) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(1);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();

View File

@ -530,6 +530,38 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas
EXPECT_EQ(1u, mockCsr->peekLatestFlushedTaskCount());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitWhenFlushBatchedIsCalledThenFlushedTaskCountIsNotModifed) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(1);
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo());
dispatchFlags.guardCommandBufferWithPipeControl = true;
mockCsr->flushTask(commandStream,
0,
dsh,
ioh,
ssh,
taskLevel,
dispatchFlags,
*pDevice);
EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount());
EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount());
mockCsr->flushBatchedSubmissions();
EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount());
EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInDefaultModeWhenFlushTaskIsCalledThenFlushedTaskCountIsModifed) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
auto &commandStream = commandQueue.getCS(4096u);
@ -1024,12 +1056,15 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhe
DebugManager.flags.UpdateTaskCountFromWait.set(1);
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
commandQueue.taskCount = 10;
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->taskCount.store(10);
mockCsr->latestFlushedTaskCount.store(5);
commandQueue.waitForAllEngines(false, nullptr);
@ -1052,12 +1087,15 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEnabledDirectSubmissionUpdate
};
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
commandQueue.taskCount = 10;
auto mockCsr = new MockCsrHwDirectSubmission(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->taskCount.store(10);
mockCsr->latestFlushedTaskCount.store(5);
commandQueue.waitForAllEngines(false, nullptr);

View File

@ -917,6 +917,27 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, true);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
givenMultipleStaticActivePartitionsWhenFlushingTagUpdateThenExpectTagUpdatePipeControlWithPartitionFlagOnAndActivePartitionConfig) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(1);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) {
commandStreamReceiver.createPreemptionAllocation();
}
EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig);
commandStreamReceiver.activePartitions = 2;
commandStreamReceiver.taskCount = 3;
EXPECT_TRUE(commandStreamReceiver.staticWorkPartitioningEnabled);
flushTask(commandStreamReceiver, true);
commandStreamReceiver.flushTagUpdate();
EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig);
prepareLinearStream<FamilyType>(commandStream, 0);
verifyPipeControl<FamilyType>(commandStreamReceiver, 4, true);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
givenMultipleDynamicActivePartitionsWhenFlushingTaskThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
@ -936,6 +957,29 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, false);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
givenMultipleDynamicActivePartitionsWhenFlushingTagUpdateThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(1);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) {
commandStreamReceiver.createPreemptionAllocation();
}
commandStreamReceiver.activePartitions = 2;
commandStreamReceiver.taskCount = 3;
commandStreamReceiver.staticWorkPartitioningEnabled = false;
flushTask(commandStreamReceiver, true);
commandStreamReceiver.flushTagUpdate();
EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig);
prepareLinearStream<FamilyType>(commandStream, 0);
verifyPipeControl<FamilyType>(commandStreamReceiver, 4, false);
prepareLinearStream<FamilyType>(commandStreamReceiver.commandStream, 0);
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, false);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
givenSingleStaticActivePartitionWhenFlushingTaskThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();

View File

@ -7,6 +7,7 @@
#include "shared/source/command_stream/scratch_space_controller_base.h"
#include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/helpers/ult_hw_config.h"
#include "shared/test/common/mocks/mock_allocation_properties.h"
@ -348,6 +349,37 @@ HWTEST_F(BcsTests, whenBlitBufferThenCommandBufferHasProperTaskCount) {
EXPECT_EQ(csr.getCS(0u).getGraphicsAllocation()->getResidencyTaskCount(csr.getOsContext().getContextId()), csr.peekTaskCount());
}
HWTEST_F(BcsTests, givenUpdateTaskCountFromWaitWhenBlitBufferThenCsrHasProperTaskCounts) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(1);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
constexpr size_t hostAllocationSize = MemoryConstants::pageSize;
auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize);
void *hostPtr = reinterpret_cast<void *>(hostAllocationPtr.get());
auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex());
auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer,
csr, graphicsAllocation, nullptr, hostPtr,
graphicsAllocation->getGpuAddress(), 0,
0, 0, {1, 1, 1}, 0, 0, 0, 0);
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);
auto taskCount = csr.peekTaskCount();
csr.blitBuffer(blitPropertiesContainer, false, false, *pDevice);
EXPECT_EQ(csr.peekTaskCount(), taskCount + 1);
EXPECT_EQ(csr.peekLatestFlushedTaskCount(), taskCount);
}
HWTEST_F(BcsTests, givenProfilingEnabledWhenBlitBufferThenCommandBufferIsConstructedProperly) {
auto bcsOsContext = std::unique_ptr<OsContext>(OsContext::create(nullptr, 0,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, pDevice->getDeviceBitfield())));

View File

@ -318,6 +318,30 @@ TEST(CommandStreamReceiverSimpleTest, givenCsrWhenSubmitiingBatchBufferThenTaskC
executionEnvironment.memoryManager->freeGraphicsMemoryImpl(commandBuffer);
}
HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitWhenSubmitiingBatchBufferThenTaskCountIsIncrementedAndLatestsValuesSetCorrectly) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(1);
MockCsrHw<FamilyType> csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize});
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false};
ResidencyContainer residencyList;
auto previousTaskCount = csr.peekTaskCount();
auto currentTaskCount = previousTaskCount + 1;
csr.submitBatchBuffer(batchBuffer, residencyList);
EXPECT_EQ(currentTaskCount, csr.peekTaskCount());
EXPECT_EQ(previousTaskCount, csr.peekLatestFlushedTaskCount());
EXPECT_EQ(currentTaskCount, csr.peekLatestSentTaskCount());
memoryManager->freeGraphicsMemoryImpl(commandBuffer);
}
HWTEST_F(CommandStreamReceiverTest, givenOverrideCsrAllocationSizeWhenCreatingCommandStreamCsrGraphicsAllocationThenAllocationHasCorrectSize) {
DebugManagerStateRestore restore;

View File

@ -472,6 +472,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
void flushTagUpdate() override{};
void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool startOfDispatch, bool endOfDispatch) override{};
void updateTagFromWait() override{};
bool isUpdateTagFromWaitEnabled() override { return false; };
bool isMultiOsContextCapable() const override { return false; }

View File

@ -89,7 +89,9 @@ int CommandStreamReceiver::submitBatchBuffer(BatchBuffer &batchBuffer, Residency
this->latestSentTaskCount = taskCount + 1;
auto flushed = this->flush(batchBuffer, allocationsForResidency);
this->latestFlushedTaskCount = taskCount + 1;
if (!isUpdateTagFromWaitEnabled()) {
this->latestFlushedTaskCount = taskCount + 1;
}
taskCount++;
return !flushed;
@ -257,10 +259,6 @@ void CommandStreamReceiver::cleanupResources() {
}
bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
if (this->latestSentTaskCount < taskCountToWait) {
this->flushTagUpdate();
}
uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
if (latestSentTaskCount < taskCountToWait) {
if (!this->flushBatchedSubmissions()) {
@ -275,7 +273,13 @@ bool CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddress, boo
std::chrono::high_resolution_clock::time_point time1, time2;
int64_t timeDiff = 0;
uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
if (latestSentTaskCount < taskCountToWait) {
updateTagFromWait();
}
volatile uint32_t *partitionAddress = pollAddress;
time1 = std::chrono::high_resolution_clock::now();
for (uint32_t i = 0; i < activePartitions; i++) {
while (*partitionAddress < taskCountToWait && timeDiff <= timeoutMicroseconds) {

View File

@ -219,6 +219,7 @@ class CommandStreamReceiver {
virtual void flushTagUpdate() = 0;
virtual void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool isStartOfDispatch, bool isEndOfDispatch) = 0;
virtual void updateTagFromWait() = 0;
virtual bool isUpdateTagFromWaitEnabled() = 0;
ScratchSpaceController *getScratchSpaceController() const {
return scratchSpaceController.get();

View File

@ -108,7 +108,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
size_t commandStreamStartTask);
void flushHandler(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency);
bool isUpdateTagFromWaitEnabled();
bool isUpdateTagFromWaitEnabled() override;
void updateTagFromWait() override;
bool isMultiOsContextCapable() const override;

View File

@ -789,7 +789,10 @@ inline bool CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
flushStampUpdateHelper.updateAll(flushStamp->peekStamp());
this->latestFlushedTaskCount = lastTaskCount;
if (!isUpdateTagFromWaitEnabled()) {
this->latestFlushedTaskCount = lastTaskCount;
}
this->makeSurfacePackNonResident(surfacesForSubmit);
resourcePackage.clear();
}
@ -882,8 +885,6 @@ inline void CommandStreamReceiverHw<GfxFamily>::emitNoop(LinearStream &commandSt
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
updateTagFromWait();
int64_t waitTimeout = 0;
bool enableTimeout = false;
@ -1088,14 +1089,18 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
BlitCommandsHelper<GfxFamily>::programGlobalSequencerFlush(commandStream);
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo());
auto updateTag = !isUpdateTagFromWaitEnabled();
updateTag |= blocking;
if (updateTag) {
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo());
MiFlushArgs args;
args.commandWithPostSync = true;
args.notifyEnable = isUsedNotifyEnableForPostSync();
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), newTaskCount, args);
MiFlushArgs args;
args.commandWithPostSync = true;
args.notifyEnable = isUsedNotifyEnableForPostSync();
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), newTaskCount, args);
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo());
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo());
}
if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnBlitCopy.get(), taskCount, PauseOnGpuProperties::PauseMode::AfterWorkload)) {
BlitCommandsHelper<GfxFamily>::dispatchDebugPauseCommands(commandStream, getDebugPauseStateGPUAddress(), DebugPauseState::waitingForUserEndConfirmation, DebugPauseState::hasUserEndConfirmation);
@ -1129,7 +1134,10 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
flush(batchBuffer, getResidencyAllocations());
makeSurfacePackNonResident(getResidencyAllocations());
latestFlushedTaskCount = newTaskCount;
if (!isUpdateTagFromWaitEnabled()) {
latestFlushedTaskCount = newTaskCount;
}
taskCount = newTaskCount;
auto flushStampToWait = flushStamp->peekStamp();
@ -1144,12 +1152,10 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::flushTagUpdate() {
if (this->osContext != nullptr) {
if (this->osContext->getEngineType() == aub_stream::ENGINE_BCS) {
this->flushMiFlushDW();
} else {
this->flushPipeControl();
}
if (EngineHelpers::isBcs(this->osContext->getEngineType())) {
this->flushMiFlushDW();
} else {
this->flushPipeControl();
}
}
@ -1176,11 +1182,12 @@ inline void CommandStreamReceiverHw<GfxFamily>::flushMiFlushDW() {
MiFlushArgs args;
args.commandWithPostSync = true;
args.notifyEnable = isUsedNotifyEnableForPostSync();
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), taskCount, args);
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), taskCount + 1, args);
makeResident(*tagAllocation);
this->flushSmallTask(commandStream, commandStreamStart);
this->latestFlushedTaskCount = taskCount.load();
}
template <typename GfxFamily>
@ -1215,8 +1222,9 @@ void CommandStreamReceiverHw<GfxFamily>::flushPipeControl() {
PipeControlArgs args(true);
args.notifyEnable = isUsedNotifyEnableForPostSync();
args.workloadPartitionOffset = this->activePartitions > 1 && this->staticWorkPartitioningEnabled;
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(commandStream,
PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
getTagAllocation()->getGpuAddress(),
taskCount + 1,
peekHwInfo(),
@ -1225,10 +1233,7 @@ void CommandStreamReceiverHw<GfxFamily>::flushPipeControl() {
makeResident(*tagAllocation);
this->flushSmallTask(commandStream, commandStreamStart);
this->latestFlushedTaskCount = taskCount + 1;
this->latestSentTaskCount = taskCount + 1;
taskCount++;
this->latestFlushedTaskCount = taskCount.load();
}
template <typename GfxFamily>
@ -1321,7 +1326,9 @@ void CommandStreamReceiverHw<GfxFamily>::flushSmallTask(LinearStream &commandStr
BatchBuffer batchBuffer{commandStreamTask.getGraphicsAllocation(), commandStreamStartTask, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
commandStreamTask.getUsed(), &commandStreamTask, endingCmdPtr, false};
this->latestSentTaskCount = taskCount + 1;
flushHandler(batchBuffer, getResidencyAllocations());
taskCount++;
}
template <typename GfxFamily>

View File

@ -59,6 +59,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
void flushTagUpdate() override{};
void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool startOfDispatch, bool endOfDispatch) override{};
void updateTagFromWait() override{};
bool isUpdateTagFromWaitEnabled() override { return false; };
bool isMultiOsContextCapable() const override { return multiOsContextCapable; }
@ -164,6 +165,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
using CommandStreamReceiver::globalFenceAllocation;
using CommandStreamReceiver::isPreambleSent;
using CommandStreamReceiver::lastSentCoherencyRequest;
using CommandStreamReceiver::latestFlushedTaskCount;
using CommandStreamReceiver::mediaVfeStateDirty;
using CommandStreamReceiver::nTo1SubmissionModelEnabled;
using CommandStreamReceiver::pageTableManagerInitialized;