Flush small task adjustments
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
f1b6b733f0
commit
32ae9555f1
|
@ -176,7 +176,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(
|
|||
NEO::PipeControlArgs args;
|
||||
this->csr->flushNonKernelTask(nullptr, 0, 0, args, false, false, false);
|
||||
if (this->isSyncModeQueue) {
|
||||
this->csr->flushTagUpdate();
|
||||
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
||||
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
|
||||
}
|
||||
|
@ -288,7 +287,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendSignalEvent(ze_
|
|||
}
|
||||
this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_SIGNALED, args, false, false, false);
|
||||
if (this->isSyncModeQueue) {
|
||||
this->csr->flushTagUpdate();
|
||||
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
||||
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
|
||||
}
|
||||
|
@ -322,7 +320,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendEventReset(ze_e
|
|||
}
|
||||
this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_CLEARED, args, false, false, false);
|
||||
if (this->isSyncModeQueue) {
|
||||
this->csr->flushTagUpdate();
|
||||
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
||||
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
|
||||
}
|
||||
|
|
|
@ -28,7 +28,6 @@ CommandListAllocatorFn commandListFactoryImmediate[IGFX_MAX_PRODUCT] = {};
|
|||
|
||||
ze_result_t CommandListImp::destroy() {
|
||||
if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
|
||||
this->csr->flushTagUpdate();
|
||||
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
||||
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
|
||||
}
|
||||
|
|
|
@ -558,6 +558,10 @@ void CommandQueueHw<gfxCoreFamily>::dispatchTaskCountWrite(NEO::LinearStream &co
|
|||
|
||||
UNRECOVERABLE_IF(csr == nullptr);
|
||||
|
||||
if (csr->isUpdateTagFromWaitEnabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto taskCountToWrite = csr->peekTaskCount() + 1;
|
||||
auto gpuAddress = static_cast<uint64_t>(csr->getTagAllocation()->getGpuAddress());
|
||||
|
||||
|
|
|
@ -243,6 +243,34 @@ HWTEST_F(CommandQueueCreate, given100CmdListsWhenExecutingThenCommandStreamIsNot
|
|||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueCreate, givenUpdateTaskCountFromWaitWhenDispatchTaskCountWriteThenNoPipeControlFlushed) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||
|
||||
const ze_command_queue_desc_t desc = {};
|
||||
ze_result_t returnValue;
|
||||
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
|
||||
device,
|
||||
neoDevice->getDefaultEngine().commandStreamReceiver,
|
||||
&desc,
|
||||
false,
|
||||
false,
|
||||
returnValue));
|
||||
|
||||
commandQueue->dispatchTaskCountWrite(*commandQueue->commandStream, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), commandQueue->commandStream->getUsed()));
|
||||
|
||||
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(cmdList.end(), itor);
|
||||
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueCreate, givenContainerWithAllocationsWhenResidencyContainerIsEmptyThenMakeResidentWasNotCalled) {
|
||||
auto csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
|
||||
csr->setupContext(*neoDevice->getDefaultEngine().osContext);
|
||||
|
|
|
@ -125,6 +125,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEmptyQueueWhenFinishingThenTa
|
|||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTaskCountToWaitBiggerThanLatestSentTaskCountWhenWaitForCompletionThenFlushPipeControl) {
|
||||
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
|
|
|
@ -530,6 +530,38 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas
|
|||
EXPECT_EQ(1u, mockCsr->peekLatestFlushedTaskCount());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitWhenFlushBatchedIsCalledThenFlushedTaskCountIsNotModifed) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||
|
||||
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
pDevice->resetCommandStreamReceiver(mockCsr);
|
||||
mockCsr->useNewResourceImplicitFlush = false;
|
||||
mockCsr->useGpuIdleImplicitFlush = false;
|
||||
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
|
||||
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo());
|
||||
dispatchFlags.guardCommandBufferWithPipeControl = true;
|
||||
|
||||
mockCsr->flushTask(commandStream,
|
||||
0,
|
||||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
taskLevel,
|
||||
dispatchFlags,
|
||||
*pDevice);
|
||||
|
||||
EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount());
|
||||
EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount());
|
||||
|
||||
mockCsr->flushBatchedSubmissions();
|
||||
|
||||
EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount());
|
||||
EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInDefaultModeWhenFlushTaskIsCalledThenFlushedTaskCountIsModifed) {
|
||||
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
|
||||
auto &commandStream = commandQueue.getCS(4096u);
|
||||
|
@ -1024,12 +1056,15 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhe
|
|||
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||
|
||||
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
|
||||
commandQueue.taskCount = 10;
|
||||
|
||||
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
pDevice->resetCommandStreamReceiver(mockCsr);
|
||||
mockCsr->useNewResourceImplicitFlush = false;
|
||||
mockCsr->useGpuIdleImplicitFlush = false;
|
||||
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
mockCsr->taskCount.store(10);
|
||||
mockCsr->latestFlushedTaskCount.store(5);
|
||||
|
||||
commandQueue.waitForAllEngines(false, nullptr);
|
||||
|
||||
|
@ -1052,12 +1087,15 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEnabledDirectSubmissionUpdate
|
|||
};
|
||||
|
||||
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
|
||||
commandQueue.taskCount = 10;
|
||||
|
||||
auto mockCsr = new MockCsrHwDirectSubmission(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
pDevice->resetCommandStreamReceiver(mockCsr);
|
||||
mockCsr->useNewResourceImplicitFlush = false;
|
||||
mockCsr->useGpuIdleImplicitFlush = false;
|
||||
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
mockCsr->taskCount.store(10);
|
||||
mockCsr->latestFlushedTaskCount.store(5);
|
||||
|
||||
commandQueue.waitForAllEngines(false, nullptr);
|
||||
|
||||
|
|
|
@ -917,6 +917,27 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile
|
|||
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, true);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
|
||||
givenMultipleStaticActivePartitionsWhenFlushingTagUpdateThenExpectTagUpdatePipeControlWithPartitionFlagOnAndActivePartitionConfig) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) {
|
||||
commandStreamReceiver.createPreemptionAllocation();
|
||||
}
|
||||
EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig);
|
||||
commandStreamReceiver.activePartitions = 2;
|
||||
commandStreamReceiver.taskCount = 3;
|
||||
EXPECT_TRUE(commandStreamReceiver.staticWorkPartitioningEnabled);
|
||||
flushTask(commandStreamReceiver, true);
|
||||
commandStreamReceiver.flushTagUpdate();
|
||||
EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig);
|
||||
|
||||
prepareLinearStream<FamilyType>(commandStream, 0);
|
||||
verifyPipeControl<FamilyType>(commandStreamReceiver, 4, true);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
|
||||
givenMultipleDynamicActivePartitionsWhenFlushingTaskThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
@ -936,6 +957,29 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile
|
|||
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, false);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
|
||||
givenMultipleDynamicActivePartitionsWhenFlushingTagUpdateThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) {
|
||||
commandStreamReceiver.createPreemptionAllocation();
|
||||
}
|
||||
commandStreamReceiver.activePartitions = 2;
|
||||
commandStreamReceiver.taskCount = 3;
|
||||
commandStreamReceiver.staticWorkPartitioningEnabled = false;
|
||||
flushTask(commandStreamReceiver, true);
|
||||
commandStreamReceiver.flushTagUpdate();
|
||||
EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig);
|
||||
|
||||
prepareLinearStream<FamilyType>(commandStream, 0);
|
||||
verifyPipeControl<FamilyType>(commandStreamReceiver, 4, false);
|
||||
|
||||
prepareLinearStream<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, false);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
|
||||
givenSingleStaticActivePartitionWhenFlushingTaskThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include "shared/source/command_stream/scratch_space_controller_base.h"
|
||||
#include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/engine_descriptor_helper.h"
|
||||
#include "shared/test/common/helpers/ult_hw_config.h"
|
||||
#include "shared/test/common/mocks/mock_allocation_properties.h"
|
||||
|
@ -348,6 +349,37 @@ HWTEST_F(BcsTests, whenBlitBufferThenCommandBufferHasProperTaskCount) {
|
|||
EXPECT_EQ(csr.getCS(0u).getGraphicsAllocation()->getResidencyTaskCount(csr.getOsContext().getContextId()), csr.peekTaskCount());
|
||||
}
|
||||
|
||||
HWTEST_F(BcsTests, givenUpdateTaskCountFromWaitWhenBlitBufferThenCsrHasProperTaskCounts) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
|
||||
|
||||
constexpr size_t hostAllocationSize = MemoryConstants::pageSize;
|
||||
auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize);
|
||||
void *hostPtr = reinterpret_cast<void *>(hostAllocationPtr.get());
|
||||
|
||||
auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex());
|
||||
|
||||
auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer,
|
||||
csr, graphicsAllocation, nullptr, hostPtr,
|
||||
graphicsAllocation->getGpuAddress(), 0,
|
||||
0, 0, {1, 1, 1}, 0, 0, 0, 0);
|
||||
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
blitPropertiesContainer.push_back(blitProperties);
|
||||
|
||||
auto taskCount = csr.peekTaskCount();
|
||||
|
||||
csr.blitBuffer(blitPropertiesContainer, false, false, *pDevice);
|
||||
|
||||
EXPECT_EQ(csr.peekTaskCount(), taskCount + 1);
|
||||
EXPECT_EQ(csr.peekLatestFlushedTaskCount(), taskCount);
|
||||
}
|
||||
|
||||
HWTEST_F(BcsTests, givenProfilingEnabledWhenBlitBufferThenCommandBufferIsConstructedProperly) {
|
||||
auto bcsOsContext = std::unique_ptr<OsContext>(OsContext::create(nullptr, 0,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, pDevice->getDeviceBitfield())));
|
||||
|
|
|
@ -318,6 +318,30 @@ TEST(CommandStreamReceiverSimpleTest, givenCsrWhenSubmitiingBatchBufferThenTaskC
|
|||
executionEnvironment.memoryManager->freeGraphicsMemoryImpl(commandBuffer);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitWhenSubmitiingBatchBufferThenTaskCountIsIncrementedAndLatestsValuesSetCorrectly) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||
|
||||
MockCsrHw<FamilyType> csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
|
||||
GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize});
|
||||
ASSERT_NE(nullptr, commandBuffer);
|
||||
LinearStream cs(commandBuffer);
|
||||
|
||||
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false};
|
||||
ResidencyContainer residencyList;
|
||||
|
||||
auto previousTaskCount = csr.peekTaskCount();
|
||||
auto currentTaskCount = previousTaskCount + 1;
|
||||
csr.submitBatchBuffer(batchBuffer, residencyList);
|
||||
|
||||
EXPECT_EQ(currentTaskCount, csr.peekTaskCount());
|
||||
EXPECT_EQ(previousTaskCount, csr.peekLatestFlushedTaskCount());
|
||||
EXPECT_EQ(currentTaskCount, csr.peekLatestSentTaskCount());
|
||||
|
||||
memoryManager->freeGraphicsMemoryImpl(commandBuffer);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenOverrideCsrAllocationSizeWhenCreatingCommandStreamCsrGraphicsAllocationThenAllocationHasCorrectSize) {
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
|
|
|
@ -472,6 +472,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
|
|||
void flushTagUpdate() override{};
|
||||
void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool startOfDispatch, bool endOfDispatch) override{};
|
||||
void updateTagFromWait() override{};
|
||||
bool isUpdateTagFromWaitEnabled() override { return false; };
|
||||
|
||||
bool isMultiOsContextCapable() const override { return false; }
|
||||
|
||||
|
|
|
@ -89,7 +89,9 @@ int CommandStreamReceiver::submitBatchBuffer(BatchBuffer &batchBuffer, Residency
|
|||
this->latestSentTaskCount = taskCount + 1;
|
||||
|
||||
auto flushed = this->flush(batchBuffer, allocationsForResidency);
|
||||
if (!isUpdateTagFromWaitEnabled()) {
|
||||
this->latestFlushedTaskCount = taskCount + 1;
|
||||
}
|
||||
taskCount++;
|
||||
|
||||
return !flushed;
|
||||
|
@ -257,10 +259,6 @@ void CommandStreamReceiver::cleanupResources() {
|
|||
}
|
||||
|
||||
bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
|
||||
if (this->latestSentTaskCount < taskCountToWait) {
|
||||
this->flushTagUpdate();
|
||||
}
|
||||
|
||||
uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
|
||||
if (latestSentTaskCount < taskCountToWait) {
|
||||
if (!this->flushBatchedSubmissions()) {
|
||||
|
@ -275,7 +273,13 @@ bool CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddress, boo
|
|||
std::chrono::high_resolution_clock::time_point time1, time2;
|
||||
int64_t timeDiff = 0;
|
||||
|
||||
uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
|
||||
if (latestSentTaskCount < taskCountToWait) {
|
||||
updateTagFromWait();
|
||||
}
|
||||
|
||||
volatile uint32_t *partitionAddress = pollAddress;
|
||||
|
||||
time1 = std::chrono::high_resolution_clock::now();
|
||||
for (uint32_t i = 0; i < activePartitions; i++) {
|
||||
while (*partitionAddress < taskCountToWait && timeDiff <= timeoutMicroseconds) {
|
||||
|
|
|
@ -219,6 +219,7 @@ class CommandStreamReceiver {
|
|||
virtual void flushTagUpdate() = 0;
|
||||
virtual void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool isStartOfDispatch, bool isEndOfDispatch) = 0;
|
||||
virtual void updateTagFromWait() = 0;
|
||||
virtual bool isUpdateTagFromWaitEnabled() = 0;
|
||||
|
||||
ScratchSpaceController *getScratchSpaceController() const {
|
||||
return scratchSpaceController.get();
|
||||
|
|
|
@ -108,7 +108,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
|||
size_t commandStreamStartTask);
|
||||
void flushHandler(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency);
|
||||
|
||||
bool isUpdateTagFromWaitEnabled();
|
||||
bool isUpdateTagFromWaitEnabled() override;
|
||||
void updateTagFromWait() override;
|
||||
|
||||
bool isMultiOsContextCapable() const override;
|
||||
|
|
|
@ -789,7 +789,10 @@ inline bool CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
|
|||
|
||||
flushStampUpdateHelper.updateAll(flushStamp->peekStamp());
|
||||
|
||||
if (!isUpdateTagFromWaitEnabled()) {
|
||||
this->latestFlushedTaskCount = lastTaskCount;
|
||||
}
|
||||
|
||||
this->makeSurfacePackNonResident(surfacesForSubmit);
|
||||
resourcePackage.clear();
|
||||
}
|
||||
|
@ -882,8 +885,6 @@ inline void CommandStreamReceiverHw<GfxFamily>::emitNoop(LinearStream &commandSt
|
|||
|
||||
template <typename GfxFamily>
|
||||
inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
|
||||
updateTagFromWait();
|
||||
|
||||
int64_t waitTimeout = 0;
|
||||
bool enableTimeout = false;
|
||||
|
||||
|
@ -1088,6 +1089,9 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
|||
|
||||
BlitCommandsHelper<GfxFamily>::programGlobalSequencerFlush(commandStream);
|
||||
|
||||
auto updateTag = !isUpdateTagFromWaitEnabled();
|
||||
updateTag |= blocking;
|
||||
if (updateTag) {
|
||||
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo());
|
||||
|
||||
MiFlushArgs args;
|
||||
|
@ -1096,6 +1100,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
|||
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), newTaskCount, args);
|
||||
|
||||
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo());
|
||||
}
|
||||
|
||||
if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnBlitCopy.get(), taskCount, PauseOnGpuProperties::PauseMode::AfterWorkload)) {
|
||||
BlitCommandsHelper<GfxFamily>::dispatchDebugPauseCommands(commandStream, getDebugPauseStateGPUAddress(), DebugPauseState::waitingForUserEndConfirmation, DebugPauseState::hasUserEndConfirmation);
|
||||
|
@ -1129,7 +1134,10 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
|||
flush(batchBuffer, getResidencyAllocations());
|
||||
makeSurfacePackNonResident(getResidencyAllocations());
|
||||
|
||||
if (!isUpdateTagFromWaitEnabled()) {
|
||||
latestFlushedTaskCount = newTaskCount;
|
||||
}
|
||||
|
||||
taskCount = newTaskCount;
|
||||
auto flushStampToWait = flushStamp->peekStamp();
|
||||
|
||||
|
@ -1144,13 +1152,11 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
|||
|
||||
template <typename GfxFamily>
|
||||
inline void CommandStreamReceiverHw<GfxFamily>::flushTagUpdate() {
|
||||
if (this->osContext != nullptr) {
|
||||
if (this->osContext->getEngineType() == aub_stream::ENGINE_BCS) {
|
||||
if (EngineHelpers::isBcs(this->osContext->getEngineType())) {
|
||||
this->flushMiFlushDW();
|
||||
} else {
|
||||
this->flushPipeControl();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
@ -1176,11 +1182,12 @@ inline void CommandStreamReceiverHw<GfxFamily>::flushMiFlushDW() {
|
|||
MiFlushArgs args;
|
||||
args.commandWithPostSync = true;
|
||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), taskCount, args);
|
||||
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), taskCount + 1, args);
|
||||
|
||||
makeResident(*tagAllocation);
|
||||
|
||||
this->flushSmallTask(commandStream, commandStreamStart);
|
||||
this->latestFlushedTaskCount = taskCount.load();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
@ -1215,8 +1222,9 @@ void CommandStreamReceiverHw<GfxFamily>::flushPipeControl() {
|
|||
|
||||
PipeControlArgs args(true);
|
||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||
args.workloadPartitionOffset = this->activePartitions > 1 && this->staticWorkPartitioningEnabled;
|
||||
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(commandStream,
|
||||
PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||
PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||
getTagAllocation()->getGpuAddress(),
|
||||
taskCount + 1,
|
||||
peekHwInfo(),
|
||||
|
@ -1225,10 +1233,7 @@ void CommandStreamReceiverHw<GfxFamily>::flushPipeControl() {
|
|||
makeResident(*tagAllocation);
|
||||
|
||||
this->flushSmallTask(commandStream, commandStreamStart);
|
||||
|
||||
this->latestFlushedTaskCount = taskCount + 1;
|
||||
this->latestSentTaskCount = taskCount + 1;
|
||||
taskCount++;
|
||||
this->latestFlushedTaskCount = taskCount.load();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
@ -1321,7 +1326,9 @@ void CommandStreamReceiverHw<GfxFamily>::flushSmallTask(LinearStream &commandStr
|
|||
BatchBuffer batchBuffer{commandStreamTask.getGraphicsAllocation(), commandStreamStartTask, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
|
||||
commandStreamTask.getUsed(), &commandStreamTask, endingCmdPtr, false};
|
||||
|
||||
this->latestSentTaskCount = taskCount + 1;
|
||||
flushHandler(batchBuffer, getResidencyAllocations());
|
||||
taskCount++;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -59,6 +59,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
|||
void flushTagUpdate() override{};
|
||||
void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool startOfDispatch, bool endOfDispatch) override{};
|
||||
void updateTagFromWait() override{};
|
||||
bool isUpdateTagFromWaitEnabled() override { return false; };
|
||||
|
||||
bool isMultiOsContextCapable() const override { return multiOsContextCapable; }
|
||||
|
||||
|
@ -164,6 +165,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
|
|||
using CommandStreamReceiver::globalFenceAllocation;
|
||||
using CommandStreamReceiver::isPreambleSent;
|
||||
using CommandStreamReceiver::lastSentCoherencyRequest;
|
||||
using CommandStreamReceiver::latestFlushedTaskCount;
|
||||
using CommandStreamReceiver::mediaVfeStateDirty;
|
||||
using CommandStreamReceiver::nTo1SubmissionModelEnabled;
|
||||
using CommandStreamReceiver::pageTableManagerInitialized;
|
||||
|
|
Loading…
Reference in New Issue