Flush small task adjustments
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
f1b6b733f0
commit
32ae9555f1
|
@ -176,7 +176,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(
|
||||||
NEO::PipeControlArgs args;
|
NEO::PipeControlArgs args;
|
||||||
this->csr->flushNonKernelTask(nullptr, 0, 0, args, false, false, false);
|
this->csr->flushNonKernelTask(nullptr, 0, 0, args, false, false, false);
|
||||||
if (this->isSyncModeQueue) {
|
if (this->isSyncModeQueue) {
|
||||||
this->csr->flushTagUpdate();
|
|
||||||
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
||||||
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
|
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
|
||||||
}
|
}
|
||||||
|
@ -288,7 +287,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendSignalEvent(ze_
|
||||||
}
|
}
|
||||||
this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_SIGNALED, args, false, false, false);
|
this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_SIGNALED, args, false, false, false);
|
||||||
if (this->isSyncModeQueue) {
|
if (this->isSyncModeQueue) {
|
||||||
this->csr->flushTagUpdate();
|
|
||||||
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
||||||
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
|
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
|
||||||
}
|
}
|
||||||
|
@ -322,7 +320,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendEventReset(ze_e
|
||||||
}
|
}
|
||||||
this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_CLEARED, args, false, false, false);
|
this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_CLEARED, args, false, false, false);
|
||||||
if (this->isSyncModeQueue) {
|
if (this->isSyncModeQueue) {
|
||||||
this->csr->flushTagUpdate();
|
|
||||||
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
||||||
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
|
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,7 +28,6 @@ CommandListAllocatorFn commandListFactoryImmediate[IGFX_MAX_PRODUCT] = {};
|
||||||
|
|
||||||
ze_result_t CommandListImp::destroy() {
|
ze_result_t CommandListImp::destroy() {
|
||||||
if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
|
if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
|
||||||
this->csr->flushTagUpdate();
|
|
||||||
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
||||||
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
|
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
|
||||||
}
|
}
|
||||||
|
|
|
@ -558,6 +558,10 @@ void CommandQueueHw<gfxCoreFamily>::dispatchTaskCountWrite(NEO::LinearStream &co
|
||||||
|
|
||||||
UNRECOVERABLE_IF(csr == nullptr);
|
UNRECOVERABLE_IF(csr == nullptr);
|
||||||
|
|
||||||
|
if (csr->isUpdateTagFromWaitEnabled()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
auto taskCountToWrite = csr->peekTaskCount() + 1;
|
auto taskCountToWrite = csr->peekTaskCount() + 1;
|
||||||
auto gpuAddress = static_cast<uint64_t>(csr->getTagAllocation()->getGpuAddress());
|
auto gpuAddress = static_cast<uint64_t>(csr->getTagAllocation()->getGpuAddress());
|
||||||
|
|
||||||
|
|
|
@ -243,6 +243,34 @@ HWTEST_F(CommandQueueCreate, given100CmdListsWhenExecutingThenCommandStreamIsNot
|
||||||
commandQueue->destroy();
|
commandQueue->destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(CommandQueueCreate, givenUpdateTaskCountFromWaitWhenDispatchTaskCountWriteThenNoPipeControlFlushed) {
|
||||||
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
|
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||||
|
|
||||||
|
const ze_command_queue_desc_t desc = {};
|
||||||
|
ze_result_t returnValue;
|
||||||
|
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
|
||||||
|
device,
|
||||||
|
neoDevice->getDefaultEngine().commandStreamReceiver,
|
||||||
|
&desc,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
returnValue));
|
||||||
|
|
||||||
|
commandQueue->dispatchTaskCountWrite(*commandQueue->commandStream, false);
|
||||||
|
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), commandQueue->commandStream->getUsed()));
|
||||||
|
|
||||||
|
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||||
|
EXPECT_EQ(cmdList.end(), itor);
|
||||||
|
|
||||||
|
commandQueue->destroy();
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(CommandQueueCreate, givenContainerWithAllocationsWhenResidencyContainerIsEmptyThenMakeResidentWasNotCalled) {
|
HWTEST_F(CommandQueueCreate, givenContainerWithAllocationsWhenResidencyContainerIsEmptyThenMakeResidentWasNotCalled) {
|
||||||
auto csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
|
auto csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
|
||||||
csr->setupContext(*neoDevice->getDefaultEngine().osContext);
|
csr->setupContext(*neoDevice->getDefaultEngine().osContext);
|
||||||
|
|
|
@ -125,6 +125,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEmptyQueueWhenFinishingThenTa
|
||||||
|
|
||||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTaskCountToWaitBiggerThanLatestSentTaskCountWhenWaitForCompletionThenFlushPipeControl) {
|
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTaskCountToWaitBiggerThanLatestSentTaskCountWhenWaitForCompletionThenFlushPipeControl) {
|
||||||
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
|
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||||
|
|
||||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
|
||||||
|
|
|
@ -530,6 +530,38 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas
|
||||||
EXPECT_EQ(1u, mockCsr->peekLatestFlushedTaskCount());
|
EXPECT_EQ(1u, mockCsr->peekLatestFlushedTaskCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitWhenFlushBatchedIsCalledThenFlushedTaskCountIsNotModifed) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||||
|
|
||||||
|
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||||
|
pDevice->resetCommandStreamReceiver(mockCsr);
|
||||||
|
mockCsr->useNewResourceImplicitFlush = false;
|
||||||
|
mockCsr->useGpuIdleImplicitFlush = false;
|
||||||
|
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||||
|
|
||||||
|
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||||
|
dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo());
|
||||||
|
dispatchFlags.guardCommandBufferWithPipeControl = true;
|
||||||
|
|
||||||
|
mockCsr->flushTask(commandStream,
|
||||||
|
0,
|
||||||
|
dsh,
|
||||||
|
ioh,
|
||||||
|
ssh,
|
||||||
|
taskLevel,
|
||||||
|
dispatchFlags,
|
||||||
|
*pDevice);
|
||||||
|
|
||||||
|
EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount());
|
||||||
|
EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount());
|
||||||
|
|
||||||
|
mockCsr->flushBatchedSubmissions();
|
||||||
|
|
||||||
|
EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount());
|
||||||
|
EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount());
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInDefaultModeWhenFlushTaskIsCalledThenFlushedTaskCountIsModifed) {
|
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInDefaultModeWhenFlushTaskIsCalledThenFlushedTaskCountIsModifed) {
|
||||||
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
|
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
|
||||||
auto &commandStream = commandQueue.getCS(4096u);
|
auto &commandStream = commandQueue.getCS(4096u);
|
||||||
|
@ -1024,12 +1056,15 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhe
|
||||||
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||||
|
|
||||||
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
|
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
|
||||||
|
commandQueue.taskCount = 10;
|
||||||
|
|
||||||
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||||
pDevice->resetCommandStreamReceiver(mockCsr);
|
pDevice->resetCommandStreamReceiver(mockCsr);
|
||||||
mockCsr->useNewResourceImplicitFlush = false;
|
mockCsr->useNewResourceImplicitFlush = false;
|
||||||
mockCsr->useGpuIdleImplicitFlush = false;
|
mockCsr->useGpuIdleImplicitFlush = false;
|
||||||
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||||
|
mockCsr->taskCount.store(10);
|
||||||
|
mockCsr->latestFlushedTaskCount.store(5);
|
||||||
|
|
||||||
commandQueue.waitForAllEngines(false, nullptr);
|
commandQueue.waitForAllEngines(false, nullptr);
|
||||||
|
|
||||||
|
@ -1052,12 +1087,15 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEnabledDirectSubmissionUpdate
|
||||||
};
|
};
|
||||||
|
|
||||||
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
|
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
|
||||||
|
commandQueue.taskCount = 10;
|
||||||
|
|
||||||
auto mockCsr = new MockCsrHwDirectSubmission(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
auto mockCsr = new MockCsrHwDirectSubmission(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||||
pDevice->resetCommandStreamReceiver(mockCsr);
|
pDevice->resetCommandStreamReceiver(mockCsr);
|
||||||
mockCsr->useNewResourceImplicitFlush = false;
|
mockCsr->useNewResourceImplicitFlush = false;
|
||||||
mockCsr->useGpuIdleImplicitFlush = false;
|
mockCsr->useGpuIdleImplicitFlush = false;
|
||||||
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||||
|
mockCsr->taskCount.store(10);
|
||||||
|
mockCsr->latestFlushedTaskCount.store(5);
|
||||||
|
|
||||||
commandQueue.waitForAllEngines(false, nullptr);
|
commandQueue.waitForAllEngines(false, nullptr);
|
||||||
|
|
||||||
|
|
|
@ -917,6 +917,27 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile
|
||||||
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, true);
|
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
|
||||||
|
givenMultipleStaticActivePartitionsWhenFlushingTagUpdateThenExpectTagUpdatePipeControlWithPartitionFlagOnAndActivePartitionConfig) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||||
|
|
||||||
|
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) {
|
||||||
|
commandStreamReceiver.createPreemptionAllocation();
|
||||||
|
}
|
||||||
|
EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig);
|
||||||
|
commandStreamReceiver.activePartitions = 2;
|
||||||
|
commandStreamReceiver.taskCount = 3;
|
||||||
|
EXPECT_TRUE(commandStreamReceiver.staticWorkPartitioningEnabled);
|
||||||
|
flushTask(commandStreamReceiver, true);
|
||||||
|
commandStreamReceiver.flushTagUpdate();
|
||||||
|
EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig);
|
||||||
|
|
||||||
|
prepareLinearStream<FamilyType>(commandStream, 0);
|
||||||
|
verifyPipeControl<FamilyType>(commandStreamReceiver, 4, true);
|
||||||
|
}
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
|
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
|
||||||
givenMultipleDynamicActivePartitionsWhenFlushingTaskThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
|
givenMultipleDynamicActivePartitionsWhenFlushingTaskThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
|
||||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
@ -936,6 +957,29 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile
|
||||||
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, false);
|
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
|
||||||
|
givenMultipleDynamicActivePartitionsWhenFlushingTagUpdateThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||||
|
|
||||||
|
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) {
|
||||||
|
commandStreamReceiver.createPreemptionAllocation();
|
||||||
|
}
|
||||||
|
commandStreamReceiver.activePartitions = 2;
|
||||||
|
commandStreamReceiver.taskCount = 3;
|
||||||
|
commandStreamReceiver.staticWorkPartitioningEnabled = false;
|
||||||
|
flushTask(commandStreamReceiver, true);
|
||||||
|
commandStreamReceiver.flushTagUpdate();
|
||||||
|
EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig);
|
||||||
|
|
||||||
|
prepareLinearStream<FamilyType>(commandStream, 0);
|
||||||
|
verifyPipeControl<FamilyType>(commandStreamReceiver, 4, false);
|
||||||
|
|
||||||
|
prepareLinearStream<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||||
|
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, false);
|
||||||
|
}
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
|
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
|
||||||
givenSingleStaticActivePartitionWhenFlushingTaskThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
|
givenSingleStaticActivePartitionWhenFlushingTaskThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
|
||||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
|
|
||||||
#include "shared/source/command_stream/scratch_space_controller_base.h"
|
#include "shared/source/command_stream/scratch_space_controller_base.h"
|
||||||
#include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h"
|
#include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h"
|
||||||
|
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||||
#include "shared/test/common/helpers/engine_descriptor_helper.h"
|
#include "shared/test/common/helpers/engine_descriptor_helper.h"
|
||||||
#include "shared/test/common/helpers/ult_hw_config.h"
|
#include "shared/test/common/helpers/ult_hw_config.h"
|
||||||
#include "shared/test/common/mocks/mock_allocation_properties.h"
|
#include "shared/test/common/mocks/mock_allocation_properties.h"
|
||||||
|
@ -348,6 +349,37 @@ HWTEST_F(BcsTests, whenBlitBufferThenCommandBufferHasProperTaskCount) {
|
||||||
EXPECT_EQ(csr.getCS(0u).getGraphicsAllocation()->getResidencyTaskCount(csr.getOsContext().getContextId()), csr.peekTaskCount());
|
EXPECT_EQ(csr.getCS(0u).getGraphicsAllocation()->getResidencyTaskCount(csr.getOsContext().getContextId()), csr.peekTaskCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(BcsTests, givenUpdateTaskCountFromWaitWhenBlitBufferThenCsrHasProperTaskCounts) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||||
|
|
||||||
|
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
|
||||||
|
cl_int retVal = CL_SUCCESS;
|
||||||
|
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
|
||||||
|
|
||||||
|
constexpr size_t hostAllocationSize = MemoryConstants::pageSize;
|
||||||
|
auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize);
|
||||||
|
void *hostPtr = reinterpret_cast<void *>(hostAllocationPtr.get());
|
||||||
|
|
||||||
|
auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex());
|
||||||
|
|
||||||
|
auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer,
|
||||||
|
csr, graphicsAllocation, nullptr, hostPtr,
|
||||||
|
graphicsAllocation->getGpuAddress(), 0,
|
||||||
|
0, 0, {1, 1, 1}, 0, 0, 0, 0);
|
||||||
|
|
||||||
|
BlitPropertiesContainer blitPropertiesContainer;
|
||||||
|
blitPropertiesContainer.push_back(blitProperties);
|
||||||
|
|
||||||
|
auto taskCount = csr.peekTaskCount();
|
||||||
|
|
||||||
|
csr.blitBuffer(blitPropertiesContainer, false, false, *pDevice);
|
||||||
|
|
||||||
|
EXPECT_EQ(csr.peekTaskCount(), taskCount + 1);
|
||||||
|
EXPECT_EQ(csr.peekLatestFlushedTaskCount(), taskCount);
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(BcsTests, givenProfilingEnabledWhenBlitBufferThenCommandBufferIsConstructedProperly) {
|
HWTEST_F(BcsTests, givenProfilingEnabledWhenBlitBufferThenCommandBufferIsConstructedProperly) {
|
||||||
auto bcsOsContext = std::unique_ptr<OsContext>(OsContext::create(nullptr, 0,
|
auto bcsOsContext = std::unique_ptr<OsContext>(OsContext::create(nullptr, 0,
|
||||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, pDevice->getDeviceBitfield())));
|
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, pDevice->getDeviceBitfield())));
|
||||||
|
|
|
@ -318,6 +318,30 @@ TEST(CommandStreamReceiverSimpleTest, givenCsrWhenSubmitiingBatchBufferThenTaskC
|
||||||
executionEnvironment.memoryManager->freeGraphicsMemoryImpl(commandBuffer);
|
executionEnvironment.memoryManager->freeGraphicsMemoryImpl(commandBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitWhenSubmitiingBatchBufferThenTaskCountIsIncrementedAndLatestsValuesSetCorrectly) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||||
|
|
||||||
|
MockCsrHw<FamilyType> csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||||
|
|
||||||
|
GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize});
|
||||||
|
ASSERT_NE(nullptr, commandBuffer);
|
||||||
|
LinearStream cs(commandBuffer);
|
||||||
|
|
||||||
|
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false};
|
||||||
|
ResidencyContainer residencyList;
|
||||||
|
|
||||||
|
auto previousTaskCount = csr.peekTaskCount();
|
||||||
|
auto currentTaskCount = previousTaskCount + 1;
|
||||||
|
csr.submitBatchBuffer(batchBuffer, residencyList);
|
||||||
|
|
||||||
|
EXPECT_EQ(currentTaskCount, csr.peekTaskCount());
|
||||||
|
EXPECT_EQ(previousTaskCount, csr.peekLatestFlushedTaskCount());
|
||||||
|
EXPECT_EQ(currentTaskCount, csr.peekLatestSentTaskCount());
|
||||||
|
|
||||||
|
memoryManager->freeGraphicsMemoryImpl(commandBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(CommandStreamReceiverTest, givenOverrideCsrAllocationSizeWhenCreatingCommandStreamCsrGraphicsAllocationThenAllocationHasCorrectSize) {
|
HWTEST_F(CommandStreamReceiverTest, givenOverrideCsrAllocationSizeWhenCreatingCommandStreamCsrGraphicsAllocationThenAllocationHasCorrectSize) {
|
||||||
DebugManagerStateRestore restore;
|
DebugManagerStateRestore restore;
|
||||||
|
|
||||||
|
|
|
@ -472,6 +472,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
|
||||||
void flushTagUpdate() override{};
|
void flushTagUpdate() override{};
|
||||||
void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool startOfDispatch, bool endOfDispatch) override{};
|
void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool startOfDispatch, bool endOfDispatch) override{};
|
||||||
void updateTagFromWait() override{};
|
void updateTagFromWait() override{};
|
||||||
|
bool isUpdateTagFromWaitEnabled() override { return false; };
|
||||||
|
|
||||||
bool isMultiOsContextCapable() const override { return false; }
|
bool isMultiOsContextCapable() const override { return false; }
|
||||||
|
|
||||||
|
|
|
@ -89,7 +89,9 @@ int CommandStreamReceiver::submitBatchBuffer(BatchBuffer &batchBuffer, Residency
|
||||||
this->latestSentTaskCount = taskCount + 1;
|
this->latestSentTaskCount = taskCount + 1;
|
||||||
|
|
||||||
auto flushed = this->flush(batchBuffer, allocationsForResidency);
|
auto flushed = this->flush(batchBuffer, allocationsForResidency);
|
||||||
this->latestFlushedTaskCount = taskCount + 1;
|
if (!isUpdateTagFromWaitEnabled()) {
|
||||||
|
this->latestFlushedTaskCount = taskCount + 1;
|
||||||
|
}
|
||||||
taskCount++;
|
taskCount++;
|
||||||
|
|
||||||
return !flushed;
|
return !flushed;
|
||||||
|
@ -257,10 +259,6 @@ void CommandStreamReceiver::cleanupResources() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
|
bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
|
||||||
if (this->latestSentTaskCount < taskCountToWait) {
|
|
||||||
this->flushTagUpdate();
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
|
uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
|
||||||
if (latestSentTaskCount < taskCountToWait) {
|
if (latestSentTaskCount < taskCountToWait) {
|
||||||
if (!this->flushBatchedSubmissions()) {
|
if (!this->flushBatchedSubmissions()) {
|
||||||
|
@ -275,7 +273,13 @@ bool CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddress, boo
|
||||||
std::chrono::high_resolution_clock::time_point time1, time2;
|
std::chrono::high_resolution_clock::time_point time1, time2;
|
||||||
int64_t timeDiff = 0;
|
int64_t timeDiff = 0;
|
||||||
|
|
||||||
|
uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
|
||||||
|
if (latestSentTaskCount < taskCountToWait) {
|
||||||
|
updateTagFromWait();
|
||||||
|
}
|
||||||
|
|
||||||
volatile uint32_t *partitionAddress = pollAddress;
|
volatile uint32_t *partitionAddress = pollAddress;
|
||||||
|
|
||||||
time1 = std::chrono::high_resolution_clock::now();
|
time1 = std::chrono::high_resolution_clock::now();
|
||||||
for (uint32_t i = 0; i < activePartitions; i++) {
|
for (uint32_t i = 0; i < activePartitions; i++) {
|
||||||
while (*partitionAddress < taskCountToWait && timeDiff <= timeoutMicroseconds) {
|
while (*partitionAddress < taskCountToWait && timeDiff <= timeoutMicroseconds) {
|
||||||
|
|
|
@ -219,6 +219,7 @@ class CommandStreamReceiver {
|
||||||
virtual void flushTagUpdate() = 0;
|
virtual void flushTagUpdate() = 0;
|
||||||
virtual void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool isStartOfDispatch, bool isEndOfDispatch) = 0;
|
virtual void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool isStartOfDispatch, bool isEndOfDispatch) = 0;
|
||||||
virtual void updateTagFromWait() = 0;
|
virtual void updateTagFromWait() = 0;
|
||||||
|
virtual bool isUpdateTagFromWaitEnabled() = 0;
|
||||||
|
|
||||||
ScratchSpaceController *getScratchSpaceController() const {
|
ScratchSpaceController *getScratchSpaceController() const {
|
||||||
return scratchSpaceController.get();
|
return scratchSpaceController.get();
|
||||||
|
|
|
@ -108,7 +108,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||||
size_t commandStreamStartTask);
|
size_t commandStreamStartTask);
|
||||||
void flushHandler(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency);
|
void flushHandler(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency);
|
||||||
|
|
||||||
bool isUpdateTagFromWaitEnabled();
|
bool isUpdateTagFromWaitEnabled() override;
|
||||||
void updateTagFromWait() override;
|
void updateTagFromWait() override;
|
||||||
|
|
||||||
bool isMultiOsContextCapable() const override;
|
bool isMultiOsContextCapable() const override;
|
||||||
|
|
|
@ -789,7 +789,10 @@ inline bool CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
|
||||||
|
|
||||||
flushStampUpdateHelper.updateAll(flushStamp->peekStamp());
|
flushStampUpdateHelper.updateAll(flushStamp->peekStamp());
|
||||||
|
|
||||||
this->latestFlushedTaskCount = lastTaskCount;
|
if (!isUpdateTagFromWaitEnabled()) {
|
||||||
|
this->latestFlushedTaskCount = lastTaskCount;
|
||||||
|
}
|
||||||
|
|
||||||
this->makeSurfacePackNonResident(surfacesForSubmit);
|
this->makeSurfacePackNonResident(surfacesForSubmit);
|
||||||
resourcePackage.clear();
|
resourcePackage.clear();
|
||||||
}
|
}
|
||||||
|
@ -882,8 +885,6 @@ inline void CommandStreamReceiverHw<GfxFamily>::emitNoop(LinearStream &commandSt
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
|
inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
|
||||||
updateTagFromWait();
|
|
||||||
|
|
||||||
int64_t waitTimeout = 0;
|
int64_t waitTimeout = 0;
|
||||||
bool enableTimeout = false;
|
bool enableTimeout = false;
|
||||||
|
|
||||||
|
@ -1088,14 +1089,18 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
||||||
|
|
||||||
BlitCommandsHelper<GfxFamily>::programGlobalSequencerFlush(commandStream);
|
BlitCommandsHelper<GfxFamily>::programGlobalSequencerFlush(commandStream);
|
||||||
|
|
||||||
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo());
|
auto updateTag = !isUpdateTagFromWaitEnabled();
|
||||||
|
updateTag |= blocking;
|
||||||
|
if (updateTag) {
|
||||||
|
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo());
|
||||||
|
|
||||||
MiFlushArgs args;
|
MiFlushArgs args;
|
||||||
args.commandWithPostSync = true;
|
args.commandWithPostSync = true;
|
||||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||||
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), newTaskCount, args);
|
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), newTaskCount, args);
|
||||||
|
|
||||||
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo());
|
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo());
|
||||||
|
}
|
||||||
|
|
||||||
if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnBlitCopy.get(), taskCount, PauseOnGpuProperties::PauseMode::AfterWorkload)) {
|
if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnBlitCopy.get(), taskCount, PauseOnGpuProperties::PauseMode::AfterWorkload)) {
|
||||||
BlitCommandsHelper<GfxFamily>::dispatchDebugPauseCommands(commandStream, getDebugPauseStateGPUAddress(), DebugPauseState::waitingForUserEndConfirmation, DebugPauseState::hasUserEndConfirmation);
|
BlitCommandsHelper<GfxFamily>::dispatchDebugPauseCommands(commandStream, getDebugPauseStateGPUAddress(), DebugPauseState::waitingForUserEndConfirmation, DebugPauseState::hasUserEndConfirmation);
|
||||||
|
@ -1129,7 +1134,10 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
||||||
flush(batchBuffer, getResidencyAllocations());
|
flush(batchBuffer, getResidencyAllocations());
|
||||||
makeSurfacePackNonResident(getResidencyAllocations());
|
makeSurfacePackNonResident(getResidencyAllocations());
|
||||||
|
|
||||||
latestFlushedTaskCount = newTaskCount;
|
if (!isUpdateTagFromWaitEnabled()) {
|
||||||
|
latestFlushedTaskCount = newTaskCount;
|
||||||
|
}
|
||||||
|
|
||||||
taskCount = newTaskCount;
|
taskCount = newTaskCount;
|
||||||
auto flushStampToWait = flushStamp->peekStamp();
|
auto flushStampToWait = flushStamp->peekStamp();
|
||||||
|
|
||||||
|
@ -1144,12 +1152,10 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
inline void CommandStreamReceiverHw<GfxFamily>::flushTagUpdate() {
|
inline void CommandStreamReceiverHw<GfxFamily>::flushTagUpdate() {
|
||||||
if (this->osContext != nullptr) {
|
if (EngineHelpers::isBcs(this->osContext->getEngineType())) {
|
||||||
if (this->osContext->getEngineType() == aub_stream::ENGINE_BCS) {
|
this->flushMiFlushDW();
|
||||||
this->flushMiFlushDW();
|
} else {
|
||||||
} else {
|
this->flushPipeControl();
|
||||||
this->flushPipeControl();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1176,11 +1182,12 @@ inline void CommandStreamReceiverHw<GfxFamily>::flushMiFlushDW() {
|
||||||
MiFlushArgs args;
|
MiFlushArgs args;
|
||||||
args.commandWithPostSync = true;
|
args.commandWithPostSync = true;
|
||||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||||
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), taskCount, args);
|
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), taskCount + 1, args);
|
||||||
|
|
||||||
makeResident(*tagAllocation);
|
makeResident(*tagAllocation);
|
||||||
|
|
||||||
this->flushSmallTask(commandStream, commandStreamStart);
|
this->flushSmallTask(commandStream, commandStreamStart);
|
||||||
|
this->latestFlushedTaskCount = taskCount.load();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
|
@ -1215,8 +1222,9 @@ void CommandStreamReceiverHw<GfxFamily>::flushPipeControl() {
|
||||||
|
|
||||||
PipeControlArgs args(true);
|
PipeControlArgs args(true);
|
||||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||||
|
args.workloadPartitionOffset = this->activePartitions > 1 && this->staticWorkPartitioningEnabled;
|
||||||
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(commandStream,
|
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(commandStream,
|
||||||
PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||||
getTagAllocation()->getGpuAddress(),
|
getTagAllocation()->getGpuAddress(),
|
||||||
taskCount + 1,
|
taskCount + 1,
|
||||||
peekHwInfo(),
|
peekHwInfo(),
|
||||||
|
@ -1225,10 +1233,7 @@ void CommandStreamReceiverHw<GfxFamily>::flushPipeControl() {
|
||||||
makeResident(*tagAllocation);
|
makeResident(*tagAllocation);
|
||||||
|
|
||||||
this->flushSmallTask(commandStream, commandStreamStart);
|
this->flushSmallTask(commandStream, commandStreamStart);
|
||||||
|
this->latestFlushedTaskCount = taskCount.load();
|
||||||
this->latestFlushedTaskCount = taskCount + 1;
|
|
||||||
this->latestSentTaskCount = taskCount + 1;
|
|
||||||
taskCount++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
|
@ -1321,7 +1326,9 @@ void CommandStreamReceiverHw<GfxFamily>::flushSmallTask(LinearStream &commandStr
|
||||||
BatchBuffer batchBuffer{commandStreamTask.getGraphicsAllocation(), commandStreamStartTask, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
|
BatchBuffer batchBuffer{commandStreamTask.getGraphicsAllocation(), commandStreamStartTask, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
|
||||||
commandStreamTask.getUsed(), &commandStreamTask, endingCmdPtr, false};
|
commandStreamTask.getUsed(), &commandStreamTask, endingCmdPtr, false};
|
||||||
|
|
||||||
|
this->latestSentTaskCount = taskCount + 1;
|
||||||
flushHandler(batchBuffer, getResidencyAllocations());
|
flushHandler(batchBuffer, getResidencyAllocations());
|
||||||
|
taskCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
|
|
|
@ -59,6 +59,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
||||||
void flushTagUpdate() override{};
|
void flushTagUpdate() override{};
|
||||||
void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool startOfDispatch, bool endOfDispatch) override{};
|
void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool startOfDispatch, bool endOfDispatch) override{};
|
||||||
void updateTagFromWait() override{};
|
void updateTagFromWait() override{};
|
||||||
|
bool isUpdateTagFromWaitEnabled() override { return false; };
|
||||||
|
|
||||||
bool isMultiOsContextCapable() const override { return multiOsContextCapable; }
|
bool isMultiOsContextCapable() const override { return multiOsContextCapable; }
|
||||||
|
|
||||||
|
@ -164,6 +165,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
|
||||||
using CommandStreamReceiver::globalFenceAllocation;
|
using CommandStreamReceiver::globalFenceAllocation;
|
||||||
using CommandStreamReceiver::isPreambleSent;
|
using CommandStreamReceiver::isPreambleSent;
|
||||||
using CommandStreamReceiver::lastSentCoherencyRequest;
|
using CommandStreamReceiver::lastSentCoherencyRequest;
|
||||||
|
using CommandStreamReceiver::latestFlushedTaskCount;
|
||||||
using CommandStreamReceiver::mediaVfeStateDirty;
|
using CommandStreamReceiver::mediaVfeStateDirty;
|
||||||
using CommandStreamReceiver::nTo1SubmissionModelEnabled;
|
using CommandStreamReceiver::nTo1SubmissionModelEnabled;
|
||||||
using CommandStreamReceiver::pageTableManagerInitialized;
|
using CommandStreamReceiver::pageTableManagerInitialized;
|
||||||
|
|
Loading…
Reference in New Issue