Add implicit flush for new resources and idling gpu

Related-To: NEO-5100

Change-Id: I57fdb8eecd88124c4c9171014950554c35dbecd1
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2020-09-22 16:29:34 +02:00
committed by sys_ocldev
parent e9ea2dc182
commit 3b6f9b7cb6
30 changed files with 592 additions and 174 deletions

View File

@@ -558,6 +558,8 @@ HWTEST_F(EnqueueKernelTest, givenEnqueueWithGlobalWorkSizeWhenZeroValueIsPassedI
HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenEnqueueKernelIsCalledThenKernelIsRecorded) {
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
pDevice->resetCommandStreamReceiver(mockCsr);
@@ -674,6 +676,8 @@ HWTEST_F(EnqueueKernelTest, givenDefaultCommandStreamReceiverWhenClFlushIsCalled
HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeAndBatchedKernelWhenFlushIsCalledThenKernelIsSubmitted) {
auto mockCsrmockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
mockCsrmockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsrmockCsr->useNewResourceImplicitFlush = false;
mockCsrmockCsr->useGpuIdleImplicitFlush = false;
pDevice->resetCommandStreamReceiver(mockCsrmockCsr);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -715,6 +719,8 @@ HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeAndBatchedKe
HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenKernelIsEnqueuedTwiceThenTwoSubmissionsAreRecorded) {
auto &mockCsrmockCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
mockCsrmockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsrmockCsr.useNewResourceImplicitFlush = false;
mockCsrmockCsr.useGpuIdleImplicitFlush = false;
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsrmockCsr.submissionAggregator.reset(mockedSubmissionsAggregator);
@@ -742,6 +748,8 @@ HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenKernelIs
HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenFlushIsCalledOnTwoBatchedKernelsThenTheyAreExecutedInOrder) {
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
pDevice->resetCommandStreamReceiver(mockCsr);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -760,6 +768,8 @@ HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenFlushIsC
HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenFinishIsCalledThenBatchesSubmissionsAreFlushed) {
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
pDevice->resetCommandStreamReceiver(mockCsr);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -779,6 +789,8 @@ HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenFinishIsCalledThenBatchesS
HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenThressEnqueueKernelsAreCalledThenBatchesSubmissionsAreFlushed) {
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
pDevice->resetCommandStreamReceiver(mockCsr);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -799,6 +811,8 @@ HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenThressEnqueueKernelsAreCal
HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenWaitForEventsIsCalledThenBatchedSubmissionsAreFlushed) {
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
pDevice->resetCommandStreamReceiver(mockCsr);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -823,6 +837,8 @@ HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenWaitForEventsIsCalledThenB
HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenCommandIsFlushedThenFlushStampIsUpdatedInCommandQueueCsrAndEvent) {
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
pDevice->resetCommandStreamReceiver(mockCsr);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -873,6 +889,8 @@ HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenNonBlockingMapFollowsNdrCa
HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenCommandWithEventIsFollowedByCommandWithoutEventThenFlushStampIsUpdatedInCommandQueueCsrAndEvent) {
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
pDevice->resetCommandStreamReceiver(mockCsr);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -905,6 +923,8 @@ HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenCommandWithEventIsFollowed
HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenClFlushIsCalledThenQueueFlushStampIsUpdated) {
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
pDevice->resetCommandStreamReceiver(mockCsr);
MockKernelWithInternals mockKernel(*pClDevice);
@@ -973,6 +993,8 @@ HWTEST_F(EnqueueKernelTest, givenOutOfOrderCommandQueueWhenEnqueueKernelIsMadeTh
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
pDevice->resetCommandStreamReceiver(mockCsr);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -995,6 +1017,8 @@ HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelIsMadeThenP
auto &mockCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
mockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr.useNewResourceImplicitFlush = false;
mockCsr.useGpuIdleImplicitFlush = false;
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr.submissionAggregator.reset(mockedSubmissionsAggregator);
@@ -1015,6 +1039,8 @@ HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelThatHasShar
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
pDevice->resetCommandStreamReceiver(mockCsr);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -1052,6 +1078,8 @@ HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelReturningEv
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->timestampPacketWriteEnabled = false;
pDevice->resetCommandStreamReceiver(mockCsr);
@@ -1079,6 +1107,8 @@ HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelReturningEv
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->enableNTo1SubmissionModel();
@@ -1103,6 +1133,8 @@ HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelReturningEv
HWTEST_F(EnqueueKernelTest, givenOutOfOrderCommandQueueWhenEnqueueKernelReturningEventIsMadeThenPipeControlPositionIsRecorded) {
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
pDevice->resetCommandStreamReceiver(mockCsr);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();

View File

@@ -364,6 +364,10 @@ HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrWithEve
HWTEST_F(EnqueueReadBufferRectTest, givenOutOfOrderQueueAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
cl_int retVal = CL_SUCCESS;
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
UltCommandStreamReceiver<FamilyType> &mockCsr =
reinterpret_cast<UltCommandStreamReceiver<FamilyType> &>(pCmdOOQ->getGpgpuCommandStreamReceiver());
mockCsr.useNewResourceImplicitFlush = false;
mockCsr.useGpuIdleImplicitFlush = false;
uint32_t taskLevelCmdQ = 17;
pCmdOOQ->taskLevel = taskLevelCmdQ;

View File

@@ -361,6 +361,11 @@ HWTEST_F(EnqueueWriteBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrWithEv
HWTEST_F(EnqueueWriteBufferRectTest, givenOutOfOrderQueueAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
cl_int retVal = CL_SUCCESS;
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
UltCommandStreamReceiver<FamilyType> &mockCsr =
reinterpret_cast<UltCommandStreamReceiver<FamilyType> &>(pCmdOOQ->getGpgpuCommandStreamReceiver());
mockCsr.useNewResourceImplicitFlush = false;
mockCsr.useGpuIdleImplicitFlush = false;
uint32_t taskLevelCmdQ = 17;
pCmdOOQ->taskLevel = taskLevelCmdQ;

View File

@@ -280,6 +280,11 @@ HWTEST_F(EnqueueWriteBufferTypeTest, givenOOQWithDisabledSupportCpuCopiesAndDstP
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0);
cl_int retVal = CL_SUCCESS;
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
UltCommandStreamReceiver<FamilyType> &mockCsr =
reinterpret_cast<UltCommandStreamReceiver<FamilyType> &>(pCmdOOQ->getGpgpuCommandStreamReceiver());
mockCsr.useNewResourceImplicitFlush = false;
mockCsr.useGpuIdleImplicitFlush = false;
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
EXPECT_EQ(retVal, CL_SUCCESS);
retVal = pCmdOOQ->enqueueWriteBuffer(zeroCopyBuffer.get(),
@@ -293,7 +298,7 @@ HWTEST_F(EnqueueWriteBufferTypeTest, givenOOQWithDisabledSupportCpuCopiesAndDstP
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdOOQ->taskLevel, 0u);
EXPECT_EQ(0u, pCmdOOQ->taskLevel);
pCmdOOQ->flush();
}
HWTEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrZeroCopyBufferEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {

View File

@@ -5,6 +5,8 @@
*
*/
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "opencl/test/unit_test/command_queue/enqueue_fixture.h"
#include "opencl/test/unit_test/fixtures/hello_world_fixture.h"
#include "opencl/test/unit_test/mocks/mock_csr.h"
@@ -17,6 +19,12 @@ struct OOQFixtureFactory : public HelloWorldFixtureFactory {
template <typename TypeParam>
struct OOQTaskTypedTests : public HelloWorldTest<OOQFixtureFactory> {
void SetUp() override {
DebugManager.flags.PerformImplicitFlushForNewResource.set(0);
DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0);
HelloWorldTest<OOQFixtureFactory>::SetUp();
}
DebugManagerStateRestore stateRestore;
};
TYPED_TEST_CASE_P(OOQTaskTypedTests);
@@ -115,6 +123,10 @@ TEST_F(OOQTaskTests, enqueueKernel_changesTaskCount) {
HWTEST_F(OOQTaskTests, givenCommandQueueWithLowerTaskLevelThenCsrWhenItIsSubmittedThenCommandQueueObtainsTaskLevelFromCsrWithoutSendingPipeControl) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
UltCommandStreamReceiver<FamilyType> &mockCsr =
reinterpret_cast<UltCommandStreamReceiver<FamilyType> &>(commandStreamReceiver);
mockCsr.useNewResourceImplicitFlush = false;
mockCsr.useGpuIdleImplicitFlush = false;
commandStreamReceiver.taskLevel = 100;
EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel);
EXPECT_EQ(100u, this->pCmdQ->taskLevel);
@@ -124,6 +136,8 @@ HWTEST_F(OOQTaskTests, givenCommandQueueAtTaskLevel100WhenMultipleEnqueueAreDone
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->taskLevel = 100;
EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel);
@@ -139,6 +153,8 @@ HWTEST_F(OOQTaskTests, givenCommandQueueAtTaskLevel100WhenItIsFlushedAndFollowed
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->taskLevel = 100;
EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel);
@@ -158,6 +174,8 @@ HWTEST_F(OOQTaskTests, givenCommandQueueAtTaskLevel100WhenItIsFlushedAndFollowed
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->taskLevel = 100;
EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel);
@@ -178,6 +196,8 @@ HWTEST_F(OOQTaskTests, givenCommandQueueAtTaskLevel100WhenItIsFlushedAndFollowed
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->taskLevel = 100;
EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel);
@@ -198,6 +218,9 @@ HWTEST_F(OOQTaskTests, givenTwoEnqueueCommandSynchronizedByEventsWhenTheyAreEnqu
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
auto currentTaskLevel = this->pCmdQ->taskLevel;
cl_event retEvent;
EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel, EnqueueKernelTraits::workDim,
@@ -220,15 +243,24 @@ HWTEST_F(OOQTaskTests, givenTwoEnqueueCommandSynchronizedByEventsWhenTheyAreEnqu
clReleaseEvent(retEvent);
}
TEST_F(OOQTaskTests, WhenEnqueingKernelThenTaskLevelIsNotIncremented) {
HWTEST_F(OOQTaskTests, WhenEnqueingKernelThenTaskLevelIsNotIncremented) {
auto previousTaskLevel = this->pCmdQ->taskLevel;
UltCommandStreamReceiver<FamilyType> &mockCsr =
reinterpret_cast<UltCommandStreamReceiver<FamilyType> &>(pCmdQ->getGpgpuCommandStreamReceiver());
mockCsr.useNewResourceImplicitFlush = false;
mockCsr.useGpuIdleImplicitFlush = false;
EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ,
pKernel);
EXPECT_EQ(previousTaskLevel, this->pCmdQ->taskLevel);
}
TEST_F(OOQTaskTests, GivenBlockingAndNonBlockedOnUserEventWhenReadingBufferThenTaskCountIsIncrementedAndTaskLevelIsUnchanged) {
HWTEST_F(OOQTaskTests, GivenBlockingAndNonBlockedOnUserEventWhenReadingBufferThenTaskCountIsIncrementedAndTaskLevelIsUnchanged) {
UltCommandStreamReceiver<FamilyType> &mockCsr =
reinterpret_cast<UltCommandStreamReceiver<FamilyType> &>(pCmdQ->getGpgpuCommandStreamReceiver());
mockCsr.useNewResourceImplicitFlush = false;
mockCsr.useGpuIdleImplicitFlush = false;
auto buffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
auto alignedReadPtr = alignedMalloc(BufferDefaults::sizeInBytes, MemoryConstants::cacheLineSize);
@@ -269,7 +301,6 @@ TEST_F(OOQTaskTests, GivenBlockingAndNonBlockedOnUserEventWhenReadingBufferThenT
}
TEST_F(OOQTaskTests, givenOutOfOrderCommandQueueWhenBarrierIsCalledThenTaskLevelIsUpdated) {
EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ,
pKernel);
auto currentTaskLevel = this->pCmdQ->taskLevel;

View File

@@ -180,6 +180,9 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenflushTaskThenDshAndIoh
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndMidThreadPreemptionWhenFlushTaskIsCalledThenSipKernelIsMadeResident) {
auto &mockCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
mockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr.useNewResourceImplicitFlush = false;
mockCsr.useGpuIdleImplicitFlush = false;
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr.submissionAggregator.reset(mockedSubmissionsAggregator);

View File

@@ -41,6 +41,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
@@ -97,7 +99,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndTwoRecord
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -148,7 +151,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndThreeReco
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -201,7 +205,6 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndThreeReco
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndThreeRecordedCommandBuffersThatUsesAllResourceWhenFlushTaskIsCalledThenBatchBuffersAreNotCombined) {
typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
@@ -210,7 +213,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndThreeReco
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -278,7 +282,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -351,7 +356,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenCsrInBatch
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -429,7 +435,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenBlocking
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -487,7 +494,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
@@ -545,8 +553,10 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas
dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo());
dispatchFlags.guardCommandBufferWithPipeControl = true;
auto &csr = commandQueue.getGpgpuCommandStreamReceiver();
auto &csr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> &>(commandQueue.getGpgpuCommandStreamReceiver());
csr.overrideDispatchPolicy(DispatchMode::BatchedDispatch);
csr.useNewResourceImplicitFlush = false;
csr.useGpuIdleImplicitFlush = false;
dispatchFlags.implicitFlush = false;
csr.flushTask(commandStream,
@@ -595,7 +605,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenWaitForT
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -635,7 +646,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenEnqueueI
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -675,7 +687,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenSusbsequ
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -739,6 +752,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTotalRes
}
mockCsr->initializeTagAllocation();
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -793,7 +808,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests,
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -874,7 +890,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenDcFlushI
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
@@ -902,7 +919,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenCommandA
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
@@ -933,7 +951,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWithOutOfOrd
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
@@ -964,7 +983,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenDcFlushI
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -997,7 +1017,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEpiloguePipeControlThenDcFlus
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -1036,7 +1057,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEpiloguePipeControlWhendDcFlu
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -1072,7 +1094,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests,
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->timestampPacketWriteEnabled = false;
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -1130,7 +1153,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndOoqFlagSe
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -1169,7 +1193,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenPipeCont
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -1231,7 +1256,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests,
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -1372,7 +1398,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetT
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -1404,7 +1431,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetT
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -1453,7 +1481,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetT
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -1534,7 +1563,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithNewSliceCoun
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
@@ -1770,3 +1800,27 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWhenInit
EXPECT_FALSE(csr->pageTableManagerInitialized);
memoryManager->freeGraphicsMemory(graphicsAllocation);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, WhenCsrIsMarkedWithNewResourceThenCallBatchedSubmission) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch;
commandStreamReceiver.newResources = true;
flushTask(commandStreamReceiver);
EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenGpuIsIdleWhenCsrIsEnabledToFlushOnGpuIdleThenCallBatchedSubmission) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch;
commandStreamReceiver.useGpuIdleImplicitFlush = true;
commandStreamReceiver.taskCount = 1u;
*commandStreamReceiver.getTagAddress() = 1u;
flushTask(commandStreamReceiver);
EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled);
*commandStreamReceiver.getTagAddress() = 2u;
}

View File

@@ -70,7 +70,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskGmockTests, givenCsrInBatchingModeThreeRe
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);

View File

@@ -470,142 +470,54 @@ HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsNotRequiredThenGshAddres
EXPECT_EQ(0u, scratchController->calculateNewGSH());
}
HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredCommands) {
constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK);
size_t notAlignedBltSize = (3 * max2DBlitSize) + 1;
size_t alignedBltSize = (3 * max2DBlitSize);
uint32_t alignedNumberOfBlts = 3;
uint32_t notAlignedNumberOfBlts = 4;
auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts;
auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts;
auto alignedCopySize = Vec3<size_t>{alignedBltSize, 1, 1};
auto notAlignedCopySize = Vec3<size_t>{notAlignedBltSize, 1, 1};
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
alignedCopySize, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment());
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
notAlignedCopySize, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment());
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize);
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(alignedCopySize, pClDevice->getRootDeviceEnvironment()));
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(notAlignedCopySize, pClDevice->getRootDeviceEnvironment()));
}
HWTEST_F(BcsTests, givenDebugCapabilityWhenEstimatingCommandSizeThenAddAllRequiredCommands) {
constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK);
const size_t debugCommandsSize = (EncodeMiFlushDW<FamilyType>::getMiFlushDwCmdSizeForDataWrite() + EncodeSempahore<FamilyType>::getSizeMiSemaphoreWait()) * 2;
constexpr uint32_t numberOfBlts = 3;
constexpr size_t bltSize = (numberOfBlts * max2DBlitSize);
auto expectedSize = (cmdsSizePerBlit * numberOfBlts) + debugCommandsSize + MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pDevice->getHardwareInfo()) +
EncodeMiFlushDW<FamilyType>::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
expectedSize = alignUp(expectedSize, MemoryConstants::cacheLineSize);
BlitProperties blitProperties;
blitProperties.copySize = {bltSize, 1, 1};
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);
auto estimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
blitPropertiesContainer, false, true, pClDevice->getRootDeviceEnvironment());
EXPECT_EQ(expectedSize, estimatedSize);
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(blitProperties.copySize, pClDevice->getRootDeviceEnvironment()));
}
HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeForReadBufferRectThenAddAllRequiredCommands) {
constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK);
Vec3<size_t> notAlignedBltSize = {(3 * max2DBlitSize) + 1, 4, 2};
Vec3<size_t> alignedBltSize = {(3 * max2DBlitSize), 4, 2};
size_t alignedNumberOfBlts = 3 * alignedBltSize.y * alignedBltSize.z;
size_t notAlignedNumberOfBlts = 4 * notAlignedBltSize.y * notAlignedBltSize.z;
auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts;
auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts;
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
alignedBltSize, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment());
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
notAlignedBltSize, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment());
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize);
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(notAlignedBltSize, pClDevice->getRootDeviceEnvironment()));
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(alignedBltSize, pClDevice->getRootDeviceEnvironment()));
}
HWTEST_F(BcsTests, givenBltWithBigCopySizeWhenEstimatingCommandSizeForReadBufferRectThenAddAllRequiredCommands) {
auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment();
auto maxWidthToCopy = static_cast<size_t>(BlitCommandsHelper<FamilyType>::getMaxBlitWidth(rootDeviceEnvironment));
auto maxHeightToCopy = static_cast<size_t>(BlitCommandsHelper<FamilyType>::getMaxBlitHeight(rootDeviceEnvironment));
constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK);
Vec3<size_t> alignedBltSize = {(3 * maxWidthToCopy), (4 * maxHeightToCopy), 2};
Vec3<size_t> notAlignedBltSize = {(3 * maxWidthToCopy + 1), (4 * maxHeightToCopy), 2};
EXPECT_TRUE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(alignedBltSize, rootDeviceEnvironment));
size_t alignedNumberOfBlts = (3 * 4 * alignedBltSize.z);
size_t notAlignedNumberOfBlts = (4 * 4 * notAlignedBltSize.z);
auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts;
auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts;
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
alignedBltSize, csrDependencies, false, false, rootDeviceEnvironment);
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
notAlignedBltSize, csrDependencies, false, false, rootDeviceEnvironment);
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize);
EXPECT_TRUE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(notAlignedBltSize, rootDeviceEnvironment));
EXPECT_TRUE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(alignedBltSize, rootDeviceEnvironment));
}
HWTEST_F(BcsTests, WhenGetNumberOfBlitsIsCalledThenCorrectValuesAreReturned) {
auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment();
auto maxWidthToCopy = static_cast<size_t>(BlitCommandsHelper<FamilyType>::getMaxBlitWidth(rootDeviceEnvironment));
auto maxHeightToCopy = static_cast<size_t>(BlitCommandsHelper<FamilyType>::getMaxBlitHeight(rootDeviceEnvironment));
{
Vec3<size_t> copySize = {maxWidthToCopy * maxHeightToCopy, 1, 3};
size_t expectednBlitsCopyRegion = maxHeightToCopy * 3;
size_t expectednBlitsCopyPerRow = 3;
auto nBlitsCopyRegion = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment);
auto nBlitsCopyPerRow = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment);
EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow);
EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion);
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(copySize, rootDeviceEnvironment));
HWTEST_F(CommandStreamReceiverHwTest, givenDefaultPlatformCapabilityWhenNoDebugKeysSetThenExpectDefaultPlatformSettings) {
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
if (commandStreamReceiver->checkPlatformSupportsNewResourceImplicitFlush()) {
EXPECT_TRUE(commandStreamReceiver->useNewResourceImplicitFlush);
} else {
EXPECT_FALSE(commandStreamReceiver->useNewResourceImplicitFlush);
}
{
Vec3<size_t> copySize = {2 * maxWidthToCopy, 16, 3};
size_t expectednBlitsCopyRegion = 2 * 3;
size_t expectednBlitsCopyPerRow = 16 * 3;
auto nBlitsCopyRegion = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment);
auto nBlitsCopyPerRow = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment);
}
EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow);
EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion);
EXPECT_TRUE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(copySize, rootDeviceEnvironment));
HWTEST_F(CommandStreamReceiverHwTest, givenDefaultGpuIdleImplicitFlushWhenNoDebugKeysSetThenExpectDefaultPlatformSettings) {
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
if (commandStreamReceiver->checkPlatformSupportsGpuIdleImplicitFlush()) {
EXPECT_TRUE(commandStreamReceiver->useGpuIdleImplicitFlush);
} else {
EXPECT_FALSE(commandStreamReceiver->useGpuIdleImplicitFlush);
}
{
Vec3<size_t> copySize = {2 * maxWidthToCopy, 3 * maxHeightToCopy, 4};
size_t expectednBlitsCopyRegion = 2 * 3 * 4;
size_t expectednBlitsCopyPerRow = 3 * maxHeightToCopy * 4;
auto nBlitsCopyRegion = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment);
auto nBlitsCopyPerRow = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment);
}
EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow);
EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion);
EXPECT_TRUE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(copySize, rootDeviceEnvironment));
}
HWTEST_F(CommandStreamReceiverHwTest, WhenForceDisableNewResourceImplicitFlushThenExpectFlagSetFalse) {
DebugManagerStateRestore restore;
DebugManager.flags.PerformImplicitFlushForNewResource.set(0);
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
EXPECT_FALSE(commandStreamReceiver->useNewResourceImplicitFlush);
}
HWTEST_F(CommandStreamReceiverHwTest, WhenForceEnableNewResourceImplicitFlushThenExpectFlagSetTrue) {
DebugManagerStateRestore restore;
DebugManager.flags.PerformImplicitFlushForNewResource.set(1);
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
EXPECT_TRUE(commandStreamReceiver->useNewResourceImplicitFlush);
}
HWTEST_F(CommandStreamReceiverHwTest, WhenForceDisableGpuIdleImplicitFlushThenExpectFlagSetFalse) {
DebugManagerStateRestore restore;
DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0);
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
EXPECT_FALSE(commandStreamReceiver->useGpuIdleImplicitFlush);
}
HWTEST_F(CommandStreamReceiverHwTest, WhenForceEnableGpuIdleImplicitFlushThenExpectFlagSetTrue) {
DebugManagerStateRestore restore;
DebugManager.flags.PerformImplicitFlushForIdleGpu.set(1);
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
EXPECT_TRUE(commandStreamReceiver->useGpuIdleImplicitFlush);
}
HWTEST_F(BcsTests, WhenGetNumberOfBlitsForCopyPerRowIsCalledThenCorrectValuesAreReturned) {

View File

@@ -24,6 +24,144 @@
using namespace NEO;
HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredCommands) {
constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK);
size_t notAlignedBltSize = (3 * max2DBlitSize) + 1;
size_t alignedBltSize = (3 * max2DBlitSize);
uint32_t alignedNumberOfBlts = 3;
uint32_t notAlignedNumberOfBlts = 4;
auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts;
auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts;
auto alignedCopySize = Vec3<size_t>{alignedBltSize, 1, 1};
auto notAlignedCopySize = Vec3<size_t>{notAlignedBltSize, 1, 1};
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
alignedCopySize, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment());
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
notAlignedCopySize, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment());
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize);
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(alignedCopySize, pClDevice->getRootDeviceEnvironment()));
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(notAlignedCopySize, pClDevice->getRootDeviceEnvironment()));
}
HWTEST_F(BcsTests, givenDebugCapabilityWhenEstimatingCommandSizeThenAddAllRequiredCommands) {
constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK);
const size_t debugCommandsSize = (EncodeMiFlushDW<FamilyType>::getMiFlushDwCmdSizeForDataWrite() + EncodeSempahore<FamilyType>::getSizeMiSemaphoreWait()) * 2;
constexpr uint32_t numberOfBlts = 3;
constexpr size_t bltSize = (numberOfBlts * max2DBlitSize);
auto expectedSize = (cmdsSizePerBlit * numberOfBlts) + debugCommandsSize + MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pDevice->getHardwareInfo()) +
EncodeMiFlushDW<FamilyType>::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
expectedSize = alignUp(expectedSize, MemoryConstants::cacheLineSize);
BlitProperties blitProperties;
blitProperties.copySize = {bltSize, 1, 1};
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);
auto estimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
blitPropertiesContainer, false, true, pClDevice->getRootDeviceEnvironment());
EXPECT_EQ(expectedSize, estimatedSize);
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(blitProperties.copySize, pClDevice->getRootDeviceEnvironment()));
}
HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeForReadBufferRectThenAddAllRequiredCommands) {
constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK);
Vec3<size_t> notAlignedBltSize = {(3 * max2DBlitSize) + 1, 4, 2};
Vec3<size_t> alignedBltSize = {(3 * max2DBlitSize), 4, 2};
size_t alignedNumberOfBlts = 3 * alignedBltSize.y * alignedBltSize.z;
size_t notAlignedNumberOfBlts = 4 * notAlignedBltSize.y * notAlignedBltSize.z;
auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts;
auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts;
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
alignedBltSize, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment());
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
notAlignedBltSize, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment());
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize);
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(notAlignedBltSize, pClDevice->getRootDeviceEnvironment()));
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(alignedBltSize, pClDevice->getRootDeviceEnvironment()));
}
HWTEST_F(BcsTests, givenBltWithBigCopySizeWhenEstimatingCommandSizeForReadBufferRectThenAddAllRequiredCommands) {
auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment();
auto maxWidthToCopy = static_cast<size_t>(BlitCommandsHelper<FamilyType>::getMaxBlitWidth(rootDeviceEnvironment));
auto maxHeightToCopy = static_cast<size_t>(BlitCommandsHelper<FamilyType>::getMaxBlitHeight(rootDeviceEnvironment));
constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK);
Vec3<size_t> alignedBltSize = {(3 * maxWidthToCopy), (4 * maxHeightToCopy), 2};
Vec3<size_t> notAlignedBltSize = {(3 * maxWidthToCopy + 1), (4 * maxHeightToCopy), 2};
EXPECT_TRUE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(alignedBltSize, rootDeviceEnvironment));
size_t alignedNumberOfBlts = (3 * 4 * alignedBltSize.z);
size_t notAlignedNumberOfBlts = (4 * 4 * notAlignedBltSize.z);
auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts;
auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts;
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
alignedBltSize, csrDependencies, false, false, rootDeviceEnvironment);
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
notAlignedBltSize, csrDependencies, false, false, rootDeviceEnvironment);
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize);
EXPECT_TRUE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(notAlignedBltSize, rootDeviceEnvironment));
EXPECT_TRUE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(alignedBltSize, rootDeviceEnvironment));
}
HWTEST_F(BcsTests, WhenGetNumberOfBlitsIsCalledThenCorrectValuesAreReturned) {
auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment();
auto maxWidthToCopy = static_cast<size_t>(BlitCommandsHelper<FamilyType>::getMaxBlitWidth(rootDeviceEnvironment));
auto maxHeightToCopy = static_cast<size_t>(BlitCommandsHelper<FamilyType>::getMaxBlitHeight(rootDeviceEnvironment));
{
Vec3<size_t> copySize = {maxWidthToCopy * maxHeightToCopy, 1, 3};
size_t expectednBlitsCopyRegion = maxHeightToCopy * 3;
size_t expectednBlitsCopyPerRow = 3;
auto nBlitsCopyRegion = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment);
auto nBlitsCopyPerRow = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment);
EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow);
EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion);
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(copySize, rootDeviceEnvironment));
}
{
Vec3<size_t> copySize = {2 * maxWidthToCopy, 16, 3};
size_t expectednBlitsCopyRegion = 2 * 3;
size_t expectednBlitsCopyPerRow = 16 * 3;
auto nBlitsCopyRegion = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment);
auto nBlitsCopyPerRow = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment);
EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow);
EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion);
EXPECT_TRUE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(copySize, rootDeviceEnvironment));
}
{
Vec3<size_t> copySize = {2 * maxWidthToCopy, 3 * maxHeightToCopy, 4};
size_t expectednBlitsCopyRegion = 2 * 3 * 4;
size_t expectednBlitsCopyPerRow = 3 * maxHeightToCopy * 4;
auto nBlitsCopyRegion = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment);
auto nBlitsCopyPerRow = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment);
EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow);
EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion);
EXPECT_TRUE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(copySize, rootDeviceEnvironment));
}
}
HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaphoreAndAtomic) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
@@ -1041,4 +1179,4 @@ TEST_F(ScratchSpaceControllerTest, whenScratchSpaceControllerIsDestroyedThenItRe
TEST(BcsConstantsTests, givenBlitConstantsThenTheyHaveDesiredValues) {
EXPECT_EQ(BlitterConstants::maxBlitWidth, 0x3FC0u);
EXPECT_EQ(BlitterConstants::maxBlitHeight, 0x3FC0u);
}
}

View File

@@ -708,6 +708,107 @@ TEST(CommandStreamReceiverSimpleTest, givenVariousDataSetsWhenVerifyingMemoryThe
EXPECT_TRUE(csr.expectMemory(setA2, setB2, setSize, compareNotEqual));
}
TEST(CommandStreamReceiverSimpleTest, givenNewResourceFlushDisabledWhenProvidingNeverUsedAllocationTaskCountThenDoNotMarkNewResourceTrue) {
MockExecutionEnvironment executionEnvironment;
executionEnvironment.prepareRootDeviceEnvironments(1);
executionEnvironment.initializeMemoryManager();
MockCommandStreamReceiver csr(executionEnvironment, 0);
MockGraphicsAllocation mockAllocation;
csr.useNewResourceImplicitFlush = false;
csr.newResources = false;
csr.checkForNewResources(10u, GraphicsAllocation::objectNotUsed, mockAllocation);
EXPECT_FALSE(csr.newResources);
}
TEST(CommandStreamReceiverSimpleTest, givenNewResourceFlushEnabledWhenProvidingNeverUsedAllocationTaskCountThenMarkNewResourceTrue) {
MockExecutionEnvironment executionEnvironment;
executionEnvironment.prepareRootDeviceEnvironments(1);
executionEnvironment.initializeMemoryManager();
MockCommandStreamReceiver csr(executionEnvironment, 0);
MockGraphicsAllocation mockAllocation;
csr.useNewResourceImplicitFlush = true;
csr.newResources = false;
csr.checkForNewResources(10u, GraphicsAllocation::objectNotUsed, mockAllocation);
EXPECT_TRUE(csr.newResources);
}
TEST(CommandStreamReceiverSimpleTest, givenNewResourceFlushEnabledWhenProvidingAlreadyUsedAllocationTaskCountThenDoNotMarkNewResource) {
MockExecutionEnvironment executionEnvironment;
executionEnvironment.prepareRootDeviceEnvironments(1);
executionEnvironment.initializeMemoryManager();
MockCommandStreamReceiver csr(executionEnvironment, 0);
MockGraphicsAllocation mockAllocation;
csr.useNewResourceImplicitFlush = true;
csr.newResources = false;
csr.checkForNewResources(10u, 10u, mockAllocation);
EXPECT_FALSE(csr.newResources);
}
TEST(CommandStreamReceiverSimpleTest, givenNewResourceFlushEnabledWhenProvidingNewAllocationAndVerbosityEnabledThenProvidePrintOfNewAllocationType) {
DebugManagerStateRestore restore;
DebugManager.flags.ProvideVerboseImplicitFlush.set(true);
MockExecutionEnvironment executionEnvironment;
executionEnvironment.prepareRootDeviceEnvironments(1);
executionEnvironment.initializeMemoryManager();
MockCommandStreamReceiver csr(executionEnvironment, 0);
MockGraphicsAllocation mockAllocation;
csr.useNewResourceImplicitFlush = true;
csr.newResources = false;
testing::internal::CaptureStdout();
csr.checkForNewResources(10u, GraphicsAllocation::objectNotUsed, mockAllocation);
EXPECT_TRUE(csr.newResources);
std::string output = testing::internal::GetCapturedStdout();
EXPECT_NE(0u, output.size());
EXPECT_STREQ("New resource detected of type 0\n", output.c_str());
}
TEST(CommandStreamReceiverSimpleTest, givenGpuIdleImplicitFlushCheckDisabledWhenGpuIsIdleThenReturnFalse) {
MockExecutionEnvironment executionEnvironment;
executionEnvironment.prepareRootDeviceEnvironments(1);
executionEnvironment.initializeMemoryManager();
MockCommandStreamReceiver csr(executionEnvironment, 0);
csr.callParentGetTagAddress = false;
csr.useGpuIdleImplicitFlush = false;
csr.mockTagAddress = 1u;
csr.taskCount = 1u;
EXPECT_FALSE(csr.checkImplicitFlushForGpuIdle());
}
TEST(CommandStreamReceiverSimpleTest, givenGpuIdleImplicitFlushCheckEnabledWhenGpuIsIdleThenReturnTrue) {
MockExecutionEnvironment executionEnvironment;
executionEnvironment.prepareRootDeviceEnvironments(1);
executionEnvironment.initializeMemoryManager();
MockCommandStreamReceiver csr(executionEnvironment, 0);
csr.callParentGetTagAddress = false;
csr.useGpuIdleImplicitFlush = true;
csr.mockTagAddress = 1u;
csr.taskCount = 1u;
EXPECT_TRUE(csr.checkImplicitFlushForGpuIdle());
}
TEST(CommandStreamReceiverSimpleTest, givenGpuNotIdleImplicitFlushCheckEnabledWhenGpuIsIdleThenReturnFalse) {
MockExecutionEnvironment executionEnvironment;
executionEnvironment.prepareRootDeviceEnvironments(1);
executionEnvironment.initializeMemoryManager();
MockCommandStreamReceiver csr(executionEnvironment, 0);
csr.callParentGetTagAddress = false;
csr.useGpuIdleImplicitFlush = true;
csr.mockTagAddress = 1u;
csr.taskCount = 2u;
EXPECT_FALSE(csr.checkImplicitFlushForGpuIdle());
csr.mockTagAddress = 2u;
}
TEST(CommandStreamReceiverMultiContextTests, givenMultipleCsrsWhenSameResourcesAreUsedThenResidencyIsProperlyHandled) {
auto executionEnvironment = platform()->peekExecutionEnvironment();

View File

@@ -594,6 +594,8 @@ HWTEST_F(SubmissionsAggregatorTests, givenMultipleQueuesWhenCmdBuffersAreRecorde
CommandQueueHw<FamilyType> cmdQ1(context.get(), device.get(), 0, false);
CommandQueueHw<FamilyType> cmdQ2(context.get(), device.get(), 0, false);
auto mockCsr = new MockCsrHw2<FamilyType>(*device->executionEnvironment, device->getRootDeviceIndex());
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
size_t GWS = 1;
overrideCsr(mockCsr);
@@ -626,6 +628,8 @@ HWTEST_F(SubmissionsAggregatorTests, givenCmdQueueWhenCmdBufferWithEventIsRecord
MockKernelWithInternals kernel(*device.get());
CommandQueueHw<FamilyType> cmdQ1(context.get(), device.get(), 0, false);
auto mockCsr = new MockCsrHw2<FamilyType>(*device->executionEnvironment, device->getRootDeviceIndex());
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
size_t GWS = 1;
overrideCsr(mockCsr);
@@ -652,6 +656,8 @@ HWTEST_F(SubmissionsAggregatorTests, givenMultipleCmdBuffersWhenFlushThenUpdateA
CommandQueueHw<FamilyType> cmdQ1(context.get(), device.get(), 0, false);
CommandQueueHw<FamilyType> cmdQ2(context.get(), device.get(), 0, false);
auto mockCsr = new MockCsrHw2<FamilyType>(*device->executionEnvironment, device->getRootDeviceIndex());
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
size_t GWS = 1;
overrideCsr(mockCsr);

View File

@@ -29,6 +29,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass = CommandStreamReceiverHw<GfxFamily>;
public:
using BaseClass::checkPlatformSupportsGpuIdleImplicitFlush;
using BaseClass::checkPlatformSupportsNewResourceImplicitFlush;
using BaseClass::dshState;
using BaseClass::getCmdSizeForPrologue;
using BaseClass::getScratchPatchAddress;
@@ -45,6 +47,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::rootDeviceIndex;
using BaseClass::sshState;
using BaseClass::CommandStreamReceiver::bindingTableBaseAddressRequired;
using BaseClass::CommandStreamReceiver::checkForNewResources;
using BaseClass::CommandStreamReceiver::checkImplicitFlushForGpuIdle;
using BaseClass::CommandStreamReceiver::cleanupResources;
using BaseClass::CommandStreamReceiver::commandStream;
using BaseClass::CommandStreamReceiver::debugConfirmationFunction;
@@ -72,6 +76,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig;
using BaseClass::CommandStreamReceiver::latestSentTaskCount;
using BaseClass::CommandStreamReceiver::mediaVfeStateDirty;
using BaseClass::CommandStreamReceiver::newResources;
using BaseClass::CommandStreamReceiver::osContext;
using BaseClass::CommandStreamReceiver::perfCounterAllocator;
using BaseClass::CommandStreamReceiver::profilingTimeStampAllocator;
@@ -86,6 +91,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::taskLevel;
using BaseClass::CommandStreamReceiver::timestampPacketAllocator;
using BaseClass::CommandStreamReceiver::timestampPacketWriteEnabled;
using BaseClass::CommandStreamReceiver::useGpuIdleImplicitFlush;
using BaseClass::CommandStreamReceiver::useNewResourceImplicitFlush;
using BaseClass::CommandStreamReceiver::userPauseConfirmation;
using BaseClass::CommandStreamReceiver::waitForTaskCountAndCleanAllocationList;

View File

@@ -16,6 +16,8 @@ class TestedDrmCommandStreamReceiver : public DrmCommandStreamReceiver<GfxFamily
using CommandStreamReceiver::commandStream;
using CommandStreamReceiver::globalFenceAllocation;
using CommandStreamReceiver::makeResident;
using CommandStreamReceiver::useGpuIdleImplicitFlush;
using CommandStreamReceiver::useNewResourceImplicitFlush;
using DrmCommandStreamReceiver<GfxFamily>::residency;
using CommandStreamReceiverHw<GfxFamily>::directSubmission;
using CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiver::lastSentSliceCount;

View File

@@ -746,6 +746,8 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenCsrWhenDispatchPolicyIsSe
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
auto testedCsr = static_cast<TestedDrmCommandStreamReceiver<FamilyType> *>(csr);
testedCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
testedCsr->useNewResourceImplicitFlush = false;
testedCsr->useGpuIdleImplicitFlush = false;
auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize});
auto dummyAllocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize});
@@ -805,6 +807,8 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenRecordedCommandBufferWhen
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
auto testedCsr = static_cast<TestedDrmCommandStreamReceiver<FamilyType> *>(csr);
testedCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
testedCsr->useNewResourceImplicitFlush = false;
testedCsr->useGpuIdleImplicitFlush = false;
auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize});
IndirectHeap cs(commandBuffer);

View File

@@ -27,4 +27,12 @@ TEST(OsInterfaceTest, GivenLinuxOsInterfaceWhenDeviceHandleQueriedthenZeroIsRetu
EXPECT_EQ(0u, osInterface.getDeviceHandle());
}
TEST(OsInterfaceTest, GivenLinuxOsWhenCheckForNewResourceImplicitFlushSupportThenReturnTrue) {
EXPECT_TRUE(OSInterface::newResourceImplicitFlush);
}
TEST(OsInterfaceTest, GivenLinuxOsWhenCheckForGpuIdleImplicitFlushSupportThenReturnFalse) {
EXPECT_TRUE(OSInterface::gpuIdleImplicitFlush);
}
} // namespace NEO

View File

@@ -93,6 +93,8 @@ struct MockWddmCsr : public WddmCommandStreamReceiver<GfxFamily> {
using CommandStreamReceiver::dispatchMode;
using CommandStreamReceiver::getCS;
using CommandStreamReceiver::globalFenceAllocation;
using CommandStreamReceiver::useGpuIdleImplicitFlush;
using CommandStreamReceiver::useNewResourceImplicitFlush;
using CommandStreamReceiverHw<GfxFamily>::blitterDirectSubmission;
using CommandStreamReceiverHw<GfxFamily>::directSubmission;
using WddmCommandStreamReceiver<GfxFamily>::commandBufferHeader;
@@ -810,6 +812,8 @@ HWTEST_F(WddmCommandStreamMockGdiTest, givenRecordedCommandBufferWhenItIsSubmitt
csrSurfaceCount += csr->globalFenceAllocation ? 1 : 0;
csr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
csr->useNewResourceImplicitFlush = false;
csr->useGpuIdleImplicitFlush = false;
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
csr->overrideSubmissionAggregator(mockedSubmissionsAggregator);

View File

@@ -38,3 +38,11 @@ TEST(OsContextTest, givenWddmWhenCreateOsContextAfterInitWddmThenOsContextIsInit
EXPECT_EQ(osContext->getWddm(), wddm);
EXPECT_EQ(1u, wddm->registerTrimCallbackResult.called);
}
TEST_F(OsInterfaceTest, GivenWindowsOsWhenCheckForNewResourceImplicitFlushSupportThenReturnFalse) {
EXPECT_FALSE(OSInterface::newResourceImplicitFlush);
}
TEST_F(OsInterfaceTest, GivenWindowsOsWhenCheckForGpuIdleImplicitFlushSupportThenReturnFalse) {
EXPECT_FALSE(OSInterface::gpuIdleImplicitFlush);
}

View File

@@ -191,4 +191,7 @@ DirectSubmissionOverrideBlitterSupport = -1
DirectSubmissionOverrideRenderSupport = -1
DirectSubmissionOverrideComputeSupport = -1
EnableUsmCompression = -1
PerformImplicitFlushEveryEnqueueCount = -1
PerformImplicitFlushEveryEnqueueCount = -1
PerformImplicitFlushForNewResource = -1
PerformImplicitFlushForIdleGpu = -1
ProvideVerboseImplicitFlush = false

View File

@@ -84,6 +84,7 @@ void CommandStreamReceiver::makeResident(GraphicsAllocation &gfxAllocation) {
auto submissionTaskCount = this->taskCount + 1;
if (gfxAllocation.isResidencyTaskCountBelow(submissionTaskCount, osContext->getContextId())) {
this->getResidencyAllocations().push_back(&gfxAllocation);
checkForNewResources(submissionTaskCount, gfxAllocation.getTaskCount(osContext->getContextId()), gfxAllocation);
gfxAllocation.updateTaskCount(submissionTaskCount, osContext->getContextId());
if (!gfxAllocation.isResident(osContext->getContextId())) {
this->totalMemoryUsed += gfxAllocation.getUnderlyingBufferSize();
@@ -560,4 +561,24 @@ void CommandStreamReceiver::printDeviceIndex() {
}
}
void CommandStreamReceiver::checkForNewResources(uint32_t submittedTaskCount, uint32_t allocationTaskCount, GraphicsAllocation &gfxAllocation) {
if (useNewResourceImplicitFlush) {
if (allocationTaskCount == GraphicsAllocation::objectNotUsed) {
newResources = true;
if (DebugManager.flags.ProvideVerboseImplicitFlush.get()) {
printf("New resource detected of type %llu\n", static_cast<unsigned long long>(gfxAllocation.getAllocationType()));
}
}
}
}
bool CommandStreamReceiver::checkImplicitFlushForGpuIdle() {
if (useGpuIdleImplicitFlush) {
if (this->taskCount == *getTagAddress()) {
return true;
}
}
return false;
}
} // namespace NEO

View File

@@ -219,6 +219,8 @@ class CommandStreamReceiver {
protected:
void cleanupResources();
void printDeviceIndex();
void checkForNewResources(uint32_t submittedTaskCount, uint32_t allocationTaskCount, GraphicsAllocation &gfxAllocation);
bool checkImplicitFlushForGpuIdle();
std::unique_ptr<FlushStampTracker> flushStamp;
std::unique_ptr<SubmissionAggregator> submissionAggregator;
@@ -300,6 +302,10 @@ class CommandStreamReceiver {
bool localMemoryEnabled = false;
bool pageTableManagerInitialized = false;
bool useNewResourceImplicitFlush = false;
bool newResources = false;
bool useGpuIdleImplicitFlush = false;
};
typedef CommandStreamReceiver *(*CommandStreamReceiverCreateFunc)(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex);

View File

@@ -132,6 +132,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
static void emitNoop(LinearStream &commandStream, size_t bytesToUpdate);
bool detectInitProgrammingFlagsRequired(const DispatchFlags &dispatchFlags) const;
bool checkPlatformSupportsNewResourceImplicitFlush() const;
bool checkPlatformSupportsGpuIdleImplicitFlush() const;
HeapDirtyState dshState;
HeapDirtyState iohState;

View File

@@ -55,6 +55,17 @@ CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw(ExecutionEnvironment
timestampPacketWriteEnabled = !!DebugManager.flags.EnableTimestampPacket.get();
}
createScratchSpaceController();
useNewResourceImplicitFlush = checkPlatformSupportsNewResourceImplicitFlush();
int32_t overrideNewResourceImplicitFlush = DebugManager.flags.PerformImplicitFlushForNewResource.get();
if (overrideNewResourceImplicitFlush != -1) {
useNewResourceImplicitFlush = overrideNewResourceImplicitFlush == 0 ? false : true;
}
useGpuIdleImplicitFlush = checkPlatformSupportsGpuIdleImplicitFlush();
int32_t overrideGpuIdleImplicitFlush = DebugManager.flags.PerformImplicitFlushForIdleGpu.get();
if (overrideGpuIdleImplicitFlush != -1) {
useGpuIdleImplicitFlush = overrideGpuIdleImplicitFlush == 0 ? false : true;
}
}
template <typename GfxFamily>
@@ -554,6 +565,12 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
}
}
if (this->newResources) {
implicitFlush = true;
this->newResources = false;
}
implicitFlush |= checkImplicitFlushForGpuIdle();
if (this->dispatchMode == DispatchMode::BatchedDispatch && implicitFlush) {
this->flushBatchedSubmissions();
}

View File

@@ -94,4 +94,14 @@ inline void CommandStreamReceiverHw<GfxFamily>::addPipeControlBeforeStateBaseAdd
addPipeControlCmd(commandStream, args);
}
template <typename GfxFamily>
bool CommandStreamReceiverHw<GfxFamily>::checkPlatformSupportsNewResourceImplicitFlush() const {
return false;
}
template <typename GfxFamily>
bool CommandStreamReceiverHw<GfxFamily>::checkPlatformSupportsGpuIdleImplicitFlush() const {
return false;
}
} // namespace NEO

View File

@@ -105,6 +105,7 @@ DECLARE_DEBUG_VARIABLE(bool, PrintTimestampPacketContents, false, "prints all ti
DECLARE_DEBUG_VARIABLE(bool, WddmResidencyLogger, false, "gather Wddm residency statistics to file")
DECLARE_DEBUG_VARIABLE(bool, PrintBOCreateDestroyResult, false, "tracks the result of creation and destruction of BOs")
DECLARE_DEBUG_VARIABLE(bool, PrintBOBindingResult, false, "tracks the result of binding and unbinding of BOs")
DECLARE_DEBUG_VARIABLE(bool, ProvideVerboseImplicitFlush, false, "provides verbose messages about implicit flush mechanism")
/*PERFORMANCE FLAGS*/
DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForBuffers, false, "When active all buffer allocations will not share memory with CPU.")
@@ -124,6 +125,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr trac
DECLARE_DEBUG_VARIABLE(int32_t, MaxHwThreadsPercent, 0, "If not zero then maximum number of used HW threads is capped to max * MaxHwThreadsPercent / 100")
DECLARE_DEBUG_VARIABLE(int32_t, MinHwThreadsUnoccupied, 0, "If not zero then maximum number of used HW threads is reduced by MinHwThreadsUnoccupied")
DECLARE_DEBUG_VARIABLE(int32_t, PerformImplicitFlushEveryEnqueueCount, -1, "If greater then 0, driver performs implicit flush every N submissions.")
DECLARE_DEBUG_VARIABLE(int32_t, PerformImplicitFlushForNewResource, -1, "-1: platform specific, 0: force disable, 1: force enable")
DECLARE_DEBUG_VARIABLE(int32_t, PerformImplicitFlushForIdleGpu, -1, "-1: platform specific, 0: force disable, 1: force enable")
/*DIRECT SUBMISSION FLAGS*/
DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmission, -1, "-1: default (disabled), 0: disable, 1:enable. Enables direct submission of command buffers bypassing KMD")

View File

@@ -17,6 +17,8 @@
namespace NEO {
bool OSInterface::osEnabled64kbPages = false;
bool OSInterface::newResourceImplicitFlush = true;
bool OSInterface::gpuIdleImplicitFlush = true;
OSInterface::OSInterfaceImpl::OSInterfaceImpl() = default;
OSInterface::OSInterfaceImpl::~OSInterfaceImpl() = default;

View File

@@ -29,6 +29,8 @@ class OSInterface {
static bool osEnabled64kbPages;
static bool osEnableLocalMemory;
static bool are64kbPagesEnabled();
static bool newResourceImplicitFlush;
static bool gpuIdleImplicitFlush;
uint32_t getDeviceHandle() const;
void setGmmInputArgs(void *args);
static std::vector<std::unique_ptr<HwDeviceId>> discoverDevices(ExecutionEnvironment &executionEnvironment);

View File

@@ -15,6 +15,8 @@
namespace NEO {
bool OSInterface::osEnabled64kbPages = true;
bool OSInterface::newResourceImplicitFlush = false;
bool OSInterface::gpuIdleImplicitFlush = false;
OSInterface::OSInterface() {
osInterfaceImpl = new OSInterfaceImpl();

View File

@@ -25,20 +25,19 @@ using namespace NEO;
class MockCommandStreamReceiver : public CommandStreamReceiver {
public:
using CommandStreamReceiver::checkForNewResources;
using CommandStreamReceiver::checkImplicitFlushForGpuIdle;
using CommandStreamReceiver::CommandStreamReceiver;
using CommandStreamReceiver::globalFenceAllocation;
using CommandStreamReceiver::internalAllocationStorage;
using CommandStreamReceiver::latestFlushedTaskCount;
using CommandStreamReceiver::latestSentTaskCount;
using CommandStreamReceiver::newResources;
using CommandStreamReceiver::requiredThreadArbitrationPolicy;
using CommandStreamReceiver::tagAddress;
std::vector<char> instructionHeapReserveredData;
int *flushBatchedSubmissionsCallCounter = nullptr;
uint32_t waitForCompletionWithTimeoutCalled = 0;
bool multiOsContextCapable = false;
bool downloadAllocationsCalled = false;
bool programHardwareContextCalled = false;
using CommandStreamReceiver::taskCount;
using CommandStreamReceiver::useGpuIdleImplicitFlush;
using CommandStreamReceiver::useNewResourceImplicitFlush;
bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override {
waitForCompletionWithTimeoutCalled++;
@@ -84,6 +83,22 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
size_t getCmdsSizeForHardwareContext() const override {
return 0;
}
volatile uint32_t *getTagAddress() const override {
if (callParentGetTagAddress) {
return CommandStreamReceiver::getTagAddress();
}
return const_cast<volatile uint32_t *>(&mockTagAddress);
}
std::vector<char> instructionHeapReserveredData;
int *flushBatchedSubmissionsCallCounter = nullptr;
uint32_t waitForCompletionWithTimeoutCalled = 0;
uint32_t mockTagAddress = 0;
bool multiOsContextCapable = false;
bool downloadAllocationsCalled = false;
bool programHardwareContextCalled = false;
bool callParentGetTagAddress = true;
};
template <typename GfxFamily>
@@ -107,6 +122,8 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
using CommandStreamReceiver::taskCount;
using CommandStreamReceiver::taskLevel;
using CommandStreamReceiver::timestampPacketWriteEnabled;
using CommandStreamReceiver::useGpuIdleImplicitFlush;
using CommandStreamReceiver::useNewResourceImplicitFlush;
MockCsrHw2(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex) {}
@@ -177,4 +194,4 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
ResidencyContainer copyOfAllocations;
DispatchFlags passedDispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
bool programHardwareContextCalled = false;
};
};

View File

@@ -1023,6 +1023,14 @@ struct IsAtMostGfxCore {
}
};
template <GFXCORE_FAMILY gfxCoreFamily>
struct IsAtLeastGfxCore {
template <PRODUCT_FAMILY productFamily>
static constexpr bool isMatched() {
return NEO::ToGfxCoreFamily<productFamily>::get() >= gfxCoreFamily;
}
};
template <PRODUCT_FAMILY product>
struct IsProduct {
template <PRODUCT_FAMILY productFamily>