Files
compute-runtime/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp

3245 lines
133 KiB
C++
Raw Normal View History

/*
* Copyright (c) 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/built_ins/built_ins.h"
#include "runtime/command_queue/command_queue_hw.h"
#include "runtime/command_stream/command_stream_receiver.h"
#include "runtime/command_stream/linear_stream.h"
#include "runtime/os_interface/debug_settings_manager.h"
#include "runtime/event/user_event.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/cache_policy.h"
#include "runtime/helpers/preamble.h"
#include "runtime/helpers/ptr_math.h"
#include "runtime/memory_manager/graphics_allocation.h"
#include "runtime/memory_manager/memory_manager.h"
#include "runtime/mem_obj/buffer.h"
#include "runtime/command_stream/preemption.h"
#include "runtime/gen_common/reg_configs.h"
#include "unit_tests/libult/ult_command_stream_receiver.h"
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/fixtures/memory_management_fixture.h"
#include "unit_tests/fixtures/built_in_fixture.h"
#include "unit_tests/helpers/hw_parse.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/mocks/mock_buffer.h"
#include "unit_tests/mocks/mock_command_queue.h"
#include "unit_tests/mocks/mock_context.h"
#include "unit_tests/mocks/mock_csr.h"
#include "unit_tests/mocks/mock_event.h"
#include "unit_tests/mocks/mock_kernel.h"
#include "unit_tests/mocks/mock_submissions_aggregator.h"
#include "test.h"
#include "gtest/gtest.h"
#include "runtime/utilities/linux/debug_env_reader.h"
#include "runtime/gmm_helper/gmm_helper.h"
#include "runtime/command_queue/dispatch_walker.h"
using namespace OCLRT;
struct UltCommandStreamReceiverTest
: public DeviceFixture,
public BuiltInFixture,
public MemoryManagementFixture,
public HardwareParse,
::testing::Test {
void SetUp() override {
MemoryManagementFixture::SetUp();
DeviceFixture::SetUp();
BuiltInFixture::SetUp(pDevice);
HardwareParse::SetUp();
size_t sizeStream = 256;
size_t alignmentStream = 0x1000;
cmdBuffer = alignedMalloc(sizeStream, alignmentStream);
dshBuffer = alignedMalloc(sizeStream, alignmentStream);
ihBuffer = alignedMalloc(sizeStream, alignmentStream);
iohBuffer = alignedMalloc(sizeStream, alignmentStream);
sshBuffer = alignedMalloc(sizeStream, alignmentStream);
ASSERT_NE(nullptr, cmdBuffer);
ASSERT_NE(nullptr, dshBuffer);
ASSERT_NE(nullptr, ihBuffer);
ASSERT_NE(nullptr, iohBuffer);
ASSERT_NE(nullptr, sshBuffer);
commandStream.replaceBuffer(cmdBuffer, sizeStream);
auto graphicsAllocation = new GraphicsAllocation(cmdBuffer, sizeStream);
commandStream.replaceGraphicsAllocation(graphicsAllocation);
dsh.replaceBuffer(dshBuffer, sizeStream);
graphicsAllocation = new GraphicsAllocation(dshBuffer, sizeStream);
dsh.replaceGraphicsAllocation(graphicsAllocation);
ih.replaceBuffer(ihBuffer, sizeStream);
graphicsAllocation = new GraphicsAllocation(ihBuffer, sizeStream);
ih.replaceGraphicsAllocation(graphicsAllocation);
ioh.replaceBuffer(iohBuffer, sizeStream);
graphicsAllocation = new GraphicsAllocation(iohBuffer, sizeStream);
ioh.replaceGraphicsAllocation(graphicsAllocation);
ssh.replaceBuffer(sshBuffer, sizeStream);
graphicsAllocation = new GraphicsAllocation(sshBuffer, sizeStream);
ssh.replaceGraphicsAllocation(graphicsAllocation);
}
void TearDown() override {
delete dsh.getGraphicsAllocation();
delete ih.getGraphicsAllocation();
delete ioh.getGraphicsAllocation();
delete ssh.getGraphicsAllocation();
delete commandStream.getGraphicsAllocation();
alignedFree(sshBuffer);
alignedFree(iohBuffer);
alignedFree(ihBuffer);
alignedFree(dshBuffer);
alignedFree(cmdBuffer);
HardwareParse::TearDown();
BuiltInFixture::TearDown();
DeviceFixture::TearDown();
MemoryManagementFixture::TearDown();
}
template <typename CommandStreamReceiverType>
CompletionStamp flushTask(CommandStreamReceiverType &commandStreamReceiver,
bool block = false,
size_t startOffset = 0,
bool requiresCoherency = false,
bool lowPriority = false) {
DispatchFlags dispatchFlags;
dispatchFlags.blocking = block;
dispatchFlags.requiresCoherency = requiresCoherency;
dispatchFlags.lowPriority = lowPriority;
return commandStreamReceiver.flushTask(
commandStream,
startOffset,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
}
template <typename GfxFamily>
void configureCSRHeapStatesToNonDirty() {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<GfxFamily>();
commandStreamReceiver.dshState.updateAndCheck(&dsh);
commandStreamReceiver.ihState.updateAndCheck(&ih);
commandStreamReceiver.iohState.updateAndCheck(&ioh);
commandStreamReceiver.sshState.updateAndCheck(&ssh);
}
template <typename GfxFamily>
void configureCSRtoNonDirtyState() {
bool slmUsed = false;
if (DebugManager.flags.ForceSLML3Config.get()) {
slmUsed = true;
}
uint32_t L3Config = PreambleHelper<GfxFamily>::getL3Config(*platformDevices[0], slmUsed);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<GfxFamily>();
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode();
commandStreamReceiver.overrideMediaVFEStateDirty(false);
commandStreamReceiver.latestSentStatelessMocsConfig = CacheSettings::l3CacheOn;
commandStreamReceiver.lastSentL3Config = L3Config;
configureCSRHeapStatesToNonDirty<GfxFamily>();
commandStreamReceiver.taskLevel = taskLevel;
commandStreamReceiver.lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin;
commandStreamReceiver.lastSentCoherencyRequest = 0;
commandStreamReceiver.lastMediaSamplerConfig = 0;
}
template <typename GfxFamily>
UltCommandStreamReceiver<GfxFamily> &getUltCommandStreamReceiver() {
return reinterpret_cast<UltCommandStreamReceiver<GfxFamily> &>(pDevice->getCommandStreamReceiver());
}
uint32_t taskLevel = 42;
LinearStream commandStream;
LinearStream dsh;
LinearStream ih;
LinearStream ioh;
LinearStream ssh;
void *cmdBuffer = nullptr;
void *dshBuffer = nullptr;
void *ihBuffer = nullptr;
void *iohBuffer = nullptr;
void *sshBuffer = nullptr;
uint32_t latestSentDcFlushTaskCount;
uint32_t latestSentNonDcFlushTaskCount;
uint32_t dcFlushRequiredTaskCount;
};
HWTEST_F(UltCommandStreamReceiverTest, requiredCmdSizeForPreamble) {
auto expectedCmdSize =
sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
sizeof(typename FamilyType::PIPE_CONTROL) +
sizeof(typename FamilyType::MEDIA_VFE_STATE) +
PreambleHelper<FamilyType>::getAdditionalCommandsSize(*pDevice);
EXPECT_EQ(expectedCmdSize, getSizeRequiredPreambleCS<FamilyType>(*pDevice));
}
HWTEST_F(UltCommandStreamReceiverTest, testInitialState) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_EQ(0u, commandStreamReceiver.peekTaskCount());
EXPECT_EQ(0u, commandStreamReceiver.peekTaskLevel());
EXPECT_TRUE(commandStreamReceiver.dshState.updateAndCheck(&dsh));
EXPECT_TRUE(commandStreamReceiver.ihState.updateAndCheck(&ih));
EXPECT_TRUE(commandStreamReceiver.iohState.updateAndCheck(&ioh));
EXPECT_TRUE(commandStreamReceiver.sshState.updateAndCheck(&ssh));
}
typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTests;
HWTEST_F(CommandStreamReceiverFlushTests, addsBatchBufferEnd) {
auto usedPrevious = commandStream.getUsed();
CommandStreamReceiverHw<FamilyType>::addBatchBufferEnd(commandStream, nullptr);
EXPECT_EQ(commandStream.getUsed(), usedPrevious + sizeof(typename FamilyType::MI_BATCH_BUFFER_END));
auto batchBufferEnd = genCmdCast<typename FamilyType::MI_BATCH_BUFFER_END *>(
ptrOffset(commandStream.getBase(), usedPrevious));
EXPECT_NE(nullptr, batchBufferEnd);
}
HWTEST_F(CommandStreamReceiverFlushTests, shouldAlignToCacheLineSize) {
commandStream.getSpace(sizeof(uint32_t));
CommandStreamReceiverHw<FamilyType>::alignToCacheLine(commandStream);
EXPECT_EQ(0u, commandStream.getUsed() % MemoryConstants::cacheLineSize);
}
typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTaskTests;
HWTEST_F(CommandStreamReceiverFlushTaskTests, shouldSeeCommandsOnFirstFlush) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
flushTask(commandStreamReceiver);
EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, taskCountShouldBeUpdated) {
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
flushTask(commandStreamReceiver);
EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenconfigureCSRtoNonDirtyStateWhenFlushTaskIsCalledThenNoCommandsAreAdded) {
configureCSRtoNonDirtyState<FamilyType>();
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
flushTask(commandStreamReceiver);
EXPECT_EQ(0u, commandStreamReceiver.commandStream.getUsed());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTaskIsSubmittedViaCsrThenBbEndCoversPaddingEnoughToFitMiBatchBufferStart) {
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
configureCSRtoNonDirtyState<FamilyType>();
mockCsr->getCS(1024u);
auto &csrCommandStream = mockCsr->commandStream;
//we do level change that will emit PPC, fill all the space so only BB end fits.
taskLevel++;
auto ppcSize = mockCsr->getRequiredPipeControlSize();
auto fillSize = MemoryConstants::cacheLineSize - ppcSize - sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
csrCommandStream.getSpace(fillSize);
auto expectedUsedSize = 2 * MemoryConstants::cacheLineSize;
flushTask(*mockCsr);
EXPECT_EQ(expectedUsedSize, mockCsr->commandStream.getUsed());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTaskIsSubmittedViaCommandStreamThenBbEndCoversPaddingEnoughToFitMiBatchBufferStart) {
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
configureCSRtoNonDirtyState<FamilyType>();
auto fillSize = MemoryConstants::cacheLineSize - sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
commandStream.getSpace(fillSize);
DispatchFlags dispatchFlags;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
auto expectedUsedSize = 2 * MemoryConstants::cacheLineSize;
EXPECT_EQ(expectedUsedSize, commandStream.getUsed());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, sameTaskLevelShouldntSendAPipeControl) {
WhitelistedRegisters forceRegs = {0};
pDevice->setForceWhitelistedRegs(true, &forceRegs);
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
// Configure the CSR to not need to submit any state or commands.
configureCSRtoNonDirtyState<FamilyType>();
flushTask(*commandStreamReceiver);
EXPECT_EQ(taskLevel, commandStreamReceiver->peekTaskLevel());
auto sizeUsed = commandStreamReceiver->commandStream.getUsed();
EXPECT_EQ(sizeUsed, 0u);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDeviceWithPreemptionSupportThenDontSendMediaVfeStateIfNotDirty) {
WhitelistedRegisters forceRegs = {0};
pDevice->setForceWhitelistedRegs(true, &forceRegs);
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
// Configure the CSR to not need to submit any state or commands.
configureCSRtoNonDirtyState<FamilyType>();
pDevice->setPreemptionMode(PreemptionMode::ThreadGroup);
flushTask(*commandStreamReceiver);
EXPECT_EQ(taskLevel, commandStreamReceiver->peekTaskLevel());
auto sizeUsed = commandStreamReceiver->commandStream.getUsed();
EXPECT_EQ(0u, sizeUsed);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, higherTaskLevelShouldSendAPipeControl) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
configureCSRtoNonDirtyState<FamilyType>();
commandStreamReceiver.taskLevel = taskLevel / 2;
flushTask(commandStreamReceiver);
EXPECT_EQ(taskLevel, commandStreamReceiver.peekTaskLevel());
EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushNotRequiredThenDontSendPipecontrol) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
OCLRT::WorkaroundTable *waTable = nullptr;
waTable = const_cast<WorkaroundTable *>(pDevice->getWaTable());
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode();
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired);
configureCSRtoNonDirtyState<FamilyType>();
commandStreamReceiver.taskLevel = taskLevel;
bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads;
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true;
flushTask(commandStreamReceiver);
EXPECT_EQ(commandStreamReceiver.commandStream.getUsed(), 0u);
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired, commandStreamReceiver.peekSamplerCacheFlushRequired());
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itorPC);
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp;
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushBeforeThenSendPipecontrol) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore);
configureCSRtoNonDirtyState<FamilyType>();
commandStreamReceiver.taskLevel = taskLevel;
OCLRT::WorkaroundTable *waTable = nullptr;
waTable = const_cast<WorkaroundTable *>(pDevice->getWaTable());
bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads;
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true;
flushTask(commandStreamReceiver);
EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u);
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushAfter, commandStreamReceiver.peekSamplerCacheFlushRequired());
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*itorPC;
EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable());
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp;
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushBeforeAndWaSamplerCacheFlushBetweenRedescribedSurfaceReadsDasabledThenDontSendPipecontrol) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore);
configureCSRtoNonDirtyState<FamilyType>();
commandStreamReceiver.taskLevel = taskLevel;
OCLRT::WorkaroundTable *waTable = nullptr;
waTable = const_cast<WorkaroundTable *>(pDevice->getWaTable());
bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads;
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = false;
flushTask(commandStreamReceiver);
EXPECT_EQ(commandStreamReceiver.commandStream.getUsed(), 0u);
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.peekSamplerCacheFlushRequired());
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itorPC);
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp;
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushAfterThenSendPipecontrol) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushAfter);
configureCSRtoNonDirtyState<FamilyType>();
commandStreamReceiver.taskLevel = taskLevel;
OCLRT::WorkaroundTable *waTable = nullptr;
waTable = const_cast<WorkaroundTable *>(pDevice->getWaTable());
bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads;
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true;
flushTask(commandStreamReceiver);
EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u);
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired, commandStreamReceiver.peekSamplerCacheFlushRequired());
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*itorPC;
EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable());
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp;
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, completionStampValid) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
//simulate our CQ is stale for 10 TL's
commandStreamReceiver.taskLevel = taskLevel + 10;
auto completionStamp = flushTask(commandStreamReceiver);
EXPECT_EQ(completionStamp.taskLevel, commandStreamReceiver.peekTaskLevel());
EXPECT_EQ(completionStamp.taskCount, commandStreamReceiver.peekTaskCount());
EXPECT_EQ(completionStamp.flushStamp, commandStreamReceiver.flushStamp->peekStamp());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, completionStamp) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto deviceEngineType = pDevice->getEngineType();
auto completionStamp = flushTask(commandStreamReceiver);
EXPECT_EQ(1u, completionStamp.taskCount);
EXPECT_EQ(taskLevel, completionStamp.taskLevel);
EXPECT_EQ(commandStreamReceiver.flushStamp->peekStamp(), completionStamp.flushStamp);
EXPECT_EQ(0u, completionStamp.deviceOrdinal);
EXPECT_EQ(deviceEngineType, completionStamp.engineType);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressTracking) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
flushTask(commandStreamReceiver);
EXPECT_FALSE(commandStreamReceiver.dshState.updateAndCheck(&dsh));
EXPECT_FALSE(commandStreamReceiver.ihState.updateAndCheck(&ih));
EXPECT_FALSE(commandStreamReceiver.iohState.updateAndCheck(&ioh));
EXPECT_FALSE(commandStreamReceiver.sshState.updateAndCheck(&ssh));
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressProgrammingShouldMatchTracking) {
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
auto stateHeapMocs = Gmm::getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER);
auto l3CacheOnMocs = Gmm::getMOCS(CacheSettings::l3CacheOn);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
flushTask(commandStreamReceiver);
auto &commandStreamCSR = commandStreamReceiver.commandStream;
HardwareParse::parseCommands<FamilyType>(commandStreamCSR, 0);
HardwareParse::findHardwareCommands<FamilyType>();
ASSERT_NE(nullptr, cmdStateBaseAddress);
auto &cmd = *reinterpret_cast<STATE_BASE_ADDRESS *>(cmdStateBaseAddress);
EXPECT_EQ(dsh.getBase(), reinterpret_cast<void *>(cmd.getDynamicStateBaseAddress()));
EXPECT_EQ(ih.getBase(), reinterpret_cast<void *>(cmd.getInstructionBaseAddress()));
EXPECT_EQ(ioh.getBase(), reinterpret_cast<void *>(cmd.getIndirectObjectBaseAddress()));
EXPECT_EQ(ssh.getBase(), reinterpret_cast<void *>(cmd.getSurfaceStateBaseAddress()));
EXPECT_EQ(l3CacheOnMocs, cmd.getStatelessDataPortAccessMemoryObjectControlState());
EXPECT_EQ(stateHeapMocs, cmd.getInstructionMemoryObjectControlState());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenStateBaseAddressWhenItIsRequiredThenThereIsPipeControlPriorToItWithTextureCacheFlush) {
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
configureCSRtoNonDirtyState<FamilyType>();
ih.replaceBuffer(ptrOffset(ih.getBase(), +1u), ih.getMaxAvailableSpace() - 1);
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto stateBaseAddressItor = find<typename FamilyType::STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
auto pipeControlItor = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), stateBaseAddressItor);
EXPECT_NE(stateBaseAddressItor, pipeControlItor);
auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*pipeControlItor;
EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable());
EXPECT_TRUE(pipeControlCmd->getDcFlushEnable());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, preambleShouldBeSentIfNeverSent) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = false;
flushTask(commandStreamReceiver);
EXPECT_TRUE(commandStreamReceiver.isPreambleSent);
EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldBeSentIfNeverSentPreambleAndMediaSamplerRequirementChanged) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = false;
commandStreamReceiver.lastMediaSamplerConfig = -1;
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
EXPECT_NE(nullptr, getCommand<typename FamilyType::PIPELINE_SELECT>());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldBeSentIfNeverSentPreambleAndMediaSamplerRequirementNotChanged) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = false;
commandStreamReceiver.lastMediaSamplerConfig = 0;
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
EXPECT_NE(nullptr, getCommand<typename FamilyType::PIPELINE_SELECT>());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldNotBeSentIfSentPreambleAndMediaSamplerRequirementDoesntChanged) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.lastMediaSamplerConfig = 0;
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
EXPECT_EQ(nullptr, getCommand<typename FamilyType::PIPELINE_SELECT>());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldBeSentIfSentPreambleAndMediaSamplerRequirementDoesntChanged) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.lastMediaSamplerConfig = 1;
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
EXPECT_NE(nullptr, getCommand<typename FamilyType::PIPELINE_SELECT>());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressShouldBeSentIfNeverSent) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.overrideMediaVFEStateDirty(false);
flushTask(commandStreamReceiver);
EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto stateBaseAddressItor = find<typename FamilyType::STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), stateBaseAddressItor);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressShouldBeSentIfSizeChanged) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto dshSize = dsh.getMaxAvailableSpace();
auto ihSize = ih.getMaxAvailableSpace();
auto iohSize = ioh.getMaxAvailableSpace();
auto sshSize = ssh.getMaxAvailableSpace();
dsh.replaceBuffer(dsh.getBase(), 0);
ih.replaceBuffer(ih.getBase(), 0);
ioh.replaceBuffer(ioh.getBase(), 0);
ssh.replaceBuffer(ssh.getBase(), 0);
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.overrideMediaVFEStateDirty(false);
configureCSRHeapStatesToNonDirty<FamilyType>();
dsh.replaceBuffer(dsh.getBase(), dshSize);
ih.replaceBuffer(ih.getBase(), ihSize);
ioh.replaceBuffer(ioh.getBase(), iohSize);
ssh.replaceBuffer(ssh.getBase(), sshSize);
flushTask(commandStreamReceiver);
EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto stateBaseAddressItor = find<typename FamilyType::STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), stateBaseAddressItor);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDshHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
configureCSRtoNonDirtyState<FamilyType>();
dsh.replaceBuffer(nullptr, 0);
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto stateBaseAddressItor = find<typename FamilyType::STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), stateBaseAddressItor);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenSshHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
configureCSRtoNonDirtyState<FamilyType>();
ssh.replaceBuffer(nullptr, 0);
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto stateBaseAddressItor = find<typename FamilyType::STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), stateBaseAddressItor);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenIohHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
configureCSRtoNonDirtyState<FamilyType>();
ioh.replaceBuffer(nullptr, 0);
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto stateBaseAddressItor = find<typename FamilyType::STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), stateBaseAddressItor);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenIshHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
configureCSRtoNonDirtyState<FamilyType>();
ih.replaceBuffer(nullptr, 0);
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto stateBaseAddressItor = find<typename FamilyType::STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), stateBaseAddressItor);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressShouldNotBeSentIfTheSame) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
configureCSRHeapStatesToNonDirty<FamilyType>();
flushTask(commandStreamReceiver);
auto base = commandStreamReceiver.commandStream.getBase();
auto stateBaseAddress = base
? genCmdCast<typename FamilyType::STATE_BASE_ADDRESS *>(base)
: nullptr;
EXPECT_EQ(nullptr, stateBaseAddress);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, shouldntAddAnyCommandsToCQCSIfEmpty) {
WhitelistedRegisters forceRegs = {0};
pDevice->setForceWhitelistedRegs(true, &forceRegs);
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
auto usedBefore = commandStream.getUsed();
flushTask(commandStreamReceiver);
EXPECT_EQ(usedBefore, commandStream.getUsed());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, blockingflushTaskAddsPCToClient) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
auto blocking = true;
flushTask(commandStreamReceiver, blocking);
parseCommands<FamilyType>(commandStream, 0);
auto itorPC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, blockingFlushWithNoPreviousDependencies) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
taskLevel = 5;
commandStreamReceiver.taskLevel = 6;
auto blocking = true;
flushTask(commandStreamReceiver, blocking);
EXPECT_EQ(7u, commandStreamReceiver.peekTaskLevel());
EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, nonblockingFlushWithNoPreviousDependencies) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
taskLevel = 5;
commandStreamReceiver.taskLevel = 6;
auto blocking = false;
flushTask(commandStreamReceiver, blocking);
EXPECT_EQ(6u, commandStreamReceiver.peekTaskLevel());
EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithOnlyEnoughMemoryForPreamble) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
WhitelistedRegisters forceRegs = {0};
pDevice->setForceWhitelistedRegs(true, &forceRegs);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
// Force a PIPE_CONTROL through a taskLevel transition
taskLevel = commandStreamReceiver.peekTaskLevel() + 1;
commandStreamReceiver.lastSentCoherencyRequest = 0;
auto l3Config = PreambleHelper<FamilyType>::getL3Config(pDevice->getHardwareInfo(), false);
commandStreamReceiver.lastSentL3Config = l3Config;
auto &csrCS = commandStreamReceiver.getCS();
size_t sizeNeededForPreamble = getSizeRequiredPreambleCS<FamilyType>(MockDevice(commandStreamReceiver.hwInfo));
size_t sizeNeededForStateBaseAddress = sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL);
size_t sizeNeededForPipeControl = commandStreamReceiver.getRequiredPipeControlSize();
size_t sizeNeededForPreemption = PreemptionHelper::getRequiredCmdStreamSize<FamilyType>(pDevice->getPreemptionMode(), commandStreamReceiver.lastPreemptionMode);
size_t sizeNeeded = sizeNeededForPreamble +
sizeNeededForStateBaseAddress +
sizeNeededForPipeControl +
sizeNeededForPreemption +
sizeof(MI_BATCH_BUFFER_END);
sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize);
csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeededForPreamble);
flushTask(commandStreamReceiver);
EXPECT_EQ(sizeNeeded, csrCS.getUsed());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithOnlyEnoughMemoryForPreambleAndSba) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
WhitelistedRegisters forceRegs = {0};
pDevice->setForceWhitelistedRegs(true, &forceRegs);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
// Force a PIPE_CONTROL through a taskLevel transition
taskLevel = commandStreamReceiver.peekTaskLevel() + 1;
commandStreamReceiver.lastSentCoherencyRequest = 0;
auto l3Config = PreambleHelper<FamilyType>::getL3Config(pDevice->getHardwareInfo(), false);
commandStreamReceiver.lastSentL3Config = l3Config;
auto &csrCS = commandStreamReceiver.getCS();
size_t sizeNeededForPreamble = getSizeRequiredPreambleCS<FamilyType>(MockDevice(commandStreamReceiver.hwInfo));
size_t sizeNeededForStateBaseAddress = sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL);
size_t sizeNeededForPipeControl = commandStreamReceiver.getRequiredPipeControlSize();
size_t sizeNeededForPreemption = PreemptionHelper::getRequiredCmdStreamSize<FamilyType>(pDevice->getPreemptionMode(), commandStreamReceiver.lastPreemptionMode);
size_t sizeNeeded = sizeNeededForPreamble +
sizeNeededForStateBaseAddress +
sizeNeededForPipeControl +
sizeNeededForPreemption +
sizeof(MI_BATCH_BUFFER_END);
sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize);
csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeededForPreamble - sizeNeededForStateBaseAddress);
flushTask(commandStreamReceiver);
EXPECT_EQ(sizeNeeded, csrCS.getUsed());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithOnlyEnoughMemoryForPreambleSbaAndPc) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
commandStream.getSpace(sizeof(PIPE_CONTROL));
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
// Force a PIPE_CONTROL through a taskLevel transition
taskLevel = commandStreamReceiver.peekTaskLevel() + 1;
commandStreamReceiver.lastSentCoherencyRequest = 0;
auto l3Config = PreambleHelper<FamilyType>::getL3Config(pDevice->getHardwareInfo(), false);
commandStreamReceiver.lastSentL3Config = l3Config;
auto &csrCS = commandStreamReceiver.getCS();
size_t sizeNeeded = getSizeRequiredPreambleCS<FamilyType>(MockDevice(commandStreamReceiver.hwInfo)) +
sizeof(STATE_BASE_ADDRESS) +
sizeof(PIPE_CONTROL) +
sizeof(PIPELINE_SELECT) +
commandStreamReceiver.getRequiredPipeControlSize() +
sizeof(MI_BATCH_BUFFER_START);
sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize);
DispatchFlags flags;
csrCS.getSpace(csrCS.getAvailableSpace() - commandStreamReceiver.getRequiredCmdStreamSizeAligned(flags));
auto expectedBase = csrCS.getBase();
// This case handles when we have *just* enough space
auto expectedUsed = csrCS.getUsed() + sizeNeeded;
flushTask(commandStreamReceiver);
// Verify that we didn't grab a new CS buffer
EXPECT_EQ(expectedUsed, csrCS.getUsed());
EXPECT_EQ(expectedBase, csrCS.getBase());
}
template <typename FamilyType>
struct CommandStreamReceiverHwLog : public UltCommandStreamReceiver<FamilyType> {
CommandStreamReceiverHwLog(const HardwareInfo &hwInfoIn) : UltCommandStreamReceiver<FamilyType>(hwInfoIn),
flushCount(0) {
}
FlushStamp flush(BatchBuffer &batchBuffer, EngineType engineType, ResidencyContainer *allocationsForResidency) override {
++flushCount;
return 0;
}
int flushCount;
};
HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithBothCSCallsFlushOnce) {
CommandStreamReceiverHwLog<FamilyType> commandStreamReceiver(*platformDevices[0]);
commandStreamReceiver.setMemoryManager(pDevice->getMemoryManager());
commandStream.getSpace(sizeof(typename FamilyType::MI_NOOP));
flushTask(commandStreamReceiver);
EXPECT_EQ(1, commandStreamReceiver.flushCount);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithBothCSCallsChainsWithBatchBufferStart) {
typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
typedef typename FamilyType::MI_NOOP MI_NOOP;
UltCommandStreamReceiver<FamilyType> commandStreamReceiver(*platformDevices[0]);
commandStreamReceiver.setMemoryManager(pDevice->getMemoryManager());
// Reserve space for 16 NOOPs
commandStream.getSpace(16 * sizeof(MI_NOOP));
// Submit starting at 8 NOOPs
size_t startOffset = 8 * sizeof(MI_NOOP);
flushTask(commandStreamReceiver, false, startOffset);
// Locate the MI_BATCH_BUFFER_START
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto itorBBS = find<MI_BATCH_BUFFER_START *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorBBS);
auto bbs = genCmdCast<MI_BATCH_BUFFER_START *>(*itorBBS);
ASSERT_NE(nullptr, bbs);
// Expect to see address based on startOffset of task
auto expectedAddress = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(ptrOffset(commandStream.getBase(), startOffset)));
EXPECT_EQ(expectedAddress, bbs->getBatchBufferStartAddressGraphicsaddress472());
// MI_BATCH_BUFFER_START from UMD must be PPGTT for security reasons
EXPECT_EQ(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT, bbs->getAddressSpaceIndicator());
}
typedef Test<DeviceFixture> CommandStreamReceiverCQFlushTaskTests;
HWTEST_F(CommandStreamReceiverCQFlushTaskTests, getCSShouldReturnACSWithEnoughSizeCSRTraffic) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
// NOTE: This test attempts to reserve the maximum amount
// of memory such that if a client gets everything he wants
// we don't overflow/corrupt memory when CSR appends its
// work.
size_t sizeCQReserves = CSRequirements::minCommandQueueCommandStreamSize;
size_t sizeRequested = 0x1000 - sizeCQReserves;
auto &commandStream = commandQueue.getCS(sizeRequested);
ASSERT_GE(0x1000u, commandStream.getMaxAvailableSpace());
EXPECT_GE(commandStream.getAvailableSpace(), sizeRequested);
commandStream.getSpace(sizeRequested - sizeCQReserves);
GraphicsAllocation allocation = GraphicsAllocation((void *)0x1234, 1);
LinearStream linear(&allocation);
auto blocking = true;
DispatchFlags dispatchFlags;
dispatchFlags.blocking = blocking;
commandStreamReceiver.flushTask(
commandStream,
0,
linear,
linear,
linear,
linear,
1,
dispatchFlags);
auto expectedSize = 0x1000u - sizeCQReserves;
if (::renderCoreFamily == IGFX_GEN8_CORE) {
expectedSize -= sizeof(typename FamilyType::PIPE_CONTROL);
}
expectedSize = alignUp(expectedSize, MemoryConstants::cacheLineSize);
auto currentUsed = commandStream.getUsed();
EXPECT_EQ(0u, currentUsed % MemoryConstants::cacheLineSize);
//depending on the size of commands we may need whole additional cacheline for alignment
if (currentUsed != expectedSize) {
EXPECT_EQ(expectedSize - MemoryConstants::cacheLineSize, currentUsed);
} else {
EXPECT_EQ(expectedSize, currentUsed);
}
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, blockingFlushTaskWithOnlyPipeControl) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
// Configure the CSR to not need to submit any state or commands
configureCSRtoNonDirtyState<FamilyType>();
// Force a PIPE_CONTROL through a blocking flag
auto blocking = true;
auto &commandStreamTask = commandQueue.getCS();
auto &commandStreamCSR = commandStreamReceiver->getCS();
commandStreamReceiver->lastSentCoherencyRequest = 0;
DispatchFlags dispatchFlags;
dispatchFlags.blocking = blocking;
dispatchFlags.guardCommandBufferWithPipeControl = true;
commandStreamReceiver->flushTask(
commandStreamTask,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
// Verify that taskCS got modified, while csrCS remained intact
EXPECT_GT(commandStreamTask.getUsed(), 0u);
EXPECT_EQ(0u, commandStreamCSR.getUsed());
// Parse command list to verify that PC got added to taskCS
cmdList.clear();
parseCommands<FamilyType>(commandStreamTask, 0);
auto itorTaskCS = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorTaskCS);
// Parse command list to verify that PC wasn't added to csrCS
cmdList.clear();
parseCommands<FamilyType>(commandStreamCSR, 0);
auto numberOfPC = getCommandsList<PIPE_CONTROL>().size();
EXPECT_EQ(0u, numberOfPC);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, FlushTaskBlockingHasPipeControlWithDCFlush) {
WhitelistedRegisters forceRegs = {0};
pDevice->setForceWhitelistedRegs(true, &forceRegs);
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
configureCSRtoNonDirtyState<FamilyType>();
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
auto &commandStreamTask = commandQueue.getCS();
DispatchFlags dispatchFlags;
dispatchFlags.blocking = true;
dispatchFlags.dcFlush = true;
dispatchFlags.guardCommandBufferWithPipeControl = true;
commandStreamReceiver.flushTask(
commandStreamTask,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
parseCommands<FamilyType>(commandStreamTask, 0);
auto itorPC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
if (::renderCoreFamily != IGFX_GEN8_CORE) {
// Verify that the dcFlushEnabled bit is set in PC
auto pCmdWA = reinterpret_cast<PIPE_CONTROL *>(*itorPC);
EXPECT_EQ(true, pCmdWA->getDcFlushEnable());
// Search taskCS for PC to analyze
auto pipeControlTask = genCmdCast<typename FamilyType::PIPE_CONTROL *>(
ptrOffset(commandStreamTask.getBase(), 24));
ASSERT_NE(nullptr, pipeControlTask);
// Verify that the dcFlushEnabled bit is not set in PC
auto pCmd = reinterpret_cast<PIPE_CONTROL *>(pipeControlTask);
EXPECT_EQ(false, pCmd->getDcFlushEnable());
} else {
// Verify that the dcFlushEnabled bit is not set in PC
auto pCmd = reinterpret_cast<PIPE_CONTROL *>(*itorPC);
EXPECT_EQ(true, pCmd->getDcFlushEnable());
}
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelRequiringDCFlushWhenUnblockedThenDCFlushIsAdded) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
MockContext ctx(pDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pDevice, 0);
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
cl_event blockingEvent;
MockEvent<UserEvent> mockEvent(&ctx);
blockingEvent = &mockEvent;
size_t tempBuffer[] = {0, 1, 2};
size_t dstBuffer[] = {0, 1, 2};
cl_int retVal = 0;
auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal);
auto &commandStreamCSR = commandStreamReceiver.getCS();
auto &commandStreamTask = commandQueue.getCS();
commandQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, 1, &blockingEvent, 0);
// Expect nothing was sent
EXPECT_EQ(0u, commandStreamCSR.getUsed());
// Unblock Event
mockEvent.setStatus(CL_COMPLETE);
cmdList.clear();
// Parse command list
parseCommands<FamilyType>(commandStreamTask, 0);
auto itorPC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
// Verify that the dcFlushEnabled bit is set in PC
auto pCmdWA = reinterpret_cast<PIPE_CONTROL *>(*itorPC);
EXPECT_EQ(true, pCmdWA->getDcFlushEnable());
buffer->release();
BuiltIns::shutDown();
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelNotRequiringDCFlushWhenUnblockedThenDCFlushIsNotAdded) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
MockContext ctx(pDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pDevice, 0);
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
cl_event blockingEvent;
MockEvent<UserEvent> mockEvent(&ctx);
blockingEvent = &mockEvent;
size_t tempBuffer[] = {0, 1, 2};
size_t dstBuffer[] = {0, 1, 2};
cl_int retVal = 0;
auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal);
auto &commandStreamCSR = commandStreamReceiver.getCS();
auto &commandStreamTask = commandQueue.getCS();
commandQueue.enqueueWriteBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, 1, &blockingEvent, 0);
// Expect nothing was sent
EXPECT_EQ(0u, commandStreamCSR.getUsed());
// Unblock Event
mockEvent.setStatus(CL_COMPLETE);
cmdList.clear();
// Parse command list
parseCommands<FamilyType>(commandStreamTask, 0);
auto itorPC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
// Verify that the dcFlushEnabled bit is set in PC
auto pCmdWA = reinterpret_cast<PIPE_CONTROL *>(*itorPC);
EXPECT_TRUE(pCmdWA->getDcFlushEnable());
buffer->release();
BuiltIns::shutDown();
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, FlushTaskWithTaskCSPassedAsCommandStreamParam) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
UltCommandStreamReceiver<FamilyType> commandStreamReceiver(*platformDevices[0]);
commandStreamReceiver.setMemoryManager(pDevice->getMemoryManager());
auto &commandStreamTask = commandQueue.getCS();
auto deviceEngineType = pDevice->getEngineType();
DispatchFlags dispatchFlags;
// Pass taskCS as command stream parameter
auto cs = commandStreamReceiver.flushTask(
commandStreamTask,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
// Verify that flushTask returned a valid completion stamp
EXPECT_EQ(commandStreamReceiver.peekTaskCount(), cs.taskCount);
EXPECT_EQ(commandStreamReceiver.peekTaskLevel(), cs.taskLevel);
EXPECT_EQ(0u, cs.deviceOrdinal);
EXPECT_EQ(deviceEngineType, cs.engineType);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, TrackSentTagsWhenEmptyQueue) {
MockContext ctx(pDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pDevice, 0);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
uint32_t taskCount = 0;
taskLevel = taskCount;
commandQueue.taskCount = taskCount;
commandQueue.taskLevel = taskCount;
commandStreamReceiver.taskLevel = taskCount;
commandStreamReceiver.taskCount = taskCount;
EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount());
commandQueue.finish(false);
EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount());
commandQueue.finish(true);
//nothings sent to the HW, no need to bump tags
EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount());
EXPECT_EQ(0u, commandQueue.latestTaskCountWaited);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, TrackSentTagsWhenNonDcFlushWithInitialTaskCountZero) {
MockContext ctx(pDevice);
MockKernelWithInternals kernel(*pDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pDevice, 0);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
size_t GWS = 1;
uint32_t taskCount = 0;
taskLevel = taskCount;
commandQueue.taskCount = taskCount;
commandQueue.taskLevel = taskCount;
commandStreamReceiver.taskLevel = taskCount;
commandStreamReceiver.taskCount = taskCount;
EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount());
// finish after enqueued kernel(cmdq task count = 1)
commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr);
commandQueue.finish(false);
EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount());
EXPECT_EQ(1u, commandQueue.latestTaskCountWaited);
EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount());
// finish again - dont call flush task
commandQueue.finish(false);
EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount());
EXPECT_EQ(1u, commandQueue.latestTaskCountWaited);
EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, TrackSentTagsWhenDcFlush) {
MockContext ctx(pDevice);
MockKernelWithInternals kernel(*pDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pDevice, 0);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
size_t GWS = 1;
size_t tempBuffer[] = {0, 1, 2};
cl_int retVal;
auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
uint32_t taskCount = 0;
taskLevel = taskCount;
commandQueue.taskCount = taskCount;
commandQueue.taskLevel = taskCount;
commandStreamReceiver.taskLevel = taskCount;
commandStreamReceiver.taskCount = taskCount;
EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount());
// finish(dcFlush=true) from blocking MapBuffer after enqueued kernel
commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount());
auto ptr = commandQueue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, sizeof(tempBuffer), 0, nullptr, nullptr, retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount());
// cmdQ task count = 2, finish again
commandQueue.finish(false);
EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount());
EXPECT_EQ(1u, commandQueue.latestTaskCountWaited);
// finish again - dont flush task again
commandQueue.finish(false);
EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount());
EXPECT_EQ(1u, commandQueue.latestTaskCountWaited);
// finish(dcFlush=true) from MapBuffer again - dont call FinishTask n finished queue
retVal = commandQueue.enqueueUnmapMemObject(buffer, ptr, 0, nullptr, nullptr);
EXPECT_EQ(retVal, CL_SUCCESS);
ptr = commandQueue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, sizeof(tempBuffer), 0, nullptr, nullptr, retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount());
//cleanup
retVal = commandQueue.enqueueUnmapMemObject(buffer, ptr, 0, nullptr, nullptr);
EXPECT_EQ(retVal, CL_SUCCESS);
retVal = clReleaseMemObject(buffer);
EXPECT_EQ(retVal, CL_SUCCESS);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenPowerOfTwoGlobalWorkSizeAndNullLocalWorkgroupSizeWhenEnqueueKernelIsCalledThenGpGpuWalkerHasOptimalSIMDmask) {
typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER;
MockContext ctx(pDevice);
MockKernelWithInternals kernel(*pDevice);
size_t GWS = 1024;
CommandQueueHw<FamilyType> commandQueue(&ctx, pDevice, 0);
commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr);
auto &commandStreamTask = commandQueue.getCS();
parseCommands<FamilyType>(commandStreamTask, 0);
auto itorCmd = find<GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(itorCmd, cmdList.end());
auto cmdGpGpuWalker = genCmdCast<GPGPU_WALKER *>(*itorCmd);
//execution masks should be all active
EXPECT_EQ(0xffffffffu, cmdGpGpuWalker->getBottomExecutionMask());
EXPECT_EQ(0xffffffffu, cmdGpGpuWalker->getRightExecutionMask());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, TrackSentTagsWhenEventIsQueried) {
MockContext ctx(pDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pDevice, 0);
cl_event event = nullptr;
Event *pEvent;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
size_t tempBuffer[] = {0, 1, 2};
size_t dstBuffer[] = {5, 5, 5};
cl_int retVal;
auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
uint32_t taskCount = 0;
taskLevel = taskCount;
commandQueue.taskCount = taskCount;
commandQueue.taskLevel = taskCount;
commandStreamReceiver.taskLevel = taskCount;
commandStreamReceiver.taskCount = taskCount;
EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount());
commandQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, 0, 0, &event);
EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount());
pEvent = (Event *)event;
retVal = Event::waitForEvents(1, &event);
EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount());
BuiltIns::shutDown();
retVal = clReleaseEvent(pEvent);
retVal = clReleaseMemObject(buffer);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenNonBlockingMapWhenFinishIsCalledThenNothingIsSubmittedToTheHardware) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
MockContext ctx(pDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pDevice, 0);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
size_t tempBuffer[] = {0, 1, 2};
cl_int retVal;
AlignedBuffer mockBuffer;
uint32_t taskCount = 0;
taskLevel = taskCount;
commandQueue.taskCount = taskCount;
commandQueue.taskLevel = taskCount;
commandStreamReceiver.taskLevel = taskCount;
commandStreamReceiver.taskCount = taskCount;
EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount());
auto ptr = commandQueue.enqueueMapBuffer(&mockBuffer, CL_FALSE, CL_MAP_READ, 0, sizeof(tempBuffer), 0, nullptr, nullptr, retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_NE(nullptr, ptr);
EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount());
commandQueue.finish(false);
EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount());
auto &commandStreamTask = commandQueue.getCS();
parseCommands<FamilyType>(commandStreamTask, 0);
auto itorPC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(cmdList.end(), itorPC);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenFlushedCallRequiringDCFlushWhenBlockingEnqueueIsCalledThenPipeControlWithDCFlushIsAdded) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
MockContext ctx(pDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pDevice, 0);
cl_event event = nullptr;
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
auto &commandStreamTask = commandQueue.getCS();
size_t tempBuffer[] = {0, 1, 2};
size_t dstBuffer[] = {5, 5, 5};
cl_int retVal;
auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount());
// Call requiring DCFlush, nonblocking
buffer->forceDisallowCPUCopy = true;
commandQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, 0, 0, 0);
EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount());
commandQueue.enqueueReadBuffer(buffer, CL_TRUE, 0, sizeof(tempBuffer), dstBuffer, 0, 0, &event);
EXPECT_EQ(2u, commandStreamReceiver.peekLatestSentTaskCount());
EXPECT_EQ(2u, commandQueue.latestTaskCountWaited);
// Parse command list to verify that PC was added to taskCS
cmdList.clear();
parseCommands<FamilyType>(commandStreamTask, 0);
auto itorWalker = find<typename FamilyType::GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
auto itorCmd = find<PIPE_CONTROL *>(itorWalker, cmdList.end());
ASSERT_NE(cmdList.end(), itorCmd);
auto cmdPC = genCmdCast<PIPE_CONTROL *>(*itorCmd);
ASSERT_NE(nullptr, cmdPC);
if (::renderCoreFamily != IGFX_GEN8_CORE) {
// SKL+: two PIPE_CONTROLs following GPGPU_WALKER: first has DcFlush and second has Write HwTag
EXPECT_TRUE(cmdPC->getDcFlushEnable());
auto itorCmdP = ++((GenCmdList::iterator)itorCmd);
EXPECT_NE(cmdList.end(), itorCmdP);
auto itorCmd2 = find<PIPE_CONTROL *>(itorCmdP, cmdList.end());
cmdPC = (PIPE_CONTROL *)*itorCmd2;
EXPECT_FALSE(cmdPC->getDcFlushEnable());
} else {
// BDW: single PIPE_CONTROL following GPGPU_WALKER has DcFlush and Write HwTag
EXPECT_TRUE(cmdPC->getDcFlushEnable());
}
BuiltIns::shutDown();
retVal = clReleaseEvent(event);
retVal = clReleaseMemObject(buffer);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3Config) {
typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
size_t GWS = 1;
MockContext ctx(pDevice);
MockKernelWithInternals kernel(*pDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pDevice, 0);
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
auto &commandStreamCSR = commandStreamReceiver->getCS();
// Mark Pramble as sent, override L3Config to invalid to programL3
commandStreamReceiver->isPreambleSent = true;
commandStreamReceiver->lastSentL3Config = 0;
((MockKernel *)kernel)->setTotalSLMSize(1024);
cmdList.clear();
commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr);
// Parse command list to verify that PC was added to taskCS
parseCommands<FamilyType>(commandStreamCSR, 0);
auto itorCmd = findMmio<FamilyType>(cmdList.begin(), cmdList.end(), L3CNTLRegisterOffset<FamilyType>::registerOffset);
ASSERT_NE(cmdList.end(), itorCmd);
auto cmdMILoad = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorCmd);
ASSERT_NE(nullptr, cmdMILoad);
// MI_LOAD_REGISTER should be preceded by PC
EXPECT_NE(cmdList.begin(), itorCmd);
--itorCmd;
auto cmdPC = genCmdCast<PIPE_CONTROL *>(*itorCmd);
ASSERT_NE(nullptr, cmdPC);
uint32_t L3Config = PreambleHelper<FamilyType>::getL3Config(*platformDevices[0], true);
EXPECT_EQ(L3Config, (uint32_t)cmdMILoad->getDataDword());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDefaultCommandStreamReceiverThenRoundRobinPolicyIsSelected) {
MockCsrHw<FamilyType> commandStreamReceiver(*platformDevices[0]);
EXPECT_EQ(ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin, commandStreamReceiver.peekThreadArbitrationPolicy());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML3WasSentThenDontProgramL3) {
typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
size_t GWS = 1;
MockContext ctx(pDevice);
MockKernelWithInternals kernel(*pDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pDevice, 0);
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
auto &commandStreamCSR = commandStreamReceiver->getCS();
uint32_t L3Config = PreambleHelper<FamilyType>::getL3Config(*platformDevices[0], true);
// Mark Pramble as sent, override L3Config to SLM config
commandStreamReceiver->isPreambleSent = true;
commandStreamReceiver->lastSentL3Config = L3Config;
commandStreamReceiver->lastSentThreadAribtrationPolicy = kernel.mockKernel->getThreadArbitrationPolicy();
((MockKernel *)kernel)->setTotalSLMSize(1024);
cmdList.clear();
commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr);
// Parse command list to verify that PC was added to taskCS
parseCommands<FamilyType>(commandStreamCSR, 0);
auto itorCmd = findMmio<FamilyType>(cmdList.begin(), cmdList.end(), L3CNTLRegisterOffset<FamilyType>::registerOffset);
EXPECT_EQ(cmdList.end(), itorCmd);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigAfterUnblocking) {
typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
size_t GWS = 1;
MockContext ctx(pDevice);
MockKernelWithInternals kernel(*pDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pDevice, 0);
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
cl_event blockingEvent;
MockEvent<UserEvent> mockEvent(&ctx);
blockingEvent = &mockEvent;
auto &commandStreamCSR = commandStreamReceiver->getCS();
uint32_t L3Config = PreambleHelper<FamilyType>::getL3Config(*platformDevices[0], false);
// Mark Pramble as sent, override L3Config to SLM config
commandStreamReceiver->isPreambleSent = true;
commandStreamReceiver->lastSentL3Config = 0;
((MockKernel *)kernel)->setTotalSLMSize(1024);
commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 1, &blockingEvent, nullptr);
// Expect nothing was sent
EXPECT_EQ(0u, commandStreamCSR.getUsed());
// Unblock Event
mockEvent.setStatus(CL_COMPLETE);
cmdList.clear();
// Parse command list
parseCommands<FamilyType>(commandStreamCSR, 0);
// Expect L3 was programmed
auto itorCmd = findMmio<FamilyType>(cmdList.begin(), cmdList.end(), L3CNTLRegisterOffset<FamilyType>::registerOffset);
ASSERT_NE(cmdList.end(), itorCmd);
auto cmdMILoad = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorCmd);
ASSERT_NE(nullptr, cmdMILoad);
L3Config = PreambleHelper<FamilyType>::getL3Config(*platformDevices[0], true);
EXPECT_EQ(L3Config, (uint32_t)cmdMILoad->getDataDword());
}
namespace OCLRT {
CommandStreamReceiver *createCommandStream(const HardwareInfo *pHwInfo);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, CreateCommandStreamReceiverHw) {
const HardwareInfo hwInfo = *platformDevices[0];
auto csrHw = CommandStreamReceiverHw<FamilyType>::create(hwInfo);
EXPECT_NE(nullptr, csrHw);
MemoryManager *mm = csrHw->createMemoryManager(false);
EXPECT_EQ(nullptr, mm);
delete csrHw;
DebugManager.flags.SetCommandStreamReceiver.set(0);
int32_t GetCsr = DebugManager.flags.SetCommandStreamReceiver.get();
EXPECT_EQ(0, GetCsr);
auto csr = OCLRT::createCommandStream(&hwInfo);
EXPECT_NE(nullptr, csr);
delete csr;
DebugManager.flags.SetCommandStreamReceiver.set(0);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, handleTagAndScratchAllocationsResidencyOnEachFlush) {
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
commandStreamReceiver->setRequiredScratchSize(1024); // whatever > 0
flushTask(*commandStreamReceiver);
auto tagAllocation = commandStreamReceiver->getTagAllocation();
auto scratchAllocation = commandStreamReceiver->getScratchAllocation();
ASSERT_NE(tagAllocation, nullptr);
ASSERT_NE(scratchAllocation, nullptr);
EXPECT_TRUE(commandStreamReceiver->isMadeResident(tagAllocation));
EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation));
EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(tagAllocation));
EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation));
// call makeResident on tag and scratch allocations per each flush
// DONT skip residency calls when scratch allocation is the same(new required size <= previous size)
commandStreamReceiver->madeResidentGfxAllocations.clear(); // this is only history - we can clean this
commandStreamReceiver->madeNonResidentGfxAllocations.clear();
flushTask(*commandStreamReceiver); // 2nd flush
auto NewScratchAllocation = commandStreamReceiver->getScratchAllocation();
EXPECT_EQ(scratchAllocation, NewScratchAllocation); // Allocation unchanged. Dont skip residency handling
EXPECT_TRUE(commandStreamReceiver->isMadeResident(tagAllocation));
EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation));
EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(tagAllocation));
EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation));
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTwoConsecutiveNDRangeKernelsStateBaseAddressIsProgrammedOnceAndScratchAddressInMediaVFEStateIsProgrammedTwiceBothWithCorrectAddress) {
typedef typename FamilyType::PARSE PARSE;
typedef typename PARSE::MEDIA_VFE_STATE MEDIA_VFE_STATE;
typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
MockContext ctx(pDevice);
MockKernelWithInternals kernel(*pDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pDevice, 0);
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
size_t GWS = 1;
uint32_t scratchSize = 1024;
SPatchMediaVFEState mediaVFEstate;
mediaVFEstate.PerThreadScratchSpace = scratchSize;
kernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate;
EXPECT_EQ(false, kernel.mockKernel->isBuiltIn);
if (pDevice->getDeviceInfo().force32BitAddressess == true) {
EXPECT_FALSE(commandStreamReceiver->getGSBAFor32BitProgrammed());
}
commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr);
if (pDevice->getDeviceInfo().force32BitAddressess == true) {
EXPECT_TRUE(commandStreamReceiver->getGSBAFor32BitProgrammed());
}
cmdList.clear();
// Parse command list
parseCommands<FamilyType>(commandQueue);
// All state should be programmed before walker
auto itorCmdForStateBase = itorStateBaseAddress;
auto *mediaVfeState = (MEDIA_VFE_STATE *)*itorMediaVfeState;
auto graphicsAllocationScratch = commandStreamReceiver->getScratchAllocation();
ASSERT_NE(itorCmdForStateBase, itorWalker);
auto *sba = (STATE_BASE_ADDRESS *)*itorCmdForStateBase;
auto GSHaddress = (uintptr_t)sba->getGeneralStateBaseAddress();
uint64_t graphicsAddress = 0;
// Get address ( offset in 32 bit addressing ) of sratch
graphicsAddress = (uint64_t)graphicsAllocationScratch->getGpuAddressToPatch();
if (pDevice->getDeviceInfo().force32BitAddressess == true && is64bit) {
EXPECT_TRUE(graphicsAllocationScratch->is32BitAllocation);
EXPECT_EQ((uint64_t)graphicsAllocationScratch->getGpuAddress() - GSHaddress, graphicsAddress);
} else {
EXPECT_EQ((uint64_t)graphicsAllocationScratch->getUnderlyingBuffer(), graphicsAddress);
}
uint64_t lowPartGraphicsAddress = (uint64_t)(graphicsAddress & 0xffffffff);
uint64_t highPartGraphicsAddress = (uint64_t)((graphicsAddress >> 32) & 0xffffffff);
uint64_t scratchBaseLowPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointer();
uint64_t scratchBaseHighPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointerHigh();
if (is64bit && !pDevice->getDeviceInfo().force32BitAddressess) {
uint64_t expectedAddress = PreambleHelper<FamilyType>::getScratchSpaceOffsetFor64bit();
EXPECT_EQ(expectedAddress, scratchBaseLowPart);
EXPECT_EQ(0u, scratchBaseHighPart);
} else {
EXPECT_EQ(lowPartGraphicsAddress, scratchBaseLowPart);
EXPECT_EQ(highPartGraphicsAddress, scratchBaseHighPart);
}
if (pDevice->getDeviceInfo().force32BitAddressess == true) {
EXPECT_EQ(pDevice->getMemoryManager()->allocator32Bit->getBase(), GSHaddress);
} else {
Add braces to avoid ambiguous 'else' For the new Linux/Fedora configuration with introduction of gcc 7.2, compilation of this file issues a new warning due to the correct diagnosis of an ambiguous 'else'. As warnings are being treated as errors, this aborts the build. The diagnostic: vpg-compute-neo/unit_tests/elflib/elflib_tests.cpp:123:12: error: suggest explicit braces to avoid ambiguous 'else' [-Werror=dangling-else] if (nonfailingAllocation == failureIndex) ^ Diagnosis: The diagnostic suggested that this: if (nonfailingAllocation == failureIndex) ASSERT_NE(nullptr, pWriter); should be changed to: if (nonfailingAllocation == failureIndex) { ASSERT_NE(nullptr, pWriter); } This is a valid suggestion. The same is true for EXPECT_EQ. Pick the files in repository ssh://gerrit-gfx.intel.com:29418/mirrors/github/google/googletest for tracing the definition. (There are many versions of gtest.h under the ufo tree). Starting in file include/gtest/gtest.h, the definition of ASSERT_NE can be traced back towards its origin as follows: ASSERT_NE include/gtest/gtest.h GTEST_ASSERT_NE include/gtest/gtest.h ASSERT_PRED_FORMAT2 include/gtest/gtest_pred_impl.h GTEST_PRED_FORMAT2_ include/gtest/gtest_pred_impl.h GTEST_ASSERT_ include/gtest/gtest_pred_impl.h where GTEST_ASSERT_ indeed *should* be enclosed in braces. GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (const ::testing::AssertionResult gtest_ar = (expression)) \ ; \ else \ on_failure(gtest_ar.failure_message()) The correct fix would be to place the braces in the macro definition. However, as file gtest.h comes from Google, and as there are 37 different versions of it in the source tree, this workaround will address the macro invocations. Should it be desirable, it is left to others to correct gtest.h and friends. Change-Id: I870d38ba623fc7564f894c7b1ea7512b74244ee2 Signed-off-by: Dale Stimson <dale.b.stimson@intel.com>
2018-01-23 17:00:40 -08:00
if (is64bit) {
EXPECT_EQ(graphicsAddress - PreambleHelper<FamilyType>::getScratchSpaceOffsetFor64bit(), GSHaddress);
Add braces to avoid ambiguous 'else' For the new Linux/Fedora configuration with introduction of gcc 7.2, compilation of this file issues a new warning due to the correct diagnosis of an ambiguous 'else'. As warnings are being treated as errors, this aborts the build. The diagnostic: vpg-compute-neo/unit_tests/elflib/elflib_tests.cpp:123:12: error: suggest explicit braces to avoid ambiguous 'else' [-Werror=dangling-else] if (nonfailingAllocation == failureIndex) ^ Diagnosis: The diagnostic suggested that this: if (nonfailingAllocation == failureIndex) ASSERT_NE(nullptr, pWriter); should be changed to: if (nonfailingAllocation == failureIndex) { ASSERT_NE(nullptr, pWriter); } This is a valid suggestion. The same is true for EXPECT_EQ. Pick the files in repository ssh://gerrit-gfx.intel.com:29418/mirrors/github/google/googletest for tracing the definition. (There are many versions of gtest.h under the ufo tree). Starting in file include/gtest/gtest.h, the definition of ASSERT_NE can be traced back towards its origin as follows: ASSERT_NE include/gtest/gtest.h GTEST_ASSERT_NE include/gtest/gtest.h ASSERT_PRED_FORMAT2 include/gtest/gtest_pred_impl.h GTEST_PRED_FORMAT2_ include/gtest/gtest_pred_impl.h GTEST_ASSERT_ include/gtest/gtest_pred_impl.h where GTEST_ASSERT_ indeed *should* be enclosed in braces. GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (const ::testing::AssertionResult gtest_ar = (expression)) \ ; \ else \ on_failure(gtest_ar.failure_message()) The correct fix would be to place the braces in the macro definition. However, as file gtest.h comes from Google, and as there are 37 different versions of it in the source tree, this workaround will address the macro invocations. Should it be desirable, it is left to others to correct gtest.h and friends. Change-Id: I870d38ba623fc7564f894c7b1ea7512b74244ee2 Signed-off-by: Dale Stimson <dale.b.stimson@intel.com>
2018-01-23 17:00:40 -08:00
} else {
EXPECT_EQ(0u, GSHaddress);
Add braces to avoid ambiguous 'else' For the new Linux/Fedora configuration with introduction of gcc 7.2, compilation of this file issues a new warning due to the correct diagnosis of an ambiguous 'else'. As warnings are being treated as errors, this aborts the build. The diagnostic: vpg-compute-neo/unit_tests/elflib/elflib_tests.cpp:123:12: error: suggest explicit braces to avoid ambiguous 'else' [-Werror=dangling-else] if (nonfailingAllocation == failureIndex) ^ Diagnosis: The diagnostic suggested that this: if (nonfailingAllocation == failureIndex) ASSERT_NE(nullptr, pWriter); should be changed to: if (nonfailingAllocation == failureIndex) { ASSERT_NE(nullptr, pWriter); } This is a valid suggestion. The same is true for EXPECT_EQ. Pick the files in repository ssh://gerrit-gfx.intel.com:29418/mirrors/github/google/googletest for tracing the definition. (There are many versions of gtest.h under the ufo tree). Starting in file include/gtest/gtest.h, the definition of ASSERT_NE can be traced back towards its origin as follows: ASSERT_NE include/gtest/gtest.h GTEST_ASSERT_NE include/gtest/gtest.h ASSERT_PRED_FORMAT2 include/gtest/gtest_pred_impl.h GTEST_PRED_FORMAT2_ include/gtest/gtest_pred_impl.h GTEST_ASSERT_ include/gtest/gtest_pred_impl.h where GTEST_ASSERT_ indeed *should* be enclosed in braces. GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (const ::testing::AssertionResult gtest_ar = (expression)) \ ; \ else \ on_failure(gtest_ar.failure_message()) The correct fix would be to place the braces in the macro definition. However, as file gtest.h comes from Google, and as there are 37 different versions of it in the source tree, this workaround will address the macro invocations. Should it be desirable, it is left to others to correct gtest.h and friends. Change-Id: I870d38ba623fc7564f894c7b1ea7512b74244ee2 Signed-off-by: Dale Stimson <dale.b.stimson@intel.com>
2018-01-23 17:00:40 -08:00
}
}
//now re-try to see if SBA is not programmed
scratchSize *= 2;
mediaVFEstate.PerThreadScratchSpace = scratchSize;
commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr);
// Parse command list
parseCommands<FamilyType>(commandQueue);
itorCmdForStateBase = find<STATE_BASE_ADDRESS *>(itorWalker, cmdList.end());
// In 32 Bit addressing sba shouldn't be reprogrammed
if (pDevice->getDeviceInfo().force32BitAddressess == true) {
EXPECT_EQ(itorCmdForStateBase, cmdList.end());
}
auto itorMediaVfeStateSecond = find<MEDIA_VFE_STATE *>(itorWalker, cmdList.end());
auto *cmdMediaVfeStateSecond = (MEDIA_VFE_STATE *)*itorMediaVfeStateSecond;
EXPECT_NE(mediaVfeState, cmdMediaVfeStateSecond);
uint64_t oldScratchAddr = ((uint64_t)scratchBaseHighPart << 32u) | scratchBaseLowPart;
uint64_t newScratchAddr = ((uint64_t)cmdMediaVfeStateSecond->getScratchSpaceBasePointerHigh() << 32u) | cmdMediaVfeStateSecond->getScratchSpaceBasePointer();
if (pDevice->getDeviceInfo().force32BitAddressess == true) {
EXPECT_NE(oldScratchAddr, newScratchAddr);
}
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNDRangeKernelAndReadBufferStateBaseAddressAndScratchAddressInMediaVFEStateIsProgrammedForNDRangeAndReprogramedForReadBufferAndGSBAFlagIsResetToFalse) {
typedef typename FamilyType::PARSE PARSE;
typedef typename PARSE::MEDIA_VFE_STATE MEDIA_VFE_STATE;
typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
MockContext ctx(pDevice);
MockKernelWithInternals kernel(*pDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pDevice, 0);
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
size_t GWS = 1;
uint32_t scratchSize = 1024;
SPatchMediaVFEState mediaVFEstate;
mediaVFEstate.PerThreadScratchSpace = scratchSize;
kernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate;
EXPECT_EQ(false, kernel.mockKernel->isBuiltIn);
if (pDevice->getDeviceInfo().force32BitAddressess == true) {
EXPECT_FALSE(commandStreamReceiver->getGSBAFor32BitProgrammed());
}
commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr);
if (pDevice->getDeviceInfo().force32BitAddressess == true) {
EXPECT_TRUE(commandStreamReceiver->getGSBAFor32BitProgrammed());
}
cmdList.clear();
// Parse command list
parseCommands<FamilyType>(commandQueue);
// All state should be programmed before walker
auto itorCmdForStateBase = itorStateBaseAddress;
auto *mediaVfeState = (MEDIA_VFE_STATE *)*itorMediaVfeState;
auto graphicsAllocationScratch = commandStreamReceiver->getScratchAllocation();
ASSERT_NE(itorCmdForStateBase, itorWalker);
auto *sba = (STATE_BASE_ADDRESS *)*itorCmdForStateBase;
auto GSHaddress = (uintptr_t)sba->getGeneralStateBaseAddress();
uint64_t graphicsAddress = 0;
// Get address ( offset in 32 bit addressing ) of sratch
graphicsAddress = (uint64_t)graphicsAllocationScratch->getGpuAddressToPatch();
if (pDevice->getDeviceInfo().force32BitAddressess == true && is64bit) {
EXPECT_TRUE(graphicsAllocationScratch->is32BitAllocation);
EXPECT_EQ((uint64_t)graphicsAllocationScratch->getGpuAddress() - GSHaddress, graphicsAddress);
} else {
EXPECT_EQ((uint64_t)graphicsAllocationScratch->getUnderlyingBuffer(), graphicsAddress);
}
uint64_t lowPartGraphicsAddress = (uint64_t)(graphicsAddress & 0xffffffff);
uint64_t highPartGraphicsAddress = (uint64_t)((graphicsAddress >> 32) & 0xffffffff);
uint64_t scratchBaseLowPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointer();
uint64_t scratchBaseHighPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointerHigh();
if (is64bit && !pDevice->getDeviceInfo().force32BitAddressess) {
lowPartGraphicsAddress = PreambleHelper<FamilyType>::getScratchSpaceOffsetFor64bit();
highPartGraphicsAddress = 0u;
}
EXPECT_EQ(lowPartGraphicsAddress, scratchBaseLowPart);
EXPECT_EQ(highPartGraphicsAddress, scratchBaseHighPart);
if (pDevice->getDeviceInfo().force32BitAddressess == true) {
EXPECT_EQ(pDevice->getMemoryManager()->allocator32Bit->getBase(), GSHaddress);
} else {
Add braces to avoid ambiguous 'else' For the new Linux/Fedora configuration with introduction of gcc 7.2, compilation of this file issues a new warning due to the correct diagnosis of an ambiguous 'else'. As warnings are being treated as errors, this aborts the build. The diagnostic: vpg-compute-neo/unit_tests/elflib/elflib_tests.cpp:123:12: error: suggest explicit braces to avoid ambiguous 'else' [-Werror=dangling-else] if (nonfailingAllocation == failureIndex) ^ Diagnosis: The diagnostic suggested that this: if (nonfailingAllocation == failureIndex) ASSERT_NE(nullptr, pWriter); should be changed to: if (nonfailingAllocation == failureIndex) { ASSERT_NE(nullptr, pWriter); } This is a valid suggestion. The same is true for EXPECT_EQ. Pick the files in repository ssh://gerrit-gfx.intel.com:29418/mirrors/github/google/googletest for tracing the definition. (There are many versions of gtest.h under the ufo tree). Starting in file include/gtest/gtest.h, the definition of ASSERT_NE can be traced back towards its origin as follows: ASSERT_NE include/gtest/gtest.h GTEST_ASSERT_NE include/gtest/gtest.h ASSERT_PRED_FORMAT2 include/gtest/gtest_pred_impl.h GTEST_PRED_FORMAT2_ include/gtest/gtest_pred_impl.h GTEST_ASSERT_ include/gtest/gtest_pred_impl.h where GTEST_ASSERT_ indeed *should* be enclosed in braces. GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (const ::testing::AssertionResult gtest_ar = (expression)) \ ; \ else \ on_failure(gtest_ar.failure_message()) The correct fix would be to place the braces in the macro definition. However, as file gtest.h comes from Google, and as there are 37 different versions of it in the source tree, this workaround will address the macro invocations. Should it be desirable, it is left to others to correct gtest.h and friends. Change-Id: I870d38ba623fc7564f894c7b1ea7512b74244ee2 Signed-off-by: Dale Stimson <dale.b.stimson@intel.com>
2018-01-23 17:00:40 -08:00
if (is64bit) {
EXPECT_EQ(graphicsAddress - PreambleHelper<FamilyType>::getScratchSpaceOffsetFor64bit(), GSHaddress);
Add braces to avoid ambiguous 'else' For the new Linux/Fedora configuration with introduction of gcc 7.2, compilation of this file issues a new warning due to the correct diagnosis of an ambiguous 'else'. As warnings are being treated as errors, this aborts the build. The diagnostic: vpg-compute-neo/unit_tests/elflib/elflib_tests.cpp:123:12: error: suggest explicit braces to avoid ambiguous 'else' [-Werror=dangling-else] if (nonfailingAllocation == failureIndex) ^ Diagnosis: The diagnostic suggested that this: if (nonfailingAllocation == failureIndex) ASSERT_NE(nullptr, pWriter); should be changed to: if (nonfailingAllocation == failureIndex) { ASSERT_NE(nullptr, pWriter); } This is a valid suggestion. The same is true for EXPECT_EQ. Pick the files in repository ssh://gerrit-gfx.intel.com:29418/mirrors/github/google/googletest for tracing the definition. (There are many versions of gtest.h under the ufo tree). Starting in file include/gtest/gtest.h, the definition of ASSERT_NE can be traced back towards its origin as follows: ASSERT_NE include/gtest/gtest.h GTEST_ASSERT_NE include/gtest/gtest.h ASSERT_PRED_FORMAT2 include/gtest/gtest_pred_impl.h GTEST_PRED_FORMAT2_ include/gtest/gtest_pred_impl.h GTEST_ASSERT_ include/gtest/gtest_pred_impl.h where GTEST_ASSERT_ indeed *should* be enclosed in braces. GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (const ::testing::AssertionResult gtest_ar = (expression)) \ ; \ else \ on_failure(gtest_ar.failure_message()) The correct fix would be to place the braces in the macro definition. However, as file gtest.h comes from Google, and as there are 37 different versions of it in the source tree, this workaround will address the macro invocations. Should it be desirable, it is left to others to correct gtest.h and friends. Change-Id: I870d38ba623fc7564f894c7b1ea7512b74244ee2 Signed-off-by: Dale Stimson <dale.b.stimson@intel.com>
2018-01-23 17:00:40 -08:00
} else {
EXPECT_EQ(0u, GSHaddress);
Add braces to avoid ambiguous 'else' For the new Linux/Fedora configuration with introduction of gcc 7.2, compilation of this file issues a new warning due to the correct diagnosis of an ambiguous 'else'. As warnings are being treated as errors, this aborts the build. The diagnostic: vpg-compute-neo/unit_tests/elflib/elflib_tests.cpp:123:12: error: suggest explicit braces to avoid ambiguous 'else' [-Werror=dangling-else] if (nonfailingAllocation == failureIndex) ^ Diagnosis: The diagnostic suggested that this: if (nonfailingAllocation == failureIndex) ASSERT_NE(nullptr, pWriter); should be changed to: if (nonfailingAllocation == failureIndex) { ASSERT_NE(nullptr, pWriter); } This is a valid suggestion. The same is true for EXPECT_EQ. Pick the files in repository ssh://gerrit-gfx.intel.com:29418/mirrors/github/google/googletest for tracing the definition. (There are many versions of gtest.h under the ufo tree). Starting in file include/gtest/gtest.h, the definition of ASSERT_NE can be traced back towards its origin as follows: ASSERT_NE include/gtest/gtest.h GTEST_ASSERT_NE include/gtest/gtest.h ASSERT_PRED_FORMAT2 include/gtest/gtest_pred_impl.h GTEST_PRED_FORMAT2_ include/gtest/gtest_pred_impl.h GTEST_ASSERT_ include/gtest/gtest_pred_impl.h where GTEST_ASSERT_ indeed *should* be enclosed in braces. GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (const ::testing::AssertionResult gtest_ar = (expression)) \ ; \ else \ on_failure(gtest_ar.failure_message()) The correct fix would be to place the braces in the macro definition. However, as file gtest.h comes from Google, and as there are 37 different versions of it in the source tree, this workaround will address the macro invocations. Should it be desirable, it is left to others to correct gtest.h and friends. Change-Id: I870d38ba623fc7564f894c7b1ea7512b74244ee2 Signed-off-by: Dale Stimson <dale.b.stimson@intel.com>
2018-01-23 17:00:40 -08:00
}
}
size_t tempBuffer[] = {0, 1, 2};
size_t dstBuffer[] = {0, 0, 0};
cl_int retVal = 0;
auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal);
commandQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, 0, 0, 0);
// Parse command list
parseCommands<FamilyType>(commandQueue);
itorCmdForStateBase = find<STATE_BASE_ADDRESS *>(itorWalker, cmdList.end());
if (pDevice->getDeviceInfo().force32BitAddressess == true) {
EXPECT_NE(itorWalker, itorCmdForStateBase);
if (itorCmdForStateBase != cmdList.end()) {
auto *sba2 = (STATE_BASE_ADDRESS *)*itorCmdForStateBase;
auto GSHaddress2 = (uintptr_t)sba2->getGeneralStateBaseAddress();
EXPECT_NE(sba, sba2);
EXPECT_EQ(0u, GSHaddress2);
if (pDevice->getDeviceInfo().force32BitAddressess == true) {
EXPECT_FALSE(commandStreamReceiver->getGSBAFor32BitProgrammed());
}
}
}
delete buffer;
if (pDevice->getDeviceInfo().force32BitAddressess == true) {
// Asserts placed after restoring old CSR to avoid heap corruption
ASSERT_NE(itorCmdForStateBase, cmdList.end());
}
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, InForced32BitAllocationsModeDoNotStore32bitScratchAllocationOnReusableAllocationList) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.Force32bitAddressing.set(true);
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*platformDevices[0]);
pDevice->getMemoryManager()->setForce32BitAllocations(true);
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
commandStreamReceiver->setRequiredScratchSize(4096); // whatever > 0 (in page size)
flushTask(*commandStreamReceiver);
auto scratchAllocation = commandStreamReceiver->getScratchAllocation();
ASSERT_NE(scratchAllocation, nullptr);
commandStreamReceiver->setRequiredScratchSize(8196); // whatever > first size
flushTask(*commandStreamReceiver); // 2nd flush
auto newScratchAllocation = commandStreamReceiver->getScratchAllocation();
EXPECT_NE(scratchAllocation, newScratchAllocation); // Allocation changed
std::unique_ptr<GraphicsAllocation> allocationReusable = pDevice->getMemoryManager()->obtainReusableAllocation(4096);
if (allocationReusable.get() != nullptr) {
Add braces to avoid ambiguous 'else' For the new Linux/Fedora configuration with introduction of gcc 7.2, compilation of this file issues a new warning due to the correct diagnosis of an ambiguous 'else'. As warnings are being treated as errors, this aborts the build. The diagnostic: vpg-compute-neo/unit_tests/elflib/elflib_tests.cpp:123:12: error: suggest explicit braces to avoid ambiguous 'else' [-Werror=dangling-else] if (nonfailingAllocation == failureIndex) ^ Diagnosis: The diagnostic suggested that this: if (nonfailingAllocation == failureIndex) ASSERT_NE(nullptr, pWriter); should be changed to: if (nonfailingAllocation == failureIndex) { ASSERT_NE(nullptr, pWriter); } This is a valid suggestion. The same is true for EXPECT_EQ. Pick the files in repository ssh://gerrit-gfx.intel.com:29418/mirrors/github/google/googletest for tracing the definition. (There are many versions of gtest.h under the ufo tree). Starting in file include/gtest/gtest.h, the definition of ASSERT_NE can be traced back towards its origin as follows: ASSERT_NE include/gtest/gtest.h GTEST_ASSERT_NE include/gtest/gtest.h ASSERT_PRED_FORMAT2 include/gtest/gtest_pred_impl.h GTEST_PRED_FORMAT2_ include/gtest/gtest_pred_impl.h GTEST_ASSERT_ include/gtest/gtest_pred_impl.h where GTEST_ASSERT_ indeed *should* be enclosed in braces. GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (const ::testing::AssertionResult gtest_ar = (expression)) \ ; \ else \ on_failure(gtest_ar.failure_message()) The correct fix would be to place the braces in the macro definition. However, as file gtest.h comes from Google, and as there are 37 different versions of it in the source tree, this workaround will address the macro invocations. Should it be desirable, it is left to others to correct gtest.h and friends. Change-Id: I870d38ba623fc7564f894c7b1ea7512b74244ee2 Signed-off-by: Dale Stimson <dale.b.stimson@intel.com>
2018-01-23 17:00:40 -08:00
if (is64bit) {
EXPECT_NE(scratchAllocation, allocationReusable.get());
Add braces to avoid ambiguous 'else' For the new Linux/Fedora configuration with introduction of gcc 7.2, compilation of this file issues a new warning due to the correct diagnosis of an ambiguous 'else'. As warnings are being treated as errors, this aborts the build. The diagnostic: vpg-compute-neo/unit_tests/elflib/elflib_tests.cpp:123:12: error: suggest explicit braces to avoid ambiguous 'else' [-Werror=dangling-else] if (nonfailingAllocation == failureIndex) ^ Diagnosis: The diagnostic suggested that this: if (nonfailingAllocation == failureIndex) ASSERT_NE(nullptr, pWriter); should be changed to: if (nonfailingAllocation == failureIndex) { ASSERT_NE(nullptr, pWriter); } This is a valid suggestion. The same is true for EXPECT_EQ. Pick the files in repository ssh://gerrit-gfx.intel.com:29418/mirrors/github/google/googletest for tracing the definition. (There are many versions of gtest.h under the ufo tree). Starting in file include/gtest/gtest.h, the definition of ASSERT_NE can be traced back towards its origin as follows: ASSERT_NE include/gtest/gtest.h GTEST_ASSERT_NE include/gtest/gtest.h ASSERT_PRED_FORMAT2 include/gtest/gtest_pred_impl.h GTEST_PRED_FORMAT2_ include/gtest/gtest_pred_impl.h GTEST_ASSERT_ include/gtest/gtest_pred_impl.h where GTEST_ASSERT_ indeed *should* be enclosed in braces. GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (const ::testing::AssertionResult gtest_ar = (expression)) \ ; \ else \ on_failure(gtest_ar.failure_message()) The correct fix would be to place the braces in the macro definition. However, as file gtest.h comes from Google, and as there are 37 different versions of it in the source tree, this workaround will address the macro invocations. Should it be desirable, it is left to others to correct gtest.h and friends. Change-Id: I870d38ba623fc7564f894c7b1ea7512b74244ee2 Signed-off-by: Dale Stimson <dale.b.stimson@intel.com>
2018-01-23 17:00:40 -08:00
}
pDevice->getMemoryManager()->freeGraphicsMemory(allocationReusable.release());
}
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, InForced32BitAllocationsModeStore32bitScratchAllocationOnTemporaryAllocationList) {
if (is64bit) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.Force32bitAddressing.set(true);
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*platformDevices[0]);
pDevice->getMemoryManager()->setForce32BitAllocations(true);
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
commandStreamReceiver->setRequiredScratchSize(4096); // whatever > 0 (in page size)
flushTask(*commandStreamReceiver);
auto scratchAllocation = commandStreamReceiver->getScratchAllocation();
ASSERT_NE(scratchAllocation, nullptr);
commandStreamReceiver->setRequiredScratchSize(8196); // whatever > first size
flushTask(*commandStreamReceiver); // 2nd flush
auto newScratchAllocation = commandStreamReceiver->getScratchAllocation();
EXPECT_NE(scratchAllocation, newScratchAllocation); // Allocation changed
std::unique_ptr<GraphicsAllocation> allocationTemporary = pDevice->getMemoryManager()->graphicsAllocations.detachAllocation(0, nullptr);
EXPECT_EQ(scratchAllocation, allocationTemporary.get());
pDevice->getMemoryManager()->freeGraphicsMemory(allocationTemporary.release());
}
}
TEST(CacheSettings, GivenCacheSettingWhenCheckedForValuesThenProperValuesAreSelected) {
EXPECT_EQ(static_cast<uint32_t>(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), CacheSettings::l3CacheOff);
EXPECT_EQ(static_cast<uint32_t>(GMM_RESOURCE_USAGE_OCL_BUFFER), CacheSettings::l3CacheOn);
}
HWTEST_F(UltCommandStreamReceiverTest, addPipeControlWithFlushAllCaches) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.FlushAllCaches.set(true);
auto &csr = pDevice->getCommandStreamReceiver();
char buff[sizeof(PIPE_CONTROL) * 3];
LinearStream stream(buff, sizeof(PIPE_CONTROL) * 3);
csr.addPipeControl(stream, false);
parseCommands<FamilyType>(stream, 0);
PIPE_CONTROL *pipeControl = getCommand<PIPE_CONTROL>();
ASSERT_NE(nullptr, pipeControl);
// WA pipeControl added
if (cmdList.size() == 2) {
pipeControl++;
}
EXPECT_TRUE(pipeControl->getDcFlushEnable());
EXPECT_TRUE(pipeControl->getRenderTargetCacheFlushEnable());
EXPECT_TRUE(pipeControl->getInstructionCacheInvalidateEnable());
EXPECT_TRUE(pipeControl->getTextureCacheInvalidationEnable());
EXPECT_TRUE(pipeControl->getPipeControlFlushEnable());
EXPECT_TRUE(pipeControl->getVfCacheInvalidationEnable());
EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable());
EXPECT_TRUE(pipeControl->getStateCacheInvalidationEnable());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEnabledPreemptionWhenFlushTaskCalledThenDontProgramMediaVfeStateAgain) {
pDevice->setPreemptionMode(PreemptionMode::ThreadGroup);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
HardwareParse hwParser;
flushTask(csr, false, 0);
hwParser.parseCommands<FamilyType>(csr.commandStream, 0);
auto cmd = hwParser.getCommand<typename FamilyType::MEDIA_VFE_STATE>();
EXPECT_NE(nullptr, cmd);
// program again
csr.overrideMediaVFEStateDirty(false);
auto offset = csr.commandStream.getUsed();
flushTask(csr, false, commandStream.getUsed());
hwParser.cmdList.clear();
hwParser.parseCommands<FamilyType>(csr.commandStream, offset);
cmd = hwParser.getCommand<typename FamilyType::MEDIA_VFE_STATE>();
EXPECT_EQ(nullptr, cmd);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithPCWhenPreambleSentAndL3ConfigChanged) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
typedef typename FamilyType::MEDIA_VFE_STATE MEDIA_VFE_STATE;
commandStream.getSpace(sizeof(PIPE_CONTROL));
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
// Force a PIPE_CONTROL through a taskLevel transition
taskLevel = commandStreamReceiver.peekTaskLevel() + 1;
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode();
commandStreamReceiver.lastMediaSamplerConfig = 0;
commandStreamReceiver.lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin;
auto &csrCS = commandStreamReceiver.getCS();
size_t sizeNeeded = 2 * sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MEDIA_VFE_STATE) +
sizeof(MI_BATCH_BUFFER_START) + sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL) +
commandStreamReceiver.getRequiredPipeControlSize();
sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize);
auto expectedUsed = csrCS.getUsed() + sizeNeeded;
DispatchFlags dispatchFlags;
dispatchFlags.useSLM = true;
commandStreamReceiver.flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags);
// Verify that we didn't grab a new CS buffer
EXPECT_EQ(expectedUsed, csrCS.getUsed());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenPreambleSentThenRequiredCsrSizeDependsOnL3ConfigChanged) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
UltCommandStreamReceiver<FamilyType> &commandStreamReceiver = (UltCommandStreamReceiver<FamilyType> &)pDevice->getCommandStreamReceiver();
CsrSizeRequestFlags csrSizeRequest = {};
DispatchFlags flags;
commandStreamReceiver.isPreambleSent = true;
csrSizeRequest.l3ConfigChanged = true;
commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest);
auto l3ConfigChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
csrSizeRequest.l3ConfigChanged = false;
commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest);
auto l3ConfigNotChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
EXPECT_NE(l3ConfigNotChangedSize, l3ConfigChangedSize);
auto difference = l3ConfigChangedSize - l3ConfigNotChangedSize;
EXPECT_EQ(sizeof(PIPE_CONTROL), difference);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenPreambleNotSentThenRequiredCsrSizeDoesntDependOnL3ConfigChanged) {
UltCommandStreamReceiver<FamilyType> &commandStreamReceiver = (UltCommandStreamReceiver<FamilyType> &)pDevice->getCommandStreamReceiver();
CsrSizeRequestFlags csrSizeRequest = {};
DispatchFlags flags;
commandStreamReceiver.isPreambleSent = false;
csrSizeRequest.l3ConfigChanged = true;
commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest);
auto l3ConfigChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
csrSizeRequest.l3ConfigChanged = false;
commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest);
auto l3ConfigNotChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
EXPECT_EQ(l3ConfigNotChangedSize, l3ConfigChangedSize);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenPreambleNotSentThenRequiredCsrSizeDoesntDependOnmediaSamplerConfigChanged) {
UltCommandStreamReceiver<FamilyType> &commandStreamReceiver = (UltCommandStreamReceiver<FamilyType> &)pDevice->getCommandStreamReceiver();
CsrSizeRequestFlags csrSizeRequest = {};
DispatchFlags flags;
commandStreamReceiver.isPreambleSent = false;
csrSizeRequest.mediaSamplerConfigChanged = false;
commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest);
auto mediaSamplerConfigNotChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
csrSizeRequest.mediaSamplerConfigChanged = true;
commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest);
auto mediaSamplerConfigChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
EXPECT_EQ(mediaSamplerConfigChangedSize, mediaSamplerConfigNotChangedSize);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenPreambleSentThenRequiredCsrSizeDependsOnmediaSamplerConfigChanged) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
UltCommandStreamReceiver<FamilyType> &commandStreamReceiver = (UltCommandStreamReceiver<FamilyType> &)pDevice->getCommandStreamReceiver();
CsrSizeRequestFlags csrSizeRequest = {};
DispatchFlags flags;
commandStreamReceiver.isPreambleSent = true;
csrSizeRequest.mediaSamplerConfigChanged = false;
commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest);
auto mediaSamplerConfigNotChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
csrSizeRequest.mediaSamplerConfigChanged = true;
commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest);
auto mediaSamplerConfigChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
EXPECT_NE(mediaSamplerConfigChangedSize, mediaSamplerConfigNotChangedSize);
auto difference = mediaSamplerConfigChangedSize - mediaSamplerConfigNotChangedSize;
EXPECT_EQ(sizeof(PIPELINE_SELECT), difference);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenSamplerCacheFlushSentThenRequiredCsrSizeContainsPipecontrolSize) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
UltCommandStreamReceiver<FamilyType> &commandStreamReceiver = (UltCommandStreamReceiver<FamilyType> &)pDevice->getCommandStreamReceiver();
CsrSizeRequestFlags csrSizeRequest = {};
DispatchFlags flags;
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest);
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired);
auto samplerCacheNotFlushedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore);
auto samplerCacheFlushBeforeSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
EXPECT_EQ(samplerCacheNotFlushedSize, samplerCacheFlushBeforeSize);
OCLRT::WorkaroundTable *waTable = const_cast<WorkaroundTable *>(pDevice->getWaTable());
bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads;
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true;
samplerCacheFlushBeforeSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
auto difference = samplerCacheFlushBeforeSize - samplerCacheNotFlushedSize;
EXPECT_EQ(sizeof(typename FamilyType::PIPE_CONTROL), difference);
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp;
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInNonDirtyStateWhenflushTaskIsCalledThenNoFlushIsCalled) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
configureCSRtoNonDirtyState<FamilyType>();
DispatchFlags dispatchFlags;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
EXPECT_EQ(0, mockCsr->flushCalledCount);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInNonDirtyStateAndBatchingModeWhenflushTaskIsCalledThenSubmissionIsNotRecorded) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
configureCSRtoNonDirtyState<FamilyType>();
DispatchFlags dispatchFlags;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
EXPECT_EQ(0, mockCsr->flushCalledCount);
EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty());
//surfaces are non resident
auto &surfacesForResidency = mockCsr->getMemoryManager()->getResidencyAllocations();
EXPECT_EQ(0u, surfacesForResidency.size());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTaskIsCalledThenNothingIsSubmittedToTheHwAndSubmissionIsRecorded) {
typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.guardCommandBufferWithPipeControl = true;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers();
EXPECT_FALSE(cmdBufferList.peekIsEmpty());
EXPECT_EQ(cmdBufferList.peekHead(), cmdBufferList.peekTail());
auto cmdBuffer = cmdBufferList.peekHead();
//we should have 4 heaps, tag allocation and csr command stream + cq
EXPECT_EQ(6u, cmdBuffer->surfaces.size());
EXPECT_EQ(0, mockCsr->flushCalledCount);
//we should be submitting via csr
EXPECT_EQ(cmdBuffer->batchBuffer.commandBufferAllocation, mockCsr->commandStream.getGraphicsAllocation());
EXPECT_EQ(cmdBuffer->batchBuffer.startOffset, 0u);
EXPECT_FALSE(cmdBuffer->batchBuffer.requiresCoherency);
EXPECT_FALSE(cmdBuffer->batchBuffer.low_priority);
//find BB END
parseCommands<FamilyType>(commandStream, 0);
auto itBBend = find<MI_BATCH_BUFFER_END *>(cmdList.begin(), cmdList.end());
void *bbEndAddress = *itBBend;
EXPECT_EQ(bbEndAddress, cmdBuffer->batchBufferEndLocation);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndTwoRecordedCommandBuffersWhenFlushTaskIsCalledThenBatchBuffersAreCombined) {
typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.guardCommandBufferWithPipeControl = true;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
auto primaryBatch = mockedSubmissionsAggregator->peekCommandBuffers().peekHead();
auto secondBatchBuffer = primaryBatch->next;
auto bbEndLocation = primaryBatch->batchBufferEndLocation;
auto secondBatchBufferAddress = (uint64_t)ptrOffset(secondBatchBuffer->batchBuffer.commandBufferAllocation->getGpuAddress(),
secondBatchBuffer->batchBuffer.startOffset);
mockCsr->flushBatchedSubmissions();
auto batchBufferStart = genCmdCast<MI_BATCH_BUFFER_START *>(bbEndLocation);
ASSERT_NE(nullptr, batchBufferStart);
EXPECT_EQ(secondBatchBufferAddress, batchBufferStart->getBatchBufferStartAddressGraphicsaddress472());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndThreeRecordedCommandBuffersWhenFlushTaskIsCalledThenBatchBuffersAreCombined) {
typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.guardCommandBufferWithPipeControl = true;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
auto primaryBatch = mockedSubmissionsAggregator->peekCommandBuffers().peekHead();
auto lastBatchBuffer = primaryBatch->next->next;
auto bbEndLocation = primaryBatch->next->batchBufferEndLocation;
auto lastBatchBufferAddress = (uint64_t)ptrOffset(lastBatchBuffer->batchBuffer.commandBufferAllocation->getGpuAddress(),
lastBatchBuffer->batchBuffer.startOffset);
mockCsr->flushBatchedSubmissions();
auto batchBufferStart = genCmdCast<MI_BATCH_BUFFER_START *>(bbEndLocation);
ASSERT_NE(nullptr, batchBufferStart);
EXPECT_EQ(lastBatchBufferAddress, batchBufferStart->getBatchBufferStartAddressGraphicsaddress472());
EXPECT_EQ(1, mockCsr->flushCalledCount);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndThreeRecordedCommandBuffersThatUsesAllResourceWhenFlushTaskIsCalledThenBatchBuffersAreNotCombined) {
typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
auto memorySize = (size_t)mockCsr->getMemoryManager()->device->getDeviceInfo().globalMemSize;
GraphicsAllocation largeAllocation(nullptr, memorySize);
DispatchFlags dispatchFlags;
dispatchFlags.guardCommandBufferWithPipeControl = true;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
mockCsr->makeResident(largeAllocation);
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
mockCsr->makeResident(largeAllocation);
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
auto primaryBatch = mockedSubmissionsAggregator->peekCommandBuffers().peekHead();
auto bbEndLocation = primaryBatch->next->batchBufferEndLocation;
mockCsr->flushBatchedSubmissions();
auto batchBufferStart = genCmdCast<MI_BATCH_BUFFER_START *>(bbEndLocation);
ASSERT_EQ(nullptr, batchBufferStart);
auto bbEnd = genCmdCast<MI_BATCH_BUFFER_END *>(bbEndLocation);
EXPECT_NE(nullptr, bbEnd);
EXPECT_EQ(3, mockCsr->flushCalledCount);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTaskIsCalledTwiceThenNothingIsSubmittedToTheHwAndTwoSubmissionAreRecorded) {
typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto initialBase = commandStream.getBase();
auto initialUsed = commandStream.getUsed();
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.guardCommandBufferWithPipeControl = true;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
//ensure command stream still used
EXPECT_EQ(initialBase, commandStream.getBase());
auto baseAfterFirstFlushTask = commandStream.getBase();
auto usedAfterFirstFlushTask = commandStream.getUsed();
dispatchFlags.requiresCoherency = true;
dispatchFlags.lowPriority = true;
mockCsr->flushTask(commandStream,
commandStream.getUsed(),
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
auto baseAfterSecondFlushTask = commandStream.getBase();
auto usedAfterSecondFlushTask = commandStream.getUsed();
EXPECT_EQ(initialBase, commandStream.getBase());
EXPECT_EQ(baseAfterSecondFlushTask, baseAfterFirstFlushTask);
EXPECT_EQ(baseAfterFirstFlushTask, initialBase);
EXPECT_GT(usedAfterFirstFlushTask, initialUsed);
EXPECT_GT(usedAfterSecondFlushTask, usedAfterFirstFlushTask);
auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers();
EXPECT_FALSE(cmdBufferList.peekIsEmpty());
EXPECT_NE(cmdBufferList.peekHead(), cmdBufferList.peekTail());
EXPECT_NE(nullptr, cmdBufferList.peekTail());
EXPECT_NE(nullptr, cmdBufferList.peekHead());
auto cmdBuffer1 = cmdBufferList.peekHead();
auto cmdBuffer2 = cmdBufferList.peekTail();
EXPECT_GT(cmdBuffer2->batchBufferEndLocation, cmdBuffer1->batchBufferEndLocation);
EXPECT_FALSE(cmdBuffer1->batchBuffer.requiresCoherency);
EXPECT_TRUE(cmdBuffer2->batchBuffer.requiresCoherency);
EXPECT_FALSE(cmdBuffer1->batchBuffer.low_priority);
EXPECT_TRUE(cmdBuffer2->batchBuffer.low_priority);
EXPECT_GT(cmdBuffer2->batchBuffer.startOffset, cmdBuffer1->batchBuffer.startOffset);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenRecordedBatchBufferIsBeingSubmittedThenFlushIsCalledWithRecordedCommandBuffer) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
configureCSRtoNonDirtyState<FamilyType>();
DispatchFlags dispatchFlags;
dispatchFlags.guardCommandBufferWithPipeControl = true;
dispatchFlags.requiresCoherency = true;
mockCsr->lastSentCoherencyRequest = 1;
commandStream.getSpace(4);
mockCsr->flushTask(commandStream,
4,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
EXPECT_EQ(0, mockCsr->flushCalledCount);
auto &surfacesForResidency = mockCsr->getMemoryManager()->getResidencyAllocations();
EXPECT_EQ(0u, surfacesForResidency.size());
auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers();
EXPECT_FALSE(cmdBufferList.peekIsEmpty());
auto cmdBuffer = cmdBufferList.peekHead();
EXPECT_EQ(5u, cmdBuffer->surfaces.size());
//copy those surfaces
std::vector<GraphicsAllocation *> residentSurfaces = cmdBuffer->surfaces;
for (auto &graphicsAllocation : residentSurfaces) {
EXPECT_TRUE(graphicsAllocation->isResident());
EXPECT_EQ(1, graphicsAllocation->residencyTaskCount);
}
mockCsr->flushBatchedSubmissions();
EXPECT_FALSE(mockCsr->recordedCommandBuffer.batchBuffer.low_priority);
EXPECT_TRUE(mockCsr->recordedCommandBuffer.batchBuffer.requiresCoherency);
EXPECT_EQ(mockCsr->recordedCommandBuffer.batchBuffer.commandBufferAllocation, commandStream.getGraphicsAllocation());
EXPECT_EQ(4u, mockCsr->recordedCommandBuffer.batchBuffer.startOffset);
EXPECT_EQ(1, mockCsr->flushCalledCount);
EXPECT_TRUE(mockedSubmissionsAggregator->peekCommandBuffers().peekIsEmpty());
EXPECT_EQ(0u, surfacesForResidency.size());
for (auto &graphicsAllocation : residentSurfaces) {
EXPECT_FALSE(graphicsAllocation->isResident());
}
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrCreatedWithDedicatedDebugFlagWhenItIsCreatedThenItHasProperDispatchMode) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.CsrDispatchMode.set(CommandStreamReceiver::DispatchMode::AdaptiveDispatch);
std::unique_ptr<MockCsrHw2<FamilyType>> mockCsr(new MockCsrHw2<FamilyType>(*platformDevices[0]));
EXPECT_EQ(CommandStreamReceiver::DispatchMode::AdaptiveDispatch, mockCsr->dispatchMode);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenBlockingCommandIsSendThenItIsFlushedAndNotBatched) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
configureCSRtoNonDirtyState<FamilyType>();
DispatchFlags dispatchFlags;
dispatchFlags.blocking = true;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
EXPECT_EQ(1, mockCsr->flushCalledCount);
EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenBufferToFlushWhenFlushTaskCalledThenUpdateFlushStamp) {
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
commandStream.getSpace(1);
EXPECT_EQ(0, mockCsr->flushCalledCount);
auto previousFlushStamp = mockCsr->flushStamp->peekStamp();
auto cmplStamp = flushTask(*mockCsr);
EXPECT_GT(mockCsr->flushStamp->peekStamp(), previousFlushStamp);
EXPECT_EQ(mockCsr->flushStamp->peekStamp(), cmplStamp.flushStamp);
EXPECT_EQ(1, mockCsr->flushCalledCount);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNothingToFlushWhenFlushTaskCalledThenDontFlushStamp) {
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
configureCSRtoNonDirtyState<FamilyType>();
EXPECT_EQ(0, mockCsr->flushCalledCount);
auto previousFlushStamp = mockCsr->flushStamp->peekStamp();
auto cmplStamp = flushTask(*mockCsr);
EXPECT_EQ(mockCsr->flushStamp->peekStamp(), previousFlushStamp);
EXPECT_EQ(previousFlushStamp, cmplStamp.flushStamp);
EXPECT_EQ(0, mockCsr->flushCalledCount);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTaskIsCalledThenFlushedTaskCountIsNotModifed) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
DispatchFlags dispatchFlags;
dispatchFlags.guardCommandBufferWithPipeControl = true;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount());
EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount());
mockCsr->flushBatchedSubmissions();
EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount());
EXPECT_EQ(1u, mockCsr->peekLatestFlushedTaskCount());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInDefaultModeWhenFlushTaskIsCalledThenFlushedTaskCountIsModifed) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
DispatchFlags dispatchFlags;
dispatchFlags.guardCommandBufferWithPipeControl = true;
auto &csr = pDevice->getCommandStreamReceiver();
csr.flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
EXPECT_EQ(1u, csr.peekLatestSentTaskCount());
EXPECT_EQ(1u, csr.peekLatestFlushedTaskCount());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenWaitForTaskCountIsCalledWithTaskCountThatWasNotYetFlushedThenBatchedCommandBuffersAreSubmitted) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.guardCommandBufferWithPipeControl = true;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers();
EXPECT_FALSE(cmdBufferList.peekIsEmpty());
EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount());
auto cmdBuffer = cmdBufferList.peekHead();
EXPECT_EQ(1u, cmdBuffer->taskCount);
mockCsr->waitForCompletionWithTimeout(false, 1, 1);
EXPECT_EQ(1u, mockCsr->peekLatestFlushedTaskCount());
EXPECT_TRUE(cmdBufferList.peekIsEmpty());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenEnqueueIsMadeThenCurrentMemoryUsedIsTracked) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.guardCommandBufferWithPipeControl = true;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead();
uint64_t expectedUsed = 0;
for (const auto &resource : cmdBuffer->surfaces) {
expectedUsed += resource->getUnderlyingBufferSize();
}
EXPECT_EQ(expectedUsed, mockCsr->peekTotalMemoryUsed());
//after flush it goes to 0
mockCsr->flushBatchedSubmissions();
EXPECT_EQ(0u, mockCsr->peekTotalMemoryUsed());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenSusbsequentEnqueueIsMadeThenOnlyNewResourcesAreTrackedForMemoryUsage) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.guardCommandBufferWithPipeControl = true;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead();
uint64_t expectedUsed = 0;
for (const auto &resource : cmdBuffer->surfaces) {
expectedUsed += resource->getUnderlyingBufferSize();
}
auto additionalSize = 1234;
GraphicsAllocation graphicsAllocation(nullptr, additionalSize);
mockCsr->makeResident(graphicsAllocation);
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
EXPECT_EQ(expectedUsed + additionalSize, mockCsr->peekTotalMemoryUsed());
}
struct MockedMemoryManager : public OsAgnosticMemoryManager {
bool isMemoryBudgetExhausted() const override { return budgetExhausted; }
bool budgetExhausted = false;
};
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTotalResourceUsedExhaustsTheBudgetThenDoImplicitFlush) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
std::unique_ptr<MockedMemoryManager> mockedMemoryManager(new MockedMemoryManager());
std::unique_ptr<MockCsrHw2<FamilyType>> mockCsr(new MockCsrHw2<FamilyType>(*platformDevices[0]));
mockedMemoryManager->device = pDevice;
mockCsr->setMemoryManager(mockedMemoryManager.get());
mockCsr->setTagAllocation(pDevice->getTagAllocation());
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.guardCommandBufferWithPipeControl = true;
mockedMemoryManager->budgetExhausted = true;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead();
uint64_t expectedUsed = 0;
for (const auto &resource : cmdBuffer->surfaces) {
expectedUsed += resource->getUnderlyingBufferSize();
}
EXPECT_EQ(expectedUsed, mockCsr->peekTotalMemoryUsed());
auto budgetSize = (size_t)mockCsr->getMemoryManager()->device->getDeviceInfo().globalMemSize;
GraphicsAllocation hugeAllocation(nullptr, budgetSize / 4);
mockCsr->makeResident(hugeAllocation);
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
//expect 2 flushes, since we cannot batch those submissions
EXPECT_EQ(2u, mockCsr->peekLatestFlushedTaskCount());
EXPECT_EQ(0u, mockCsr->peekTotalMemoryUsed());
EXPECT_TRUE(mockedSubmissionsAggregator->peekCommandBuffers().peekIsEmpty());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTwoTasksArePassedWithTheSameLevelThenThereIsNoPipeControlBetweenThemAfterFlush) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.guardCommandBufferWithPipeControl = true;
dispatchFlags.outOfOrderExecutionAllowed = true;
auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel();
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevelPriorToSubmission,
dispatchFlags);
//now emit with the same taskLevel
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevelPriorToSubmission,
dispatchFlags);
EXPECT_EQ(taskLevelPriorToSubmission, mockCsr->peekTaskLevel());
//validate if we recorded ppc positions
auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead();
EXPECT_NE(nullptr, firstCmdBuffer->pipeControlLocation);
auto secondCmdBuffer = firstCmdBuffer->next;
EXPECT_NE(nullptr, secondCmdBuffer->pipeControlLocation);
EXPECT_NE(firstCmdBuffer->pipeControlLocation, secondCmdBuffer->pipeControlLocation);
auto ppc = genCmdCast<typename FamilyType::PIPE_CONTROL *>(firstCmdBuffer->pipeControlLocation);
EXPECT_NE(nullptr, ppc);
auto ppc2 = genCmdCast<typename FamilyType::PIPE_CONTROL *>(secondCmdBuffer->pipeControlLocation);
EXPECT_NE(nullptr, ppc2);
//flush needs to bump the taskLevel
mockCsr->flushBatchedSubmissions();
EXPECT_EQ(taskLevelPriorToSubmission + 1, mockCsr->peekTaskLevel());
//decode commands to confirm no pipe controls between Walkers
parseCommands<FamilyType>(commandQueue);
auto itorBatchBufferStartFirst = find<typename FamilyType::MI_BATCH_BUFFER_START *>(cmdList.begin(), cmdList.end());
auto itorBatchBufferStartSecond = find<typename FamilyType::MI_BATCH_BUFFER_START *>(++itorBatchBufferStartFirst, cmdList.end());
//make sure they are not the same
EXPECT_NE(cmdList.end(), itorBatchBufferStartFirst);
EXPECT_NE(cmdList.end(), itorBatchBufferStartSecond);
EXPECT_NE(itorBatchBufferStartFirst, itorBatchBufferStartSecond);
auto itorPipeControl = find<typename FamilyType::PIPE_CONTROL *>(itorBatchBufferStartFirst, itorBatchBufferStartSecond);
EXPECT_EQ(itorPipeControl, itorBatchBufferStartSecond);
//first pipe control is nooped, second pipe control is untouched
auto noop1 = genCmdCast<typename FamilyType::MI_NOOP *>(ppc);
auto noop2 = genCmdCast<typename FamilyType::MI_NOOP *>(ppc2);
EXPECT_NE(nullptr, noop1);
EXPECT_EQ(nullptr, noop2);
auto ppcAfterChange = genCmdCast<typename FamilyType::PIPE_CONTROL *>(ppc2);
EXPECT_NE(nullptr, ppcAfterChange);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndOoqFlagSetToFalseWhenTwoTasksArePassedWithTheSameLevelThenThereIsPipeControlBetweenThemAfterFlush) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.guardCommandBufferWithPipeControl = true;
dispatchFlags.outOfOrderExecutionAllowed = false;
auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel();
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevelPriorToSubmission,
dispatchFlags);
//now emit with the same taskLevel
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevelPriorToSubmission,
dispatchFlags);
EXPECT_EQ(taskLevelPriorToSubmission, mockCsr->peekTaskLevel());
//validate if we recorded ppc positions
auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead();
EXPECT_EQ(nullptr, firstCmdBuffer->pipeControlLocation);
auto secondCmdBuffer = firstCmdBuffer->next;
EXPECT_EQ(nullptr, secondCmdBuffer->pipeControlLocation);
mockCsr->flushBatchedSubmissions();
//decode commands to confirm no pipe controls between Walkers
parseCommands<FamilyType>(commandQueue);
auto itorBatchBufferStartFirst = find<typename FamilyType::MI_BATCH_BUFFER_START *>(cmdList.begin(), cmdList.end());
auto itorBatchBufferStartSecond = find<typename FamilyType::MI_BATCH_BUFFER_START *>(++itorBatchBufferStartFirst, cmdList.end());
auto itorPipeControl = find<typename FamilyType::PIPE_CONTROL *>(itorBatchBufferStartFirst, itorBatchBufferStartSecond);
EXPECT_NE(itorPipeControl, itorBatchBufferStartSecond);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenPipeControlForNoopAddressIsNullThenPipeControlIsNotNooped) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.guardCommandBufferWithPipeControl = true;
dispatchFlags.outOfOrderExecutionAllowed = true;
auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel();
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevelPriorToSubmission,
dispatchFlags);
//now emit with the same taskLevel
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevelPriorToSubmission,
dispatchFlags);
//validate if we recorded ppc positions
auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead();
auto ppc1Location = firstCmdBuffer->pipeControlLocation;
firstCmdBuffer->pipeControlLocation = nullptr;
auto ppc = genCmdCast<typename FamilyType::PIPE_CONTROL *>(ppc1Location);
EXPECT_NE(nullptr, ppc);
//call flush, both pipe controls must remain untouched
mockCsr->flushBatchedSubmissions();
EXPECT_EQ(taskLevelPriorToSubmission + 1, mockCsr->peekTaskLevel());
//decode commands to confirm no pipe controls between Walkers
parseCommands<FamilyType>(commandQueue);
auto itorBatchBufferStartFirst = find<typename FamilyType::MI_BATCH_BUFFER_START *>(cmdList.begin(), cmdList.end());
auto itorBatchBufferStartSecond = find<typename FamilyType::MI_BATCH_BUFFER_START *>(++itorBatchBufferStartFirst, cmdList.end());
auto itorPipeControl = find<typename FamilyType::PIPE_CONTROL *>(itorBatchBufferStartFirst, itorBatchBufferStartSecond);
EXPECT_NE(itorPipeControl, itorBatchBufferStartSecond);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenThreeTasksArePassedWithTheSameLevelThenThereIsNoPipeControlBetweenThemAfterFlush) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.guardCommandBufferWithPipeControl = true;
dispatchFlags.outOfOrderExecutionAllowed = true;
auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel();
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevelPriorToSubmission,
dispatchFlags);
//now emit with the same taskLevel
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevelPriorToSubmission,
dispatchFlags);
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevelPriorToSubmission,
dispatchFlags);
EXPECT_EQ(taskLevelPriorToSubmission, mockCsr->peekTaskLevel());
//validate if we recorded ppc positions
auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead();
auto secondCmdBuffer = firstCmdBuffer->next;
auto thirdCmdBuffer = firstCmdBuffer->next->next;
EXPECT_NE(nullptr, thirdCmdBuffer->pipeControlLocation);
EXPECT_NE(firstCmdBuffer->pipeControlLocation, thirdCmdBuffer->pipeControlLocation);
auto ppc = genCmdCast<typename FamilyType::PIPE_CONTROL *>(firstCmdBuffer->pipeControlLocation);
auto ppc2 = genCmdCast<typename FamilyType::PIPE_CONTROL *>(secondCmdBuffer->pipeControlLocation);
auto ppc3 = genCmdCast<typename FamilyType::PIPE_CONTROL *>(thirdCmdBuffer->pipeControlLocation);
EXPECT_NE(nullptr, ppc2);
EXPECT_NE(nullptr, ppc3);
//flush needs to bump the taskLevel
mockCsr->flushBatchedSubmissions();
EXPECT_EQ(taskLevelPriorToSubmission + 1, mockCsr->peekTaskLevel());
//decode commands to confirm no pipe controls between Walkers
parseCommands<FamilyType>(commandQueue);
auto itorBatchBufferStartFirst = find<typename FamilyType::MI_BATCH_BUFFER_START *>(cmdList.begin(), cmdList.end());
auto itorBatchBufferStartSecond = find<typename FamilyType::MI_BATCH_BUFFER_START *>(++itorBatchBufferStartFirst, cmdList.end());
auto itorBatchBufferStartThird = find<typename FamilyType::MI_BATCH_BUFFER_START *>(++itorBatchBufferStartSecond, cmdList.end());
//make sure they are not the same
EXPECT_NE(cmdList.end(), itorBatchBufferStartFirst);
EXPECT_NE(cmdList.end(), itorBatchBufferStartSecond);
EXPECT_NE(cmdList.end(), itorBatchBufferStartThird);
EXPECT_NE(itorBatchBufferStartFirst, itorBatchBufferStartSecond);
EXPECT_NE(itorBatchBufferStartThird, itorBatchBufferStartSecond);
auto itorPipeControl = find<typename FamilyType::PIPE_CONTROL *>(itorBatchBufferStartFirst, itorBatchBufferStartSecond);
EXPECT_EQ(itorPipeControl, itorBatchBufferStartSecond);
itorPipeControl = find<typename FamilyType::PIPE_CONTROL *>(itorBatchBufferStartSecond, itorBatchBufferStartThird);
EXPECT_EQ(itorPipeControl, itorBatchBufferStartThird);
//first pipe control is nooped, second pipe control is untouched
auto noop1 = genCmdCast<typename FamilyType::MI_NOOP *>(ppc);
auto noop2 = genCmdCast<typename FamilyType::MI_NOOP *>(ppc2);
auto noop3 = genCmdCast<typename FamilyType::MI_NOOP *>(ppc3);
EXPECT_NE(nullptr, noop1);
EXPECT_NE(nullptr, noop2);
EXPECT_EQ(nullptr, noop3);
auto ppcAfterChange = genCmdCast<typename FamilyType::PIPE_CONTROL *>(ppc3);
EXPECT_NE(nullptr, ppcAfterChange);
}
typedef UltCommandStreamReceiverTest CommandStreamReceiverCleanupTests;
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenTemporaryAndReusableAllocationsArePresentThenCleanupResourcesOnlyCleansThoseAboveLatestFlushTaskLevel) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto memoryManager = pDevice->getMemoryManager();
auto temporaryToClean = memoryManager->allocateGraphicsMemory(4096u);
auto temporaryToHold = memoryManager->allocateGraphicsMemory(4096u);
auto reusableToClean = memoryManager->allocateGraphicsMemory(4096u);
auto reusableToHold = memoryManager->allocateGraphicsMemory(4096u);
memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(temporaryToClean), TEMPORARY_ALLOCATION);
memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(temporaryToHold), TEMPORARY_ALLOCATION);
memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(reusableToClean), REUSABLE_ALLOCATION);
memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(reusableToHold), REUSABLE_ALLOCATION);
temporaryToClean->taskCount = 1;
reusableToClean->taskCount = 1;
temporaryToHold->taskCount = 10;
reusableToHold->taskCount = 10;
commandStreamReceiver.latestFlushedTaskCount = 9;
commandStreamReceiver.cleanupResources();
EXPECT_EQ(reusableToHold, memoryManager->allocationsForReuse.peekHead());
EXPECT_EQ(reusableToHold, memoryManager->allocationsForReuse.peekTail());
EXPECT_EQ(temporaryToHold, memoryManager->graphicsAllocations.peekHead());
EXPECT_EQ(temporaryToHold, memoryManager->graphicsAllocations.peekTail());
commandStreamReceiver.latestFlushedTaskCount = 11;
commandStreamReceiver.cleanupResources();
EXPECT_TRUE(memoryManager->allocationsForReuse.peekIsEmpty());
EXPECT_TRUE(memoryManager->graphicsAllocations.peekIsEmpty());
Add braces to avoid ambiguous 'else' For the new Linux/Fedora configuration with introduction of gcc 7.2, compilation of this file issues a new warning due to the correct diagnosis of an ambiguous 'else'. As warnings are being treated as errors, this aborts the build. The diagnostic: vpg-compute-neo/unit_tests/elflib/elflib_tests.cpp:123:12: error: suggest explicit braces to avoid ambiguous 'else' [-Werror=dangling-else] if (nonfailingAllocation == failureIndex) ^ Diagnosis: The diagnostic suggested that this: if (nonfailingAllocation == failureIndex) ASSERT_NE(nullptr, pWriter); should be changed to: if (nonfailingAllocation == failureIndex) { ASSERT_NE(nullptr, pWriter); } This is a valid suggestion. The same is true for EXPECT_EQ. Pick the files in repository ssh://gerrit-gfx.intel.com:29418/mirrors/github/google/googletest for tracing the definition. (There are many versions of gtest.h under the ufo tree). Starting in file include/gtest/gtest.h, the definition of ASSERT_NE can be traced back towards its origin as follows: ASSERT_NE include/gtest/gtest.h GTEST_ASSERT_NE include/gtest/gtest.h ASSERT_PRED_FORMAT2 include/gtest/gtest_pred_impl.h GTEST_PRED_FORMAT2_ include/gtest/gtest_pred_impl.h GTEST_ASSERT_ include/gtest/gtest_pred_impl.h where GTEST_ASSERT_ indeed *should* be enclosed in braces. GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (const ::testing::AssertionResult gtest_ar = (expression)) \ ; \ else \ on_failure(gtest_ar.failure_message()) The correct fix would be to place the braces in the macro definition. However, as file gtest.h comes from Google, and as there are 37 different versions of it in the source tree, this workaround will address the macro invocations. Should it be desirable, it is left to others to correct gtest.h and friends. Change-Id: I870d38ba623fc7564f894c7b1ea7512b74244ee2 Signed-off-by: Dale Stimson <dale.b.stimson@intel.com>
2018-01-23 17:00:40 -08:00
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetToLowWhenFlushTaskIsCalledThenThrottleIsSetInBatchBuffer) {
typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.throttle = QueueThrottle::LOW;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers();
auto cmdBuffer = cmdBufferList.peekHead();
EXPECT_EQ(cmdBuffer->batchBuffer.throttle, QueueThrottle::LOW);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetToMediumWhenFlushTaskIsCalledThenThrottleIsSetInBatchBuffer) {
typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.throttle = QueueThrottle::MEDIUM;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers();
auto cmdBuffer = cmdBufferList.peekHead();
EXPECT_EQ(cmdBuffer->batchBuffer.throttle, QueueThrottle::MEDIUM);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetToHighWhenFlushTaskIsCalledThenThrottleIsSetInBatchBuffer) {
typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*platformDevices[0]);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
DispatchFlags dispatchFlags;
dispatchFlags.throttle = QueueThrottle::HIGH;
mockCsr->flushTask(commandStream,
0,
dsh,
ih,
ioh,
ssh,
taskLevel,
dispatchFlags);
auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers();
auto cmdBuffer = cmdBufferList.peekHead();
EXPECT_EQ(cmdBuffer->batchBuffer.throttle, QueueThrottle::HIGH);
}