/* * Copyright (c) 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "runtime/built_ins/built_ins.h" #include "runtime/command_queue/command_queue_hw.h" #include "runtime/command_stream/command_stream_receiver.h" #include "runtime/command_stream/linear_stream.h" #include "runtime/os_interface/debug_settings_manager.h" #include "runtime/event/user_event.h" #include "runtime/helpers/aligned_memory.h" #include "runtime/helpers/cache_policy.h" #include "runtime/helpers/preamble.h" #include "runtime/helpers/ptr_math.h" #include "runtime/memory_manager/graphics_allocation.h" #include "runtime/memory_manager/memory_manager.h" #include "runtime/mem_obj/buffer.h" #include "runtime/command_stream/preemption.h" #include "runtime/gen_common/reg_configs.h" #include "unit_tests/libult/ult_command_stream_receiver.h" #include "unit_tests/fixtures/device_fixture.h" #include "unit_tests/fixtures/memory_management_fixture.h" #include "unit_tests/fixtures/built_in_fixture.h" #include "unit_tests/helpers/hw_parse.h" #include "unit_tests/helpers/debug_manager_state_restore.h" #include "unit_tests/mocks/mock_buffer.h" #include "unit_tests/mocks/mock_command_queue.h" #include "unit_tests/mocks/mock_context.h" #include "unit_tests/mocks/mock_csr.h" #include "unit_tests/mocks/mock_event.h" #include "unit_tests/mocks/mock_kernel.h" #include "unit_tests/mocks/mock_submissions_aggregator.h" #include "test.h" #include "gtest/gtest.h" #include "runtime/utilities/linux/debug_env_reader.h" #include "runtime/gmm_helper/gmm_helper.h" #include "runtime/command_queue/dispatch_walker.h" using namespace OCLRT; struct UltCommandStreamReceiverTest : public DeviceFixture, public BuiltInFixture, public MemoryManagementFixture, public HardwareParse, ::testing::Test { void SetUp() override { MemoryManagementFixture::SetUp(); DeviceFixture::SetUp(); BuiltInFixture::SetUp(pDevice); HardwareParse::SetUp(); size_t sizeStream = 256; size_t alignmentStream = 0x1000; cmdBuffer = alignedMalloc(sizeStream, alignmentStream); dshBuffer = alignedMalloc(sizeStream, alignmentStream); ihBuffer = alignedMalloc(sizeStream, alignmentStream); iohBuffer = alignedMalloc(sizeStream, alignmentStream); sshBuffer = alignedMalloc(sizeStream, alignmentStream); ASSERT_NE(nullptr, cmdBuffer); ASSERT_NE(nullptr, dshBuffer); ASSERT_NE(nullptr, ihBuffer); ASSERT_NE(nullptr, iohBuffer); ASSERT_NE(nullptr, sshBuffer); commandStream.replaceBuffer(cmdBuffer, sizeStream); auto graphicsAllocation = new GraphicsAllocation(cmdBuffer, sizeStream); commandStream.replaceGraphicsAllocation(graphicsAllocation); dsh.replaceBuffer(dshBuffer, sizeStream); graphicsAllocation = new GraphicsAllocation(dshBuffer, sizeStream); dsh.replaceGraphicsAllocation(graphicsAllocation); ih.replaceBuffer(ihBuffer, sizeStream); graphicsAllocation = new GraphicsAllocation(ihBuffer, sizeStream); ih.replaceGraphicsAllocation(graphicsAllocation); ioh.replaceBuffer(iohBuffer, sizeStream); graphicsAllocation = new GraphicsAllocation(iohBuffer, sizeStream); ioh.replaceGraphicsAllocation(graphicsAllocation); ssh.replaceBuffer(sshBuffer, sizeStream); graphicsAllocation = new GraphicsAllocation(sshBuffer, sizeStream); ssh.replaceGraphicsAllocation(graphicsAllocation); } void TearDown() override { delete dsh.getGraphicsAllocation(); delete ih.getGraphicsAllocation(); delete ioh.getGraphicsAllocation(); delete ssh.getGraphicsAllocation(); delete commandStream.getGraphicsAllocation(); alignedFree(sshBuffer); alignedFree(iohBuffer); alignedFree(ihBuffer); alignedFree(dshBuffer); alignedFree(cmdBuffer); HardwareParse::TearDown(); BuiltInFixture::TearDown(); DeviceFixture::TearDown(); MemoryManagementFixture::TearDown(); } template CompletionStamp flushTask(CommandStreamReceiverType &commandStreamReceiver, bool block = false, size_t startOffset = 0, bool requiresCoherency = false, bool lowPriority = false) { DispatchFlags dispatchFlags; dispatchFlags.blocking = block; dispatchFlags.requiresCoherency = requiresCoherency; dispatchFlags.low_priority = lowPriority; return commandStreamReceiver.flushTask( commandStream, startOffset, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); } template void configureCSRHeapStatesToNonDirty() { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.dshState.updateAndCheck(&dsh); commandStreamReceiver.ihState.updateAndCheck(&ih); commandStreamReceiver.iohState.updateAndCheck(&ioh); commandStreamReceiver.sshState.updateAndCheck(&ssh); } template void configureCSRtoNonDirtyState() { bool slmUsed = false; if (DebugManager.flags.ForceSLML3Config.get()) { slmUsed = true; } uint32_t L3Config = PreambleHelper::getL3Config(*platformDevices[0], slmUsed); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.overrideMediaVFEStateDirty(false); commandStreamReceiver.latestSentStatelessMocsConfig = CacheSettings::l3CacheOn; commandStreamReceiver.lastSentL3Config = L3Config; configureCSRHeapStatesToNonDirty(); commandStreamReceiver.taskLevel = taskLevel; commandStreamReceiver.lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin; commandStreamReceiver.lastSentCoherencyRequest = 0; commandStreamReceiver.lastMediaSamplerConfig = 0; } template UltCommandStreamReceiver &getUltCommandStreamReceiver() { return reinterpret_cast &>(pDevice->getCommandStreamReceiver()); } uint32_t taskLevel = 42; LinearStream commandStream; LinearStream dsh; LinearStream ih; LinearStream ioh; LinearStream ssh; void *cmdBuffer = nullptr; void *dshBuffer = nullptr; void *ihBuffer = nullptr; void *iohBuffer = nullptr; void *sshBuffer = nullptr; uint32_t latestSentDcFlushTaskCount; uint32_t latestSentNonDcFlushTaskCount; uint32_t dcFlushRequiredTaskCount; }; HWTEST_F(UltCommandStreamReceiverTest, testInitialState) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(0u, commandStreamReceiver.peekTaskCount()); EXPECT_EQ(0u, commandStreamReceiver.peekTaskLevel()); EXPECT_TRUE(commandStreamReceiver.dshState.updateAndCheck(&dsh)); EXPECT_TRUE(commandStreamReceiver.ihState.updateAndCheck(&ih)); EXPECT_TRUE(commandStreamReceiver.iohState.updateAndCheck(&ioh)); EXPECT_TRUE(commandStreamReceiver.sshState.updateAndCheck(&ssh)); } typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTests; HWTEST_F(CommandStreamReceiverFlushTests, addsBatchBufferEnd) { auto usedPrevious = commandStream.getUsed(); CommandStreamReceiverHw::addBatchBufferEnd(commandStream, nullptr); EXPECT_EQ(commandStream.getUsed(), usedPrevious + sizeof(typename FamilyType::MI_BATCH_BUFFER_END)); auto batchBufferEnd = genCmdCast( ptrOffset(commandStream.getBase(), usedPrevious)); EXPECT_NE(nullptr, batchBufferEnd); } HWTEST_F(CommandStreamReceiverFlushTests, shouldAlignToCacheLineSize) { commandStream.getSpace(sizeof(uint32_t)); CommandStreamReceiverHw::alignToCacheLine(commandStream); EXPECT_EQ(0u, commandStream.getUsed() % MemoryConstants::cacheLineSize); } typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTaskTests; HWTEST_F(CommandStreamReceiverFlushTaskTests, shouldSeeCommandsOnFirstFlush) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); } HWTEST_F(CommandStreamReceiverFlushTaskTests, taskCountShouldBeUpdated) { auto &commandStreamReceiver = pDevice->getCommandStreamReceiver(); flushTask(commandStreamReceiver); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenconfigureCSRtoNonDirtyStateWhenFlushTaskIsCalledThenNoCommandsAreAdded) { configureCSRtoNonDirtyState(); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); EXPECT_EQ(0u, commandStreamReceiver.commandStream.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTaskIsSubmittedViaCsrThenBbEndCoversPaddingEnoughToFitMiBatchBufferStart) { auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); configureCSRtoNonDirtyState(); mockCsr->getCS(1024u); auto &csrCommandStream = mockCsr->commandStream; //we do level change that will emit PPC, fill all the space so only BB end fits. taskLevel++; auto ppcSize = mockCsr->getRequiredPipeControlSize(); auto fillSize = MemoryConstants::cacheLineSize - ppcSize - sizeof(typename FamilyType::MI_BATCH_BUFFER_END); csrCommandStream.getSpace(fillSize); auto expectedUsedSize = 2 * MemoryConstants::cacheLineSize; flushTask(*mockCsr); EXPECT_EQ(expectedUsedSize, mockCsr->commandStream.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTaskIsSubmittedViaCommandStreamThenBbEndCoversPaddingEnoughToFitMiBatchBufferStart) { auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); configureCSRtoNonDirtyState(); auto fillSize = MemoryConstants::cacheLineSize - sizeof(typename FamilyType::MI_BATCH_BUFFER_END); commandStream.getSpace(fillSize); DispatchFlags dispatchFlags; mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); auto expectedUsedSize = 2 * MemoryConstants::cacheLineSize; EXPECT_EQ(expectedUsedSize, commandStream.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, sameTaskLevelShouldntSendAPipeControl) { WhitelistedRegisters forceRegs = {0}; pDevice->setForceWhitelistedRegs(true, &forceRegs); auto commandStreamReceiver = new MockCsrHw(*platformDevices[0]); pDevice->resetCommandStreamReceiver(commandStreamReceiver); // Configure the CSR to not need to submit any state or commands. configureCSRtoNonDirtyState(); flushTask(*commandStreamReceiver); EXPECT_EQ(taskLevel, commandStreamReceiver->peekTaskLevel()); auto sizeUsed = commandStreamReceiver->commandStream.getUsed(); EXPECT_EQ(sizeUsed, 0u); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDeviceWithPreemptionSupportThenDontSendMediaVfeStateIfNotDirty) { WhitelistedRegisters forceRegs = {0}; pDevice->setForceWhitelistedRegs(true, &forceRegs); auto commandStreamReceiver = new MockCsrHw(*platformDevices[0]); pDevice->resetCommandStreamReceiver(commandStreamReceiver); // Configure the CSR to not need to submit any state or commands. configureCSRtoNonDirtyState(); pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); flushTask(*commandStreamReceiver); EXPECT_EQ(taskLevel, commandStreamReceiver->peekTaskLevel()); auto sizeUsed = commandStreamReceiver->commandStream.getUsed(); EXPECT_EQ(0u, sizeUsed); } HWTEST_F(CommandStreamReceiverFlushTaskTests, higherTaskLevelShouldSendAPipeControl) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; configureCSRHeapStatesToNonDirty(); commandStreamReceiver.taskLevel = taskLevel / 2; flushTask(commandStreamReceiver); EXPECT_EQ(taskLevel, commandStreamReceiver.peekTaskLevel()); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); } HWTEST_F(CommandStreamReceiverFlushTaskTests, completionStampValid) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); //simulate our CQ is stale for 10 TL's commandStreamReceiver.taskLevel = taskLevel + 10; auto completionStamp = flushTask(commandStreamReceiver); EXPECT_EQ(completionStamp.taskLevel, commandStreamReceiver.peekTaskLevel()); EXPECT_EQ(completionStamp.taskCount, commandStreamReceiver.peekTaskCount()); EXPECT_EQ(completionStamp.flushStamp, commandStreamReceiver.flushStamp->peekStamp()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, completionStamp) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto deviceEngineType = pDevice->getEngineType(); auto completionStamp = flushTask(commandStreamReceiver); EXPECT_EQ(1u, completionStamp.taskCount); EXPECT_EQ(taskLevel, completionStamp.taskLevel); EXPECT_EQ(commandStreamReceiver.flushStamp->peekStamp(), completionStamp.flushStamp); EXPECT_EQ(0u, completionStamp.deviceOrdinal); EXPECT_EQ(deviceEngineType, completionStamp.engineType); } HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressTracking) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); EXPECT_FALSE(commandStreamReceiver.dshState.updateAndCheck(&dsh)); EXPECT_FALSE(commandStreamReceiver.ihState.updateAndCheck(&ih)); EXPECT_FALSE(commandStreamReceiver.iohState.updateAndCheck(&ioh)); EXPECT_FALSE(commandStreamReceiver.sshState.updateAndCheck(&ssh)); } HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressProgrammingShouldMatchTracking) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; auto stateHeapMocs = Gmm::getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER); auto l3CacheOnMocs = Gmm::getMOCS(CacheSettings::l3CacheOn); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver.commandStream; HardwareParse::parseCommands(commandStreamCSR, 0); HardwareParse::findHardwareCommands(); ASSERT_NE(nullptr, cmdStateBaseAddress); auto &cmd = *reinterpret_cast(cmdStateBaseAddress); EXPECT_EQ(dsh.getBase(), reinterpret_cast(cmd.getDynamicStateBaseAddress())); EXPECT_EQ(ih.getBase(), reinterpret_cast(cmd.getInstructionBaseAddress())); EXPECT_EQ(ioh.getBase(), reinterpret_cast(cmd.getIndirectObjectBaseAddress())); EXPECT_EQ(ssh.getBase(), reinterpret_cast(cmd.getSurfaceStateBaseAddress())); EXPECT_EQ(l3CacheOnMocs, cmd.getStatelessDataPortAccessMemoryObjectControlState()); EXPECT_EQ(stateHeapMocs, cmd.getInstructionMemoryObjectControlState()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenStateBaseAddressWhenItIsRequiredThenThereIsPipeControlPriorToItWithTextureCacheFlush) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(); ih.replaceBuffer(ptrOffset(ih.getBase(), +1u), ih.getMaxAvailableSpace() - 1); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); auto pipeControlItor = find(cmdList.begin(), stateBaseAddressItor); EXPECT_NE(stateBaseAddressItor, pipeControlItor); auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*pipeControlItor; EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); EXPECT_TRUE(pipeControlCmd->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, preambleShouldBeSentIfNeverSent) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = false; flushTask(commandStreamReceiver); EXPECT_TRUE(commandStreamReceiver.isPreambleSent); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); } HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldBeSentIfNeverSentPreambleAndMediaSamplerRequirementChanged) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = false; commandStreamReceiver.lastMediaSamplerConfig = -1; flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); EXPECT_NE(nullptr, getCommand()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldBeSentIfNeverSentPreambleAndMediaSamplerRequirementNotChanged) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = false; commandStreamReceiver.lastMediaSamplerConfig = 0; flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); EXPECT_NE(nullptr, getCommand()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldNotBeSentIfSentPreambleAndMediaSamplerRequirementDoesntChanged) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastMediaSamplerConfig = 0; flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); EXPECT_EQ(nullptr, getCommand()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldBeSentIfSentPreambleAndMediaSamplerRequirementDoesntChanged) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastMediaSamplerConfig = 1; flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); EXPECT_NE(nullptr, getCommand()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressShouldBeSentIfNeverSent) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.overrideMediaVFEStateDirty(false); flushTask(commandStreamReceiver); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); } HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressShouldBeSentIfSizeChanged) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto dshSize = dsh.getMaxAvailableSpace(); auto ihSize = ih.getMaxAvailableSpace(); auto iohSize = ioh.getMaxAvailableSpace(); auto sshSize = ssh.getMaxAvailableSpace(); dsh.replaceBuffer(dsh.getBase(), 0); ih.replaceBuffer(ih.getBase(), 0); ioh.replaceBuffer(ioh.getBase(), 0); ssh.replaceBuffer(ssh.getBase(), 0); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.overrideMediaVFEStateDirty(false); configureCSRHeapStatesToNonDirty(); dsh.replaceBuffer(dsh.getBase(), dshSize); ih.replaceBuffer(ih.getBase(), ihSize); ioh.replaceBuffer(ioh.getBase(), iohSize); ssh.replaceBuffer(ssh.getBase(), sshSize); flushTask(commandStreamReceiver); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); } HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressShouldNotBeSentIfTheSame) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; configureCSRHeapStatesToNonDirty(); flushTask(commandStreamReceiver); auto base = commandStreamReceiver.commandStream.getBase(); auto stateBaseAddress = base ? genCmdCast(base) : nullptr; EXPECT_EQ(nullptr, stateBaseAddress); } HWTEST_F(CommandStreamReceiverFlushTaskTests, shouldntAddAnyCommandsToCQCSIfEmpty) { WhitelistedRegisters forceRegs = {0}; pDevice->setForceWhitelistedRegs(true, &forceRegs); auto &commandStreamReceiver = pDevice->getCommandStreamReceiver(); auto usedBefore = commandStream.getUsed(); flushTask(commandStreamReceiver); EXPECT_EQ(usedBefore, commandStream.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, blockingflushTaskAddsPCToClient) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getCommandStreamReceiver(); auto blocking = true; flushTask(commandStreamReceiver, blocking); parseCommands(commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); } HWTEST_F(CommandStreamReceiverFlushTaskTests, blockingFlushWithNoPreviousDependencies) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); taskLevel = 5; commandStreamReceiver.taskLevel = 6; auto blocking = true; flushTask(commandStreamReceiver, blocking); EXPECT_EQ(7u, commandStreamReceiver.peekTaskLevel()); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, nonblockingFlushWithNoPreviousDependencies) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); taskLevel = 5; commandStreamReceiver.taskLevel = 6; auto blocking = false; flushTask(commandStreamReceiver, blocking); EXPECT_EQ(6u, commandStreamReceiver.peekTaskLevel()); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithOnlyEnoughMemoryForPreamble) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; WhitelistedRegisters forceRegs = {0}; pDevice->setForceWhitelistedRegs(true, &forceRegs); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Force a PIPE_CONTROL through a taskLevel transition taskLevel = commandStreamReceiver.peekTaskLevel() + 1; commandStreamReceiver.lastSentCoherencyRequest = 0; auto l3Config = PreambleHelper::getL3Config(pDevice->getHardwareInfo(), false); commandStreamReceiver.lastSentL3Config = l3Config; auto &csrCS = commandStreamReceiver.getCS(); size_t sizeNeededForPreamble = getSizeRequiredPreambleCS(MockDevice(commandStreamReceiver.hwInfo)); size_t sizeNeededForStateBaseAddress = sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL); size_t sizeNeededForPipeControl = commandStreamReceiver.getRequiredPipeControlSize(); size_t sizeNeeded = sizeNeededForPreamble + sizeNeededForStateBaseAddress + sizeNeededForPipeControl + sizeof(MI_BATCH_BUFFER_END); sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize); csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeededForPreamble); flushTask(commandStreamReceiver); EXPECT_EQ(sizeNeeded, csrCS.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithOnlyEnoughMemoryForPreambleAndSba) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; WhitelistedRegisters forceRegs = {0}; pDevice->setForceWhitelistedRegs(true, &forceRegs); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Force a PIPE_CONTROL through a taskLevel transition taskLevel = commandStreamReceiver.peekTaskLevel() + 1; commandStreamReceiver.lastSentCoherencyRequest = 0; auto l3Config = PreambleHelper::getL3Config(pDevice->getHardwareInfo(), false); commandStreamReceiver.lastSentL3Config = l3Config; auto &csrCS = commandStreamReceiver.getCS(); size_t sizeNeededForPreamble = getSizeRequiredPreambleCS(MockDevice(commandStreamReceiver.hwInfo)); size_t sizeNeededForStateBaseAddress = sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL); size_t sizeNeededForPipeControl = commandStreamReceiver.getRequiredPipeControlSize(); size_t sizeNeeded = sizeNeededForPreamble + sizeNeededForStateBaseAddress + sizeNeededForPipeControl + sizeof(MI_BATCH_BUFFER_END); sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize); csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeededForPreamble - sizeNeededForStateBaseAddress); flushTask(commandStreamReceiver); EXPECT_EQ(sizeNeeded, csrCS.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithOnlyEnoughMemoryForPreambleSbaAndPc) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; commandStream.getSpace(sizeof(PIPE_CONTROL)); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Force a PIPE_CONTROL through a taskLevel transition taskLevel = commandStreamReceiver.peekTaskLevel() + 1; commandStreamReceiver.lastSentCoherencyRequest = 0; auto l3Config = PreambleHelper::getL3Config(pDevice->getHardwareInfo(), false); commandStreamReceiver.lastSentL3Config = l3Config; auto &csrCS = commandStreamReceiver.getCS(); size_t sizeNeeded = getSizeRequiredPreambleCS(MockDevice(commandStreamReceiver.hwInfo)) + sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL) + commandStreamReceiver.getRequiredPipeControlSize() + sizeof(MI_BATCH_BUFFER_START); sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize); DispatchFlags flags; csrCS.getSpace(csrCS.getAvailableSpace() - commandStreamReceiver.getRequiredCmdStreamSize(flags)); auto expectedBase = csrCS.getBase(); // This case handles when we have *just* enough space auto expectedUsed = csrCS.getUsed() + sizeNeeded; flushTask(commandStreamReceiver); // Verify that we didn't grab a new CS buffer EXPECT_EQ(expectedUsed, csrCS.getUsed()); EXPECT_EQ(expectedBase, csrCS.getBase()); } template struct CommandStreamReceiverHwLog : public UltCommandStreamReceiver { CommandStreamReceiverHwLog(const HardwareInfo &hwInfoIn) : UltCommandStreamReceiver(hwInfoIn), flushCount(0) { } FlushStamp flush(BatchBuffer &batchBuffer, EngineType engineType, ResidencyContainer *allocationsForResidency) override { ++flushCount; return 0; } int flushCount; }; HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithBothCSCallsFlushOnce) { CommandStreamReceiverHwLog commandStreamReceiver(*platformDevices[0]); commandStreamReceiver.setMemoryManager(pDevice->getMemoryManager()); commandStream.getSpace(sizeof(typename FamilyType::MI_NOOP)); flushTask(commandStreamReceiver); EXPECT_EQ(1, commandStreamReceiver.flushCount); } HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithBothCSCallsChainsWithBatchBufferStart) { typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename FamilyType::MI_NOOP MI_NOOP; UltCommandStreamReceiver commandStreamReceiver(*platformDevices[0]); commandStreamReceiver.setMemoryManager(pDevice->getMemoryManager()); // Reserve space for 16 NOOPs commandStream.getSpace(16 * sizeof(MI_NOOP)); // Submit starting at 8 NOOPs size_t startOffset = 8 * sizeof(MI_NOOP); flushTask(commandStreamReceiver, false, startOffset); // Locate the MI_BATCH_BUFFER_START parseCommands(commandStreamReceiver.commandStream, 0); auto itorBBS = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorBBS); auto bbs = genCmdCast(*itorBBS); ASSERT_NE(nullptr, bbs); // Expect to see address based on startOffset of task auto expectedAddress = static_cast(reinterpret_cast(ptrOffset(commandStream.getBase(), startOffset))); EXPECT_EQ(expectedAddress, bbs->getBatchBufferStartAddressGraphicsaddress472()); // MI_BATCH_BUFFER_START from UMD must be PPGTT for security reasons EXPECT_EQ(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT, bbs->getAddressSpaceIndicator()); } typedef Test CommandStreamReceiverCQFlushTaskTests; HWTEST_F(CommandStreamReceiverCQFlushTaskTests, getCSShouldReturnACSWithEnoughSizeCSRTraffic) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStreamReceiver = pDevice->getCommandStreamReceiver(); // NOTE: This test attempts to reserve the maximum amount // of memory such that if a client gets everything he wants // we don't overflow/corrupt memory when CSR appends its // work. size_t sizeCQReserves = CSRequirements::minCommandQueueCommandStreamSize; size_t sizeRequested = 0x1000 - sizeCQReserves; auto &commandStream = commandQueue.getCS(sizeRequested); ASSERT_GE(0x1000u, commandStream.getMaxAvailableSpace()); EXPECT_GE(commandStream.getAvailableSpace(), sizeRequested); commandStream.getSpace(sizeRequested - sizeCQReserves); GraphicsAllocation allocation = GraphicsAllocation((void *)0x1234, 1); LinearStream linear(&allocation); auto blocking = true; DispatchFlags dispatchFlags; dispatchFlags.blocking = blocking; commandStreamReceiver.flushTask( commandStream, 0, linear, linear, linear, linear, 1, dispatchFlags); auto expectedSize = 0x1000u - sizeCQReserves; if (::renderCoreFamily == IGFX_GEN8_CORE) { expectedSize -= sizeof(typename FamilyType::PIPE_CONTROL); } expectedSize = alignUp(expectedSize, MemoryConstants::cacheLineSize); auto currentUsed = commandStream.getUsed(); EXPECT_EQ(0u, currentUsed % MemoryConstants::cacheLineSize); //depending on the size of commands we may need whole additional cacheline for alignment if (currentUsed != expectedSize) { EXPECT_EQ(expectedSize - MemoryConstants::cacheLineSize, currentUsed); } else { EXPECT_EQ(expectedSize, currentUsed); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, blockingFlushTaskWithOnlyPipeControl) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; CommandQueueHw commandQueue(nullptr, pDevice, 0); auto commandStreamReceiver = new MockCsrHw(*platformDevices[0]); pDevice->resetCommandStreamReceiver(commandStreamReceiver); // Configure the CSR to not need to submit any state or commands configureCSRtoNonDirtyState(); // Force a PIPE_CONTROL through a blocking flag auto blocking = true; auto &commandStreamTask = commandQueue.getCS(); auto &commandStreamCSR = commandStreamReceiver->getCS(); commandStreamReceiver->lastSentCoherencyRequest = 0; DispatchFlags dispatchFlags; dispatchFlags.blocking = blocking; dispatchFlags.guardCommandBufferWithPipeControl = true; commandStreamReceiver->flushTask( commandStreamTask, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); // Verify that taskCS got modified, while csrCS remained intact EXPECT_GT(commandStreamTask.getUsed(), 0u); EXPECT_EQ(0u, commandStreamCSR.getUsed()); // Parse command list to verify that PC got added to taskCS cmdList.clear(); parseCommands(commandStreamTask, 0); auto itorTaskCS = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorTaskCS); // Parse command list to verify that PC wasn't added to csrCS cmdList.clear(); parseCommands(commandStreamCSR, 0); auto numberOfPC = getCommandsList().size(); EXPECT_EQ(0u, numberOfPC); } HWTEST_F(CommandStreamReceiverFlushTaskTests, FlushTaskBlockingHasPipeControlWithDCFlush) { WhitelistedRegisters forceRegs = {0}; pDevice->setForceWhitelistedRegs(true, &forceRegs); typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; CommandQueueHw commandQueue(nullptr, pDevice, 0); configureCSRtoNonDirtyState(); auto &commandStreamReceiver = pDevice->getCommandStreamReceiver(); auto &commandStreamTask = commandQueue.getCS(); DispatchFlags dispatchFlags; dispatchFlags.blocking = true; dispatchFlags.dcFlush = true; dispatchFlags.guardCommandBufferWithPipeControl = true; commandStreamReceiver.flushTask( commandStreamTask, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); parseCommands(commandStreamTask, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); if (::renderCoreFamily != IGFX_GEN8_CORE) { // Verify that the dcFlushEnabled bit is set in PC auto pCmdWA = reinterpret_cast(*itorPC); EXPECT_EQ(true, pCmdWA->getDcFlushEnable()); // Search taskCS for PC to analyze auto pipeControlTask = genCmdCast( ptrOffset(commandStreamTask.getBase(), 24)); ASSERT_NE(nullptr, pipeControlTask); // Verify that the dcFlushEnabled bit is not set in PC auto pCmd = reinterpret_cast(pipeControlTask); EXPECT_EQ(false, pCmd->getDcFlushEnable()); } else { // Verify that the dcFlushEnabled bit is not set in PC auto pCmd = reinterpret_cast(*itorPC); EXPECT_EQ(true, pCmd->getDcFlushEnable()); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelRequiringDCFlushWhenUnblockedThenDCFlushIsAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; MockContext ctx(pDevice); CommandQueueHw commandQueue(&ctx, pDevice, 0); auto &commandStreamReceiver = pDevice->getCommandStreamReceiver(); cl_event blockingEvent; MockEvent mockEvent(&ctx); blockingEvent = &mockEvent; size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {0, 1, 2}; cl_int retVal = 0; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); auto &commandStreamCSR = commandStreamReceiver.getCS(); auto &commandStreamTask = commandQueue.getCS(); commandQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, 1, &blockingEvent, 0); // Expect nothing was sent EXPECT_EQ(0u, commandStreamCSR.getUsed()); // Unblock Event mockEvent.setStatus(CL_COMPLETE); cmdList.clear(); // Parse command list parseCommands(commandStreamTask, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); // Verify that the dcFlushEnabled bit is set in PC auto pCmdWA = reinterpret_cast(*itorPC); EXPECT_EQ(true, pCmdWA->getDcFlushEnable()); buffer->release(); BuiltIns::shutDown(); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelNotRequiringDCFlushWhenUnblockedThenDCFlushIsNotAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; MockContext ctx(pDevice); CommandQueueHw commandQueue(&ctx, pDevice, 0); auto &commandStreamReceiver = pDevice->getCommandStreamReceiver(); cl_event blockingEvent; MockEvent mockEvent(&ctx); blockingEvent = &mockEvent; size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {0, 1, 2}; cl_int retVal = 0; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); auto &commandStreamCSR = commandStreamReceiver.getCS(); auto &commandStreamTask = commandQueue.getCS(); commandQueue.enqueueWriteBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, 1, &blockingEvent, 0); // Expect nothing was sent EXPECT_EQ(0u, commandStreamCSR.getUsed()); // Unblock Event mockEvent.setStatus(CL_COMPLETE); cmdList.clear(); // Parse command list parseCommands(commandStreamTask, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); // Verify that the dcFlushEnabled bit is set in PC auto pCmdWA = reinterpret_cast(*itorPC); EXPECT_TRUE(pCmdWA->getDcFlushEnable()); buffer->release(); BuiltIns::shutDown(); } HWTEST_F(CommandStreamReceiverFlushTaskTests, FlushTaskWithTaskCSPassedAsCommandStreamParam) { CommandQueueHw commandQueue(nullptr, pDevice, 0); UltCommandStreamReceiver commandStreamReceiver(*platformDevices[0]); commandStreamReceiver.setMemoryManager(pDevice->getMemoryManager()); auto &commandStreamTask = commandQueue.getCS(); auto deviceEngineType = pDevice->getEngineType(); DispatchFlags dispatchFlags; // Pass taskCS as command stream parameter auto cs = commandStreamReceiver.flushTask( commandStreamTask, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); // Verify that flushTask returned a valid completion stamp EXPECT_EQ(commandStreamReceiver.peekTaskCount(), cs.taskCount); EXPECT_EQ(commandStreamReceiver.peekTaskLevel(), cs.taskLevel); EXPECT_EQ(0u, cs.deviceOrdinal); EXPECT_EQ(deviceEngineType, cs.engineType); } HWTEST_F(CommandStreamReceiverFlushTaskTests, TrackSentTagsWhenEmptyQueue) { MockContext ctx(pDevice); CommandQueueHw commandQueue(&ctx, pDevice, 0); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); uint32_t taskCount = 0; taskLevel = taskCount; commandQueue.taskCount = taskCount; commandQueue.taskLevel = taskCount; commandStreamReceiver.taskLevel = taskCount; commandStreamReceiver.taskCount = taskCount; EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); commandQueue.finish(false); EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); commandQueue.finish(true); //nothings sent to the HW, no need to bump tags EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(0u, commandQueue.latestTaskCountWaited); } HWTEST_F(CommandStreamReceiverFlushTaskTests, TrackSentTagsWhenNonDcFlushWithInitialTaskCountZero) { MockContext ctx(pDevice); MockKernelWithInternals kernel(*pDevice); CommandQueueHw commandQueue(&ctx, pDevice, 0); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); size_t GWS = 1; uint32_t taskCount = 0; taskLevel = taskCount; commandQueue.taskCount = taskCount; commandQueue.taskLevel = taskCount; commandStreamReceiver.taskLevel = taskCount; commandStreamReceiver.taskCount = taskCount; EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); // finish after enqueued kernel(cmdq task count = 1) commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); commandQueue.finish(false); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(1u, commandQueue.latestTaskCountWaited); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); // finish again - dont call flush task commandQueue.finish(false); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(1u, commandQueue.latestTaskCountWaited); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, TrackSentTagsWhenDcFlush) { MockContext ctx(pDevice); MockKernelWithInternals kernel(*pDevice); CommandQueueHw commandQueue(&ctx, pDevice, 0); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); size_t GWS = 1; size_t tempBuffer[] = {0, 1, 2}; cl_int retVal; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); EXPECT_EQ(retVal, CL_SUCCESS); uint32_t taskCount = 0; taskLevel = taskCount; commandQueue.taskCount = taskCount; commandQueue.taskLevel = taskCount; commandStreamReceiver.taskLevel = taskCount; commandStreamReceiver.taskCount = taskCount; EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); // finish(dcFlush=true) from blocking MapBuffer after enqueued kernel commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); auto ptr = commandQueue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, sizeof(tempBuffer), 0, nullptr, nullptr, retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); // cmdQ task count = 2, finish again commandQueue.finish(false); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(1u, commandQueue.latestTaskCountWaited); // finish again - dont flush task again commandQueue.finish(false); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(1u, commandQueue.latestTaskCountWaited); // finish(dcFlush=true) from MapBuffer again - dont call FinishTask n finished queue retVal = commandQueue.enqueueUnmapMemObject(buffer, ptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); ptr = commandQueue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, sizeof(tempBuffer), 0, nullptr, nullptr, retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); //cleanup retVal = commandQueue.enqueueUnmapMemObject(buffer, ptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); retVal = clReleaseMemObject(buffer); EXPECT_EQ(retVal, CL_SUCCESS); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenPowerOfTwoGlobalWorkSizeAndNullLocalWorkgroupSizeWhenEnqueueKernelIsCalledThenGpGpuWalkerHasOptimalSIMDmask) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; MockContext ctx(pDevice); MockKernelWithInternals kernel(*pDevice); size_t GWS = 1024; CommandQueueHw commandQueue(&ctx, pDevice, 0); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); auto &commandStreamTask = commandQueue.getCS(); parseCommands(commandStreamTask, 0); auto itorCmd = find(cmdList.begin(), cmdList.end()); ASSERT_NE(itorCmd, cmdList.end()); auto cmdGpGpuWalker = genCmdCast(*itorCmd); //execution masks should be all active EXPECT_EQ(0xffffffffu, cmdGpGpuWalker->getBottomExecutionMask()); EXPECT_EQ(0xffffffffu, cmdGpGpuWalker->getRightExecutionMask()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, TrackSentTagsWhenEventIsQueried) { MockContext ctx(pDevice); CommandQueueHw commandQueue(&ctx, pDevice, 0); cl_event event = nullptr; Event *pEvent; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {5, 5, 5}; cl_int retVal; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); EXPECT_EQ(retVal, CL_SUCCESS); uint32_t taskCount = 0; taskLevel = taskCount; commandQueue.taskCount = taskCount; commandQueue.taskLevel = taskCount; commandStreamReceiver.taskLevel = taskCount; commandStreamReceiver.taskCount = taskCount; EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); commandQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, 0, 0, &event); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); pEvent = (Event *)event; retVal = Event::waitForEvents(1, &event); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); BuiltIns::shutDown(); retVal = clReleaseEvent(pEvent); retVal = clReleaseMemObject(buffer); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenNonBlockingMapWhenFinishIsCalledThenNothingIsSubmittedToTheHardware) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; MockContext ctx(pDevice); CommandQueueHw commandQueue(&ctx, pDevice, 0); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); size_t tempBuffer[] = {0, 1, 2}; cl_int retVal; AlignedBuffer mockBuffer; uint32_t taskCount = 0; taskLevel = taskCount; commandQueue.taskCount = taskCount; commandQueue.taskLevel = taskCount; commandStreamReceiver.taskLevel = taskCount; commandStreamReceiver.taskCount = taskCount; EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); auto ptr = commandQueue.enqueueMapBuffer(&mockBuffer, CL_FALSE, CL_MAP_READ, 0, sizeof(tempBuffer), 0, nullptr, nullptr, retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(nullptr, ptr); EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); commandQueue.finish(false); EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); auto &commandStreamTask = commandQueue.getCS(); parseCommands(commandStreamTask, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); ASSERT_EQ(cmdList.end(), itorPC); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenFlushedCallRequiringDCFlushWhenBlockingEnqueueIsCalledThenPipeControlWithDCFlushIsAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; MockContext ctx(pDevice); CommandQueueHw commandQueue(&ctx, pDevice, 0); cl_event event = nullptr; auto &commandStreamReceiver = pDevice->getCommandStreamReceiver(); auto &commandStreamTask = commandQueue.getCS(); size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {5, 5, 5}; cl_int retVal; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); // Call requiring DCFlush, nonblocking buffer->forceDisallowCPUCopy = true; commandQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, 0, 0, 0); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); commandQueue.enqueueReadBuffer(buffer, CL_TRUE, 0, sizeof(tempBuffer), dstBuffer, 0, 0, &event); EXPECT_EQ(2u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(2u, commandQueue.latestTaskCountWaited); // Parse command list to verify that PC was added to taskCS cmdList.clear(); parseCommands(commandStreamTask, 0); auto itorWalker = find(cmdList.begin(), cmdList.end()); auto itorCmd = find(itorWalker, cmdList.end()); ASSERT_NE(cmdList.end(), itorCmd); auto cmdPC = genCmdCast(*itorCmd); ASSERT_NE(nullptr, cmdPC); if (::renderCoreFamily != IGFX_GEN8_CORE) { // SKL+: two PIPE_CONTROLs following GPGPU_WALKER: first has DcFlush and second has Write HwTag EXPECT_TRUE(cmdPC->getDcFlushEnable()); auto itorCmdP = ++((GenCmdList::iterator)itorCmd); EXPECT_NE(cmdList.end(), itorCmdP); auto itorCmd2 = find(itorCmdP, cmdList.end()); cmdPC = (PIPE_CONTROL *)*itorCmd2; EXPECT_FALSE(cmdPC->getDcFlushEnable()); } else { // BDW: single PIPE_CONTROL following GPGPU_WALKER has DcFlush and Write HwTag EXPECT_TRUE(cmdPC->getDcFlushEnable()); } BuiltIns::shutDown(); retVal = clReleaseEvent(event); retVal = clReleaseMemObject(buffer); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3Config) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; size_t GWS = 1; MockContext ctx(pDevice); MockKernelWithInternals kernel(*pDevice); CommandQueueHw commandQueue(&ctx, pDevice, 0); auto commandStreamReceiver = new MockCsrHw(*platformDevices[0]); pDevice->resetCommandStreamReceiver(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver->getCS(); // Mark Pramble as sent, override L3Config to invalid to programL3 commandStreamReceiver->isPreambleSent = true; commandStreamReceiver->lastSentL3Config = 0; ((MockKernel *)kernel)->setTotalSLMSize(1024); cmdList.clear(); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); // Parse command list to verify that PC was added to taskCS parseCommands(commandStreamCSR, 0); auto itorCmd = findMmio(cmdList.begin(), cmdList.end(), L3CNTLRegisterOffset::registerOffset); ASSERT_NE(cmdList.end(), itorCmd); auto cmdMILoad = genCmdCast(*itorCmd); ASSERT_NE(nullptr, cmdMILoad); // MI_LOAD_REGISTER should be preceded by PC EXPECT_NE(cmdList.begin(), itorCmd); --itorCmd; auto cmdPC = genCmdCast(*itorCmd); ASSERT_NE(nullptr, cmdPC); uint32_t L3Config = PreambleHelper::getL3Config(*platformDevices[0], true); EXPECT_EQ(L3Config, (uint32_t)cmdMILoad->getDataDword()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDefaultCommandStreamReceiverThenRoundRobinPolicyIsSelected) { MockCsrHw commandStreamReceiver(*platformDevices[0]); EXPECT_EQ(ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin, commandStreamReceiver.peekThreadArbitrationPolicy()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML3WasSentThenDontProgramL3) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; size_t GWS = 1; MockContext ctx(pDevice); MockKernelWithInternals kernel(*pDevice); CommandQueueHw commandQueue(&ctx, pDevice, 0); auto commandStreamReceiver = new MockCsrHw(*platformDevices[0]); pDevice->resetCommandStreamReceiver(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver->getCS(); uint32_t L3Config = PreambleHelper::getL3Config(*platformDevices[0], true); // Mark Pramble as sent, override L3Config to SLM config commandStreamReceiver->isPreambleSent = true; commandStreamReceiver->lastSentL3Config = L3Config; commandStreamReceiver->lastSentThreadAribtrationPolicy = kernel.mockKernel->getThreadArbitrationPolicy(); ((MockKernel *)kernel)->setTotalSLMSize(1024); cmdList.clear(); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); // Parse command list to verify that PC was added to taskCS parseCommands(commandStreamCSR, 0); auto itorCmd = findMmio(cmdList.begin(), cmdList.end(), L3CNTLRegisterOffset::registerOffset); EXPECT_EQ(cmdList.end(), itorCmd); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigAfterUnblocking) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; size_t GWS = 1; MockContext ctx(pDevice); MockKernelWithInternals kernel(*pDevice); CommandQueueHw commandQueue(&ctx, pDevice, 0); auto commandStreamReceiver = new MockCsrHw(*platformDevices[0]); pDevice->resetCommandStreamReceiver(commandStreamReceiver); cl_event blockingEvent; MockEvent mockEvent(&ctx); blockingEvent = &mockEvent; auto &commandStreamCSR = commandStreamReceiver->getCS(); uint32_t L3Config = PreambleHelper::getL3Config(*platformDevices[0], false); // Mark Pramble as sent, override L3Config to SLM config commandStreamReceiver->isPreambleSent = true; commandStreamReceiver->lastSentL3Config = 0; ((MockKernel *)kernel)->setTotalSLMSize(1024); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 1, &blockingEvent, nullptr); // Expect nothing was sent EXPECT_EQ(0u, commandStreamCSR.getUsed()); // Unblock Event mockEvent.setStatus(CL_COMPLETE); cmdList.clear(); // Parse command list parseCommands(commandStreamCSR, 0); // Expect L3 was programmed auto itorCmd = findMmio(cmdList.begin(), cmdList.end(), L3CNTLRegisterOffset::registerOffset); ASSERT_NE(cmdList.end(), itorCmd); auto cmdMILoad = genCmdCast(*itorCmd); ASSERT_NE(nullptr, cmdMILoad); L3Config = PreambleHelper::getL3Config(*platformDevices[0], true); EXPECT_EQ(L3Config, (uint32_t)cmdMILoad->getDataDword()); } namespace OCLRT { CommandStreamReceiver *createCommandStream(const HardwareInfo *pHwInfo); } HWTEST_F(CommandStreamReceiverFlushTaskTests, CreateCommandStreamReceiverHw) { const HardwareInfo hwInfo = *platformDevices[0]; auto csrHw = CommandStreamReceiverHw::create(hwInfo); EXPECT_NE(nullptr, csrHw); MemoryManager *mm = csrHw->createMemoryManager(false); EXPECT_EQ(nullptr, mm); delete csrHw; DebugManager.flags.SetCommandStreamReceiver.set(0); int32_t GetCsr = DebugManager.flags.SetCommandStreamReceiver.get(); EXPECT_EQ(0, GetCsr); auto csr = OCLRT::createCommandStream(&hwInfo); EXPECT_NE(nullptr, csr); delete csr; DebugManager.flags.SetCommandStreamReceiver.set(0); } HWTEST_F(CommandStreamReceiverFlushTaskTests, handleTagAndScratchAllocationsResidencyOnEachFlush) { auto commandStreamReceiver = new MockCsrHw(*platformDevices[0]); pDevice->resetCommandStreamReceiver(commandStreamReceiver); commandStreamReceiver->setRequiredScratchSize(1024); // whatever > 0 flushTask(*commandStreamReceiver); auto tagAllocation = commandStreamReceiver->getTagAllocation(); auto scratchAllocation = commandStreamReceiver->getScratchAllocation(); ASSERT_NE(tagAllocation, nullptr); ASSERT_NE(scratchAllocation, nullptr); EXPECT_TRUE(commandStreamReceiver->isMadeResident(tagAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(tagAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation)); // call makeResident on tag and scratch allocations per each flush // DONT skip residency calls when scratch allocation is the same(new required size <= previous size) commandStreamReceiver->madeResidentGfxAllocations.clear(); // this is only history - we can clean this commandStreamReceiver->madeNonResidentGfxAllocations.clear(); flushTask(*commandStreamReceiver); // 2nd flush auto NewScratchAllocation = commandStreamReceiver->getScratchAllocation(); EXPECT_EQ(scratchAllocation, NewScratchAllocation); // Allocation unchanged. Dont skip residency handling EXPECT_TRUE(commandStreamReceiver->isMadeResident(tagAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(tagAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation)); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTwoConsecutiveNDRangeKernelsStateBaseAddressIsProgrammedOnceAndScratchAddressInMediaVFEStateIsProgrammedTwiceBothWithCorrectAddress) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_VFE_STATE MEDIA_VFE_STATE; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; MockContext ctx(pDevice); MockKernelWithInternals kernel(*pDevice); CommandQueueHw commandQueue(&ctx, pDevice, 0); auto commandStreamReceiver = new MockCsrHw(*platformDevices[0]); pDevice->resetCommandStreamReceiver(commandStreamReceiver); size_t GWS = 1; uint32_t scratchSize = 1024; SPatchMediaVFEState mediaVFEstate; mediaVFEstate.PerThreadScratchSpace = scratchSize; kernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; EXPECT_EQ(false, kernel.mockKernel->isBuiltIn); if (pDevice->getDeviceInfo().force32BitAddressess == true) { EXPECT_FALSE(commandStreamReceiver->getGSBAFor32BitProgrammed()); } commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); if (pDevice->getDeviceInfo().force32BitAddressess == true) { EXPECT_TRUE(commandStreamReceiver->getGSBAFor32BitProgrammed()); } cmdList.clear(); // Parse command list parseCommands(commandQueue); // All state should be programmed before walker auto itorCmdForStateBase = itorStateBaseAddress; auto *mediaVfeState = (MEDIA_VFE_STATE *)*itorMediaVfeState; auto graphicsAllocationScratch = commandStreamReceiver->getScratchAllocation(); ASSERT_NE(itorCmdForStateBase, itorWalker); auto *sba = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; auto GSHaddress = (uintptr_t)sba->getGeneralStateBaseAddress(); uint64_t graphicsAddress = 0; // Get address ( offset in 32 bit addressing ) of sratch graphicsAddress = (uint64_t)graphicsAllocationScratch->getGpuAddressToPatch(); if (pDevice->getDeviceInfo().force32BitAddressess == true && is64bit) { EXPECT_TRUE(graphicsAllocationScratch->is32BitAllocation); EXPECT_EQ((uint64_t)graphicsAllocationScratch->getGpuAddress() - GSHaddress, graphicsAddress); } else { EXPECT_EQ((uint64_t)graphicsAllocationScratch->getUnderlyingBuffer(), graphicsAddress); } uint64_t lowPartGraphicsAddress = (uint64_t)(graphicsAddress & 0xffffffff); uint64_t highPartGraphicsAddress = (uint64_t)((graphicsAddress >> 32) & 0xffffffff); uint64_t scratchBaseLowPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointer(); uint64_t scratchBaseHighPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointerHigh(); if (is64bit && !pDevice->getDeviceInfo().force32BitAddressess) { uint64_t expectedAddress = PreambleHelper::getScratchSpaceOffsetFor64bit(); EXPECT_EQ(expectedAddress, scratchBaseLowPart); EXPECT_EQ(0u, scratchBaseHighPart); } else { EXPECT_EQ(lowPartGraphicsAddress, scratchBaseLowPart); EXPECT_EQ(highPartGraphicsAddress, scratchBaseHighPart); } if (pDevice->getDeviceInfo().force32BitAddressess == true) { EXPECT_EQ(pDevice->getMemoryManager()->allocator32Bit->getBase(), GSHaddress); } else { if (is64bit) EXPECT_EQ(graphicsAddress - PreambleHelper::getScratchSpaceOffsetFor64bit(), GSHaddress); else EXPECT_EQ(0u, GSHaddress); } //now re-try to see if SBA is not programmed scratchSize *= 2; mediaVFEstate.PerThreadScratchSpace = scratchSize; commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); // Parse command list parseCommands(commandQueue); itorCmdForStateBase = find(itorWalker, cmdList.end()); // In 32 Bit addressing sba shouldn't be reprogrammed if (pDevice->getDeviceInfo().force32BitAddressess == true) { EXPECT_EQ(itorCmdForStateBase, cmdList.end()); } auto itorMediaVfeStateSecond = find(itorWalker, cmdList.end()); auto *cmdMediaVfeStateSecond = (MEDIA_VFE_STATE *)*itorMediaVfeStateSecond; EXPECT_NE(mediaVfeState, cmdMediaVfeStateSecond); uint64_t oldScratchAddr = ((uint64_t)scratchBaseHighPart << 32u) | scratchBaseLowPart; uint64_t newScratchAddr = ((uint64_t)cmdMediaVfeStateSecond->getScratchSpaceBasePointerHigh() << 32u) | cmdMediaVfeStateSecond->getScratchSpaceBasePointer(); if (pDevice->getDeviceInfo().force32BitAddressess == true) { EXPECT_NE(oldScratchAddr, newScratchAddr); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNDRangeKernelAndReadBufferStateBaseAddressAndScratchAddressInMediaVFEStateIsProgrammedForNDRangeAndReprogramedForReadBufferAndGSBAFlagIsResetToFalse) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_VFE_STATE MEDIA_VFE_STATE; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; MockContext ctx(pDevice); MockKernelWithInternals kernel(*pDevice); CommandQueueHw commandQueue(&ctx, pDevice, 0); auto commandStreamReceiver = new MockCsrHw(*platformDevices[0]); pDevice->resetCommandStreamReceiver(commandStreamReceiver); size_t GWS = 1; uint32_t scratchSize = 1024; SPatchMediaVFEState mediaVFEstate; mediaVFEstate.PerThreadScratchSpace = scratchSize; kernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; EXPECT_EQ(false, kernel.mockKernel->isBuiltIn); if (pDevice->getDeviceInfo().force32BitAddressess == true) { EXPECT_FALSE(commandStreamReceiver->getGSBAFor32BitProgrammed()); } commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); if (pDevice->getDeviceInfo().force32BitAddressess == true) { EXPECT_TRUE(commandStreamReceiver->getGSBAFor32BitProgrammed()); } cmdList.clear(); // Parse command list parseCommands(commandQueue); // All state should be programmed before walker auto itorCmdForStateBase = itorStateBaseAddress; auto *mediaVfeState = (MEDIA_VFE_STATE *)*itorMediaVfeState; auto graphicsAllocationScratch = commandStreamReceiver->getScratchAllocation(); ASSERT_NE(itorCmdForStateBase, itorWalker); auto *sba = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; auto GSHaddress = (uintptr_t)sba->getGeneralStateBaseAddress(); uint64_t graphicsAddress = 0; // Get address ( offset in 32 bit addressing ) of sratch graphicsAddress = (uint64_t)graphicsAllocationScratch->getGpuAddressToPatch(); if (pDevice->getDeviceInfo().force32BitAddressess == true && is64bit) { EXPECT_TRUE(graphicsAllocationScratch->is32BitAllocation); EXPECT_EQ((uint64_t)graphicsAllocationScratch->getGpuAddress() - GSHaddress, graphicsAddress); } else { EXPECT_EQ((uint64_t)graphicsAllocationScratch->getUnderlyingBuffer(), graphicsAddress); } uint64_t lowPartGraphicsAddress = (uint64_t)(graphicsAddress & 0xffffffff); uint64_t highPartGraphicsAddress = (uint64_t)((graphicsAddress >> 32) & 0xffffffff); uint64_t scratchBaseLowPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointer(); uint64_t scratchBaseHighPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointerHigh(); if (is64bit && !pDevice->getDeviceInfo().force32BitAddressess) { lowPartGraphicsAddress = PreambleHelper::getScratchSpaceOffsetFor64bit(); highPartGraphicsAddress = 0u; } EXPECT_EQ(lowPartGraphicsAddress, scratchBaseLowPart); EXPECT_EQ(highPartGraphicsAddress, scratchBaseHighPart); if (pDevice->getDeviceInfo().force32BitAddressess == true) { EXPECT_EQ(pDevice->getMemoryManager()->allocator32Bit->getBase(), GSHaddress); } else { if (is64bit) EXPECT_EQ(graphicsAddress - PreambleHelper::getScratchSpaceOffsetFor64bit(), GSHaddress); else EXPECT_EQ(0u, GSHaddress); } size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {0, 0, 0}; cl_int retVal = 0; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); commandQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, 0, 0, 0); // Parse command list parseCommands(commandQueue); itorCmdForStateBase = find(itorWalker, cmdList.end()); if (pDevice->getDeviceInfo().force32BitAddressess == true) { EXPECT_NE(itorWalker, itorCmdForStateBase); if (itorCmdForStateBase != cmdList.end()) { auto *sba2 = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; auto GSHaddress2 = (uintptr_t)sba2->getGeneralStateBaseAddress(); EXPECT_NE(sba, sba2); EXPECT_EQ(0u, GSHaddress2); if (pDevice->getDeviceInfo().force32BitAddressess == true) { EXPECT_FALSE(commandStreamReceiver->getGSBAFor32BitProgrammed()); } } } delete buffer; if (pDevice->getDeviceInfo().force32BitAddressess == true) { // Asserts placed after restoring old CSR to avoid heap corruption ASSERT_NE(itorCmdForStateBase, cmdList.end()); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, InForced32BitAllocationsModeDoNotStore32bitScratchAllocationOnReusableAllocationList) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.Force32bitAddressing.set(true); auto commandStreamReceiver = new MockCsrHw(*platformDevices[0]); pDevice->getMemoryManager()->setForce32BitAllocations(true); pDevice->resetCommandStreamReceiver(commandStreamReceiver); commandStreamReceiver->setRequiredScratchSize(4096); // whatever > 0 (in page size) flushTask(*commandStreamReceiver); auto scratchAllocation = commandStreamReceiver->getScratchAllocation(); ASSERT_NE(scratchAllocation, nullptr); commandStreamReceiver->setRequiredScratchSize(8196); // whatever > first size flushTask(*commandStreamReceiver); // 2nd flush auto newScratchAllocation = commandStreamReceiver->getScratchAllocation(); EXPECT_NE(scratchAllocation, newScratchAllocation); // Allocation changed std::unique_ptr allocationReusable = pDevice->getMemoryManager()->obtainReusableAllocation(4096); if (allocationReusable.get() != nullptr) { if (is64bit) EXPECT_NE(scratchAllocation, allocationReusable.get()); pDevice->getMemoryManager()->freeGraphicsMemory(allocationReusable.release()); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, InForced32BitAllocationsModeStore32bitScratchAllocationOnTemporaryAllocationList) { if (is64bit) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.Force32bitAddressing.set(true); auto commandStreamReceiver = new MockCsrHw(*platformDevices[0]); pDevice->getMemoryManager()->setForce32BitAllocations(true); pDevice->resetCommandStreamReceiver(commandStreamReceiver); commandStreamReceiver->setRequiredScratchSize(4096); // whatever > 0 (in page size) flushTask(*commandStreamReceiver); auto scratchAllocation = commandStreamReceiver->getScratchAllocation(); ASSERT_NE(scratchAllocation, nullptr); commandStreamReceiver->setRequiredScratchSize(8196); // whatever > first size flushTask(*commandStreamReceiver); // 2nd flush auto newScratchAllocation = commandStreamReceiver->getScratchAllocation(); EXPECT_NE(scratchAllocation, newScratchAllocation); // Allocation changed std::unique_ptr allocationTemporary = pDevice->getMemoryManager()->graphicsAllocations.detachAllocation(0, nullptr); EXPECT_EQ(scratchAllocation, allocationTemporary.get()); pDevice->getMemoryManager()->freeGraphicsMemory(allocationTemporary.release()); } } TEST(CacheSettings, GivenCacheSettingWhenCheckedForValuesThenProperValuesAreSelected) { EXPECT_EQ(static_cast(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), CacheSettings::l3CacheOff); EXPECT_EQ(static_cast(GMM_RESOURCE_USAGE_OCL_BUFFER), CacheSettings::l3CacheOn); } HWTEST_F(UltCommandStreamReceiverTest, addPipeControlWithFlushAllCaches) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; DebugManagerStateRestore dbgRestorer; DebugManager.flags.FlushAllCaches.set(true); auto &csr = pDevice->getCommandStreamReceiver(); char buff[sizeof(PIPE_CONTROL) * 3]; LinearStream stream(buff, sizeof(PIPE_CONTROL) * 3); csr.addPipeControl(stream, false); parseCommands(stream, 0); PIPE_CONTROL *pipeControl = getCommand(); ASSERT_NE(nullptr, pipeControl); // WA pipeControl added if (cmdList.size() == 2) { pipeControl++; } EXPECT_TRUE(pipeControl->getDcFlushEnable()); EXPECT_TRUE(pipeControl->getRenderTargetCacheFlushEnable()); EXPECT_TRUE(pipeControl->getInstructionCacheInvalidateEnable()); EXPECT_TRUE(pipeControl->getTextureCacheInvalidationEnable()); EXPECT_TRUE(pipeControl->getPipeControlFlushEnable()); EXPECT_TRUE(pipeControl->getVfCacheInvalidationEnable()); EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable()); EXPECT_TRUE(pipeControl->getStateCacheInvalidationEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEnabledPreemptionWhenFlushTaskCalledThenDontProgramMediaVfeStateAgain) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); auto &csr = pDevice->getUltCommandStreamReceiver(); HardwareParse hwParser; flushTask(csr, false, 0); hwParser.parseCommands(csr.commandStream, 0); auto cmd = hwParser.getCommand(); EXPECT_NE(nullptr, cmd); // program again csr.overrideMediaVFEStateDirty(false); auto offset = csr.commandStream.getUsed(); flushTask(csr, false, commandStream.getUsed()); hwParser.cmdList.clear(); hwParser.parseCommands(csr.commandStream, offset); cmd = hwParser.getCommand(); EXPECT_EQ(nullptr, cmd); } HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithPCWhenPreambleSentAndL3ConfigChanged) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename FamilyType::MEDIA_VFE_STATE MEDIA_VFE_STATE; commandStream.getSpace(sizeof(PIPE_CONTROL)); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Force a PIPE_CONTROL through a taskLevel transition taskLevel = commandStreamReceiver.peekTaskLevel() + 1; commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastMediaSamplerConfig = 0; commandStreamReceiver.lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin; auto &csrCS = commandStreamReceiver.getCS(); size_t sizeNeeded = 2 * sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MEDIA_VFE_STATE) + sizeof(MI_BATCH_BUFFER_START) + sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL) + commandStreamReceiver.getRequiredPipeControlSize(); sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize); auto expectedUsed = csrCS.getUsed() + sizeNeeded; DispatchFlags dispatchFlags; dispatchFlags.useSLM = true; commandStreamReceiver.flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); // Verify that we didn't grab a new CS buffer EXPECT_EQ(expectedUsed, csrCS.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, requiredCsrSizeAlignedToCacheline) { UltCommandStreamReceiver &commandStreamReceiver = (UltCommandStreamReceiver &)pDevice->getCommandStreamReceiver(); CsrSizeRequestFlags csrSizeRequest = {}; csrSizeRequest.l3ConfigChanged = true; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); DispatchFlags flags; auto l3ConfigChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags); csrSizeRequest.l3ConfigChanged = false; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto l3ConfigNotChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags); EXPECT_EQ(alignUp(l3ConfigChangedSize, MemoryConstants::cacheLineSize), l3ConfigChangedSize); EXPECT_EQ(alignUp(l3ConfigNotChangedSize, MemoryConstants::cacheLineSize), l3ConfigNotChangedSize); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInNonDirtyStateWhenflushTaskIsCalledThenNoFlushIsCalled) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); configureCSRtoNonDirtyState(); DispatchFlags dispatchFlags; mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); EXPECT_EQ(0, mockCsr->flushCalledCount); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInNonDirtyStateAndBatchingModeWhenflushTaskIsCalledThenSubmissionIsNotRecorded) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); configureCSRtoNonDirtyState(); DispatchFlags dispatchFlags; mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); EXPECT_EQ(0, mockCsr->flushCalledCount); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); //surfaces are non resident auto &surfacesForResidency = mockCsr->getMemoryManager()->getResidencyAllocations(); EXPECT_EQ(0u, surfacesForResidency.size()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTaskIsCalledThenNothingIsSubmittedToTheHwAndSubmissionIsRecorded) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags; dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); EXPECT_FALSE(cmdBufferList.peekIsEmpty()); EXPECT_EQ(cmdBufferList.peekHead(), cmdBufferList.peekTail()); auto cmdBuffer = cmdBufferList.peekHead(); //we should have 4 heaps, tag allocation and csr command stream + cq EXPECT_EQ(6u, cmdBuffer->surfaces.size()); EXPECT_EQ(0, mockCsr->flushCalledCount); //we should be submitting via csr EXPECT_EQ(cmdBuffer->batchBuffer.commandBufferAllocation, mockCsr->commandStream.getGraphicsAllocation()); EXPECT_EQ(cmdBuffer->batchBuffer.startOffset, 0u); EXPECT_FALSE(cmdBuffer->batchBuffer.requiresCoherency); EXPECT_FALSE(cmdBuffer->batchBuffer.low_priority); //find BB END parseCommands(commandStream, 0); auto itBBend = find(cmdList.begin(), cmdList.end()); void *bbEndAddress = *itBBend; EXPECT_EQ(bbEndAddress, cmdBuffer->batchBufferEndLocation); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndTwoRecordedCommandBuffersWhenFlushTaskIsCalledThenBatchBuffersAreCombined) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags; dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); auto primaryBatch = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto secondBatchBuffer = primaryBatch->next; auto bbEndLocation = primaryBatch->batchBufferEndLocation; auto secondBatchBufferAddress = (uint64_t)ptrOffset(secondBatchBuffer->batchBuffer.commandBufferAllocation->getGpuAddress(), secondBatchBuffer->batchBuffer.startOffset); mockCsr->flushBatchedSubmissions(); auto batchBufferStart = genCmdCast(bbEndLocation); ASSERT_NE(nullptr, batchBufferStart); EXPECT_EQ(secondBatchBufferAddress, batchBufferStart->getBatchBufferStartAddressGraphicsaddress472()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndThreeRecordedCommandBuffersWhenFlushTaskIsCalledThenBatchBuffersAreCombined) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags; dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); auto primaryBatch = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto lastBatchBuffer = primaryBatch->next->next; auto bbEndLocation = primaryBatch->next->batchBufferEndLocation; auto lastBatchBufferAddress = (uint64_t)ptrOffset(lastBatchBuffer->batchBuffer.commandBufferAllocation->getGpuAddress(), lastBatchBuffer->batchBuffer.startOffset); mockCsr->flushBatchedSubmissions(); auto batchBufferStart = genCmdCast(bbEndLocation); ASSERT_NE(nullptr, batchBufferStart); EXPECT_EQ(lastBatchBufferAddress, batchBufferStart->getBatchBufferStartAddressGraphicsaddress472()); EXPECT_EQ(1, mockCsr->flushCalledCount); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndThreeRecordedCommandBuffersThatUsesAllResourceWhenFlushTaskIsCalledThenBatchBuffersAreNotCombined) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); auto memorySize = (size_t)mockCsr->getMemoryManager()->device->getDeviceInfo().globalMemSize; GraphicsAllocation largeAllocation(nullptr, memorySize); DispatchFlags dispatchFlags; dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); mockCsr->makeResident(largeAllocation); mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); mockCsr->makeResident(largeAllocation); mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); auto primaryBatch = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto bbEndLocation = primaryBatch->next->batchBufferEndLocation; mockCsr->flushBatchedSubmissions(); auto batchBufferStart = genCmdCast(bbEndLocation); ASSERT_EQ(nullptr, batchBufferStart); auto bbEnd = genCmdCast(bbEndLocation); EXPECT_NE(nullptr, bbEnd); EXPECT_EQ(3, mockCsr->flushCalledCount); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTaskIsCalledTwiceThenNothingIsSubmittedToTheHwAndTwoSubmissionAreRecorded) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto initialBase = commandStream.getBase(); auto initialUsed = commandStream.getUsed(); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags; dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); //ensure command stream still used EXPECT_EQ(initialBase, commandStream.getBase()); auto baseAfterFirstFlushTask = commandStream.getBase(); auto usedAfterFirstFlushTask = commandStream.getUsed(); dispatchFlags.requiresCoherency = true; dispatchFlags.low_priority = true; mockCsr->flushTask(commandStream, commandStream.getUsed(), dsh, ih, ioh, ssh, taskLevel, dispatchFlags); auto baseAfterSecondFlushTask = commandStream.getBase(); auto usedAfterSecondFlushTask = commandStream.getUsed(); EXPECT_EQ(initialBase, commandStream.getBase()); EXPECT_EQ(baseAfterSecondFlushTask, baseAfterFirstFlushTask); EXPECT_EQ(baseAfterFirstFlushTask, initialBase); EXPECT_GT(usedAfterFirstFlushTask, initialUsed); EXPECT_GT(usedAfterSecondFlushTask, usedAfterFirstFlushTask); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); EXPECT_FALSE(cmdBufferList.peekIsEmpty()); EXPECT_NE(cmdBufferList.peekHead(), cmdBufferList.peekTail()); EXPECT_NE(nullptr, cmdBufferList.peekTail()); EXPECT_NE(nullptr, cmdBufferList.peekHead()); auto cmdBuffer1 = cmdBufferList.peekHead(); auto cmdBuffer2 = cmdBufferList.peekTail(); EXPECT_GT(cmdBuffer2->batchBufferEndLocation, cmdBuffer1->batchBufferEndLocation); EXPECT_FALSE(cmdBuffer1->batchBuffer.requiresCoherency); EXPECT_TRUE(cmdBuffer2->batchBuffer.requiresCoherency); EXPECT_FALSE(cmdBuffer1->batchBuffer.low_priority); EXPECT_TRUE(cmdBuffer2->batchBuffer.low_priority); EXPECT_GT(cmdBuffer2->batchBuffer.startOffset, cmdBuffer1->batchBuffer.startOffset); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenRecordedBatchBufferIsBeingSubmittedThenFlushIsCalledWithRecordedCommandBuffer) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); configureCSRtoNonDirtyState(); DispatchFlags dispatchFlags; dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.requiresCoherency = true; mockCsr->lastSentCoherencyRequest = 1; commandStream.getSpace(4); mockCsr->flushTask(commandStream, 4, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); EXPECT_EQ(0, mockCsr->flushCalledCount); auto &surfacesForResidency = mockCsr->getMemoryManager()->getResidencyAllocations(); EXPECT_EQ(0u, surfacesForResidency.size()); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); EXPECT_FALSE(cmdBufferList.peekIsEmpty()); auto cmdBuffer = cmdBufferList.peekHead(); EXPECT_EQ(5u, cmdBuffer->surfaces.size()); //copy those surfaces std::vector residentSurfaces = cmdBuffer->surfaces; for (auto &graphicsAllocation : residentSurfaces) { EXPECT_TRUE(graphicsAllocation->isResident()); EXPECT_EQ(1, graphicsAllocation->residencyTaskCount); } mockCsr->flushBatchedSubmissions(); EXPECT_FALSE(mockCsr->recordedCommandBuffer.batchBuffer.low_priority); EXPECT_TRUE(mockCsr->recordedCommandBuffer.batchBuffer.requiresCoherency); EXPECT_EQ(mockCsr->recordedCommandBuffer.batchBuffer.commandBufferAllocation, commandStream.getGraphicsAllocation()); EXPECT_EQ(4u, mockCsr->recordedCommandBuffer.batchBuffer.startOffset); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_TRUE(mockedSubmissionsAggregator->peekCommandBuffers().peekIsEmpty()); EXPECT_EQ(0u, surfacesForResidency.size()); for (auto &graphicsAllocation : residentSurfaces) { EXPECT_FALSE(graphicsAllocation->isResident()); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrCreatedWithDedicatedDebugFlagWhenItIsCreatedThenItHasProperDispatchMode) { DebugManagerStateRestore stateRestore; DebugManager.flags.CsrDispatchMode.set(CommandStreamReceiver::DispatchMode::AdaptiveDispatch); std::unique_ptr> mockCsr(new MockCsrHw2(*platformDevices[0])); EXPECT_EQ(CommandStreamReceiver::DispatchMode::AdaptiveDispatch, mockCsr->dispatchMode); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenBlockingCommandIsSendThenItIsFlushedAndNotBatched) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); configureCSRtoNonDirtyState(); DispatchFlags dispatchFlags; dispatchFlags.blocking = true; mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenBufferToFlushWhenFlushTaskCalledThenUpdateFlushStamp) { auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); commandStream.getSpace(1); EXPECT_EQ(0, mockCsr->flushCalledCount); auto previousFlushStamp = mockCsr->flushStamp->peekStamp(); auto cmplStamp = flushTask(*mockCsr); EXPECT_GT(mockCsr->flushStamp->peekStamp(), previousFlushStamp); EXPECT_EQ(mockCsr->flushStamp->peekStamp(), cmplStamp.flushStamp); EXPECT_EQ(1, mockCsr->flushCalledCount); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNothingToFlushWhenFlushTaskCalledThenDontFlushStamp) { auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); configureCSRtoNonDirtyState(); EXPECT_EQ(0, mockCsr->flushCalledCount); auto previousFlushStamp = mockCsr->flushStamp->peekStamp(); auto cmplStamp = flushTask(*mockCsr); EXPECT_EQ(mockCsr->flushStamp->peekStamp(), previousFlushStamp); EXPECT_EQ(previousFlushStamp, cmplStamp.flushStamp); EXPECT_EQ(0, mockCsr->flushCalledCount); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTaskIsCalledThenFlushedTaskCountIsNotModifed) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); DispatchFlags dispatchFlags; dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount()); EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount()); mockCsr->flushBatchedSubmissions(); EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount()); EXPECT_EQ(1u, mockCsr->peekLatestFlushedTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInDefaultModeWhenFlushTaskIsCalledThenFlushedTaskCountIsModifed) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); DispatchFlags dispatchFlags; dispatchFlags.guardCommandBufferWithPipeControl = true; auto &csr = pDevice->getCommandStreamReceiver(); csr.flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); EXPECT_EQ(1u, csr.peekLatestSentTaskCount()); EXPECT_EQ(1u, csr.peekLatestFlushedTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenWaitForTaskCountIsCalledWithTaskCountThatWasNotYetFlushedThenBatchedCommandBuffersAreSubmitted) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags; dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); EXPECT_FALSE(cmdBufferList.peekIsEmpty()); EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount()); auto cmdBuffer = cmdBufferList.peekHead(); EXPECT_EQ(1u, cmdBuffer->taskCount); mockCsr->waitForCompletionWithTimeout(false, 1, 1); EXPECT_EQ(1u, mockCsr->peekLatestFlushedTaskCount()); EXPECT_TRUE(cmdBufferList.peekIsEmpty()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenEnqueueIsMadeThenCurrentMemoryUsedIsTracked) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags; dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); uint64_t expectedUsed = 0; for (const auto &resource : cmdBuffer->surfaces) { expectedUsed += resource->getUnderlyingBufferSize(); } EXPECT_EQ(expectedUsed, mockCsr->peekTotalMemoryUsed()); //after flush it goes to 0 mockCsr->flushBatchedSubmissions(); EXPECT_EQ(0u, mockCsr->peekTotalMemoryUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenSusbsequentEnqueueIsMadeThenOnlyNewResourcesAreTrackedForMemoryUsage) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags; dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); uint64_t expectedUsed = 0; for (const auto &resource : cmdBuffer->surfaces) { expectedUsed += resource->getUnderlyingBufferSize(); } auto additionalSize = 1234; GraphicsAllocation graphicsAllocation(nullptr, additionalSize); mockCsr->makeResident(graphicsAllocation); mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); EXPECT_EQ(expectedUsed + additionalSize, mockCsr->peekTotalMemoryUsed()); } struct MockedMemoryManager : public OsAgnosticMemoryManager { bool isMemoryBudgetExhausted() const override { return budgetExhausted; } bool budgetExhausted = false; }; HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTotalResourceUsedExhaustsTheBudgetThenDoImplicitFlush) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); std::unique_ptr mockedMemoryManager(new MockedMemoryManager()); std::unique_ptr> mockCsr(new MockCsrHw2(*platformDevices[0])); mockedMemoryManager->device = pDevice; mockCsr->setMemoryManager(mockedMemoryManager.get()); mockCsr->setTagAllocation(pDevice->getTagAllocation()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags; dispatchFlags.guardCommandBufferWithPipeControl = true; mockedMemoryManager->budgetExhausted = true; mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); uint64_t expectedUsed = 0; for (const auto &resource : cmdBuffer->surfaces) { expectedUsed += resource->getUnderlyingBufferSize(); } EXPECT_EQ(expectedUsed, mockCsr->peekTotalMemoryUsed()); auto budgetSize = (size_t)mockCsr->getMemoryManager()->device->getDeviceInfo().globalMemSize; GraphicsAllocation hugeAllocation(nullptr, budgetSize / 4); mockCsr->makeResident(hugeAllocation); mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevel, dispatchFlags); //expect 2 flushes, since we cannot batch those submissions EXPECT_EQ(2u, mockCsr->peekLatestFlushedTaskCount()); EXPECT_EQ(0u, mockCsr->peekTotalMemoryUsed()); EXPECT_TRUE(mockedSubmissionsAggregator->peekCommandBuffers().peekIsEmpty()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTwoTasksArePassedWithTheSameLevelThenThereIsNoPipeControlBetweenThemAfterFlush) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags; dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = true; auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel(); mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags); //now emit with the same taskLevel mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags); EXPECT_EQ(taskLevelPriorToSubmission, mockCsr->peekTaskLevel()); //validate if we recorded ppc positions auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); EXPECT_NE(nullptr, firstCmdBuffer->pipeControlLocation); auto secondCmdBuffer = firstCmdBuffer->next; EXPECT_NE(nullptr, secondCmdBuffer->pipeControlLocation); EXPECT_NE(firstCmdBuffer->pipeControlLocation, secondCmdBuffer->pipeControlLocation); auto ppc = genCmdCast(firstCmdBuffer->pipeControlLocation); EXPECT_NE(nullptr, ppc); auto ppc2 = genCmdCast(secondCmdBuffer->pipeControlLocation); EXPECT_NE(nullptr, ppc2); //flush needs to bump the taskLevel mockCsr->flushBatchedSubmissions(); EXPECT_EQ(taskLevelPriorToSubmission + 1, mockCsr->peekTaskLevel()); //decode commands to confirm no pipe controls between Walkers parseCommands(commandQueue); auto itorBatchBufferStartFirst = find(cmdList.begin(), cmdList.end()); auto itorBatchBufferStartSecond = find(++itorBatchBufferStartFirst, cmdList.end()); //make sure they are not the same EXPECT_NE(cmdList.end(), itorBatchBufferStartFirst); EXPECT_NE(cmdList.end(), itorBatchBufferStartSecond); EXPECT_NE(itorBatchBufferStartFirst, itorBatchBufferStartSecond); auto itorPipeControl = find(itorBatchBufferStartFirst, itorBatchBufferStartSecond); EXPECT_EQ(itorPipeControl, itorBatchBufferStartSecond); //first pipe control is nooped, second pipe control is untouched auto noop1 = genCmdCast(ppc); auto noop2 = genCmdCast(ppc2); EXPECT_NE(nullptr, noop1); EXPECT_EQ(nullptr, noop2); auto ppcAfterChange = genCmdCast(ppc2); EXPECT_NE(nullptr, ppcAfterChange); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndOoqFlagSetToFalseWhenTwoTasksArePassedWithTheSameLevelThenThereIsPipeControlBetweenThemAfterFlush) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags; dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = false; auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel(); mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags); //now emit with the same taskLevel mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags); EXPECT_EQ(taskLevelPriorToSubmission, mockCsr->peekTaskLevel()); //validate if we recorded ppc positions auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); EXPECT_EQ(nullptr, firstCmdBuffer->pipeControlLocation); auto secondCmdBuffer = firstCmdBuffer->next; EXPECT_EQ(nullptr, secondCmdBuffer->pipeControlLocation); mockCsr->flushBatchedSubmissions(); //decode commands to confirm no pipe controls between Walkers parseCommands(commandQueue); auto itorBatchBufferStartFirst = find(cmdList.begin(), cmdList.end()); auto itorBatchBufferStartSecond = find(++itorBatchBufferStartFirst, cmdList.end()); auto itorPipeControl = find(itorBatchBufferStartFirst, itorBatchBufferStartSecond); EXPECT_NE(itorPipeControl, itorBatchBufferStartSecond); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenPipeControlForNoopAddressIsNullThenPipeControlIsNotNooped) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags; dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = true; auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel(); mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags); //now emit with the same taskLevel mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags); //validate if we recorded ppc positions auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto ppc1Location = firstCmdBuffer->pipeControlLocation; firstCmdBuffer->pipeControlLocation = nullptr; auto ppc = genCmdCast(ppc1Location); EXPECT_NE(nullptr, ppc); //call flush, both pipe controls must remain untouched mockCsr->flushBatchedSubmissions(); EXPECT_EQ(taskLevelPriorToSubmission + 1, mockCsr->peekTaskLevel()); //decode commands to confirm no pipe controls between Walkers parseCommands(commandQueue); auto itorBatchBufferStartFirst = find(cmdList.begin(), cmdList.end()); auto itorBatchBufferStartSecond = find(++itorBatchBufferStartFirst, cmdList.end()); auto itorPipeControl = find(itorBatchBufferStartFirst, itorBatchBufferStartSecond); EXPECT_NE(itorPipeControl, itorBatchBufferStartSecond); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenThreeTasksArePassedWithTheSameLevelThenThereIsNoPipeControlBetweenThemAfterFlush) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*platformDevices[0]); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags; dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = true; auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel(); mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags); //now emit with the same taskLevel mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags); mockCsr->flushTask(commandStream, 0, dsh, ih, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags); EXPECT_EQ(taskLevelPriorToSubmission, mockCsr->peekTaskLevel()); //validate if we recorded ppc positions auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto secondCmdBuffer = firstCmdBuffer->next; auto thirdCmdBuffer = firstCmdBuffer->next->next; EXPECT_NE(nullptr, thirdCmdBuffer->pipeControlLocation); EXPECT_NE(firstCmdBuffer->pipeControlLocation, thirdCmdBuffer->pipeControlLocation); auto ppc = genCmdCast(firstCmdBuffer->pipeControlLocation); auto ppc2 = genCmdCast(secondCmdBuffer->pipeControlLocation); auto ppc3 = genCmdCast(thirdCmdBuffer->pipeControlLocation); EXPECT_NE(nullptr, ppc2); EXPECT_NE(nullptr, ppc3); //flush needs to bump the taskLevel mockCsr->flushBatchedSubmissions(); EXPECT_EQ(taskLevelPriorToSubmission + 1, mockCsr->peekTaskLevel()); //decode commands to confirm no pipe controls between Walkers parseCommands(commandQueue); auto itorBatchBufferStartFirst = find(cmdList.begin(), cmdList.end()); auto itorBatchBufferStartSecond = find(++itorBatchBufferStartFirst, cmdList.end()); auto itorBatchBufferStartThird = find(++itorBatchBufferStartSecond, cmdList.end()); //make sure they are not the same EXPECT_NE(cmdList.end(), itorBatchBufferStartFirst); EXPECT_NE(cmdList.end(), itorBatchBufferStartSecond); EXPECT_NE(cmdList.end(), itorBatchBufferStartThird); EXPECT_NE(itorBatchBufferStartFirst, itorBatchBufferStartSecond); EXPECT_NE(itorBatchBufferStartThird, itorBatchBufferStartSecond); auto itorPipeControl = find(itorBatchBufferStartFirst, itorBatchBufferStartSecond); EXPECT_EQ(itorPipeControl, itorBatchBufferStartSecond); itorPipeControl = find(itorBatchBufferStartSecond, itorBatchBufferStartThird); EXPECT_EQ(itorPipeControl, itorBatchBufferStartThird); //first pipe control is nooped, second pipe control is untouched auto noop1 = genCmdCast(ppc); auto noop2 = genCmdCast(ppc2); auto noop3 = genCmdCast(ppc3); EXPECT_NE(nullptr, noop1); EXPECT_NE(nullptr, noop2); EXPECT_EQ(nullptr, noop3); auto ppcAfterChange = genCmdCast(ppc3); EXPECT_NE(nullptr, ppcAfterChange); } typedef UltCommandStreamReceiverTest CommandStreamReceiverCleanupTests; HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenTemporaryAndReusableAllocationsArePresentThenCleanupResourcesOnlyCleansThoseAboveLatestFlushTaskLevel) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto memoryManager = pDevice->getMemoryManager(); auto temporaryToClean = memoryManager->allocateGraphicsMemory(4096u); auto temporaryToHold = memoryManager->allocateGraphicsMemory(4096u); auto reusableToClean = memoryManager->allocateGraphicsMemory(4096u); auto reusableToHold = memoryManager->allocateGraphicsMemory(4096u); memoryManager->storeAllocation(std::unique_ptr(temporaryToClean), TEMPORARY_ALLOCATION); memoryManager->storeAllocation(std::unique_ptr(temporaryToHold), TEMPORARY_ALLOCATION); memoryManager->storeAllocation(std::unique_ptr(reusableToClean), REUSABLE_ALLOCATION); memoryManager->storeAllocation(std::unique_ptr(reusableToHold), REUSABLE_ALLOCATION); temporaryToClean->taskCount = 1; reusableToClean->taskCount = 1; temporaryToHold->taskCount = 10; reusableToHold->taskCount = 10; commandStreamReceiver.latestFlushedTaskCount = 9; commandStreamReceiver.cleanupResources(); EXPECT_EQ(reusableToHold, memoryManager->allocationsForReuse.peekHead()); EXPECT_EQ(reusableToHold, memoryManager->allocationsForReuse.peekTail()); EXPECT_EQ(temporaryToHold, memoryManager->graphicsAllocations.peekHead()); EXPECT_EQ(temporaryToHold, memoryManager->graphicsAllocations.peekTail()); commandStreamReceiver.latestFlushedTaskCount = 11; commandStreamReceiver.cleanupResources(); EXPECT_TRUE(memoryManager->allocationsForReuse.peekIsEmpty()); EXPECT_TRUE(memoryManager->graphicsAllocations.peekIsEmpty()); }