/* * Copyright (C) 2018-2019 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "runtime/built_ins/built_ins.h" #include "runtime/command_queue/command_queue_hw.h" #include "runtime/command_queue/gpgpu_walker.h" #include "runtime/command_stream/command_stream_receiver.h" #include "runtime/command_stream/linear_stream.h" #include "runtime/command_stream/preemption.h" #include "runtime/command_stream/scratch_space_controller.h" #include "runtime/event/user_event.h" #include "runtime/helpers/aligned_memory.h" #include "runtime/helpers/blit_commands_helper.h" #include "runtime/helpers/cache_policy.h" #include "runtime/helpers/preamble.h" #include "runtime/helpers/ptr_math.h" #include "runtime/mem_obj/buffer.h" #include "runtime/memory_manager/graphics_allocation.h" #include "runtime/memory_manager/memory_manager.h" #include "runtime/os_interface/debug_settings_manager.h" #include "runtime/os_interface/os_context.h" #include "runtime/utilities/linux/debug_env_reader.h" #include "test.h" #include "unit_tests/fixtures/built_in_fixture.h" #include "unit_tests/fixtures/device_fixture.h" #include "unit_tests/fixtures/ult_command_stream_receiver_fixture.h" #include "unit_tests/helpers/debug_manager_state_restore.h" #include "unit_tests/helpers/hw_parse.h" #include "unit_tests/helpers/unit_test_helper.h" #include "unit_tests/libult/create_command_stream.h" #include "unit_tests/libult/ult_command_stream_receiver.h" #include "unit_tests/mocks/mock_buffer.h" #include "unit_tests/mocks/mock_command_queue.h" #include "unit_tests/mocks/mock_context.h" #include "unit_tests/mocks/mock_csr.h" #include "unit_tests/mocks/mock_event.h" #include "unit_tests/mocks/mock_kernel.h" #include "unit_tests/mocks/mock_submissions_aggregator.h" #include "unit_tests/utilities/base_object_utils.h" #include "reg_configs_common.h" #include using namespace NEO; HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenPreambleSentAndThreadArbitrationPolicyNotChangedWhenEstimatingPreambleCmdSizeThenReturnItsValue) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy; auto expectedCmdSize = sizeof(typename FamilyType::PIPE_CONTROL) + sizeof(typename FamilyType::MEDIA_VFE_STATE); EXPECT_EQ(expectedCmdSize, commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice)); } HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenNotSentStateSipWhenFirstTaskIsFlushedThenStateSipCmdIsAddedAndIsStateSipSentSetToTrue) { using STATE_SIP = typename FamilyType::STATE_SIP; auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); if (mockDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode == PreemptionMode::MidThread) { mockDevice->setPreemptionMode(PreemptionMode::MidThread); auto &csr = mockDevice->getUltCommandStreamReceiver(); csr.isPreambleSent = true; CommandQueueHw commandQueue(nullptr, mockDevice.get(), 0); auto &commandStream = commandQueue.getCS(4096u); DispatchFlags dispatchFlags; dispatchFlags.preemptionMode = PreemptionMode::MidThread; MockGraphicsAllocation allocation(nullptr, 0); IndirectHeap heap(&allocation); csr.flushTask(commandStream, 0, heap, heap, heap, 0, dispatchFlags, *mockDevice); EXPECT_TRUE(csr.isStateSipSent); HardwareParse hwParser; hwParser.parseCommands(csr.getCS(0)); auto stateSipItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), stateSipItor); } } HWTEST_F(UltCommandStreamReceiverTest, givenCsrWhenProgramStateSipIsCalledThenIsStateSipCalledIsSetToTrue) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto requiredSize = PreemptionHelper::getRequiredStateSipCmdSize(*pDevice); StackVec buffer(requiredSize); LinearStream cmdStream(buffer.begin(), buffer.size()); commandStreamReceiver.programStateSip(cmdStream, *pDevice); EXPECT_TRUE(commandStreamReceiver.isStateSipSent); } HWTEST_F(UltCommandStreamReceiverTest, givenSentStateSipFlagSetWhenGetRequiredStateSipCmdSizeIsCalledThenStateSipCmdSizeIsNotIncluded) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags; commandStreamReceiver.isStateSipSent = false; auto sizeWithStateSipIsNotSent = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); commandStreamReceiver.isStateSipSent = true; auto sizeWhenSipIsSent = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); auto sizeForStateSip = PreemptionHelper::getRequiredStateSipCmdSize(*pDevice); EXPECT_EQ(sizeForStateSip, sizeWithStateSipIsNotSent - sizeWhenSipIsSent); } HWTEST_F(UltCommandStreamReceiverTest, givenSentStateSipFlagSetAndSourceLevelDebuggerIsActiveWhenGetRequiredStateSipCmdSizeIsCalledThenStateSipCmdSizeIsIncluded) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags; commandStreamReceiver.isStateSipSent = true; auto sizeWithoutSourceKernelDebugging = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); pDevice->setSourceLevelDebuggerActive(true); commandStreamReceiver.isStateSipSent = true; auto sizeWithSourceKernelDebugging = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); auto sizeForStateSip = PreemptionHelper::getRequiredStateSipCmdSize(*pDevice); EXPECT_EQ(sizeForStateSip, sizeWithSourceKernelDebugging - sizeWithoutSourceKernelDebugging - PreambleHelper::getKernelDebuggingCommandsSize(true)); } HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentAndThreadArbitrationPolicyChangedWhenEstimatingPreambleCmdSizeThenResultDependsOnPolicyProgrammingCmdSize) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy; auto policyNotChanged = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy + 1; auto policyChanged = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); auto actualDifference = policyChanged - policyNotChanged; auto expectedDifference = PreambleHelper::getThreadArbitrationCommandsSize(); EXPECT_EQ(expectedDifference, actualDifference); } HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentWhenEstimatingPreambleCmdSizeThenResultDependsOnPolicyProgrammingAndAdditionalCmdsSize) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy; commandStreamReceiver.isPreambleSent = false; auto preambleNotSent = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); commandStreamReceiver.isPreambleSent = true; auto preambleSent = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); auto actualDifference = preambleNotSent - preambleSent; auto expectedDifference = PreambleHelper::getThreadArbitrationCommandsSize() + PreambleHelper::getAdditionalCommandsSize(*pDevice); EXPECT_EQ(expectedDifference, actualDifference); } HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenMediaVfeStateDirtyEstimatingPreambleCmdSizeThenResultDependsVfeStateProgrammingCmdSize) { typedef typename FamilyType::MEDIA_VFE_STATE MEDIA_VFE_STATE; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.setMediaVFEStateDirty(false); auto notDirty = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); commandStreamReceiver.setMediaVFEStateDirty(true); auto dirty = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); auto actualDifference = dirty - notDirty; auto expectedDifference = sizeof(PIPE_CONTROL) + sizeof(MEDIA_VFE_STATE); EXPECT_EQ(expectedDifference, actualDifference); } HWTEST_F(UltCommandStreamReceiverTest, givenCommandStreamReceiverInInitialStateWhenHeapsAreAskedForDirtyStatusThenTrueIsReturned) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(0u, commandStreamReceiver.peekTaskCount()); EXPECT_EQ(0u, commandStreamReceiver.peekTaskLevel()); EXPECT_TRUE(commandStreamReceiver.dshState.updateAndCheck(&dsh)); EXPECT_TRUE(commandStreamReceiver.iohState.updateAndCheck(&ioh)); EXPECT_TRUE(commandStreamReceiver.sshState.updateAndCheck(&ssh)); } typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTests; HWTEST_F(CommandStreamReceiverFlushTests, addsBatchBufferEnd) { auto usedPrevious = commandStream.getUsed(); CommandStreamReceiverHw::addBatchBufferEnd(commandStream, nullptr); EXPECT_EQ(commandStream.getUsed(), usedPrevious + sizeof(typename FamilyType::MI_BATCH_BUFFER_END)); auto batchBufferEnd = genCmdCast( ptrOffset(commandStream.getCpuBase(), usedPrevious)); EXPECT_NE(nullptr, batchBufferEnd); } HWTEST_F(CommandStreamReceiverFlushTests, shouldAlignToCacheLineSize) { commandStream.getSpace(sizeof(uint32_t)); CommandStreamReceiverHw::alignToCacheLine(commandStream); EXPECT_EQ(0u, commandStream.getUsed() % MemoryConstants::cacheLineSize); } typedef Test CommandStreamReceiverHwTest; HWTEST_F(CommandStreamReceiverHwTest, givenCsrHwWhenTypeIsCheckedThenCsrHwIsReturned) { auto csr = std::unique_ptr(CommandStreamReceiverHw::create(*pDevice->executionEnvironment)); EXPECT_EQ(CommandStreamReceiverType::CSR_HW, csr->getType()); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverHwTest, WhenCommandStreamReceiverHwIsCreatedThenDefaultSshSizeIs64KB) { auto &commandStreamReceiver = pDevice->getCommandStreamReceiver(); EXPECT_EQ(64 * KB, commandStreamReceiver.defaultSshSize); } HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsNotRequiredThenScratchAllocationIsNotCreated) { auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment); auto scratchController = commandStreamReceiver->scratchSpaceController.get(); bool stateBaseAddressDirty = false; bool cfeStateDirty = false; scratchController->setRequiredScratchSpace(reinterpret_cast(0x2000), 0u, 0u, 0u, stateBaseAddressDirty, cfeStateDirty); EXPECT_FALSE(cfeStateDirty); EXPECT_FALSE(stateBaseAddressDirty); EXPECT_EQ(nullptr, scratchController->getScratchSpaceAllocation()); } HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsRequiredThenCorrectAddressIsReturned) { auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment); auto scratchController = commandStreamReceiver->scratchSpaceController.get(); bool cfeStateDirty = false; bool stateBaseAddressDirty = false; std::unique_ptr> surfaceHeap(alignedMalloc(0x1000, 0x1000), alignedFree); scratchController->setRequiredScratchSpace(surfaceHeap.get(), 0x1000u, 0u, 0u, stateBaseAddressDirty, cfeStateDirty); uint64_t expectedScratchAddress = 0xAAABBBCCCDDD000ull; scratchController->getScratchSpaceAllocation()->setCpuPtrAndGpuAddress(scratchController->getScratchSpaceAllocation()->getUnderlyingBuffer(), expectedScratchAddress); EXPECT_TRUE(UnitTestHelper::evaluateGshAddressForScratchSpace((expectedScratchAddress - MemoryConstants::pageSize), scratchController->calculateNewGSH())); } struct BcsTests : public CommandStreamReceiverHwTest { void SetUp() override { CommandStreamReceiverHwTest::SetUp(); auto &csr = pDevice->getCommandStreamReceiver(); auto engine = csr.getMemoryManager()->getRegisteredEngineForCsr(&csr); auto contextId = engine->osContext->getContextId(); delete engine->osContext; engine->osContext = OsContext::create(nullptr, contextId, 0, aub_stream::EngineType::ENGINE_BCS, PreemptionMode::Disabled, false); engine->osContext->incRefInternal(); csr.setupContext(*engine->osContext); context = std::make_unique(pDevice); } void TearDown() override { context.reset(); CommandStreamReceiverHwTest::TearDown(); } std::unique_ptr context; }; HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredCommands) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; uint64_t notAlignedBltSize = (3 * max2DBlitSize) + 1; uint64_t alignedBltSize = (3 * max2DBlitSize); uint32_t alignedNumberOfBlts = 3; uint32_t notAlignedNumberOfBlts = 4; size_t expectedSize = sizeof(typename FamilyType::MI_FLUSH_DW) + sizeof(typename FamilyType::MI_BATCH_BUFFER_END); auto expectedAlignedSize = alignUp(expectedSize + (sizeof(typename FamilyType::XY_COPY_BLT) * alignedNumberOfBlts), MemoryConstants::cacheLineSize); auto expectedNotAlignedSize = alignUp(expectedSize + (sizeof(typename FamilyType::XY_COPY_BLT) * notAlignedNumberOfBlts), MemoryConstants::cacheLineSize); auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize(alignedBltSize); auto notAlignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize(notAlignedBltSize); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize); } HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredCommands) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; auto &csr = pDevice->getUltCommandStreamReceiver(); static_cast(csr.getMemoryManager())->turnOnFakingBigAllocations(); uint32_t bltLeftover = 17; uint64_t bltSize = (2 * max2DBlitSize) + bltLeftover; uint32_t numberOfBlts = 3; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, static_cast(bltSize), nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); uint32_t newTaskCount = 19; csr.taskCount = newTaskCount - 1; EXPECT_EQ(0u, csr.recursiveLockCounter.load()); csr.blitFromHostPtr(*buffer, hostPtr, bltSize); EXPECT_EQ(newTaskCount, csr.taskCount); EXPECT_EQ(newTaskCount, csr.latestFlushedTaskCount); EXPECT_EQ(newTaskCount, csr.latestSentTaskCount); EXPECT_EQ(newTaskCount, csr.latestSentTaskCountValueDuringFlush); EXPECT_EQ(1u, csr.recursiveLockCounter.load()); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream); auto &cmdList = hwParser.cmdList; auto cmdIterator = cmdList.begin(); for (uint32_t i = 0; i < numberOfBlts; i++) { auto bltCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(0u, bltCmd->getDestinationX1CoordinateLeft()); EXPECT_EQ(0u, bltCmd->getDestinationY1CoordinateTop()); EXPECT_EQ(0u, bltCmd->getSourceX1CoordinateLeft()); EXPECT_EQ(0u, bltCmd->getSourceY1CoordinateTop()); uint32_t expectedWidth = static_cast(BlitterConstants::maxBlitWidth); uint32_t expectedHeight = static_cast(BlitterConstants::maxBlitHeight); if (i == (numberOfBlts - 1)) { expectedWidth = bltLeftover; expectedHeight = 1; } EXPECT_EQ(expectedWidth, bltCmd->getDestinationX2CoordinateRight()); EXPECT_EQ(expectedHeight, bltCmd->getDestinationY2CoordinateBottom()); EXPECT_EQ(expectedWidth, bltCmd->getDestinationPitch()); EXPECT_EQ(expectedWidth, bltCmd->getSourcePitch()); } auto miFlushCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miFlushCmd); EXPECT_EQ(MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD, miFlushCmd->getPostSyncOperation()); EXPECT_EQ(csr.getTagAllocation()->getGpuAddress(), miFlushCmd->getDestinationAddress()); EXPECT_EQ(newTaskCount, miFlushCmd->getImmediateData()); EXPECT_NE(nullptr, genCmdCast(*(cmdIterator++))); // padding while (cmdIterator != cmdList.end()) { EXPECT_NE(nullptr, genCmdCast(*(cmdIterator++))); } } HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocationsResident) { auto &csr = pDevice->getUltCommandStreamReceiver(); csr.storeMakeResidentAllocations = true; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); EXPECT_EQ(0u, csr.makeSurfacePackNonResidentCalled); csr.blitFromHostPtr(*buffer, hostPtr, 1); EXPECT_TRUE(csr.isMadeResident(buffer->getGraphicsAllocation())); EXPECT_TRUE(csr.isMadeResident(csr.commandStream.getGraphicsAllocation())); EXPECT_TRUE(csr.isMadeResident(csr.getTagAllocation())); EXPECT_EQ(1u, csr.makeSurfacePackNonResidentCalled); bool hostPtrAllocationFound = false; for (auto &allocation : csr.makeResidentAllocations) { if (allocation.first->getUnderlyingBuffer() == hostPtr) { hostPtrAllocationFound = true; break; } } EXPECT_TRUE(hostPtrAllocationFound); } HWTEST_F(BcsTests, givenBufferWhenBlitCalledThenFlushCommandBuffer) { auto &csr = pDevice->getUltCommandStreamReceiver(); csr.recordFlusheBatchBuffer = true; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); auto &commandStream = csr.getCS(MemoryConstants::pageSize); size_t commandStreamOffset = 4; commandStream.getSpace(commandStreamOffset); uint32_t newTaskCount = 17; csr.taskCount = newTaskCount - 1; csr.blitFromHostPtr(*buffer, hostPtr, 1); EXPECT_EQ(commandStream.getGraphicsAllocation(), csr.latestFlushedBatchBuffer.commandBufferAllocation); EXPECT_EQ(commandStreamOffset, csr.latestFlushedBatchBuffer.startOffset); EXPECT_EQ(0u, csr.latestFlushedBatchBuffer.chainedBatchBufferStartOffset); EXPECT_EQ(nullptr, csr.latestFlushedBatchBuffer.chainedBatchBuffer); EXPECT_FALSE(csr.latestFlushedBatchBuffer.requiresCoherency); EXPECT_FALSE(csr.latestFlushedBatchBuffer.low_priority); EXPECT_EQ(QueueThrottle::MEDIUM, csr.latestFlushedBatchBuffer.throttle); EXPECT_EQ(commandStream.getUsed(), csr.latestFlushedBatchBuffer.usedSize); EXPECT_EQ(&commandStream, csr.latestFlushedBatchBuffer.stream); EXPECT_EQ(newTaskCount, csr.latestWaitForCompletionWithTimeoutTaskCount.load()); } HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCallWaitWithKmdFallback) { class MyMockCsr : public UltCommandStreamReceiver { public: using UltCommandStreamReceiver::UltCommandStreamReceiver; void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override { waitForTaskCountWithKmdNotifyFallbackCalled++; taskCountToWaitPassed = taskCountToWait; flushStampToWaitPassed = flushStampToWait; useQuickKmdSleepPassed = useQuickKmdSleep; forcePowerSavingModePassed = forcePowerSavingMode; } uint32_t taskCountToWaitPassed = 0; FlushStamp flushStampToWaitPassed = 0; bool useQuickKmdSleepPassed = false; bool forcePowerSavingModePassed = false; uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0; }; auto myMockCsr = std::make_unique<::testing::NiceMock>(*pDevice->getExecutionEnvironment()); auto &bcsOsContext = pDevice->getUltCommandStreamReceiver().getOsContext(); myMockCsr->initializeTagAllocation(); myMockCsr->setupContext(bcsOsContext); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); myMockCsr->blitFromHostPtr(*buffer, hostPtr, 1); EXPECT_EQ(1u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled); EXPECT_EQ(myMockCsr->taskCount, myMockCsr->taskCountToWaitPassed); EXPECT_EQ(myMockCsr->flushStamp->peekStamp(), myMockCsr->flushStampToWaitPassed); EXPECT_FALSE(myMockCsr->useQuickKmdSleepPassed); EXPECT_FALSE(myMockCsr->forcePowerSavingModePassed); }