/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/stream_properties.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gen_common/reg_configs_common.h" #include "shared/source/helpers/flat_batch_buffer_helper_hw.h" #include "shared/source/helpers/preamble.h" #include "shared/source/utilities/stackvec.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "test.h" #include "reg_configs_common.h" #include #include using PreambleTest = ::testing::Test; using namespace NEO; HWTEST_F(PreambleTest, givenDisabledPreemptioWhenPreambleAdditionalCommandsSizeIsQueriedThenZeroIsReturned) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); mockDevice->setPreemptionMode(PreemptionMode::Disabled); auto cmdSize = PreambleHelper::getAdditionalCommandsSize(*mockDevice); EXPECT_EQ(PreemptionHelper::getRequiredPreambleSize(*mockDevice), cmdSize); EXPECT_EQ(0u, cmdSize); } HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, givenMidthreadPreemptionWhenPreambleAdditionalCommandsSizeIsQueriedThenSizeForPreemptionPreambleIsReturned) { using GPGPU_CSR_BASE_ADDRESS = typename FamilyType::GPGPU_CSR_BASE_ADDRESS; auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); if (mockDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode == PreemptionMode::MidThread) { mockDevice->setPreemptionMode(PreemptionMode::MidThread); auto cmdSize = PreambleHelper::getAdditionalCommandsSize(*mockDevice); EXPECT_EQ(PreemptionHelper::getRequiredPreambleSize(*mockDevice), cmdSize); EXPECT_EQ(sizeof(GPGPU_CSR_BASE_ADDRESS), cmdSize); } } HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, givenMidThreadPreemptionWhenPreambleIsProgrammedThenStateSipAndCsrBaseAddressCmdsAreAdded) { using STATE_SIP = typename FamilyType::STATE_SIP; using GPGPU_CSR_BASE_ADDRESS = typename FamilyType::GPGPU_CSR_BASE_ADDRESS; auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); mockDevice->setPreemptionMode(PreemptionMode::Disabled); auto cmdSizePreemptionDisabled = PreemptionHelper::getRequiredStateSipCmdSize(*mockDevice); EXPECT_EQ(0u, cmdSizePreemptionDisabled); if (mockDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode == PreemptionMode::MidThread) { mockDevice->setPreemptionMode(PreemptionMode::MidThread); auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredStateSipCmdSize(*mockDevice); EXPECT_LT(cmdSizePreemptionDisabled, cmdSizePreemptionMidThread); StackVec preambleBuffer(8192); LinearStream preambleStream(&*preambleBuffer.begin(), preambleBuffer.size()); StackVec preemptionBuffer; preemptionBuffer.resize(cmdSizePreemptionMidThread); LinearStream preemptionStream(&*preemptionBuffer.begin(), preemptionBuffer.size()); uintptr_t minCsrAlignment = 2 * 256 * MemoryConstants::kiloByte; MockGraphicsAllocation csrSurface(reinterpret_cast(minCsrAlignment), 1024); PreambleHelper::programPreamble(&preambleStream, *mockDevice, 0U, ThreadArbitrationPolicy::RoundRobin, &csrSurface); PreemptionHelper::programStateSip(preemptionStream, *mockDevice); HardwareParse hwParserPreamble; hwParserPreamble.parseCommands(preambleStream, 0); auto csrCmd = hwParserPreamble.getCommand(); EXPECT_NE(nullptr, csrCmd); EXPECT_EQ(csrSurface.getGpuAddress(), csrCmd->getGpgpuCsrBaseAddress()); HardwareParse hwParserPreemption; hwParserPreemption.parseCommands(preemptionStream, 0); auto stateSipCmd = hwParserPreemption.getCommand(); EXPECT_NE(nullptr, stateSipCmd); } } HWTEST_F(PreambleTest, givenActiveKernelDebuggingWhenPreambleKernelDebuggingCommandsSizeIsQueriedThenCorrectSizeIsReturned) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; auto size = PreambleHelper::getKernelDebuggingCommandsSize(true); auto sizeExpected = 2 * sizeof(MI_LOAD_REGISTER_IMM); EXPECT_EQ(sizeExpected, size); } HWTEST_F(PreambleTest, givenInactiveKernelDebuggingWhenPreambleKernelDebuggingCommandsSizeIsQueriedThenZeroIsReturned) { auto size = PreambleHelper::getKernelDebuggingCommandsSize(false); EXPECT_EQ(0u, size); } HWTEST_F(PreambleTest, whenKernelDebuggingCommandsAreProgrammedThenCorrectCommandsArePlacedIntoStream) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; auto bufferSize = PreambleHelper::getKernelDebuggingCommandsSize(true); auto buffer = std::unique_ptr(new char[bufferSize]); LinearStream stream(buffer.get(), bufferSize); PreambleHelper::programKernelDebugging(&stream); HardwareParse hwParser; hwParser.parseCommands(stream); auto cmdList = hwParser.getCommandsList(); ASSERT_EQ(2u, cmdList.size()); auto it = cmdList.begin(); MI_LOAD_REGISTER_IMM *pCmd = reinterpret_cast(*it); EXPECT_EQ(UnitTestHelper::getDebugModeRegisterOffset(), pCmd->getRegisterOffset()); EXPECT_EQ(UnitTestHelper::getDebugModeRegisterValue(), pCmd->getDataDword()); it++; pCmd = reinterpret_cast(*it); EXPECT_EQ(UnitTestHelper::getTdCtlRegisterOffset(), pCmd->getRegisterOffset()); EXPECT_EQ(UnitTestHelper::getTdCtlRegisterValue(), pCmd->getDataDword()); } HWTEST_F(PreambleTest, givenKernelDebuggingActiveWhenPreambleIsProgrammedThenProgramKernelDebuggingIsCalled) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); mockDevice->setPreemptionMode(PreemptionMode::Disabled); mockDevice->setDebuggerActive(false); StackVec preambleBuffer(8192); LinearStream preambleStream(&*preambleBuffer.begin(), preambleBuffer.size()); PreambleHelper::programPreamble(&preambleStream, *mockDevice, 0U, ThreadArbitrationPolicy::RoundRobin, nullptr); HardwareParse hwParser; hwParser.parseCommands(preambleStream); auto cmdList = hwParser.getCommandsList(); auto miLoadRegImmCountWithoutDebugging = cmdList.size(); mockDevice->setDebuggerActive(true); auto preemptionAllocation = mockDevice->getGpgpuCommandStreamReceiver().getPreemptionAllocation(); StackVec preambleBuffer2(8192); preambleStream.replaceBuffer(&*preambleBuffer2.begin(), preambleBuffer2.size()); PreambleHelper::programPreamble(&preambleStream, *mockDevice, 0U, ThreadArbitrationPolicy::RoundRobin, preemptionAllocation); HardwareParse hwParser2; hwParser2.parseCommands(preambleStream); cmdList = hwParser2.getCommandsList(); auto miLoadRegImmCountWithDebugging = cmdList.size(); ASSERT_LT(miLoadRegImmCountWithoutDebugging, miLoadRegImmCountWithDebugging); EXPECT_EQ(2u, miLoadRegImmCountWithDebugging - miLoadRegImmCountWithoutDebugging); } HWTEST_F(PreambleTest, givenKernelDebuggingActiveAndMidThreadPreemptionWhenGetAdditionalCommandsSizeIsCalledThen2MiLoadRegisterImmCmdsAreAdded) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); mockDevice->setPreemptionMode(PreemptionMode::MidThread); mockDevice->setDebuggerActive(false); size_t withoutDebugging = PreambleHelper::getAdditionalCommandsSize(*mockDevice); mockDevice->setDebuggerActive(true); size_t withDebugging = PreambleHelper::getAdditionalCommandsSize(*mockDevice); EXPECT_LT(withoutDebugging, withDebugging); size_t diff = withDebugging - withoutDebugging; size_t sizeExpected = 2 * sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM); EXPECT_EQ(sizeExpected, diff); } HWTEST_F(PreambleTest, givenDefaultPreambleWhenGetThreadsMaxNumberIsCalledThenMaximumNumberOfThreadsIsReturned) { const HardwareInfo &hwInfo = *defaultHwInfo; uint32_t threadsPerEU = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount) + hwInfo.capabilityTable.extraQuantityThreadsPerEU; uint32_t value = HwHelper::getMaxThreadsForVfe(hwInfo); uint32_t expected = hwInfo.gtSystemInfo.EUCount * threadsPerEU; EXPECT_EQ(expected, value); } HWTEST_F(PreambleTest, givenMaxHwThreadsPercentDebugVariableWhenGetThreadsMaxNumberIsCalledThenMaximumNumberOfThreadsIsCappedToRequestedNumber) { const HardwareInfo &hwInfo = *defaultHwInfo; uint32_t threadsPerEU = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount) + hwInfo.capabilityTable.extraQuantityThreadsPerEU; DebugManagerStateRestore debugManagerStateRestore; DebugManager.flags.MaxHwThreadsPercent.set(80); uint32_t value = HwHelper::getMaxThreadsForVfe(hwInfo); uint32_t expected = int(hwInfo.gtSystemInfo.EUCount * threadsPerEU * 80 / 100.0f); EXPECT_EQ(expected, value); } HWTEST_F(PreambleTest, givenMinHwThreadsUnoccupiedDebugVariableWhenGetThreadsMaxNumberIsCalledThenMaximumNumberOfThreadsIsCappedToMatchRequestedNumber) { const HardwareInfo &hwInfo = *defaultHwInfo; uint32_t threadsPerEU = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount) + hwInfo.capabilityTable.extraQuantityThreadsPerEU; DebugManagerStateRestore debugManagerStateRestore; DebugManager.flags.MinHwThreadsUnoccupied.set(2); uint32_t value = HwHelper::getMaxThreadsForVfe(hwInfo); uint32_t expected = hwInfo.gtSystemInfo.EUCount * threadsPerEU - 2; EXPECT_EQ(expected, value); } HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, WhenProgramVFEStateIsCalledThenCorrectVfeStateAddressIsReturned) { using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; char buffer[64]; MockGraphicsAllocation graphicsAllocation(buffer, sizeof(buffer)); LinearStream preambleStream(&graphicsAllocation, graphicsAllocation.getUnderlyingBuffer(), graphicsAllocation.getUnderlyingBufferSize()); uint64_t addressToPatch = 0xC0DEC0DE; uint64_t expectedAddress = 0xC0DEC000; auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&preambleStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 1024u, addressToPatch, 10u, emptyProperties); EXPECT_GE(reinterpret_cast(pVfeCmd), reinterpret_cast(preambleStream.getCpuBase())); EXPECT_LT(reinterpret_cast(pVfeCmd), reinterpret_cast(preambleStream.getCpuBase()) + preambleStream.getUsed()); auto &vfeCmd = *reinterpret_cast(pVfeCmd); EXPECT_EQ(10u, vfeCmd.getMaximumNumberOfThreads()); EXPECT_EQ(1u, vfeCmd.getNumberOfUrbEntries()); EXPECT_EQ(expectedAddress, vfeCmd.getScratchSpaceBasePointer()); EXPECT_EQ(0u, vfeCmd.getScratchSpaceBasePointerHigh()); } HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, WhenGetScratchSpaceAddressOffsetForVfeStateIsCalledThenCorrectOffsetIsReturned) { using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; char buffer[64]; MockGraphicsAllocation graphicsAllocation(buffer, sizeof(buffer)); LinearStream preambleStream(&graphicsAllocation, graphicsAllocation.getUnderlyingBuffer(), graphicsAllocation.getUnderlyingBufferSize()); auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); FlatBatchBufferHelperHw helper(*mockDevice->getExecutionEnvironment()); uint64_t addressToPatch = 0xC0DEC0DE; auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&preambleStream, mockDevice->getHardwareInfo(), EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; PreambleHelper::programVfeState(pVfeCmd, mockDevice->getHardwareInfo(), 1024u, addressToPatch, 10u, emptyProperties); auto offset = PreambleHelper::getScratchSpaceAddressOffsetForVfeState(&preambleStream, pVfeCmd); EXPECT_NE(0u, offset); EXPECT_EQ(MEDIA_VFE_STATE::PATCH_CONSTANTS::SCRATCHSPACEBASEPOINTER_BYTEOFFSET + reinterpret_cast(pVfeCmd), offset + reinterpret_cast(preambleStream.getCpuBase())); } HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, WhenIsSystolicModeConfigurableThenReturnFalse) { auto result = PreambleHelper::isSystolicModeConfigurable(*defaultHwInfo); EXPECT_FALSE(result); } HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, WhenAppendProgramPipelineSelectThenNothingChanged) { using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; PIPELINE_SELECT cmd = FamilyType::cmdInitPipelineSelect; cmd.setMaskBits(pipelineSelectEnablePipelineSelectMaskBits); PreambleHelper::appendProgramPipelineSelect(&cmd, true, *defaultHwInfo); EXPECT_EQ(pipelineSelectEnablePipelineSelectMaskBits, cmd.getMaskBits()); } HWTEST_F(PreambleTest, givenSetForceSemaphoreDelayBetweenWaitsWhenProgramSemaphoreDelayThenSemaWaitPollRegisterIsProgrammed) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; DebugManagerStateRestore debugManagerStateRestore; uint32_t newDelay = 10u; DebugManager.flags.ForceSemaphoreDelayBetweenWaits.set(newDelay); auto bufferSize = PreambleHelper::getSemaphoreDelayCommandSize(); EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM), bufferSize); auto buffer = std::unique_ptr(new char[bufferSize]); LinearStream stream(buffer.get(), bufferSize); PreambleHelper::programSemaphoreDelay(&stream); HardwareParse hwParser; hwParser.parseCommands(stream); auto cmdList = hwParser.getCommandsList(); ASSERT_EQ(1u, cmdList.size()); auto it = cmdList.begin(); MI_LOAD_REGISTER_IMM *pCmd = reinterpret_cast(*it); EXPECT_EQ(static_cast(0x224c), pCmd->getRegisterOffset()); EXPECT_EQ(newDelay, pCmd->getDataDword()); } HWTEST_F(PreambleTest, givenNotSetForceSemaphoreDelayBetweenWaitsWhenProgramSemaphoreDelayThenSemaWaitPollRegisterIsNotProgrammed) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; DebugManagerStateRestore debugManagerStateRestore; DebugManager.flags.ForceSemaphoreDelayBetweenWaits.set(-1); auto bufferSize = PreambleHelper::getSemaphoreDelayCommandSize(); EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM), bufferSize); auto buffer = std::unique_ptr(new char[bufferSize]); LinearStream stream(buffer.get(), bufferSize); PreambleHelper::programSemaphoreDelay(&stream); HardwareParse hwParser; hwParser.parseCommands(stream); auto cmdList = hwParser.getCommandsList(); ASSERT_EQ(0u, cmdList.size()); }