/* * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "runtime/built_ins/built_ins.h" #include "runtime/command_stream/preemption.h" #include "runtime/helpers/hw_helper.h" #include "unit_tests/command_queue/enqueue_fixture.h" #include "unit_tests/fixtures/preemption_fixture.h" #include "unit_tests/helpers/hw_parse.h" #include "unit_tests/mocks/mock_buffer.h" #include "unit_tests/mocks/mock_command_queue.h" #include "unit_tests/mocks/mock_csr.h" #include "unit_tests/mocks/mock_submissions_aggregator.h" namespace NEO { template <> void HardwareParse::findCsrBaseAddress() { typedef typename GEN10::GPGPU_CSR_BASE_ADDRESS GPGPU_CSR_BASE_ADDRESS; itorGpgpuCsrBaseAddress = find(cmdList.begin(), itorWalker); if (itorGpgpuCsrBaseAddress != itorWalker) { cmdGpgpuCsrBaseAddress = *itorGpgpuCsrBaseAddress; } } } // namespace NEO using namespace NEO; using Gen10PreemptionTests = DevicePreemptionTests; using Gen10PreemptionEnqueueKernelTest = PreemptionEnqueueKernelTest; using Gen10MidThreadPreemptionEnqueueKernelTest = MidThreadPreemptionEnqueueKernelTest; using Gen10ThreadGroupPreemptionEnqueueKernelTest = ThreadGroupPreemptionEnqueueKernelTest; template <> PreemptionTestHwDetails GetPreemptionTestHwDetails() { PreemptionTestHwDetails ret; ret.modeToRegValueMap[PreemptionMode::ThreadGroup] = DwordBuilder::build(1, true) | DwordBuilder::build(2, true, false); ret.modeToRegValueMap[PreemptionMode::MidBatch] = DwordBuilder::build(2, true) | DwordBuilder::build(1, true, false); ret.modeToRegValueMap[PreemptionMode::MidThread] = DwordBuilder::build(2, true, false) | DwordBuilder::build(1, true, false); ret.defaultRegValue = ret.modeToRegValueMap[PreemptionMode::MidBatch]; ret.regAddress = 0x2580u; return ret; } GEN10TEST_F(Gen10PreemptionTests, whenMidThreadPreemptionIsNotAvailableThenDoesNotProgramStateSip) { device->setPreemptionMode(PreemptionMode::ThreadGroup); size_t requiredSize = PreemptionHelper::getRequiredStateSipCmdSize(*device); EXPECT_EQ(0U, requiredSize); LinearStream cmdStream{nullptr, 0}; PreemptionHelper::programStateSip(cmdStream, *device); EXPECT_EQ(0U, cmdStream.getUsed()); } GEN10TEST_F(Gen10PreemptionTests, whenMidThreadPreemptionIsAvailableThenStateSipIsProgrammed) { using STATE_SIP = typename FamilyType::STATE_SIP; device->setPreemptionMode(PreemptionMode::MidThread); executionEnvironment->DisableMidThreadPreemption = 0; size_t requiredCmdStreamSize = PreemptionHelper::getRequiredStateSipCmdSize(*device); size_t expectedPreambleSize = sizeof(STATE_SIP); EXPECT_EQ(expectedPreambleSize, requiredCmdStreamSize); StackVec streamStorage(requiredCmdStreamSize); ASSERT_LE(requiredCmdStreamSize, streamStorage.size()); LinearStream cmdStream{streamStorage.begin(), streamStorage.size()}; PreemptionHelper::programStateSip(cmdStream, *device); HardwareParse hwParsePreamble; hwParsePreamble.parseCommands(cmdStream); auto stateSipCmd = hwParsePreamble.getCommand(); ASSERT_NE(nullptr, stateSipCmd); EXPECT_EQ(device->getExecutionEnvironment()->getBuiltIns()->getSipKernel(SipKernelType::Csr, *device).getSipAllocation()->getGpuAddressToPatch(), stateSipCmd->getSystemInstructionPointer()); } GEN10TEST_F(Gen10ThreadGroupPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSamePreemptionRequestThenDontReprogram) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); WhitelistedRegisters regs = {}; regs.csChicken1_0x2580 = true; pDevice->setForceWhitelistedRegs(true, ®s); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); csr.setMediaVFEStateDirty(false); auto csrSurface = csr.getPreemptionCsrAllocation(); EXPECT_EQ(nullptr, csrSurface); HardwareParse hwParser; size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; MockKernelWithInternals mockKernel(*pDevice); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(csr.commandStream); hwParser.findHardwareCommands(); auto offset = csr.commandStream.getUsed(); bool foundOne = false; for (auto it : hwParser.lriList) { auto cmd = genCmdCast(it); if (cmd->getRegisterOffset() == 0x2580u) { EXPECT_FALSE(foundOne); foundOne = true; } } EXPECT_TRUE(foundOne); hwParser.cmdList.clear(); hwParser.lriList.clear(); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(csr.commandStream, offset); hwParser.findHardwareCommands(); for (auto it : hwParser.lriList) { auto cmd = genCmdCast(it); EXPECT_FALSE(cmd->getRegisterOffset() == 0x2580u); } } GEN10TEST_F(Gen10PreemptionEnqueueKernelTest, givenValidKernelForPreemptionWhenEnqueueKernelCalledThenPassDevicePreemptionMode) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); WhitelistedRegisters regs = {}; regs.csChicken1_0x2580 = true; pDevice->setForceWhitelistedRegs(true, ®s); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pDevice); EXPECT_EQ(PreemptionMode::ThreadGroup, PreemptionHelper::taskPreemptionMode(*pDevice, mockKernel.mockKernel)); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->flush(); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_EQ(PreemptionMode::ThreadGroup, mockCsr->passedDispatchFlags.preemptionMode); } GEN10TEST_F(Gen10PreemptionEnqueueKernelTest, givenValidKernelForPreemptionWhenEnqueueKernelCalledAndBlockedThenPassDevicePreemptionMode) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); WhitelistedRegisters regs = {}; regs.csChicken1_0x2580 = true; pDevice->setForceWhitelistedRegs(true, ®s); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pDevice); EXPECT_EQ(PreemptionMode::ThreadGroup, PreemptionHelper::taskPreemptionMode(*pDevice, mockKernel.mockKernel)); UserEvent userEventObj; cl_event userEvent = &userEventObj; size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 1, &userEvent, nullptr); pCmdQ->flush(); EXPECT_EQ(0, mockCsr->flushCalledCount); userEventObj.setStatus(CL_COMPLETE); pCmdQ->flush(); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_EQ(PreemptionMode::ThreadGroup, mockCsr->passedDispatchFlags.preemptionMode); } GEN10TEST_F(Gen10MidThreadPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSamePreemptionRequestThenDontReprogramMidThread) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename FamilyType::GPGPU_CSR_BASE_ADDRESS GPGPU_CSR_BASE_ADDRESS; auto &csr = pDevice->getUltCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); csr.setMediaVFEStateDirty(false); auto csrSurface = csr.getPreemptionCsrAllocation(); ASSERT_NE(nullptr, csrSurface); HardwareParse hwParser; size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; MockKernelWithInternals mockKernel(*pDevice); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(csr.commandStream); hwParser.findHardwareCommands(); auto offset = csr.commandStream.getUsed(); bool foundOneLri = false; for (auto it : hwParser.lriList) { auto cmdLri = genCmdCast(it); if (cmdLri->getRegisterOffset() == 0x2580u) { EXPECT_FALSE(foundOneLri); foundOneLri = true; } } EXPECT_TRUE(foundOneLri); hwParser.findCsrBaseAddress(); ASSERT_NE(nullptr, hwParser.cmdGpgpuCsrBaseAddress); auto cmdCsr = genCmdCast(hwParser.cmdGpgpuCsrBaseAddress); ASSERT_NE(nullptr, cmdCsr); EXPECT_EQ(csrSurface->getGpuAddressToPatch(), cmdCsr->getGpgpuCsrBaseAddress()); hwParser.cmdList.clear(); hwParser.lriList.clear(); hwParser.cmdGpgpuCsrBaseAddress = nullptr; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(csr.commandStream, offset); hwParser.findHardwareCommands(); for (auto it : hwParser.lriList) { auto cmd = genCmdCast(it); EXPECT_FALSE(cmd->getRegisterOffset() == 0x2580u); } hwParser.findCsrBaseAddress(); EXPECT_EQ(nullptr, hwParser.cmdGpgpuCsrBaseAddress); } GEN10TEST_F(Gen10PreemptionEnqueueKernelTest, givenDisabledPreemptionWhenEnqueueKernelCalledThenPassDisabledPreemptionMode) { pDevice->setPreemptionMode(PreemptionMode::Disabled); WhitelistedRegisters regs = {}; pDevice->setForceWhitelistedRegs(true, ®s); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pDevice); EXPECT_EQ(PreemptionMode::Disabled, PreemptionHelper::taskPreemptionMode(*pDevice, mockKernel.mockKernel)); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->flush(); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_EQ(PreemptionMode::Disabled, mockCsr->passedDispatchFlags.preemptionMode); } GEN10TEST_F(Gen10PreemptionTests, getRequiredCmdQSize) { size_t expectedSize = 0; EXPECT_EQ(expectedSize, PreemptionHelper::getPreemptionWaCsSize(*device)); } GEN10TEST_F(Gen10PreemptionTests, applyPreemptionWaCmds) { size_t usedSize = 0; auto &cmdStream = cmdQ->getCS(0); PreemptionHelper::applyPreemptionWaCmdsBegin(&cmdStream, *device); EXPECT_EQ(usedSize, cmdStream.getUsed()); PreemptionHelper::applyPreemptionWaCmdsEnd(&cmdStream, *device); EXPECT_EQ(usedSize, cmdStream.getUsed()); } GEN10TEST_F(Gen10PreemptionTests, givenInterfaceDescriptorDataWhenMidThreadPreemptionModeThenSetDisableThreadPreemptionBitToDisable) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; INTERFACE_DESCRIPTOR_DATA iddArg; iddArg = FamilyType::cmdInitInterfaceDescriptorData; iddArg.setThreadPreemptionDisable(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_ENABLE); PreemptionHelper::programInterfaceDescriptorDataPreemption(&iddArg, PreemptionMode::MidThread); EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_DISABLE, iddArg.getThreadPreemptionDisable()); } GEN10TEST_F(Gen10PreemptionTests, givenInterfaceDescriptorDataWhenNoMidThreadPreemptionModeThenSetDisableThreadPreemptionBitToEnable) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; INTERFACE_DESCRIPTOR_DATA iddArg; iddArg = FamilyType::cmdInitInterfaceDescriptorData; iddArg.setThreadPreemptionDisable(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_DISABLE); PreemptionHelper::programInterfaceDescriptorDataPreemption(&iddArg, PreemptionMode::Disabled); EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_ENABLE, iddArg.getThreadPreemptionDisable()); iddArg.setThreadPreemptionDisable(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_DISABLE); PreemptionHelper::programInterfaceDescriptorDataPreemption(&iddArg, PreemptionMode::MidBatch); EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_ENABLE, iddArg.getThreadPreemptionDisable()); iddArg.setThreadPreemptionDisable(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_DISABLE); PreemptionHelper::programInterfaceDescriptorDataPreemption(&iddArg, PreemptionMode::ThreadGroup); EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_ENABLE, iddArg.getThreadPreemptionDisable()); } GEN10TEST_F(Gen10PreemptionTests, givenMidThreadPreemptionModeWhenStateSipIsProgrammedThenSipEqualsSipAllocationGpuAddressToPatch) { using STATE_SIP = typename FamilyType::STATE_SIP; auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); mockDevice->setPreemptionMode(PreemptionMode::MidThread); auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredStateSipCmdSize(*mockDevice); StackVec preemptionBuffer; preemptionBuffer.resize(cmdSizePreemptionMidThread); LinearStream preemptionStream(&*preemptionBuffer.begin(), preemptionBuffer.size()); PreemptionHelper::programStateSip(preemptionStream, *mockDevice); HardwareParse hwParserOnlyPreemption; hwParserOnlyPreemption.parseCommands(preemptionStream, 0); auto cmd = hwParserOnlyPreemption.getCommand(); EXPECT_NE(nullptr, cmd); auto sipType = SipKernel::getSipKernelType(mockDevice->getHardwareInfo().pPlatform->eRenderCoreFamily, mockDevice->isSourceLevelDebuggerActive()); EXPECT_EQ(mockDevice->getExecutionEnvironment()->getBuiltIns()->getSipKernel(sipType, *mockDevice).getSipAllocation()->getGpuAddressToPatch(), cmd->getSystemInstructionPointer()); }