/* * Copyright (C) 2017-2018 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "runtime/built_ins/built_ins.h" #include "runtime/command_stream/preemption.h" #include "unit_tests/command_queue/enqueue_fixture.h" #include "unit_tests/fixtures/preemption_fixture.h" #include "unit_tests/helpers/hw_parse.h" #include "unit_tests/mocks/mock_command_queue.h" #include "unit_tests/mocks/mock_csr.h" #include "unit_tests/mocks/mock_buffer.h" #include "unit_tests/mocks/mock_submissions_aggregator.h" namespace OCLRT { template <> void HardwareParse::findCsrBaseAddress() { typedef typename GEN10::GPGPU_CSR_BASE_ADDRESS GPGPU_CSR_BASE_ADDRESS; itorGpgpuCsrBaseAddress = find(cmdList.begin(), itorWalker); if (itorGpgpuCsrBaseAddress != itorWalker) { cmdGpgpuCsrBaseAddress = *itorGpgpuCsrBaseAddress; } } } // namespace OCLRT using namespace OCLRT; using Gen10PreemptionTests = DevicePreemptionTests; using Gen10PreemptionEnqueueKernelTest = PreemptionEnqueueKernelTest; using Gen10MidThreadPreemptionEnqueueKernelTest = MidThreadPreemptionEnqueueKernelTest; using Gen10ThreadGroupPreemptionEnqueueKernelTest = ThreadGroupPreemptionEnqueueKernelTest; template <> PreemptionTestHwDetails GetPreemptionTestHwDetails() { PreemptionTestHwDetails ret; ret.modeToRegValueMap[PreemptionMode::ThreadGroup] = DwordBuilder::build(1, true) | DwordBuilder::build(2, true, false); ret.modeToRegValueMap[PreemptionMode::MidBatch] = DwordBuilder::build(2, true) | DwordBuilder::build(1, true, false); ret.modeToRegValueMap[PreemptionMode::MidThread] = DwordBuilder::build(2, true, false) | DwordBuilder::build(1, true, false); ret.defaultRegValue = ret.modeToRegValueMap[PreemptionMode::MidBatch]; ret.regAddress = 0x2580u; return ret; } GEN10TEST_F(Gen10PreemptionTests, whenMidThreadPreemptionIsNotAvailableThenDoesNotProgramPreamble) { device->setPreemptionMode(PreemptionMode::ThreadGroup); size_t requiredSize = PreemptionHelper::getRequiredPreambleSize(*device); EXPECT_EQ(0U, requiredSize); LinearStream cmdStream{nullptr, 0}; PreemptionHelper::programPreamble(cmdStream, *device, nullptr); EXPECT_EQ(0U, cmdStream.getUsed()); } GEN10TEST_F(Gen10PreemptionTests, whenMidThreadPreemptionIsAvailableThenProgramsPreamble) { using GPGPU_CSR_BASE_ADDRESS = typename FamilyType::GPGPU_CSR_BASE_ADDRESS; using STATE_SIP = typename FamilyType::STATE_SIP; device->setPreemptionMode(PreemptionMode::MidThread); executionEnvironment->DisableMidThreadPreemption = 0; size_t minCsrSize = device->getHardwareInfo().pSysInfo->CsrSizeInMb * MemoryConstants::megaByte; uint64_t minCsrAlignment = 2 * 256 * MemoryConstants::kiloByte; MockGraphicsAllocation csrSurface((void *)minCsrAlignment, minCsrSize); // verify preamble programming size_t requiredPreambleSize = PreemptionHelper::getRequiredPreambleSize(*device); size_t expectedPreambleSize = sizeof(GPGPU_CSR_BASE_ADDRESS) + sizeof(STATE_SIP); EXPECT_EQ(expectedPreambleSize, requiredPreambleSize); StackVec preambleStorage(requiredPreambleSize); ASSERT_LE(requiredPreambleSize, preambleStorage.size()); LinearStream preambleCmdStream{preambleStorage.begin(), preambleStorage.size()}; PreemptionHelper::programPreamble(preambleCmdStream, *device, &csrSurface); HardwareParse hwParsePreamble; hwParsePreamble.parseCommands(preambleCmdStream); auto csrBaseAddressCmd = hwParsePreamble.getCommand(); ASSERT_NE(nullptr, csrBaseAddressCmd); EXPECT_EQ(csrSurface.getGpuAddressToPatch(), csrBaseAddressCmd->getGpgpuCsrBaseAddress()); auto stateSipCmd = hwParsePreamble.getCommand(); ASSERT_NE(nullptr, stateSipCmd); EXPECT_EQ(device->getExecutionEnvironment()->getBuiltIns()->getSipKernel(SipKernelType::Csr, *device).getSipAllocation()->getGpuAddressToPatch(), stateSipCmd->getSystemInstructionPointer()); } GEN10TEST_F(Gen10ThreadGroupPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSamePreemptionRequestThenDontReprogram) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); WhitelistedRegisters regs = {}; regs.csChicken1_0x2580 = true; pDevice->setForceWhitelistedRegs(true, ®s); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); csr.overrideMediaVFEStateDirty(false); auto csrSurface = csr.getPreemptionCsrAllocation(); EXPECT_EQ(nullptr, csrSurface); HardwareParse hwParser; size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; MockKernelWithInternals mockKernel(*pDevice); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(csr.commandStream); hwParser.findHardwareCommands(); auto offset = csr.commandStream.getUsed(); bool foundOne = false; for (auto it : hwParser.lriList) { auto cmd = genCmdCast(it); if (cmd->getRegisterOffset() == 0x2580u) { EXPECT_FALSE(foundOne); foundOne = true; } } EXPECT_TRUE(foundOne); hwParser.cmdList.clear(); hwParser.lriList.clear(); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(csr.commandStream, offset); hwParser.findHardwareCommands(); for (auto it : hwParser.lriList) { auto cmd = genCmdCast(it); EXPECT_FALSE(cmd->getRegisterOffset() == 0x2580u); } } GEN10TEST_F(Gen10PreemptionEnqueueKernelTest, givenValidKernelForPreemptionWhenEnqueueKernelCalledThenPassDevicePreemptionMode) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); WhitelistedRegisters regs = {}; regs.csChicken1_0x2580 = true; pDevice->setForceWhitelistedRegs(true, ®s); auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo(), *pDevice->executionEnvironment); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pDevice); EXPECT_EQ(PreemptionMode::ThreadGroup, PreemptionHelper::taskPreemptionMode(*pDevice, mockKernel.mockKernel)); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->flush(); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_EQ(PreemptionMode::ThreadGroup, mockCsr->passedDispatchFlags.preemptionMode); } GEN10TEST_F(Gen10PreemptionEnqueueKernelTest, givenValidKernelForPreemptionWhenEnqueueKernelCalledAndBlockedThenPassDevicePreemptionMode) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); WhitelistedRegisters regs = {}; regs.csChicken1_0x2580 = true; pDevice->setForceWhitelistedRegs(true, ®s); auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo(), *pDevice->executionEnvironment); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pDevice); EXPECT_EQ(PreemptionMode::ThreadGroup, PreemptionHelper::taskPreemptionMode(*pDevice, mockKernel.mockKernel)); UserEvent userEventObj; cl_event userEvent = &userEventObj; size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 1, &userEvent, nullptr); pCmdQ->flush(); EXPECT_EQ(0, mockCsr->flushCalledCount); userEventObj.setStatus(CL_COMPLETE); pCmdQ->flush(); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_EQ(PreemptionMode::ThreadGroup, mockCsr->passedDispatchFlags.preemptionMode); } GEN10TEST_F(Gen10MidThreadPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSamePreemptionRequestThenDontReprogramMidThread) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename FamilyType::GPGPU_CSR_BASE_ADDRESS GPGPU_CSR_BASE_ADDRESS; auto &csr = pDevice->getUltCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); csr.overrideMediaVFEStateDirty(false); auto csrSurface = csr.getPreemptionCsrAllocation(); ASSERT_NE(nullptr, csrSurface); HardwareParse hwParser; size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; MockKernelWithInternals mockKernel(*pDevice); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(csr.commandStream); hwParser.findHardwareCommands(); auto offset = csr.commandStream.getUsed(); bool foundOneLri = false; for (auto it : hwParser.lriList) { auto cmdLri = genCmdCast(it); if (cmdLri->getRegisterOffset() == 0x2580u) { EXPECT_FALSE(foundOneLri); foundOneLri = true; } } EXPECT_TRUE(foundOneLri); hwParser.findCsrBaseAddress(); ASSERT_NE(nullptr, hwParser.cmdGpgpuCsrBaseAddress); auto cmdCsr = genCmdCast(hwParser.cmdGpgpuCsrBaseAddress); ASSERT_NE(nullptr, cmdCsr); EXPECT_EQ(csrSurface->getGpuAddressToPatch(), cmdCsr->getGpgpuCsrBaseAddress()); hwParser.cmdList.clear(); hwParser.lriList.clear(); hwParser.cmdGpgpuCsrBaseAddress = nullptr; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(csr.commandStream, offset); hwParser.findHardwareCommands(); for (auto it : hwParser.lriList) { auto cmd = genCmdCast(it); EXPECT_FALSE(cmd->getRegisterOffset() == 0x2580u); } hwParser.findCsrBaseAddress(); EXPECT_EQ(nullptr, hwParser.cmdGpgpuCsrBaseAddress); } GEN10TEST_F(Gen10PreemptionEnqueueKernelTest, givenDisabledPreemptionWhenEnqueueKernelCalledThenPassDisabledPreemptionMode) { pDevice->setPreemptionMode(PreemptionMode::Disabled); WhitelistedRegisters regs = {}; pDevice->setForceWhitelistedRegs(true, ®s); auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo(), *pDevice->executionEnvironment); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pDevice); EXPECT_EQ(PreemptionMode::Disabled, PreemptionHelper::taskPreemptionMode(*pDevice, mockKernel.mockKernel)); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->flush(); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_EQ(PreemptionMode::Disabled, mockCsr->passedDispatchFlags.preemptionMode); } GEN10TEST_F(Gen10PreemptionTests, getRequiredCmdQSize) { size_t expectedSize = 0; EXPECT_EQ(expectedSize, PreemptionHelper::getPreemptionWaCsSize(*device)); } GEN10TEST_F(Gen10PreemptionTests, applyPreemptionWaCmds) { size_t usedSize = 0; auto &cmdStream = cmdQ->getCS(0); PreemptionHelper::applyPreemptionWaCmdsBegin(&cmdStream, *device); EXPECT_EQ(usedSize, cmdStream.getUsed()); PreemptionHelper::applyPreemptionWaCmdsEnd(&cmdStream, *device); EXPECT_EQ(usedSize, cmdStream.getUsed()); } GEN10TEST_F(Gen10PreemptionTests, givenInterfaceDescriptorDataWhenMidThreadPreemptionModeThenSetDisableThreadPreemptionBitToDisable) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; INTERFACE_DESCRIPTOR_DATA iddArg; iddArg = FamilyType::cmdInitInterfaceDescriptorData; iddArg.setThreadPreemptionDisable(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_ENABLE); PreemptionHelper::programInterfaceDescriptorDataPreemption(&iddArg, PreemptionMode::MidThread); EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_DISABLE, iddArg.getThreadPreemptionDisable()); } GEN10TEST_F(Gen10PreemptionTests, givenInterfaceDescriptorDataWhenNoMidThreadPreemptionModeThenSetDisableThreadPreemptionBitToEnable) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; INTERFACE_DESCRIPTOR_DATA iddArg; iddArg = FamilyType::cmdInitInterfaceDescriptorData; iddArg.setThreadPreemptionDisable(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_DISABLE); PreemptionHelper::programInterfaceDescriptorDataPreemption(&iddArg, PreemptionMode::Disabled); EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_ENABLE, iddArg.getThreadPreemptionDisable()); iddArg.setThreadPreemptionDisable(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_DISABLE); PreemptionHelper::programInterfaceDescriptorDataPreemption(&iddArg, PreemptionMode::MidBatch); EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_ENABLE, iddArg.getThreadPreemptionDisable()); iddArg.setThreadPreemptionDisable(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_DISABLE); PreemptionHelper::programInterfaceDescriptorDataPreemption(&iddArg, PreemptionMode::ThreadGroup); EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_ENABLE, iddArg.getThreadPreemptionDisable()); }