/* * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "runtime/command_queue/enqueue_kernel.h" #include "runtime/event/perf_counter.h" #include "runtime/kernel/kernel.h" #include "runtime/sampler/sampler.h" #include "unit_tests/fixtures/execution_model_fixture.h" #include "unit_tests/helpers/hw_parse.h" #include "unit_tests/mocks/mock_kernel.h" #include "unit_tests/mocks/mock_program.h" #include "unit_tests/mocks/mock_context.h" #include "unit_tests/mocks/mock_mdi.h" using namespace OCLRT; static const char *binaryFile = "simple_block_kernel"; static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"}; typedef ExecutionModelKernelTest ParentKernelDispatchTest; HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDeviceQueueDSHIsUsed) { if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); KernelOperation *blockedCommandsData = nullptr; const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; pKernel->createReflectionSurface(); size_t dshUsedBefore = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE).getUsed(); EXPECT_EQ(0u, dshUsedBefore); size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed(); GpgpuWalkerHelper::dispatchWalker(*pCmdQ, *pKernel, 1, globalOffsets, workItems, nullptr, 0, nullptr, &blockedCommandsData, nullptr, nullptr, pDevice->getPreemptionMode(), false); size_t dshUsedAfter = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE).getUsed(); EXPECT_EQ(0u, dshUsedAfter); size_t executionModelDSHUsedAfter = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed(); EXPECT_NE(executionModelDSHUsedBefore, executionModelDSHUsedAfter); } } HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenDynamicStateHeapIsRequestedThenDeviceQueueHeapIsReturned) { if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); MockMultiDispatchInfo multiDispatchInfo(pKernel); auto ish = &getIndirectHeap(*pCmdQ, multiDispatchInfo); auto ishOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); EXPECT_EQ(ishOfDevQueue, ish); } } HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenIndirectObjectHeapIsRequestedThenDeviceQueueDSHIsReturned) { if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); MockMultiDispatchInfo multiDispatchInfo(pKernel); auto ioh = &getIndirectHeap(*pCmdQ, multiDispatchInfo); auto dshOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); EXPECT_EQ(dshOfDevQueue, ioh); } } HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDefaultCmdQIOHIsNotUsed) { if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { KernelOperation *blockedCommandsData = nullptr; const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; MockMultiDispatchInfo multiDispatchInfo(pKernel); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT); GpgpuWalkerHelper::dispatchWalker(*pCmdQ, *pKernel, 1, globalOffsets, workItems, nullptr, 0, nullptr, &blockedCommandsData, nullptr, nullptr, pDevice->getPreemptionMode(), false); auto iohUsed = ioh.getUsed(); EXPECT_EQ(0u, iohUsed); } } HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenSSHSizeAccountForsBlocksSurfaceStates) { if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { KernelOperation *blockedCommandsData = nullptr; const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; MockMultiDispatchInfo multiDispatchInfo(pKernel); GpgpuWalkerHelper::dispatchWalker(*pCmdQ, *pKernel, 1, globalOffsets, workItems, nullptr, 0, nullptr, &blockedCommandsData, nullptr, nullptr, pDevice->getPreemptionMode(), false); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE); EXPECT_LE(pKernel->getKernelInfo().heapInfo.pKernelHeader->SurfaceStateHeapSize, ssh.getMaxAvailableSpace()); size_t minRequiredSize = KernelCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); size_t minRequiredSizeForEM = KernelCommandsHelper::template getSizeRequiredForExecutionModel(*pKernel); EXPECT_LE(minRequiredSize + minRequiredSizeForEM, ssh.getMaxAvailableSpace()); } } HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsBlockedThenSSHSizeForParentIsAllocated) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { KernelOperation *blockedCommandsData = nullptr; const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; MockMultiDispatchInfo multiDispatchInfo(pKernel); GpgpuWalkerHelper::dispatchWalker(*pCmdQ, *pKernel, 1, globalOffsets, workItems, nullptr, 0, nullptr, &blockedCommandsData, nullptr, nullptr, pDevice->getPreemptionMode(), true); // blockQueue ASSERT_NE(nullptr, blockedCommandsData); size_t minRequiredSize = KernelCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); size_t minRequiredSizeForEM = KernelCommandsHelper::template getSizeRequiredForExecutionModel(*pKernel); size_t sshUsed = blockedCommandsData->ssh->getUsed(); size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates() * sizeof(RENDER_SURFACE_STATE) + pKernel->getKernelInfo().patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE); if ((pKernel->requiresSshForBuffers()) || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) { EXPECT_EQ(expectedSizeSSH, sshUsed); } EXPECT_GE(minRequiredSize, sshUsed); // Total SSH size including EM must be greater then ssh allocated EXPECT_GT(minRequiredSize + minRequiredSizeForEM, sshUsed); delete blockedCommandsData; } } INSTANTIATE_TEST_CASE_P(ParentKernelDispatchTest, ParentKernelDispatchTest, ::testing::Combine( ::testing::Values(binaryFile), ::testing::ValuesIn(KernelNames))); typedef ParentKernelCommandQueueFixture ParentKernelCommandStreamFixture; HWTEST_F(ParentKernelCommandStreamFixture, GivenDispatchInfoWithParentKernelWhenCommandStreamIsAcquiredThenSizeAccountsForSchedulerDispatch) { if (device->getSupportedClVersion() >= 20) { MockParentKernel *mockParentKernel = MockParentKernel::create(*device); DispatchInfo dispatchInfo(mockParentKernel, 1, Vec3{24, 1, 1}, Vec3{24, 1, 1}, Vec3{0, 0, 0}); MultiDispatchInfo multiDispatchInfo; size_t size = EnqueueOperation::getSizeRequiredCS(false, false, *pCmdQ, mockParentKernel); size_t numOfKernels = MemoryConstants::pageSize / size; size_t rest = MemoryConstants::pageSize - (numOfKernels * size); SchedulerKernel &scheduler = BuiltIns::getInstance().getSchedulerKernel(*mockParentKernel->getContext()); size_t schedulerSize = EnqueueOperation::getSizeRequiredCS(false, false, *pCmdQ, &scheduler); while (rest >= schedulerSize) { numOfKernels++; rest = alignUp(numOfKernels * size, MemoryConstants::pageSize) - numOfKernels * size; } for (size_t i = 0; i < numOfKernels; i++) { multiDispatchInfo.push(dispatchInfo); } size_t totalKernelSize = alignUp(numOfKernels * size, MemoryConstants::pageSize); LinearStream &commandStream = getCommandStream(*pCmdQ, false, false, multiDispatchInfo); EXPECT_LT(totalKernelSize, commandStream.getMaxAvailableSpace()); delete mockParentKernel; } } class MockParentKernelDispatch : public ExecutionModelSchedulerTest, public testing::Test { void SetUp() override { ExecutionModelSchedulerTest::SetUp(); } void TearDown() override { ExecutionModelSchedulerTest::TearDown(); } }; HWTEST_F(MockParentKernelDispatch, GivenBlockedQueueWhenParentKernelIsDispatchedThenDshHeapForIndirectObjectHeapIsUsed) { if (pDevice->getSupportedClVersion() >= 20) { MockParentKernel *mockParentKernel = MockParentKernel::create(*pDevice); KernelOperation *blockedCommandsData = nullptr; const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; GpgpuWalkerHelper::dispatchWalker(*pCmdQ, *mockParentKernel, 1, globalOffsets, workItems, nullptr, 0, nullptr, &blockedCommandsData, nullptr, nullptr, pDevice->getPreemptionMode(), true); // blockQueue ASSERT_NE(nullptr, blockedCommandsData); EXPECT_EQ(blockedCommandsData->dsh.get(), blockedCommandsData->ioh.get()); delete blockedCommandsData; delete mockParentKernel; } } HWTEST_F(MockParentKernelDispatch, GivenParentKernelWhenDispatchedThenMediaInterfaceDescriptorLoadIsCorrectlyProgrammed) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; if (pDevice->getSupportedClVersion() >= 20) { MockParentKernel *mockParentKernel = MockParentKernel::create(*pDevice); KernelOperation *blockedCommandsData = nullptr; const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; GpgpuWalkerHelper::dispatchWalker(*pCmdQ, *mockParentKernel, 1, globalOffsets, workItems, nullptr, 0, nullptr, &blockedCommandsData, nullptr, nullptr, pDevice->getPreemptionMode(), false); // blockQueue LinearStream *commandStream = &pCmdQ->getCS(0); HardwareParse hwParser; hwParser.parseCommands(*commandStream, 0); hwParser.findHardwareCommands(); ASSERT_NE(hwParser.cmdList.end(), hwParser.itorMediaInterfaceDescriptorLoad); auto pCmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)hwParser.getCommand(hwParser.cmdList.begin(), hwParser.itorWalker); ASSERT_NE(nullptr, pCmd); uint32_t offsetInterfaceDescriptorData = DeviceQueue::colorCalcStateSize; uint32_t sizeInterfaceDescriptorData = sizeof(INTERFACE_DESCRIPTOR_DATA); EXPECT_EQ(offsetInterfaceDescriptorData, pCmd->getInterfaceDescriptorDataStartAddress()); EXPECT_EQ(sizeInterfaceDescriptorData, pCmd->getInterfaceDescriptorTotalLength()); delete mockParentKernel; } } HWTEST_F(MockParentKernelDispatch, GivenUsedSSHHeapWhenParentKernelIsDispatchedThenNewSSHIsAllocated) { if (pDevice->getSupportedClVersion() >= 20) { MockParentKernel *mockParentKernel = MockParentKernel::create(*pDevice); KernelOperation *blockedCommandsData = nullptr; const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 100); ssh.getSpace(20); EXPECT_EQ(20u, ssh.getUsed()); // Assuming parent is not using SSH, this is becuase storing allocation on reuse list and allocating // new one by obtaining from reuse list returns the same allocation and heap buffer does not differ // If parent is not using SSH, then heap obtained has zero usage and the same buffer ASSERT_EQ(0u, mockParentKernel->getKernelInfo().heapInfo.pKernelHeader->SurfaceStateHeapSize); GpgpuWalkerHelper::dispatchWalker(*pCmdQ, *mockParentKernel, 1, globalOffsets, workItems, nullptr, 0, nullptr, &blockedCommandsData, nullptr, nullptr, pDevice->getPreemptionMode(), false); // blockQueue EXPECT_EQ(0u, ssh.getUsed()); delete mockParentKernel; } } HWTEST_F(MockParentKernelDispatch, GivenNotUsedSSHHeapWhenParentKernelIsDispatchedThenExistingSSHIsUsed) { if (pDevice->getSupportedClVersion() >= 20) { MockParentKernel *mockParentKernel = MockParentKernel::create(*pDevice); KernelOperation *blockedCommandsData = nullptr; const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 100); EXPECT_EQ(0u, ssh.getUsed()); auto *bufferMemory = ssh.getCpuBase(); GpgpuWalkerHelper::dispatchWalker(*pCmdQ, *mockParentKernel, 1, globalOffsets, workItems, nullptr, 0, nullptr, &blockedCommandsData, nullptr, nullptr, pDevice->getPreemptionMode(), false); // blockQueue EXPECT_EQ(bufferMemory, ssh.getCpuBase()); delete mockParentKernel; } }