415 lines
20 KiB
C++
415 lines
20 KiB
C++
/*
|
|
* Copyright (c) 2017 - 2018, Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included
|
|
* in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "runtime/command_queue/enqueue_kernel.h"
|
|
#include "runtime/event/perf_counter.h"
|
|
#include "runtime/kernel/kernel.h"
|
|
#include "runtime/sampler/sampler.h"
|
|
#include "unit_tests/fixtures/execution_model_fixture.h"
|
|
#include "unit_tests/helpers/hw_parse.h"
|
|
#include "unit_tests/mocks/mock_kernel.h"
|
|
#include "unit_tests/mocks/mock_program.h"
|
|
#include "unit_tests/mocks/mock_context.h"
|
|
#include "unit_tests/mocks/mock_mdi.h"
|
|
|
|
using namespace OCLRT;
|
|
|
|
static const char *binaryFile = "simple_block_kernel";
|
|
static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"};
|
|
|
|
typedef ExecutionModelKernelTest ParentKernelDispatchTest;
|
|
|
|
HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDeviceQueueDSHIsUsed) {
|
|
if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
|
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
|
|
|
KernelOperation *blockedCommandsData = nullptr;
|
|
const size_t globalOffsets[3] = {0, 0, 0};
|
|
const size_t workItems[3] = {1, 1, 1};
|
|
|
|
pKernel->createReflectionSurface();
|
|
|
|
size_t dshUsedBefore = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE).getUsed();
|
|
EXPECT_EQ(0u, dshUsedBefore);
|
|
|
|
size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
|
|
|
|
GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
|
|
*pKernel,
|
|
1,
|
|
globalOffsets,
|
|
workItems,
|
|
nullptr,
|
|
0,
|
|
nullptr,
|
|
&blockedCommandsData,
|
|
nullptr,
|
|
nullptr,
|
|
pDevice->getPreemptionMode(),
|
|
false);
|
|
|
|
size_t dshUsedAfter = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE).getUsed();
|
|
EXPECT_EQ(0u, dshUsedAfter);
|
|
|
|
size_t executionModelDSHUsedAfter = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
|
|
EXPECT_NE(executionModelDSHUsedBefore, executionModelDSHUsedAfter);
|
|
}
|
|
}
|
|
|
|
HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenDynamicStateHeapIsRequestedThenDeviceQueueHeapIsReturned) {
|
|
if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
|
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
|
|
|
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
|
auto ish = &getIndirectHeap<FamilyType, IndirectHeap::DYNAMIC_STATE>(*pCmdQ, multiDispatchInfo);
|
|
auto ishOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
|
|
|
EXPECT_EQ(ishOfDevQueue, ish);
|
|
}
|
|
}
|
|
|
|
HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenIndirectObjectHeapIsRequestedThenDeviceQueueDSHIsReturned) {
|
|
if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
|
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
|
|
|
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
|
auto ioh = &getIndirectHeap<FamilyType, IndirectHeap::INDIRECT_OBJECT>(*pCmdQ, multiDispatchInfo);
|
|
auto dshOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
|
|
|
EXPECT_EQ(dshOfDevQueue, ioh);
|
|
}
|
|
}
|
|
|
|
HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDefaultCmdQIOHIsNotUsed) {
|
|
if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
|
KernelOperation *blockedCommandsData = nullptr;
|
|
const size_t globalOffsets[3] = {0, 0, 0};
|
|
const size_t workItems[3] = {1, 1, 1};
|
|
|
|
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
|
|
|
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT);
|
|
|
|
GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
|
|
*pKernel,
|
|
1,
|
|
globalOffsets,
|
|
workItems,
|
|
nullptr,
|
|
0,
|
|
nullptr,
|
|
&blockedCommandsData,
|
|
nullptr,
|
|
nullptr,
|
|
pDevice->getPreemptionMode(),
|
|
false);
|
|
|
|
auto iohUsed = ioh.getUsed();
|
|
EXPECT_EQ(0u, iohUsed);
|
|
}
|
|
}
|
|
|
|
HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenSSHSizeAccountForsBlocksSurfaceStates) {
|
|
if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
|
KernelOperation *blockedCommandsData = nullptr;
|
|
const size_t globalOffsets[3] = {0, 0, 0};
|
|
const size_t workItems[3] = {1, 1, 1};
|
|
|
|
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
|
|
|
GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
|
|
*pKernel,
|
|
1,
|
|
globalOffsets,
|
|
workItems,
|
|
nullptr,
|
|
0,
|
|
nullptr,
|
|
&blockedCommandsData,
|
|
nullptr,
|
|
nullptr,
|
|
pDevice->getPreemptionMode(),
|
|
false);
|
|
|
|
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE);
|
|
|
|
EXPECT_LE(pKernel->getKernelInfo().heapInfo.pKernelHeader->SurfaceStateHeapSize, ssh.getMaxAvailableSpace());
|
|
|
|
size_t minRequiredSize = KernelCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
|
|
size_t minRequiredSizeForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*pKernel);
|
|
|
|
EXPECT_LE(minRequiredSize + minRequiredSizeForEM, ssh.getMaxAvailableSpace());
|
|
}
|
|
}
|
|
|
|
HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsBlockedThenSSHSizeForParentIsAllocated) {
|
|
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
|
|
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
|
|
|
if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
|
KernelOperation *blockedCommandsData = nullptr;
|
|
const size_t globalOffsets[3] = {0, 0, 0};
|
|
const size_t workItems[3] = {1, 1, 1};
|
|
|
|
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
|
|
|
GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
|
|
*pKernel,
|
|
1,
|
|
globalOffsets,
|
|
workItems,
|
|
nullptr,
|
|
0,
|
|
nullptr,
|
|
&blockedCommandsData,
|
|
nullptr,
|
|
nullptr,
|
|
pDevice->getPreemptionMode(),
|
|
true); // blockQueue
|
|
ASSERT_NE(nullptr, blockedCommandsData);
|
|
|
|
size_t minRequiredSize = KernelCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
|
|
size_t minRequiredSizeForEM = KernelCommandsHelper<FamilyType>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*pKernel);
|
|
|
|
size_t sshUsed = blockedCommandsData->ssh->getUsed();
|
|
|
|
size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates() * sizeof(RENDER_SURFACE_STATE) + pKernel->getKernelInfo().patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE);
|
|
|
|
if ((pKernel->requiresSshForBuffers()) || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) {
|
|
EXPECT_EQ(expectedSizeSSH, sshUsed);
|
|
}
|
|
|
|
EXPECT_GE(minRequiredSize, sshUsed);
|
|
// Total SSH size including EM must be greater then ssh allocated
|
|
EXPECT_GT(minRequiredSize + minRequiredSizeForEM, sshUsed);
|
|
|
|
delete blockedCommandsData;
|
|
}
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(ParentKernelDispatchTest,
|
|
ParentKernelDispatchTest,
|
|
::testing::Combine(
|
|
::testing::Values(binaryFile),
|
|
::testing::ValuesIn(KernelNames)));
|
|
|
|
typedef ParentKernelCommandQueueFixture ParentKernelCommandStreamFixture;
|
|
|
|
HWTEST_F(ParentKernelCommandStreamFixture, GivenDispatchInfoWithParentKernelWhenCommandStreamIsAcquiredThenSizeAccountsForSchedulerDispatch) {
|
|
|
|
if (device->getSupportedClVersion() >= 20) {
|
|
MockParentKernel *mockParentKernel = MockParentKernel::create(*device);
|
|
|
|
DispatchInfo dispatchInfo(mockParentKernel, 1, Vec3<size_t>{24, 1, 1}, Vec3<size_t>{24, 1, 1}, Vec3<size_t>{0, 0, 0});
|
|
MultiDispatchInfo multiDispatchInfo;
|
|
|
|
size_t size = EnqueueOperation<FamilyType, CL_COMMAND_NDRANGE_KERNEL>::getSizeRequiredCS(false, false, *pCmdQ, mockParentKernel);
|
|
size_t numOfKernels = MemoryConstants::pageSize / size;
|
|
|
|
size_t rest = MemoryConstants::pageSize - (numOfKernels * size);
|
|
|
|
SchedulerKernel &scheduler = BuiltIns::getInstance().getSchedulerKernel(*mockParentKernel->getContext());
|
|
size_t schedulerSize = EnqueueOperation<FamilyType, CL_COMMAND_NDRANGE_KERNEL>::getSizeRequiredCS(false, false, *pCmdQ, &scheduler);
|
|
|
|
while (rest >= schedulerSize) {
|
|
numOfKernels++;
|
|
rest = alignUp(numOfKernels * size, MemoryConstants::pageSize) - numOfKernels * size;
|
|
}
|
|
|
|
for (size_t i = 0; i < numOfKernels; i++) {
|
|
multiDispatchInfo.push(dispatchInfo);
|
|
}
|
|
|
|
size_t totalKernelSize = alignUp(numOfKernels * size, MemoryConstants::pageSize);
|
|
|
|
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, false, false, multiDispatchInfo);
|
|
|
|
EXPECT_LT(totalKernelSize, commandStream.getMaxAvailableSpace());
|
|
|
|
delete mockParentKernel;
|
|
}
|
|
}
|
|
|
|
class MockParentKernelDispatch : public ExecutionModelSchedulerTest,
|
|
public testing::Test {
|
|
void SetUp() override {
|
|
ExecutionModelSchedulerTest::SetUp();
|
|
}
|
|
|
|
void TearDown() override {
|
|
ExecutionModelSchedulerTest::TearDown();
|
|
}
|
|
};
|
|
|
|
HWTEST_F(MockParentKernelDispatch, GivenBlockedQueueWhenParentKernelIsDispatchedThenDshHeapForIndirectObjectHeapIsUsed) {
|
|
|
|
if (pDevice->getSupportedClVersion() >= 20) {
|
|
MockParentKernel *mockParentKernel = MockParentKernel::create(*pDevice);
|
|
|
|
KernelOperation *blockedCommandsData = nullptr;
|
|
const size_t globalOffsets[3] = {0, 0, 0};
|
|
const size_t workItems[3] = {1, 1, 1};
|
|
|
|
GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
|
|
*mockParentKernel,
|
|
1,
|
|
globalOffsets,
|
|
workItems,
|
|
nullptr,
|
|
0,
|
|
nullptr,
|
|
&blockedCommandsData,
|
|
nullptr,
|
|
nullptr,
|
|
pDevice->getPreemptionMode(),
|
|
true); // blockQueue
|
|
|
|
ASSERT_NE(nullptr, blockedCommandsData);
|
|
|
|
EXPECT_EQ(blockedCommandsData->dsh.get(), blockedCommandsData->ioh.get());
|
|
delete blockedCommandsData;
|
|
delete mockParentKernel;
|
|
}
|
|
}
|
|
|
|
HWTEST_F(MockParentKernelDispatch, GivenParentKernelWhenDispatchedThenMediaInterfaceDescriptorLoadIsCorrectlyProgrammed) {
|
|
typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
|
typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;
|
|
|
|
if (pDevice->getSupportedClVersion() >= 20) {
|
|
MockParentKernel *mockParentKernel = MockParentKernel::create(*pDevice);
|
|
|
|
KernelOperation *blockedCommandsData = nullptr;
|
|
const size_t globalOffsets[3] = {0, 0, 0};
|
|
const size_t workItems[3] = {1, 1, 1};
|
|
|
|
GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
|
|
*mockParentKernel,
|
|
1,
|
|
globalOffsets,
|
|
workItems,
|
|
nullptr,
|
|
0,
|
|
nullptr,
|
|
&blockedCommandsData,
|
|
nullptr,
|
|
nullptr,
|
|
pDevice->getPreemptionMode(),
|
|
false); // blockQueue
|
|
|
|
LinearStream *commandStream = &pCmdQ->getCS(0);
|
|
|
|
HardwareParse hwParser;
|
|
hwParser.parseCommands<FamilyType>(*commandStream, 0);
|
|
hwParser.findHardwareCommands<FamilyType>();
|
|
|
|
ASSERT_NE(hwParser.cmdList.end(), hwParser.itorMediaInterfaceDescriptorLoad);
|
|
|
|
auto pCmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)hwParser.getCommand<MEDIA_INTERFACE_DESCRIPTOR_LOAD>(hwParser.cmdList.begin(), hwParser.itorWalker);
|
|
|
|
ASSERT_NE(nullptr, pCmd);
|
|
|
|
uint32_t offsetInterfaceDescriptorData = DeviceQueue::colorCalcStateSize;
|
|
uint32_t sizeInterfaceDescriptorData = sizeof(INTERFACE_DESCRIPTOR_DATA);
|
|
|
|
EXPECT_EQ(offsetInterfaceDescriptorData, pCmd->getInterfaceDescriptorDataStartAddress());
|
|
EXPECT_EQ(sizeInterfaceDescriptorData, pCmd->getInterfaceDescriptorTotalLength());
|
|
|
|
delete mockParentKernel;
|
|
}
|
|
}
|
|
|
|
HWTEST_F(MockParentKernelDispatch, GivenUsedSSHHeapWhenParentKernelIsDispatchedThenNewSSHIsAllocated) {
|
|
|
|
if (pDevice->getSupportedClVersion() >= 20) {
|
|
MockParentKernel *mockParentKernel = MockParentKernel::create(*pDevice);
|
|
|
|
KernelOperation *blockedCommandsData = nullptr;
|
|
const size_t globalOffsets[3] = {0, 0, 0};
|
|
const size_t workItems[3] = {1, 1, 1};
|
|
|
|
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 100);
|
|
|
|
ssh.getSpace(20);
|
|
|
|
EXPECT_EQ(20u, ssh.getUsed());
|
|
|
|
// Assuming parent is not using SSH, this is becuase storing allocation on reuse list and allocating
|
|
// new one by obtaining from reuse list returns the same allocation and heap buffer does not differ
|
|
// If parent is not using SSH, then heap obtained has zero usage and the same buffer
|
|
ASSERT_EQ(0u, mockParentKernel->getKernelInfo().heapInfo.pKernelHeader->SurfaceStateHeapSize);
|
|
|
|
GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
|
|
*mockParentKernel,
|
|
1,
|
|
globalOffsets,
|
|
workItems,
|
|
nullptr,
|
|
0,
|
|
nullptr,
|
|
&blockedCommandsData,
|
|
nullptr,
|
|
nullptr,
|
|
pDevice->getPreemptionMode(),
|
|
false); // blockQueue
|
|
|
|
EXPECT_EQ(0u, ssh.getUsed());
|
|
|
|
delete mockParentKernel;
|
|
}
|
|
}
|
|
|
|
HWTEST_F(MockParentKernelDispatch, GivenNotUsedSSHHeapWhenParentKernelIsDispatchedThenExistingSSHIsUsed) {
|
|
|
|
if (pDevice->getSupportedClVersion() >= 20) {
|
|
MockParentKernel *mockParentKernel = MockParentKernel::create(*pDevice);
|
|
|
|
KernelOperation *blockedCommandsData = nullptr;
|
|
const size_t globalOffsets[3] = {0, 0, 0};
|
|
const size_t workItems[3] = {1, 1, 1};
|
|
|
|
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 100);
|
|
|
|
EXPECT_EQ(0u, ssh.getUsed());
|
|
|
|
auto *bufferMemory = ssh.getCpuBase();
|
|
|
|
GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
|
|
*mockParentKernel,
|
|
1,
|
|
globalOffsets,
|
|
workItems,
|
|
nullptr,
|
|
0,
|
|
nullptr,
|
|
&blockedCommandsData,
|
|
nullptr,
|
|
nullptr,
|
|
pDevice->getPreemptionMode(),
|
|
false); // blockQueue
|
|
|
|
EXPECT_EQ(bufferMemory, ssh.getCpuBase());
|
|
|
|
delete mockParentKernel;
|
|
}
|
|
}
|