Disable Device Enqueue and Pipes support on TGLLP

Related-To: NEO-4368

Change-Id: I1838bd711b59c1e82c84bd92ee945d7a50e89234
Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2020-05-21 17:18:16 +02:00
committed by sys_ocldev
parent e1403ef08d
commit 611198cd98
11 changed files with 470 additions and 453 deletions

View File

@@ -26,6 +26,7 @@ class ExecutionModelSchedulerFixture : public ExecutionModelSchedulerTest,
public:
void SetUp() override {
ExecutionModelSchedulerTest::SetUp();
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pClDevice);
}
void TearDown() override {
@@ -43,124 +44,122 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, dispatchScheduler) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
if (pClDevice->areOcl21FeaturesSupported()) {
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
SchedulerKernel &scheduler = context->getSchedulerKernel();
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
SchedulerKernel &scheduler = context->getSchedulerKernel();
auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer();
auto *dshHeap = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
void *executionModelDsh = executionModelDshAllocation->getUnderlyingBuffer();
auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer();
auto *dshHeap = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
void *executionModelDsh = executionModelDshAllocation->getUnderlyingBuffer();
EXPECT_NE(nullptr, executionModelDsh);
EXPECT_NE(nullptr, executionModelDsh);
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
// Setup heaps in pCmdQ
MultiDispatchInfo multiDispatchinfo(&scheduler);
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),
false, false, false, multiDispatchinfo,
nullptr, 0);
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
// Setup heaps in pCmdQ
MultiDispatchInfo multiDispatchinfo(&scheduler);
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),
false, false, false, multiDispatchinfo,
nullptr, 0);
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
pCmdQ->getCS(0),
*pDevQueueHw,
pDevice->getPreemptionMode(),
scheduler,
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
false);
GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
pCmdQ->getCS(0),
*pDevQueueHw,
pDevice->getPreemptionMode(),
scheduler,
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
false);
EXPECT_EQ(0u, *scheduler.globalWorkOffsetX);
EXPECT_EQ(0u, *scheduler.globalWorkOffsetY);
EXPECT_EQ(0u, *scheduler.globalWorkOffsetZ);
EXPECT_EQ(0u, *scheduler.globalWorkOffsetX);
EXPECT_EQ(0u, *scheduler.globalWorkOffsetY);
EXPECT_EQ(0u, *scheduler.globalWorkOffsetZ);
EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.localWorkSizeX);
EXPECT_EQ(1u, *scheduler.localWorkSizeY);
EXPECT_EQ(1u, *scheduler.localWorkSizeZ);
EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.localWorkSizeX);
EXPECT_EQ(1u, *scheduler.localWorkSizeY);
EXPECT_EQ(1u, *scheduler.localWorkSizeZ);
EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.localWorkSizeX2);
EXPECT_EQ(1u, *scheduler.localWorkSizeY2);
EXPECT_EQ(1u, *scheduler.localWorkSizeZ2);
EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.localWorkSizeX2);
EXPECT_EQ(1u, *scheduler.localWorkSizeY2);
EXPECT_EQ(1u, *scheduler.localWorkSizeZ2);
if (scheduler.enqueuedLocalWorkSizeX != &Kernel::dummyPatchLocation) {
EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.enqueuedLocalWorkSizeX);
}
EXPECT_EQ(1u, *scheduler.enqueuedLocalWorkSizeY);
EXPECT_EQ(1u, *scheduler.enqueuedLocalWorkSizeZ);
EXPECT_EQ((uint32_t)(scheduler.getGws() / scheduler.getLws()), *scheduler.numWorkGroupsX);
EXPECT_EQ(0u, *scheduler.numWorkGroupsY);
EXPECT_EQ(0u, *scheduler.numWorkGroupsZ);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandStream, 0);
hwParser.findHardwareCommands<FamilyType>();
ASSERT_NE(hwParser.cmdList.end(), hwParser.itorWalker);
// Before Walker There must be PC
PIPE_CONTROL *pc = hwParser.getCommand<PIPE_CONTROL>(hwParser.cmdList.begin(), hwParser.itorWalker);
ASSERT_NE(nullptr, pc);
ASSERT_NE(hwParser.cmdList.end(), hwParser.itorMediaInterfaceDescriptorLoad);
auto *interfaceDescLoad = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*hwParser.itorMediaInterfaceDescriptorLoad;
uint32_t addressOffsetProgrammed = interfaceDescLoad->getInterfaceDescriptorDataStartAddress();
uint32_t interfaceDescriptorSizeProgrammed = interfaceDescLoad->getInterfaceDescriptorTotalLength();
uint32_t addressOffsetExpected = pDevQueueHw->colorCalcStateSize;
uint32_t intDescSizeExpected = DeviceQueue::interfaceDescriptorEntries * sizeof(INTERFACE_DESCRIPTOR_DATA);
EXPECT_EQ(addressOffsetExpected, addressOffsetProgrammed);
EXPECT_EQ(intDescSizeExpected, interfaceDescriptorSizeProgrammed);
auto *walker = (GPGPU_WALKER *)*hwParser.itorWalker;
size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1};
size_t numWorkgroupsProgrammed[3] = {0, 0, 0};
uint32_t threadsPerWorkGroup = walker->getThreadWidthCounterMaximum();
EXPECT_EQ(scheduler.getLws() / scheduler.getKernelInfo().getMaxSimdSize(), threadsPerWorkGroup);
numWorkgroupsProgrammed[0] = walker->getThreadGroupIdXDimension();
numWorkgroupsProgrammed[1] = walker->getThreadGroupIdYDimension();
numWorkgroupsProgrammed[2] = walker->getThreadGroupIdZDimension();
EXPECT_EQ(workGroups[0], numWorkgroupsProgrammed[0]);
EXPECT_EQ(workGroups[1], numWorkgroupsProgrammed[1]);
EXPECT_EQ(workGroups[2], numWorkgroupsProgrammed[2]);
typename FamilyType::GPGPU_WALKER::SIMD_SIZE simdSize = walker->getSimdSize();
EXPECT_EQ(FamilyType::GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD8, simdSize);
EXPECT_EQ(0u, walker->getThreadGroupIdStartingX());
EXPECT_EQ(0u, walker->getThreadGroupIdStartingY());
EXPECT_EQ(0u, walker->getThreadGroupIdStartingResumeZ());
uint32_t offsetCrossThreadDataProgrammed = walker->getIndirectDataStartAddress();
assert(offsetCrossThreadDataProgrammed % 64 == 0);
size_t curbeSize = scheduler.getCurbeSize();
size_t offsetCrossThreadDataExpected = dshHeap->getMaxAvailableSpace() - curbeSize - 4096; // take additional page for padding into account
EXPECT_EQ((uint32_t)offsetCrossThreadDataExpected, offsetCrossThreadDataProgrammed);
EXPECT_EQ(62u, walker->getInterfaceDescriptorOffset());
auto numChannels = 3;
auto grfSize = pDevice->getHardwareInfo().capabilityTable.grfSize;
auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(scheduler.getKernelInfo().getMaxSimdSize(), grfSize, numChannels, scheduler.getLws());
auto sizeCrossThreadData = scheduler.getCrossThreadDataSize();
auto IndirectDataLength = alignUp((uint32_t)(sizeCrossThreadData + sizePerThreadDataTotal), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
EXPECT_EQ(IndirectDataLength, walker->getIndirectDataLength());
ASSERT_NE(hwParser.cmdList.end(), hwParser.itorBBStartAfterWalker);
auto *bbStart = (MI_BATCH_BUFFER_START *)*hwParser.itorBBStartAfterWalker;
uint64_t slbAddress = pDevQueueHw->getSlbBuffer()->getGpuAddress();
EXPECT_EQ(slbAddress, bbStart->getBatchBufferStartAddressGraphicsaddress472());
if (scheduler.enqueuedLocalWorkSizeX != &Kernel::dummyPatchLocation) {
EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.enqueuedLocalWorkSizeX);
}
EXPECT_EQ(1u, *scheduler.enqueuedLocalWorkSizeY);
EXPECT_EQ(1u, *scheduler.enqueuedLocalWorkSizeZ);
EXPECT_EQ((uint32_t)(scheduler.getGws() / scheduler.getLws()), *scheduler.numWorkGroupsX);
EXPECT_EQ(0u, *scheduler.numWorkGroupsY);
EXPECT_EQ(0u, *scheduler.numWorkGroupsZ);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandStream, 0);
hwParser.findHardwareCommands<FamilyType>();
ASSERT_NE(hwParser.cmdList.end(), hwParser.itorWalker);
// Before Walker There must be PC
PIPE_CONTROL *pc = hwParser.getCommand<PIPE_CONTROL>(hwParser.cmdList.begin(), hwParser.itorWalker);
ASSERT_NE(nullptr, pc);
ASSERT_NE(hwParser.cmdList.end(), hwParser.itorMediaInterfaceDescriptorLoad);
auto *interfaceDescLoad = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*hwParser.itorMediaInterfaceDescriptorLoad;
uint32_t addressOffsetProgrammed = interfaceDescLoad->getInterfaceDescriptorDataStartAddress();
uint32_t interfaceDescriptorSizeProgrammed = interfaceDescLoad->getInterfaceDescriptorTotalLength();
uint32_t addressOffsetExpected = pDevQueueHw->colorCalcStateSize;
uint32_t intDescSizeExpected = DeviceQueue::interfaceDescriptorEntries * sizeof(INTERFACE_DESCRIPTOR_DATA);
EXPECT_EQ(addressOffsetExpected, addressOffsetProgrammed);
EXPECT_EQ(intDescSizeExpected, interfaceDescriptorSizeProgrammed);
auto *walker = (GPGPU_WALKER *)*hwParser.itorWalker;
size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1};
size_t numWorkgroupsProgrammed[3] = {0, 0, 0};
uint32_t threadsPerWorkGroup = walker->getThreadWidthCounterMaximum();
EXPECT_EQ(scheduler.getLws() / scheduler.getKernelInfo().getMaxSimdSize(), threadsPerWorkGroup);
numWorkgroupsProgrammed[0] = walker->getThreadGroupIdXDimension();
numWorkgroupsProgrammed[1] = walker->getThreadGroupIdYDimension();
numWorkgroupsProgrammed[2] = walker->getThreadGroupIdZDimension();
EXPECT_EQ(workGroups[0], numWorkgroupsProgrammed[0]);
EXPECT_EQ(workGroups[1], numWorkgroupsProgrammed[1]);
EXPECT_EQ(workGroups[2], numWorkgroupsProgrammed[2]);
typename FamilyType::GPGPU_WALKER::SIMD_SIZE simdSize = walker->getSimdSize();
EXPECT_EQ(FamilyType::GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD8, simdSize);
EXPECT_EQ(0u, walker->getThreadGroupIdStartingX());
EXPECT_EQ(0u, walker->getThreadGroupIdStartingY());
EXPECT_EQ(0u, walker->getThreadGroupIdStartingResumeZ());
uint32_t offsetCrossThreadDataProgrammed = walker->getIndirectDataStartAddress();
assert(offsetCrossThreadDataProgrammed % 64 == 0);
size_t curbeSize = scheduler.getCurbeSize();
size_t offsetCrossThreadDataExpected = dshHeap->getMaxAvailableSpace() - curbeSize - 4096; // take additional page for padding into account
EXPECT_EQ((uint32_t)offsetCrossThreadDataExpected, offsetCrossThreadDataProgrammed);
EXPECT_EQ(62u, walker->getInterfaceDescriptorOffset());
auto numChannels = 3;
auto grfSize = pDevice->getHardwareInfo().capabilityTable.grfSize;
auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(scheduler.getKernelInfo().getMaxSimdSize(), grfSize, numChannels, scheduler.getLws());
auto sizeCrossThreadData = scheduler.getCrossThreadDataSize();
auto IndirectDataLength = alignUp((uint32_t)(sizeCrossThreadData + sizePerThreadDataTotal), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
EXPECT_EQ(IndirectDataLength, walker->getIndirectDataLength());
ASSERT_NE(hwParser.cmdList.end(), hwParser.itorBBStartAfterWalker);
auto *bbStart = (MI_BATCH_BUFFER_START *)*hwParser.itorBBStartAfterWalker;
uint64_t slbAddress = pDevQueueHw->getSlbBuffer()->getGpuAddress();
EXPECT_EQ(slbAddress, bbStart->getBatchBufferStartAddressGraphicsaddress472());
}
HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, dispatchSchedulerDoesNotUseStandardCmdQIOH) {
@@ -172,89 +171,83 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, dispatchSchedulerDoe
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
if (pClDevice->areOcl21FeaturesSupported()) {
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
SchedulerKernel &scheduler = context->getSchedulerKernel();
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
SchedulerKernel &scheduler = context->getSchedulerKernel();
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
// Setup heaps in pCmdQ
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
// Setup heaps in pCmdQ
MultiDispatchInfo multiDispatchinfo(&scheduler);
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(), false, false, false, multiDispatchinfo,
nullptr, 0);
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
MultiDispatchInfo multiDispatchinfo(&scheduler);
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(), false, false, false, multiDispatchinfo,
nullptr, 0);
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
pCmdQ->getCS(0),
*pDevQueueHw,
pDevice->getPreemptionMode(),
scheduler,
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
false);
GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
pCmdQ->getCS(0),
*pDevQueueHw,
pDevice->getPreemptionMode(),
scheduler,
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
false);
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
EXPECT_EQ(0u, ioh.getUsed());
}
EXPECT_EQ(0u, ioh.getUsed());
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, dispatchSchedulerWithEarlyReturnSetToFirstInstanceDoesNotPutBBStartCmd) {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
if (device->areOcl21FeaturesSupported()) {
cl_queue_properties properties[3] = {0};
MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
cl_queue_properties properties[3] = {0};
MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
auto *igilQueue = mockDevQueue.getIgilQueue();
auto *igilQueue = mockDevQueue.getIgilQueue();
ASSERT_NE(nullptr, igilQueue);
igilQueue->m_controls.m_SchedulerEarlyReturn = 1;
ASSERT_NE(nullptr, igilQueue);
igilQueue->m_controls.m_SchedulerEarlyReturn = 1;
SchedulerKernel &scheduler = context->getSchedulerKernel();
SchedulerKernel &scheduler = context->getSchedulerKernel();
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(scheduler);
// Setup heaps in pCmdQ
MultiDispatchInfo multiDispatchinfo(&scheduler);
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),
false, false, false, multiDispatchinfo,
nullptr, 0);
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(scheduler);
// Setup heaps in pCmdQ
MultiDispatchInfo multiDispatchinfo(&scheduler);
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),
false, false, false, multiDispatchinfo,
nullptr, 0);
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
pCmdQ->getCS(0),
mockDevQueue,
device->getPreemptionMode(),
scheduler,
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
false);
GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
pCmdQ->getCS(0),
mockDevQueue,
device->getPreemptionMode(),
scheduler,
&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
false);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandStream, 0);
hwParser.findHardwareCommands<FamilyType>();
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandStream, 0);
hwParser.findHardwareCommands<FamilyType>();
EXPECT_NE(hwParser.cmdList.end(), hwParser.itorWalker);
EXPECT_EQ(hwParser.cmdList.end(), hwParser.itorBBStartAfterWalker);
}
EXPECT_NE(hwParser.cmdList.end(), hwParser.itorWalker);
EXPECT_EQ(hwParser.cmdList.end(), hwParser.itorBBStartAfterWalker);
}
HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, ForceDispatchSchedulerEnqueuesSchedulerKernel) {
if (pClDevice->areOcl21FeaturesSupported()) {
DebugManagerStateRestore dbgRestorer;
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.ForceDispatchScheduler.set(true);
DebugManager.flags.ForceDispatchScheduler.set(true);
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
MockCommandQueueHw<FamilyType> *mockCmdQ = new MockCommandQueueHw<FamilyType>(context, pClDevice, 0);
MockCommandQueueHw<FamilyType> *mockCmdQ = new MockCommandQueueHw<FamilyType>(context, pClDevice, 0);
mockCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
mockCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
EXPECT_TRUE(mockCmdQ->lastEnqueuedKernels.front()->isSchedulerKernel);
EXPECT_TRUE(mockCmdQ->lastEnqueuedKernels.front()->isSchedulerKernel);
delete mockCmdQ;
}
delete mockCmdQ;
}

View File

@@ -15,6 +15,7 @@
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_device_queue.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
#include <memory>
@@ -76,361 +77,361 @@ class MockDeviceQueueHwWithCriticalSectionRelease : public DeviceQueueHw<GfxFami
};
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenLockedEMcritcalSectionWhenParentKernelCommandIsSubmittedThenItWaitsForcriticalSectionReleasement) {
if (device->areOcl21FeaturesSupported()) {
cl_queue_properties properties[3] = {0};
MockParentKernel *parentKernel = MockParentKernel::create(*context);
MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
mockDevQueue.acquireEMCriticalSection();
cl_queue_properties properties[3] = {0};
MockParentKernel *parentKernel = MockParentKernel::create(*context);
MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
size_t heapSize = 20;
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
mockDevQueue.acquireEMCriticalSection();
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
size_t heapSize = 20;
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
dsh->getSpace(mockDevQueue.getDshOffset());
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
dsh->getSpace(mockDevQueue.getDshOffset());
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
std::vector<Surface *> surfaces;
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
cmdComputeKernel->submit(0, false);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
std::vector<Surface *> surfaces;
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
EXPECT_EQ(mockDevQueue.maxCounter, mockDevQueue.criticalSectioncheckCounter);
delete cmdComputeKernel;
delete parentKernel;
}
cmdComputeKernel->submit(0, false);
EXPECT_EQ(mockDevQueue.maxCounter, mockDevQueue.criticalSectioncheckCounter);
delete cmdComputeKernel;
delete parentKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenPassedDshIsUsed) {
if (device->areOcl21FeaturesSupported()) {
cl_queue_properties properties[3] = {0};
MockParentKernel *parentKernel = MockParentKernel::create(*context);
MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
auto *dshOfDevQueue = mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
cl_queue_properties properties[3] = {0};
MockParentKernel *parentKernel = MockParentKernel::create(*context);
MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
size_t heapSize = 20;
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
auto *dshOfDevQueue = mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
size_t heapSize = 20;
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
// add initial offset of colorCalState
dsh->getSpace(DeviceQueue::colorCalcStateSize);
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
uint64_t ValueToFillDsh = 5;
uint64_t *dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t));
// add initial offset of colorCalState
dsh->getSpace(DeviceQueue::colorCalcStateSize);
// Fill Interface Descriptor Data
*dshVal = ValueToFillDsh;
uint64_t ValueToFillDsh = 5;
uint64_t *dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t));
// Move to parent DSH Offset
size_t alignToOffsetDshSize = mockDevQueue.getDshOffset() - DeviceQueue::colorCalcStateSize - sizeof(uint64_t);
dsh->getSpace(alignToOffsetDshSize);
// Fill Interface Descriptor Data
*dshVal = ValueToFillDsh;
// Fill with pattern
dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t));
*dshVal = ValueToFillDsh;
// Move to parent DSH Offset
size_t alignToOffsetDshSize = mockDevQueue.getDshOffset() - DeviceQueue::colorCalcStateSize - sizeof(uint64_t);
dsh->getSpace(alignToOffsetDshSize);
size_t usedDSHBeforeSubmit = dshOfDevQueue->getUsed();
// Fill with pattern
dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t));
*dshVal = ValueToFillDsh;
uint32_t colorCalcSizeDevQueue = DeviceQueue::colorCalcStateSize;
EXPECT_EQ(colorCalcSizeDevQueue, usedDSHBeforeSubmit);
size_t usedDSHBeforeSubmit = dshOfDevQueue->getUsed();
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
uint32_t colorCalcSizeDevQueue = DeviceQueue::colorCalcStateSize;
EXPECT_EQ(colorCalcSizeDevQueue, usedDSHBeforeSubmit);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
std::vector<Surface *> surfaces;
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
cmdComputeKernel->submit(0, false);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
std::vector<Surface *> surfaces;
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
//device queue dsh is not changed
size_t usedDSHAfterSubmit = dshOfDevQueue->getUsed();
EXPECT_EQ(usedDSHAfterSubmit, usedDSHAfterSubmit);
cmdComputeKernel->submit(0, false);
delete cmdComputeKernel;
delete parentKernel;
}
//device queue dsh is not changed
size_t usedDSHAfterSubmit = dshOfDevQueue->getUsed();
EXPECT_EQ(usedDSHAfterSubmit, usedDSHAfterSubmit);
delete cmdComputeKernel;
delete parentKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenIndirectStateAndEMCleanupSectionIsSetup) {
if (device->areOcl21FeaturesSupported()) {
cl_queue_properties properties[3] = {0};
MockParentKernel *parentKernel = MockParentKernel::create(*context);
MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
size_t heapSize = 20;
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
cl_queue_properties properties[3] = {0};
MockParentKernel *parentKernel = MockParentKernel::create(*context);
MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
size_t heapSize = 20;
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
dsh->getSpace(mockDevQueue.getDshOffset());
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
dsh->getSpace(mockDevQueue.getDshOffset());
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
std::vector<Surface *> surfaces;
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
cmdComputeKernel->submit(0, false);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
std::vector<Surface *> surfaces;
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
EXPECT_TRUE(mockDevQueue.indirectStateSetup);
EXPECT_TRUE(mockDevQueue.cleanupSectionAdded);
cmdComputeKernel->submit(0, false);
delete cmdComputeKernel;
delete parentKernel;
}
EXPECT_TRUE(mockDevQueue.indirectStateSetup);
EXPECT_TRUE(mockDevQueue.cleanupSectionAdded);
delete cmdComputeKernel;
delete parentKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedParentKernelWithProfilingWhenCommandIsSubmittedThenEMCleanupSectionsSetsCompleteTimestamp) {
if (device->areOcl21FeaturesSupported()) {
cl_queue_properties properties[3] = {0};
MockParentKernel *parentKernel = MockParentKernel::create(*context);
MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
size_t heapSize = 20;
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
dsh->getSpace(mockDevQueue.getDshOffset());
cl_queue_properties properties[3] = {0};
MockParentKernel *parentKernel = MockParentKernel::create(*context);
MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
size_t heapSize = 20;
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
dsh->getSpace(mockDevQueue.getDshOffset());
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
std::vector<Surface *> surfaces;
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
auto timestamp = pCmdQ->getGpgpuCommandStreamReceiver().getEventTsAllocator()->getTag();
cmdComputeKernel->timestamp = timestamp;
cmdComputeKernel->submit(0, false);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
std::vector<Surface *> surfaces;
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
EXPECT_TRUE(mockDevQueue.cleanupSectionAdded);
EXPECT_EQ(mockDevQueue.timestampAddedInCleanupSection, timestamp->tagForCpuAccess);
auto timestamp = pCmdQ->getGpgpuCommandStreamReceiver().getEventTsAllocator()->getTag();
cmdComputeKernel->timestamp = timestamp;
cmdComputeKernel->submit(0, false);
delete cmdComputeKernel;
delete parentKernel;
}
EXPECT_TRUE(mockDevQueue.cleanupSectionAdded);
EXPECT_EQ(mockDevQueue.timestampAddedInCleanupSection, timestamp->tagForCpuAccess);
delete cmdComputeKernel;
delete parentKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenSchedulerIsDispatched) {
if (device->areOcl21FeaturesSupported()) {
cl_queue_properties properties[3] = {0};
MockParentKernel *parentKernel = MockParentKernel::create(*context);
MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
size_t heapSize = 20;
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
cl_queue_properties properties[3] = {0};
MockParentKernel *parentKernel = MockParentKernel::create(*context);
MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
dsh->getSpace(mockDevQueue.getDshOffset());
size_t heapSize = 20;
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
dsh->getSpace(mockDevQueue.getDshOffset());
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
std::vector<Surface *> surfaces;
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
cmdComputeKernel->submit(0, false);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
std::vector<Surface *> surfaces;
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
EXPECT_TRUE(mockDevQueue.schedulerDispatched);
cmdComputeKernel->submit(0, false);
delete cmdComputeKernel;
delete parentKernel;
}
EXPECT_TRUE(mockDevQueue.schedulerDispatched);
delete cmdComputeKernel;
delete parentKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenUsedCommandQueueHeapshenParentKernelIsSubmittedThenQueueHeapsAreNotUsed) {
if (device->areOcl21FeaturesSupported()) {
cl_queue_properties properties[3] = {0};
MockParentKernel *parentKernel = MockParentKernel::create(*context);
MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
MockCommandQueue cmdQ(context, device, properties);
cl_queue_properties properties[3] = {0};
MockParentKernel *parentKernel = MockParentKernel::create(*context);
MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
MockCommandQueue cmdQ(context, device, properties);
size_t heapSize = 20;
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
size_t heapSize = 20;
dsh->getSpace(mockDevQueue.getDshOffset());
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
auto &queueSsh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 100);
auto &queueDsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 100);
auto &queueIoh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 100);
dsh->getSpace(mockDevQueue.getDshOffset());
size_t usedSize = 4u;
auto &queueSsh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 100);
auto &queueDsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 100);
auto &queueIoh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 100);
queueSsh.getSpace(usedSize);
queueDsh.getSpace(usedSize);
queueIoh.getSpace(usedSize);
size_t usedSize = 4u;
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
queueSsh.getSpace(usedSize);
queueDsh.getSpace(usedSize);
queueIoh.getSpace(usedSize);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
std::vector<Surface *> surfaces;
auto *cmdComputeKernel = new CommandComputeKernel(cmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
cmdComputeKernel->submit(0, false);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
std::vector<Surface *> surfaces;
auto *cmdComputeKernel = new CommandComputeKernel(cmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
EXPECT_FALSE(cmdQ.releaseIndirectHeapCalled);
EXPECT_EQ(usedSize, queueDsh.getUsed());
EXPECT_EQ(usedSize, queueIoh.getUsed());
EXPECT_EQ(usedSize, queueSsh.getUsed());
cmdComputeKernel->submit(0, false);
delete cmdComputeKernel;
delete parentKernel;
}
EXPECT_FALSE(cmdQ.releaseIndirectHeapCalled);
EXPECT_EQ(usedSize, queueDsh.getUsed());
EXPECT_EQ(usedSize, queueIoh.getUsed());
EXPECT_EQ(usedSize, queueSsh.getUsed());
delete cmdComputeKernel;
delete parentKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenNotUsedSSHWhenParentKernelIsSubmittedThenExistingSSHIsUsed) {
if (device->areOcl21FeaturesSupported()) {
cl_queue_properties properties[3] = {0};
MockParentKernel *parentKernel = MockParentKernel::create(*context);
MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
cl_queue_properties properties[3] = {0};
MockParentKernel *parentKernel = MockParentKernel::create(*context);
MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
size_t heapSize = 20;
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
size_t sshSize = 1000;
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, sshSize, ssh);
dsh->getSpace(mockDevQueue.getDshOffset());
size_t heapSize = 20;
EXPECT_EQ(0u, ssh->getUsed());
size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
size_t sshSize = 1000;
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, sshSize, ssh);
dsh->getSpace(mockDevQueue.getDshOffset());
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, sshSize);
EXPECT_EQ(0u, ssh->getUsed());
void *sshBuffer = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getCpuBase();
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, sshSize);
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
void *sshBuffer = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getCpuBase();
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
std::vector<Surface *> surfaces;
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
cmdComputeKernel->submit(0, false);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
std::vector<Surface *> surfaces;
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
void *newSshBuffer = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getCpuBase();
cmdComputeKernel->submit(0, false);
EXPECT_EQ(sshBuffer, newSshBuffer);
void *newSshBuffer = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getCpuBase();
delete cmdComputeKernel;
delete parentKernel;
}
EXPECT_EQ(sshBuffer, newSshBuffer);
delete cmdComputeKernel;
delete parentKernel;
}
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedCommandQueueWhenDispatchWalkerIsCalledThenHeapsHaveProperSizes) {
if (device->areOcl21FeaturesSupported()) {
cl_queue_properties properties[3] = {0};
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(*context));
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
cl_queue_properties properties[3] = {0};
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(*context));
auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
DispatchInfo dispatchInfo(parentKernel.get(), 1, workItems, nullptr, globalOffsets);
MultiDispatchInfo multiDispatchInfo(parentKernel.get());
multiDispatchInfo.push(dispatchInfo);
HardwareInterface<FamilyType>::dispatchWalker(
*pCmdQ,
multiDispatchInfo,
CsrDependencies(),
blockedCommandsData.get(),
nullptr,
nullptr,
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
EXPECT_NE(nullptr, blockedCommandsData);
EXPECT_EQ(blockedCommandsData->dsh->getMaxAvailableSpace(), mockDevQueue.getDshBuffer()->getUnderlyingBufferSize());
EXPECT_EQ(blockedCommandsData->dsh, blockedCommandsData->ioh);
DispatchInfo dispatchInfo(parentKernel.get(), 1, workItems, nullptr, globalOffsets);
MultiDispatchInfo multiDispatchInfo(parentKernel.get());
multiDispatchInfo.push(dispatchInfo);
HardwareInterface<FamilyType>::dispatchWalker(
*pCmdQ,
multiDispatchInfo,
CsrDependencies(),
blockedCommandsData.get(),
nullptr,
nullptr,
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
EXPECT_NE(nullptr, blockedCommandsData->dsh->getGraphicsAllocation());
EXPECT_NE(nullptr, blockedCommandsData->ioh->getGraphicsAllocation());
EXPECT_NE(nullptr, blockedCommandsData->ssh->getGraphicsAllocation());
EXPECT_EQ(blockedCommandsData->dsh->getGraphicsAllocation(), blockedCommandsData->ioh->getGraphicsAllocation());
}
EXPECT_NE(nullptr, blockedCommandsData);
EXPECT_EQ(blockedCommandsData->dsh->getMaxAvailableSpace(), mockDevQueue.getDshBuffer()->getUnderlyingBufferSize());
EXPECT_EQ(blockedCommandsData->dsh, blockedCommandsData->ioh);
EXPECT_NE(nullptr, blockedCommandsData->dsh->getGraphicsAllocation());
EXPECT_NE(nullptr, blockedCommandsData->ioh->getGraphicsAllocation());
EXPECT_NE(nullptr, blockedCommandsData->ssh->getGraphicsAllocation());
EXPECT_EQ(blockedCommandsData->dsh->getGraphicsAllocation(), blockedCommandsData->ioh->getGraphicsAllocation());
}