Disable Device Enqueue and Pipes support on TGLLP

Related-To: NEO-4368 Change-Id: I1838bd711b59c1e82c84bd92ee945d7a50e89234 Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
2026-01-11 00:10:58 +08:00 · 2020-05-21 17:18:16 +02:00
parent e1403ef08d
commit 611198cd98
11 changed files with 470 additions and 453 deletions
--- a/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp
+++ b/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp
@@ -26,6 +26,7 @@ class ExecutionModelSchedulerFixture : public ExecutionModelSchedulerTest,
  public:
    void SetUp() override {
        ExecutionModelSchedulerTest::SetUp();
+        REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pClDevice);
    }

    void TearDown() override {
@@ -43,124 +44,122 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, dispatchScheduler) {
    using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
    using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;

-    if (pClDevice->areOcl21FeaturesSupported()) {
-        DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
-        SchedulerKernel &scheduler = context->getSchedulerKernel();
+    DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
+    SchedulerKernel &scheduler = context->getSchedulerKernel();

-        auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer();
-        auto *dshHeap = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
-        void *executionModelDsh = executionModelDshAllocation->getUnderlyingBuffer();
+    auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer();
+    auto *dshHeap = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
+    void *executionModelDsh = executionModelDshAllocation->getUnderlyingBuffer();

-        EXPECT_NE(nullptr, executionModelDsh);
+    EXPECT_NE(nullptr, executionModelDsh);

-        size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
-        // Setup heaps in pCmdQ
-        MultiDispatchInfo multiDispatchinfo(&scheduler);
-        LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),
-                                                                                              false, false, false, multiDispatchinfo,
-                                                                                              nullptr, 0);
-        pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
+    size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
+    // Setup heaps in pCmdQ
+    MultiDispatchInfo multiDispatchinfo(&scheduler);
+    LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),
+                                                                                          false, false, false, multiDispatchinfo,
+                                                                                          nullptr, 0);
+    pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);

-        GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
-            pCmdQ->getCS(0),
-            *pDevQueueHw,
-            pDevice->getPreemptionMode(),
-            scheduler,
-            &pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
-            pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
-            false);
+    GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
+        pCmdQ->getCS(0),
+        *pDevQueueHw,
+        pDevice->getPreemptionMode(),
+        scheduler,
+        &pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
+        pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
+        false);

-        EXPECT_EQ(0u, *scheduler.globalWorkOffsetX);
-        EXPECT_EQ(0u, *scheduler.globalWorkOffsetY);
-        EXPECT_EQ(0u, *scheduler.globalWorkOffsetZ);
+    EXPECT_EQ(0u, *scheduler.globalWorkOffsetX);
+    EXPECT_EQ(0u, *scheduler.globalWorkOffsetY);
+    EXPECT_EQ(0u, *scheduler.globalWorkOffsetZ);

-        EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.localWorkSizeX);
-        EXPECT_EQ(1u, *scheduler.localWorkSizeY);
-        EXPECT_EQ(1u, *scheduler.localWorkSizeZ);
+    EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.localWorkSizeX);
+    EXPECT_EQ(1u, *scheduler.localWorkSizeY);
+    EXPECT_EQ(1u, *scheduler.localWorkSizeZ);

-        EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.localWorkSizeX2);
-        EXPECT_EQ(1u, *scheduler.localWorkSizeY2);
-        EXPECT_EQ(1u, *scheduler.localWorkSizeZ2);
+    EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.localWorkSizeX2);
+    EXPECT_EQ(1u, *scheduler.localWorkSizeY2);
+    EXPECT_EQ(1u, *scheduler.localWorkSizeZ2);

-        if (scheduler.enqueuedLocalWorkSizeX != &Kernel::dummyPatchLocation) {
-            EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.enqueuedLocalWorkSizeX);
-        }
-        EXPECT_EQ(1u, *scheduler.enqueuedLocalWorkSizeY);
-        EXPECT_EQ(1u, *scheduler.enqueuedLocalWorkSizeZ);
-
-        EXPECT_EQ((uint32_t)(scheduler.getGws() / scheduler.getLws()), *scheduler.numWorkGroupsX);
-        EXPECT_EQ(0u, *scheduler.numWorkGroupsY);
-        EXPECT_EQ(0u, *scheduler.numWorkGroupsZ);
-
-        HardwareParse hwParser;
-        hwParser.parseCommands<FamilyType>(commandStream, 0);
-        hwParser.findHardwareCommands<FamilyType>();
-
-        ASSERT_NE(hwParser.cmdList.end(), hwParser.itorWalker);
-
-        // Before Walker There must be PC
-        PIPE_CONTROL *pc = hwParser.getCommand<PIPE_CONTROL>(hwParser.cmdList.begin(), hwParser.itorWalker);
-        ASSERT_NE(nullptr, pc);
-
-        ASSERT_NE(hwParser.cmdList.end(), hwParser.itorMediaInterfaceDescriptorLoad);
-        auto *interfaceDescLoad = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*hwParser.itorMediaInterfaceDescriptorLoad;
-
-        uint32_t addressOffsetProgrammed = interfaceDescLoad->getInterfaceDescriptorDataStartAddress();
-        uint32_t interfaceDescriptorSizeProgrammed = interfaceDescLoad->getInterfaceDescriptorTotalLength();
-
-        uint32_t addressOffsetExpected = pDevQueueHw->colorCalcStateSize;
-        uint32_t intDescSizeExpected = DeviceQueue::interfaceDescriptorEntries * sizeof(INTERFACE_DESCRIPTOR_DATA);
-
-        EXPECT_EQ(addressOffsetExpected, addressOffsetProgrammed);
-        EXPECT_EQ(intDescSizeExpected, interfaceDescriptorSizeProgrammed);
-
-        auto *walker = (GPGPU_WALKER *)*hwParser.itorWalker;
-
-        size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1};
-
-        size_t numWorkgroupsProgrammed[3] = {0, 0, 0};
-
-        uint32_t threadsPerWorkGroup = walker->getThreadWidthCounterMaximum();
-
-        EXPECT_EQ(scheduler.getLws() / scheduler.getKernelInfo().getMaxSimdSize(), threadsPerWorkGroup);
-
-        numWorkgroupsProgrammed[0] = walker->getThreadGroupIdXDimension();
-        numWorkgroupsProgrammed[1] = walker->getThreadGroupIdYDimension();
-        numWorkgroupsProgrammed[2] = walker->getThreadGroupIdZDimension();
-
-        EXPECT_EQ(workGroups[0], numWorkgroupsProgrammed[0]);
-        EXPECT_EQ(workGroups[1], numWorkgroupsProgrammed[1]);
-        EXPECT_EQ(workGroups[2], numWorkgroupsProgrammed[2]);
-
-        typename FamilyType::GPGPU_WALKER::SIMD_SIZE simdSize = walker->getSimdSize();
-        EXPECT_EQ(FamilyType::GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD8, simdSize);
-
-        EXPECT_EQ(0u, walker->getThreadGroupIdStartingX());
-        EXPECT_EQ(0u, walker->getThreadGroupIdStartingY());
-        EXPECT_EQ(0u, walker->getThreadGroupIdStartingResumeZ());
-
-        uint32_t offsetCrossThreadDataProgrammed = walker->getIndirectDataStartAddress();
-        assert(offsetCrossThreadDataProgrammed % 64 == 0);
-        size_t curbeSize = scheduler.getCurbeSize();
-        size_t offsetCrossThreadDataExpected = dshHeap->getMaxAvailableSpace() - curbeSize - 4096; // take additional page for padding into account
-        EXPECT_EQ((uint32_t)offsetCrossThreadDataExpected, offsetCrossThreadDataProgrammed);
-
-        EXPECT_EQ(62u, walker->getInterfaceDescriptorOffset());
-
-        auto numChannels = 3;
-        auto grfSize = pDevice->getHardwareInfo().capabilityTable.grfSize;
-        auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(scheduler.getKernelInfo().getMaxSimdSize(), grfSize, numChannels, scheduler.getLws());
-
-        auto sizeCrossThreadData = scheduler.getCrossThreadDataSize();
-        auto IndirectDataLength = alignUp((uint32_t)(sizeCrossThreadData + sizePerThreadDataTotal), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
-        EXPECT_EQ(IndirectDataLength, walker->getIndirectDataLength());
-
-        ASSERT_NE(hwParser.cmdList.end(), hwParser.itorBBStartAfterWalker);
-        auto *bbStart = (MI_BATCH_BUFFER_START *)*hwParser.itorBBStartAfterWalker;
-
-        uint64_t slbAddress = pDevQueueHw->getSlbBuffer()->getGpuAddress();
-        EXPECT_EQ(slbAddress, bbStart->getBatchBufferStartAddressGraphicsaddress472());
+    if (scheduler.enqueuedLocalWorkSizeX != &Kernel::dummyPatchLocation) {
+        EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.enqueuedLocalWorkSizeX);
    }
+    EXPECT_EQ(1u, *scheduler.enqueuedLocalWorkSizeY);
+    EXPECT_EQ(1u, *scheduler.enqueuedLocalWorkSizeZ);
+
+    EXPECT_EQ((uint32_t)(scheduler.getGws() / scheduler.getLws()), *scheduler.numWorkGroupsX);
+    EXPECT_EQ(0u, *scheduler.numWorkGroupsY);
+    EXPECT_EQ(0u, *scheduler.numWorkGroupsZ);
+
+    HardwareParse hwParser;
+    hwParser.parseCommands<FamilyType>(commandStream, 0);
+    hwParser.findHardwareCommands<FamilyType>();
+
+    ASSERT_NE(hwParser.cmdList.end(), hwParser.itorWalker);
+
+    // Before Walker There must be PC
+    PIPE_CONTROL *pc = hwParser.getCommand<PIPE_CONTROL>(hwParser.cmdList.begin(), hwParser.itorWalker);
+    ASSERT_NE(nullptr, pc);
+
+    ASSERT_NE(hwParser.cmdList.end(), hwParser.itorMediaInterfaceDescriptorLoad);
+    auto *interfaceDescLoad = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*hwParser.itorMediaInterfaceDescriptorLoad;
+
+    uint32_t addressOffsetProgrammed = interfaceDescLoad->getInterfaceDescriptorDataStartAddress();
+    uint32_t interfaceDescriptorSizeProgrammed = interfaceDescLoad->getInterfaceDescriptorTotalLength();
+
+    uint32_t addressOffsetExpected = pDevQueueHw->colorCalcStateSize;
+    uint32_t intDescSizeExpected = DeviceQueue::interfaceDescriptorEntries * sizeof(INTERFACE_DESCRIPTOR_DATA);
+
+    EXPECT_EQ(addressOffsetExpected, addressOffsetProgrammed);
+    EXPECT_EQ(intDescSizeExpected, interfaceDescriptorSizeProgrammed);
+
+    auto *walker = (GPGPU_WALKER *)*hwParser.itorWalker;
+
+    size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1};
+
+    size_t numWorkgroupsProgrammed[3] = {0, 0, 0};
+
+    uint32_t threadsPerWorkGroup = walker->getThreadWidthCounterMaximum();
+
+    EXPECT_EQ(scheduler.getLws() / scheduler.getKernelInfo().getMaxSimdSize(), threadsPerWorkGroup);
+
+    numWorkgroupsProgrammed[0] = walker->getThreadGroupIdXDimension();
+    numWorkgroupsProgrammed[1] = walker->getThreadGroupIdYDimension();
+    numWorkgroupsProgrammed[2] = walker->getThreadGroupIdZDimension();
+
+    EXPECT_EQ(workGroups[0], numWorkgroupsProgrammed[0]);
+    EXPECT_EQ(workGroups[1], numWorkgroupsProgrammed[1]);
+    EXPECT_EQ(workGroups[2], numWorkgroupsProgrammed[2]);
+
+    typename FamilyType::GPGPU_WALKER::SIMD_SIZE simdSize = walker->getSimdSize();
+    EXPECT_EQ(FamilyType::GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD8, simdSize);
+
+    EXPECT_EQ(0u, walker->getThreadGroupIdStartingX());
+    EXPECT_EQ(0u, walker->getThreadGroupIdStartingY());
+    EXPECT_EQ(0u, walker->getThreadGroupIdStartingResumeZ());
+
+    uint32_t offsetCrossThreadDataProgrammed = walker->getIndirectDataStartAddress();
+    assert(offsetCrossThreadDataProgrammed % 64 == 0);
+    size_t curbeSize = scheduler.getCurbeSize();
+    size_t offsetCrossThreadDataExpected = dshHeap->getMaxAvailableSpace() - curbeSize - 4096; // take additional page for padding into account
+    EXPECT_EQ((uint32_t)offsetCrossThreadDataExpected, offsetCrossThreadDataProgrammed);
+
+    EXPECT_EQ(62u, walker->getInterfaceDescriptorOffset());
+
+    auto numChannels = 3;
+    auto grfSize = pDevice->getHardwareInfo().capabilityTable.grfSize;
+    auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(scheduler.getKernelInfo().getMaxSimdSize(), grfSize, numChannels, scheduler.getLws());
+
+    auto sizeCrossThreadData = scheduler.getCrossThreadDataSize();
+    auto IndirectDataLength = alignUp((uint32_t)(sizeCrossThreadData + sizePerThreadDataTotal), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
+    EXPECT_EQ(IndirectDataLength, walker->getIndirectDataLength());
+
+    ASSERT_NE(hwParser.cmdList.end(), hwParser.itorBBStartAfterWalker);
+    auto *bbStart = (MI_BATCH_BUFFER_START *)*hwParser.itorBBStartAfterWalker;
+
+    uint64_t slbAddress = pDevQueueHw->getSlbBuffer()->getGpuAddress();
+    EXPECT_EQ(slbAddress, bbStart->getBatchBufferStartAddressGraphicsaddress472());
 }

 HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, dispatchSchedulerDoesNotUseStandardCmdQIOH) {
@@ -172,89 +171,83 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, dispatchSchedulerDoe
    using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
    using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;

-    if (pClDevice->areOcl21FeaturesSupported()) {
-        DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
-        SchedulerKernel &scheduler = context->getSchedulerKernel();
+    DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
+    SchedulerKernel &scheduler = context->getSchedulerKernel();

-        size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
-        // Setup heaps in pCmdQ
+    size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
+    // Setup heaps in pCmdQ

-        MultiDispatchInfo multiDispatchinfo(&scheduler);
-        getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(), false, false, false, multiDispatchinfo,
-                                                                nullptr, 0);
-        pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
+    MultiDispatchInfo multiDispatchinfo(&scheduler);
+    getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(), false, false, false, multiDispatchinfo,
+                                                            nullptr, 0);
+    pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);

-        GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
-            pCmdQ->getCS(0),
-            *pDevQueueHw,
-            pDevice->getPreemptionMode(),
-            scheduler,
-            &pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
-            pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
-            false);
+    GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
+        pCmdQ->getCS(0),
+        *pDevQueueHw,
+        pDevice->getPreemptionMode(),
+        scheduler,
+        &pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
+        pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
+        false);

-        auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
+    auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);

-        EXPECT_EQ(0u, ioh.getUsed());
-    }
+    EXPECT_EQ(0u, ioh.getUsed());
 }

 HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, dispatchSchedulerWithEarlyReturnSetToFirstInstanceDoesNotPutBBStartCmd) {
+    REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);

-    if (device->areOcl21FeaturesSupported()) {
+    cl_queue_properties properties[3] = {0};
+    MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);

-        cl_queue_properties properties[3] = {0};
-        MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
+    auto *igilQueue = mockDevQueue.getIgilQueue();

-        auto *igilQueue = mockDevQueue.getIgilQueue();
+    ASSERT_NE(nullptr, igilQueue);
+    igilQueue->m_controls.m_SchedulerEarlyReturn = 1;

-        ASSERT_NE(nullptr, igilQueue);
-        igilQueue->m_controls.m_SchedulerEarlyReturn = 1;
+    SchedulerKernel &scheduler = context->getSchedulerKernel();

-        SchedulerKernel &scheduler = context->getSchedulerKernel();
+    size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(scheduler);
+    // Setup heaps in pCmdQ
+    MultiDispatchInfo multiDispatchinfo(&scheduler);
+    LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),
+                                                                                          false, false, false, multiDispatchinfo,
+                                                                                          nullptr, 0);
+    pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);

-        size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(scheduler);
-        // Setup heaps in pCmdQ
-        MultiDispatchInfo multiDispatchinfo(&scheduler);
-        LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),
-                                                                                              false, false, false, multiDispatchinfo,
-                                                                                              nullptr, 0);
-        pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
+    GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
+        pCmdQ->getCS(0),
+        mockDevQueue,
+        device->getPreemptionMode(),
+        scheduler,
+        &pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
+        mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
+        false);

-        GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
-            pCmdQ->getCS(0),
-            mockDevQueue,
-            device->getPreemptionMode(),
-            scheduler,
-            &pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
-            mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
-            false);
+    HardwareParse hwParser;
+    hwParser.parseCommands<FamilyType>(commandStream, 0);
+    hwParser.findHardwareCommands<FamilyType>();

-        HardwareParse hwParser;
-        hwParser.parseCommands<FamilyType>(commandStream, 0);
-        hwParser.findHardwareCommands<FamilyType>();
-
-        EXPECT_NE(hwParser.cmdList.end(), hwParser.itorWalker);
-        EXPECT_EQ(hwParser.cmdList.end(), hwParser.itorBBStartAfterWalker);
-    }
+    EXPECT_NE(hwParser.cmdList.end(), hwParser.itorWalker);
+    EXPECT_EQ(hwParser.cmdList.end(), hwParser.itorBBStartAfterWalker);
 }

 HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, ForceDispatchSchedulerEnqueuesSchedulerKernel) {

-    if (pClDevice->areOcl21FeaturesSupported()) {
-        DebugManagerStateRestore dbgRestorer;
+    DebugManagerStateRestore dbgRestorer;

-        DebugManager.flags.ForceDispatchScheduler.set(true);
+    DebugManager.flags.ForceDispatchScheduler.set(true);

-        size_t offset[3] = {0, 0, 0};
-        size_t gws[3] = {1, 1, 1};
+    size_t offset[3] = {0, 0, 0};
+    size_t gws[3] = {1, 1, 1};

-        MockCommandQueueHw<FamilyType> *mockCmdQ = new MockCommandQueueHw<FamilyType>(context, pClDevice, 0);
+    MockCommandQueueHw<FamilyType> *mockCmdQ = new MockCommandQueueHw<FamilyType>(context, pClDevice, 0);

-        mockCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
+    mockCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr);

-        EXPECT_TRUE(mockCmdQ->lastEnqueuedKernels.front()->isSchedulerKernel);
+    EXPECT_TRUE(mockCmdQ->lastEnqueuedKernels.front()->isSchedulerKernel);

-        delete mockCmdQ;
-    }
+    delete mockCmdQ;
 }
--- a/opencl/test/unit_test/execution_model/submit_blocked_parent_kernel_tests.cpp
+++ b/opencl/test/unit_test/execution_model/submit_blocked_parent_kernel_tests.cpp
@@ -15,6 +15,7 @@
 #include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
 #include "opencl/test/unit_test/mocks/mock_command_queue.h"
 #include "opencl/test/unit_test/mocks/mock_device_queue.h"
+#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"

 #include <memory>

@@ -76,361 +77,361 @@ class MockDeviceQueueHwWithCriticalSectionRelease : public DeviceQueueHw<GfxFami
 };

 HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenLockedEMcritcalSectionWhenParentKernelCommandIsSubmittedThenItWaitsForcriticalSectionReleasement) {
-    if (device->areOcl21FeaturesSupported()) {
-        cl_queue_properties properties[3] = {0};
-        MockParentKernel *parentKernel = MockParentKernel::create(*context);
-        MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
-        parentKernel->createReflectionSurface();
-        context->setDefaultDeviceQueue(&mockDevQueue);
+    REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);

-        mockDevQueue.acquireEMCriticalSection();
+    cl_queue_properties properties[3] = {0};
+    MockParentKernel *parentKernel = MockParentKernel::create(*context);
+    MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
+    parentKernel->createReflectionSurface();
+    context->setDefaultDeviceQueue(&mockDevQueue);

-        size_t heapSize = 20;
-        size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
+    mockDevQueue.acquireEMCriticalSection();

-        IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
-        pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
-        pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
-        pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
+    size_t heapSize = 20;
+    size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();

-        dsh->getSpace(mockDevQueue.getDshOffset());
+    IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
+    pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
+    pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
+    pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);

-        size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
+    dsh->getSpace(mockDevQueue.getDshOffset());

-        auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
-        auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
-                                                                    *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
-        blockedCommandData->setHeaps(dsh, ioh, ssh);
+    size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);

-        blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
-        PreemptionMode preemptionMode = device->getPreemptionMode();
-        std::vector<Surface *> surfaces;
-        auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
+    auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
+    auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
+                                                                *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
+    blockedCommandData->setHeaps(dsh, ioh, ssh);

-        cmdComputeKernel->submit(0, false);
+    blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
+    PreemptionMode preemptionMode = device->getPreemptionMode();
+    std::vector<Surface *> surfaces;
+    auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);

-        EXPECT_EQ(mockDevQueue.maxCounter, mockDevQueue.criticalSectioncheckCounter);
-        delete cmdComputeKernel;
-        delete parentKernel;
-    }
+    cmdComputeKernel->submit(0, false);
+
+    EXPECT_EQ(mockDevQueue.maxCounter, mockDevQueue.criticalSectioncheckCounter);
+    delete cmdComputeKernel;
+    delete parentKernel;
 }

 HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenPassedDshIsUsed) {
-    if (device->areOcl21FeaturesSupported()) {
-        cl_queue_properties properties[3] = {0};
-        MockParentKernel *parentKernel = MockParentKernel::create(*context);
-        MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
-        parentKernel->createReflectionSurface();
-        context->setDefaultDeviceQueue(&mockDevQueue);
+    REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);

-        auto *dshOfDevQueue = mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
+    cl_queue_properties properties[3] = {0};
+    MockParentKernel *parentKernel = MockParentKernel::create(*context);
+    MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
+    parentKernel->createReflectionSurface();
+    context->setDefaultDeviceQueue(&mockDevQueue);

-        size_t heapSize = 20;
-        size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
+    auto *dshOfDevQueue = mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE);

-        IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
-        pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
-        pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
-        pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
+    size_t heapSize = 20;
+    size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();

-        // add initial offset of colorCalState
-        dsh->getSpace(DeviceQueue::colorCalcStateSize);
+    IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
+    pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
+    pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
+    pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);

-        uint64_t ValueToFillDsh = 5;
-        uint64_t *dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t));
+    // add initial offset of colorCalState
+    dsh->getSpace(DeviceQueue::colorCalcStateSize);

-        // Fill Interface Descriptor Data
-        *dshVal = ValueToFillDsh;
+    uint64_t ValueToFillDsh = 5;
+    uint64_t *dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t));

-        // Move to parent DSH Offset
-        size_t alignToOffsetDshSize = mockDevQueue.getDshOffset() - DeviceQueue::colorCalcStateSize - sizeof(uint64_t);
-        dsh->getSpace(alignToOffsetDshSize);
+    // Fill Interface Descriptor Data
+    *dshVal = ValueToFillDsh;

-        // Fill with pattern
-        dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t));
-        *dshVal = ValueToFillDsh;
+    // Move to parent DSH Offset
+    size_t alignToOffsetDshSize = mockDevQueue.getDshOffset() - DeviceQueue::colorCalcStateSize - sizeof(uint64_t);
+    dsh->getSpace(alignToOffsetDshSize);

-        size_t usedDSHBeforeSubmit = dshOfDevQueue->getUsed();
+    // Fill with pattern
+    dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t));
+    *dshVal = ValueToFillDsh;

-        uint32_t colorCalcSizeDevQueue = DeviceQueue::colorCalcStateSize;
-        EXPECT_EQ(colorCalcSizeDevQueue, usedDSHBeforeSubmit);
+    size_t usedDSHBeforeSubmit = dshOfDevQueue->getUsed();

-        auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
-        auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
-                                                                    *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
-        blockedCommandData->setHeaps(dsh, ioh, ssh);
+    uint32_t colorCalcSizeDevQueue = DeviceQueue::colorCalcStateSize;
+    EXPECT_EQ(colorCalcSizeDevQueue, usedDSHBeforeSubmit);

-        size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
+    auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
+    auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
+                                                                *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
+    blockedCommandData->setHeaps(dsh, ioh, ssh);

-        blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
-        PreemptionMode preemptionMode = device->getPreemptionMode();
-        std::vector<Surface *> surfaces;
-        auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
+    size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);

-        cmdComputeKernel->submit(0, false);
+    blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
+    PreemptionMode preemptionMode = device->getPreemptionMode();
+    std::vector<Surface *> surfaces;
+    auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);

-        //device queue dsh is not changed
-        size_t usedDSHAfterSubmit = dshOfDevQueue->getUsed();
-        EXPECT_EQ(usedDSHAfterSubmit, usedDSHAfterSubmit);
+    cmdComputeKernel->submit(0, false);

-        delete cmdComputeKernel;
-        delete parentKernel;
-    }
+    //device queue dsh is not changed
+    size_t usedDSHAfterSubmit = dshOfDevQueue->getUsed();
+    EXPECT_EQ(usedDSHAfterSubmit, usedDSHAfterSubmit);
+
+    delete cmdComputeKernel;
+    delete parentKernel;
 }

 HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenIndirectStateAndEMCleanupSectionIsSetup) {
-    if (device->areOcl21FeaturesSupported()) {
-        cl_queue_properties properties[3] = {0};
-        MockParentKernel *parentKernel = MockParentKernel::create(*context);
-        MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
-        parentKernel->createReflectionSurface();
-        context->setDefaultDeviceQueue(&mockDevQueue);
+    REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);

-        size_t heapSize = 20;
-        size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
+    cl_queue_properties properties[3] = {0};
+    MockParentKernel *parentKernel = MockParentKernel::create(*context);
+    MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
+    parentKernel->createReflectionSurface();
+    context->setDefaultDeviceQueue(&mockDevQueue);

-        IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
-        pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
-        pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
-        pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
+    size_t heapSize = 20;
+    size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();

-        dsh->getSpace(mockDevQueue.getDshOffset());
+    IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
+    pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
+    pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
+    pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);

-        auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
-        auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
-                                                                    *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
-        blockedCommandData->setHeaps(dsh, ioh, ssh);
+    dsh->getSpace(mockDevQueue.getDshOffset());

-        size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
+    auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
+    auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
+                                                                *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
+    blockedCommandData->setHeaps(dsh, ioh, ssh);

-        blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
-        PreemptionMode preemptionMode = device->getPreemptionMode();
-        std::vector<Surface *> surfaces;
-        auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
+    size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);

-        cmdComputeKernel->submit(0, false);
+    blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
+    PreemptionMode preemptionMode = device->getPreemptionMode();
+    std::vector<Surface *> surfaces;
+    auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);

-        EXPECT_TRUE(mockDevQueue.indirectStateSetup);
-        EXPECT_TRUE(mockDevQueue.cleanupSectionAdded);
+    cmdComputeKernel->submit(0, false);

-        delete cmdComputeKernel;
-        delete parentKernel;
-    }
+    EXPECT_TRUE(mockDevQueue.indirectStateSetup);
+    EXPECT_TRUE(mockDevQueue.cleanupSectionAdded);
+
+    delete cmdComputeKernel;
+    delete parentKernel;
 }

 HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedParentKernelWithProfilingWhenCommandIsSubmittedThenEMCleanupSectionsSetsCompleteTimestamp) {
-    if (device->areOcl21FeaturesSupported()) {
-        cl_queue_properties properties[3] = {0};
-        MockParentKernel *parentKernel = MockParentKernel::create(*context);
-        MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
-        parentKernel->createReflectionSurface();
-        context->setDefaultDeviceQueue(&mockDevQueue);
+    REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);

-        size_t heapSize = 20;
-        size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
-        IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
-        pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
-        pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
-        pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
-        dsh->getSpace(mockDevQueue.getDshOffset());
+    cl_queue_properties properties[3] = {0};
+    MockParentKernel *parentKernel = MockParentKernel::create(*context);
+    MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
+    parentKernel->createReflectionSurface();
+    context->setDefaultDeviceQueue(&mockDevQueue);

-        auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
-        auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
-                                                                    *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
-        blockedCommandData->setHeaps(dsh, ioh, ssh);
+    size_t heapSize = 20;
+    size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
+    IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
+    pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
+    pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
+    pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
+    dsh->getSpace(mockDevQueue.getDshOffset());

-        size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
+    auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
+    auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
+                                                                *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
+    blockedCommandData->setHeaps(dsh, ioh, ssh);

-        blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
-        PreemptionMode preemptionMode = device->getPreemptionMode();
-        std::vector<Surface *> surfaces;
-        auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
+    size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);

-        auto timestamp = pCmdQ->getGpgpuCommandStreamReceiver().getEventTsAllocator()->getTag();
-        cmdComputeKernel->timestamp = timestamp;
-        cmdComputeKernel->submit(0, false);
+    blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
+    PreemptionMode preemptionMode = device->getPreemptionMode();
+    std::vector<Surface *> surfaces;
+    auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);

-        EXPECT_TRUE(mockDevQueue.cleanupSectionAdded);
-        EXPECT_EQ(mockDevQueue.timestampAddedInCleanupSection, timestamp->tagForCpuAccess);
+    auto timestamp = pCmdQ->getGpgpuCommandStreamReceiver().getEventTsAllocator()->getTag();
+    cmdComputeKernel->timestamp = timestamp;
+    cmdComputeKernel->submit(0, false);

-        delete cmdComputeKernel;
-        delete parentKernel;
-    }
+    EXPECT_TRUE(mockDevQueue.cleanupSectionAdded);
+    EXPECT_EQ(mockDevQueue.timestampAddedInCleanupSection, timestamp->tagForCpuAccess);
+
+    delete cmdComputeKernel;
+    delete parentKernel;
 }

 HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenSchedulerIsDispatched) {
-    if (device->areOcl21FeaturesSupported()) {
-        cl_queue_properties properties[3] = {0};
-        MockParentKernel *parentKernel = MockParentKernel::create(*context);
-        MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
-        parentKernel->createReflectionSurface();
-        context->setDefaultDeviceQueue(&mockDevQueue);
+    REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);

-        size_t heapSize = 20;
-        size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
+    cl_queue_properties properties[3] = {0};
+    MockParentKernel *parentKernel = MockParentKernel::create(*context);
+    MockDeviceQueueHwWithCriticalSectionRelease<FamilyType> mockDevQueue(context, device, properties[0]);
+    parentKernel->createReflectionSurface();
+    context->setDefaultDeviceQueue(&mockDevQueue);

-        IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
-        pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
-        pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
-        pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
-        dsh->getSpace(mockDevQueue.getDshOffset());
+    size_t heapSize = 20;
+    size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();

-        auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
-        auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
-                                                                    *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
-        blockedCommandData->setHeaps(dsh, ioh, ssh);
+    IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
+    pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
+    pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
+    pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
+    dsh->getSpace(mockDevQueue.getDshOffset());

-        size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
+    auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
+    auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
+                                                                *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
+    blockedCommandData->setHeaps(dsh, ioh, ssh);

-        blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
-        PreemptionMode preemptionMode = device->getPreemptionMode();
-        std::vector<Surface *> surfaces;
-        auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
+    size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);

-        cmdComputeKernel->submit(0, false);
+    blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
+    PreemptionMode preemptionMode = device->getPreemptionMode();
+    std::vector<Surface *> surfaces;
+    auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);

-        EXPECT_TRUE(mockDevQueue.schedulerDispatched);
+    cmdComputeKernel->submit(0, false);

-        delete cmdComputeKernel;
-        delete parentKernel;
-    }
+    EXPECT_TRUE(mockDevQueue.schedulerDispatched);
+
+    delete cmdComputeKernel;
+    delete parentKernel;
 }

 HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenUsedCommandQueueHeapshenParentKernelIsSubmittedThenQueueHeapsAreNotUsed) {
-    if (device->areOcl21FeaturesSupported()) {
-        cl_queue_properties properties[3] = {0};
-        MockParentKernel *parentKernel = MockParentKernel::create(*context);
-        MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
-        parentKernel->createReflectionSurface();
-        context->setDefaultDeviceQueue(&mockDevQueue);
+    REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);

-        MockCommandQueue cmdQ(context, device, properties);
+    cl_queue_properties properties[3] = {0};
+    MockParentKernel *parentKernel = MockParentKernel::create(*context);
+    MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
+    parentKernel->createReflectionSurface();
+    context->setDefaultDeviceQueue(&mockDevQueue);

-        size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
+    MockCommandQueue cmdQ(context, device, properties);

-        size_t heapSize = 20;
+    size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);

-        size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
-        IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
-        pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
-        pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
-        pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);
+    size_t heapSize = 20;

-        dsh->getSpace(mockDevQueue.getDshOffset());
+    size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
+    IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
+    pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
+    pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
+    pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh);

-        auto &queueSsh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 100);
-        auto &queueDsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 100);
-        auto &queueIoh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 100);
+    dsh->getSpace(mockDevQueue.getDshOffset());

-        size_t usedSize = 4u;
+    auto &queueSsh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 100);
+    auto &queueDsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 100);
+    auto &queueIoh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 100);

-        queueSsh.getSpace(usedSize);
-        queueDsh.getSpace(usedSize);
-        queueIoh.getSpace(usedSize);
+    size_t usedSize = 4u;

-        auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
-        auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
-                                                                    *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
-        blockedCommandData->setHeaps(dsh, ioh, ssh);
+    queueSsh.getSpace(usedSize);
+    queueDsh.getSpace(usedSize);
+    queueIoh.getSpace(usedSize);

-        blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
-        PreemptionMode preemptionMode = device->getPreemptionMode();
-        std::vector<Surface *> surfaces;
-        auto *cmdComputeKernel = new CommandComputeKernel(cmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
+    auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
+    auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
+                                                                *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
+    blockedCommandData->setHeaps(dsh, ioh, ssh);

-        cmdComputeKernel->submit(0, false);
+    blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
+    PreemptionMode preemptionMode = device->getPreemptionMode();
+    std::vector<Surface *> surfaces;
+    auto *cmdComputeKernel = new CommandComputeKernel(cmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);

-        EXPECT_FALSE(cmdQ.releaseIndirectHeapCalled);
-        EXPECT_EQ(usedSize, queueDsh.getUsed());
-        EXPECT_EQ(usedSize, queueIoh.getUsed());
-        EXPECT_EQ(usedSize, queueSsh.getUsed());
+    cmdComputeKernel->submit(0, false);

-        delete cmdComputeKernel;
-        delete parentKernel;
-    }
+    EXPECT_FALSE(cmdQ.releaseIndirectHeapCalled);
+    EXPECT_EQ(usedSize, queueDsh.getUsed());
+    EXPECT_EQ(usedSize, queueIoh.getUsed());
+    EXPECT_EQ(usedSize, queueSsh.getUsed());
+
+    delete cmdComputeKernel;
+    delete parentKernel;
 }

 HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenNotUsedSSHWhenParentKernelIsSubmittedThenExistingSSHIsUsed) {
-    if (device->areOcl21FeaturesSupported()) {
-        cl_queue_properties properties[3] = {0};
-        MockParentKernel *parentKernel = MockParentKernel::create(*context);
-        MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
-        parentKernel->createReflectionSurface();
-        context->setDefaultDeviceQueue(&mockDevQueue);
+    REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);

-        size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
+    cl_queue_properties properties[3] = {0};
+    MockParentKernel *parentKernel = MockParentKernel::create(*context);
+    MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
+    parentKernel->createReflectionSurface();
+    context->setDefaultDeviceQueue(&mockDevQueue);

-        size_t heapSize = 20;
+    size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);

-        size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
-        size_t sshSize = 1000;
-        IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
-        pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
-        pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
-        pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, sshSize, ssh);
-        dsh->getSpace(mockDevQueue.getDshOffset());
+    size_t heapSize = 20;

-        EXPECT_EQ(0u, ssh->getUsed());
+    size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize();
+    size_t sshSize = 1000;
+    IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
+    pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
+    pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh);
+    pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, sshSize, ssh);
+    dsh->getSpace(mockDevQueue.getDshOffset());

-        pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, sshSize);
+    EXPECT_EQ(0u, ssh->getUsed());

-        void *sshBuffer = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getCpuBase();
+    pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, sshSize);

-        auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
-        auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
-                                                                    *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
-        blockedCommandData->setHeaps(dsh, ioh, ssh);
+    void *sshBuffer = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getCpuBase();

-        blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
-        PreemptionMode preemptionMode = device->getPreemptionMode();
-        std::vector<Surface *> surfaces;
-        auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
+    auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
+    auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
+                                                                *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
+    blockedCommandData->setHeaps(dsh, ioh, ssh);

-        cmdComputeKernel->submit(0, false);
+    blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
+    PreemptionMode preemptionMode = device->getPreemptionMode();
+    std::vector<Surface *> surfaces;
+    auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);

-        void *newSshBuffer = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getCpuBase();
+    cmdComputeKernel->submit(0, false);

-        EXPECT_EQ(sshBuffer, newSshBuffer);
+    void *newSshBuffer = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getCpuBase();

-        delete cmdComputeKernel;
-        delete parentKernel;
-    }
+    EXPECT_EQ(sshBuffer, newSshBuffer);
+
+    delete cmdComputeKernel;
+    delete parentKernel;
 }

 HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedCommandQueueWhenDispatchWalkerIsCalledThenHeapsHaveProperSizes) {
-    if (device->areOcl21FeaturesSupported()) {
-        cl_queue_properties properties[3] = {0};
-        std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(*context));
+    REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);

-        MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
-        parentKernel->createReflectionSurface();
-        context->setDefaultDeviceQueue(&mockDevQueue);
+    cl_queue_properties properties[3] = {0};
+    std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(*context));

-        auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
-        const size_t globalOffsets[3] = {0, 0, 0};
-        const size_t workItems[3] = {1, 1, 1};
+    MockDeviceQueueHw<FamilyType> mockDevQueue(context, device, properties[0]);
+    parentKernel->createReflectionSurface();
+    context->setDefaultDeviceQueue(&mockDevQueue);

-        DispatchInfo dispatchInfo(parentKernel.get(), 1, workItems, nullptr, globalOffsets);
-        MultiDispatchInfo multiDispatchInfo(parentKernel.get());
-        multiDispatchInfo.push(dispatchInfo);
-        HardwareInterface<FamilyType>::dispatchWalker(
-            *pCmdQ,
-            multiDispatchInfo,
-            CsrDependencies(),
-            blockedCommandsData.get(),
-            nullptr,
-            nullptr,
-            nullptr,
-            nullptr,
-            CL_COMMAND_NDRANGE_KERNEL);
+    auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
+    const size_t globalOffsets[3] = {0, 0, 0};
+    const size_t workItems[3] = {1, 1, 1};

-        EXPECT_NE(nullptr, blockedCommandsData);
-        EXPECT_EQ(blockedCommandsData->dsh->getMaxAvailableSpace(), mockDevQueue.getDshBuffer()->getUnderlyingBufferSize());
-        EXPECT_EQ(blockedCommandsData->dsh, blockedCommandsData->ioh);
+    DispatchInfo dispatchInfo(parentKernel.get(), 1, workItems, nullptr, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo(parentKernel.get());
+    multiDispatchInfo.push(dispatchInfo);
+    HardwareInterface<FamilyType>::dispatchWalker(
+        *pCmdQ,
+        multiDispatchInfo,
+        CsrDependencies(),
+        blockedCommandsData.get(),
+        nullptr,
+        nullptr,
+        nullptr,
+        nullptr,
+        CL_COMMAND_NDRANGE_KERNEL);

-        EXPECT_NE(nullptr, blockedCommandsData->dsh->getGraphicsAllocation());
-        EXPECT_NE(nullptr, blockedCommandsData->ioh->getGraphicsAllocation());
-        EXPECT_NE(nullptr, blockedCommandsData->ssh->getGraphicsAllocation());
-        EXPECT_EQ(blockedCommandsData->dsh->getGraphicsAllocation(), blockedCommandsData->ioh->getGraphicsAllocation());
-    }
+    EXPECT_NE(nullptr, blockedCommandsData);
+    EXPECT_EQ(blockedCommandsData->dsh->getMaxAvailableSpace(), mockDevQueue.getDshBuffer()->getUnderlyingBufferSize());
+    EXPECT_EQ(blockedCommandsData->dsh, blockedCommandsData->ioh);
+
+    EXPECT_NE(nullptr, blockedCommandsData->dsh->getGraphicsAllocation());
+    EXPECT_NE(nullptr, blockedCommandsData->ioh->getGraphicsAllocation());
+    EXPECT_NE(nullptr, blockedCommandsData->ssh->getGraphicsAllocation());
+    EXPECT_EQ(blockedCommandsData->dsh->getGraphicsAllocation(), blockedCommandsData->ioh->getGraphicsAllocation());
 }