compute-runtime/unit_tests/built_ins/scheduler_source_tests.cpp

/*
 * Copyright (c) 2017, Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include "gtest/gtest.h"
#include "test.h"

#include "hw_cmds.h"

#include "runtime/device_queue/device_queue_hw.h"

#include "unit_tests/fixtures/device_host_queue_fixture.h"
#include "unit_tests/fixtures/execution_model_fixture.h"
#include "unit_tests/helpers/hw_parse.h"
#include "unit_tests/mocks/mock_device_queue.h"
// Keep this include after execution_model_fixture.h otherwise there is high chance of conflict with macros
#include "runtime/builtin_kernels_simulation/opencl_c.h"
#include "runtime/builtin_kernels_simulation/scheduler_simulation.h"

extern PRODUCT_FAMILY defaultProductFamily;

using namespace OCLRT;
using namespace BuiltinKernelsSimulation;

class SchedulerSourceTest : public testing::Test {
  public:
    void SetUp() override {
        pDevice = DeviceHelper<>::create();
    }
    void TearDown() override {
        delete pDevice;
    }

    Device *pDevice;
    MockContext context;
};

HWTEST_F(SchedulerSourceTest, PatchGpgpuWalker) {

    using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH;
    using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
    using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
    using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
    using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
    using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
    using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
    using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK;
    using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
    using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;

    size_t msfOffset = 0;
    size_t miArbCheckOffset = 0;
    size_t miAtomicOffset = 0;
    size_t mediaIDLoadOffset = 0;
    size_t miLoadRegOffset = 0;
    size_t pipeControlOffset = 0;
    size_t gpgpuOffset = 0;
    size_t msfOffset2 = 0;
    size_t miArbCheckOffset2 = 0;

    size_t msfOffsetAfter = 0;
    size_t miArbCheckOffsetAfter = 0;
    size_t miAtomicOffsetAfter = 0;
    size_t mediaIDLoadOffsetAfter = 0;
    size_t miLoadRegOffsetAfter = 0;
    size_t pipeControlOffsetAfter = 0;
    size_t gpgpuOffsetAfter = 0;
    size_t msfOffsetAfter2 = 0;
    size_t miArbCheckOffsetAfter2 = 0;

    auto pDevQueueHw = new MockDeviceQueueHw<FamilyType>(&context, pDevice, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]);

    // Prepopulate SLB with commands
    pDevQueueHw->buildSlbDummyCommands();
    LinearStream *slb = pDevQueueHw->getSlbCS();
    HardwareParse hwParser;
    hwParser.parseCommands<FamilyType>(*slb, 0);

    // Parse commands and save offsets of first enqueue space
    auto itorMediaStateFlush = find<MEDIA_STATE_FLUSH *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
    auto *msf = (MEDIA_STATE_FLUSH *)*itorMediaStateFlush;

    EXPECT_EQ((void *)slb->getCpuBase(), (void *)msf);

    auto itorArbCheck = find<MI_ARB_CHECK *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
    auto *arbCheck = itorArbCheck != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck : nullptr;

    auto itorMiAtomic = find<MI_ATOMIC *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
    auto *miAtomic = itorMiAtomic != hwParser.cmdList.end() ? (MI_ATOMIC *)*itorMiAtomic : nullptr;

    auto itorIDLoad = find<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
    auto *idLoad = itorIDLoad != hwParser.cmdList.end() ? (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorIDLoad : nullptr;

    auto itorMiLoadReg = find<MI_LOAD_REGISTER_IMM *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
    auto *miLoadReg = itorMiLoadReg != hwParser.cmdList.end() ? (MI_LOAD_REGISTER_IMM *)*itorMiLoadReg : nullptr;

    auto itorPipeControl = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
    auto *pipeControl = itorPipeControl != hwParser.cmdList.end() ? (PIPE_CONTROL *)*itorPipeControl : nullptr;

    auto itorWalker = find<GPGPU_WALKER *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
    auto *walker = itorWalker != hwParser.cmdList.end() ? (GPGPU_WALKER *)*itorWalker : nullptr;

    auto itorMediaStateFlush2 = find<MEDIA_STATE_FLUSH *>(itorWalker, hwParser.cmdList.end());
    auto *msf2 = itorMediaStateFlush2 != hwParser.cmdList.end() ? (MEDIA_STATE_FLUSH *)*itorMediaStateFlush2 : nullptr;

    auto itorArbCheck2 = find<MI_ARB_CHECK *>(itorWalker, hwParser.cmdList.end());
    auto *arbCheck2 = itorArbCheck2 != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck2 : nullptr;

    if (msf)
        msfOffset = ptrDiff(msf, slb->getCpuBase());

    if (arbCheck)
        miArbCheckOffset = ptrDiff(arbCheck, slb->getCpuBase());

    if (miAtomic)
        miAtomicOffset = ptrDiff(miAtomic, slb->getCpuBase());

    if (idLoad)
        mediaIDLoadOffset = ptrDiff(idLoad, slb->getCpuBase());

    if (miLoadReg)
        miLoadRegOffset = ptrDiff(miLoadReg, slb->getCpuBase());

    if (pipeControl)
        pipeControlOffset = ptrDiff(pipeControl, slb->getCpuBase());

    if (walker)
        gpgpuOffset = ptrDiff(walker, slb->getCpuBase());

    if (msf2)
        msfOffset2 = ptrDiff(msf2, slb->getCpuBase());

    if (arbCheck2)
        miArbCheckOffset2 = ptrDiff(arbCheck2, slb->getCpuBase());

    uint32_t *slbBuffer = (uint32_t *)slb->getCpuBase();
    uint32_t secondLevelBatchOffset = 0;
    uint32_t InterfaceDescriptorOffset = 3;
    uint32_t SIMDSize = 16;
    uint32_t TotalLocalWorkSize = 24;
    uint3 DimSize = {6, 4, 1};
    uint3 StartPoint = {4, 4, 0};
    uint32_t NumberOfHWThreadsPerWG = 3;
    uint32_t IndirectPayloadSize = 10;
    uint32_t IOHoffset = 256;

    SchedulerSimulation<FamilyType>::patchGpGpuWalker(secondLevelBatchOffset, slbBuffer, InterfaceDescriptorOffset, SIMDSize, TotalLocalWorkSize, DimSize, StartPoint, NumberOfHWThreadsPerWG, IndirectPayloadSize, IOHoffset);

    size_t commandsSize = pDevQueueHw->getMinimumSlbSize() + pDevQueueHw->getWaCommandsSize();

    // Parse again
    LinearStream slbTested(slbBuffer, commandsSize);
    hwParser.cmdList.clear();
    slbTested.getSpace(commandsSize);
    hwParser.parseCommands<FamilyType>(slbTested, 0);

    itorMediaStateFlush = find<MEDIA_STATE_FLUSH *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
    msf = (MEDIA_STATE_FLUSH *)*itorMediaStateFlush;

    EXPECT_EQ((void *)slb->getCpuBase(), (void *)msf);

    itorArbCheck = find<MI_ARB_CHECK *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
    arbCheck = itorArbCheck != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck : nullptr;

    itorMiAtomic = find<MI_ATOMIC *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
    miAtomic = itorMiAtomic != hwParser.cmdList.end() ? (MI_ATOMIC *)*itorMiAtomic : nullptr;

    itorIDLoad = find<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
    idLoad = itorIDLoad != hwParser.cmdList.end() ? (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorIDLoad : nullptr;

    itorMiLoadReg = find<MI_LOAD_REGISTER_IMM *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
    miLoadReg = itorMiLoadReg != hwParser.cmdList.end() ? (MI_LOAD_REGISTER_IMM *)*itorMiLoadReg : nullptr;

    itorPipeControl = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
    pipeControl = itorPipeControl != hwParser.cmdList.end() ? (PIPE_CONTROL *)*itorPipeControl : nullptr;

    itorWalker = find<GPGPU_WALKER *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
    walker = itorWalker != hwParser.cmdList.end() ? (GPGPU_WALKER *)*itorWalker : nullptr;

    itorMediaStateFlush2 = find<MEDIA_STATE_FLUSH *>(itorWalker, hwParser.cmdList.end());
    msf2 = itorMediaStateFlush2 != hwParser.cmdList.end() ? (MEDIA_STATE_FLUSH *)*itorMediaStateFlush2 : nullptr;

    itorArbCheck2 = find<MI_ARB_CHECK *>(itorWalker, hwParser.cmdList.end());
    arbCheck2 = itorArbCheck2 != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck2 : nullptr;

    if (msf)
        msfOffsetAfter = ptrDiff(msf, slbTested.getCpuBase());

    if (arbCheck)
        miArbCheckOffsetAfter = ptrDiff(arbCheck, slbTested.getCpuBase());

    if (miAtomic)
        miAtomicOffsetAfter = ptrDiff(miAtomic, slbTested.getCpuBase());

    if (idLoad)
        mediaIDLoadOffsetAfter = ptrDiff(idLoad, slbTested.getCpuBase());

    if (miLoadReg)
        miLoadRegOffsetAfter = ptrDiff(miLoadReg, slbTested.getCpuBase());

    if (pipeControl)
        pipeControlOffsetAfter = ptrDiff(pipeControl, slbTested.getCpuBase());

    if (walker)
        gpgpuOffsetAfter = ptrDiff(walker, slbTested.getCpuBase());

    if (msf2)
        msfOffsetAfter2 = ptrDiff(msf2, slbTested.getCpuBase());

    if (arbCheck2)
        miArbCheckOffsetAfter2 = ptrDiff(arbCheck2, slbTested.getCpuBase());

    EXPECT_EQ(msfOffset, msfOffsetAfter);
    EXPECT_EQ(miArbCheckOffset, miArbCheckOffsetAfter);
    EXPECT_EQ(miAtomicOffset, miAtomicOffsetAfter);
    EXPECT_EQ(mediaIDLoadOffset, mediaIDLoadOffsetAfter);
    EXPECT_EQ(miLoadRegOffset, miLoadRegOffsetAfter);
    EXPECT_EQ(pipeControlOffset, pipeControlOffsetAfter);
    EXPECT_EQ(gpgpuOffset, gpgpuOffsetAfter);
    EXPECT_EQ(msfOffset2, msfOffsetAfter2);
    EXPECT_EQ(miArbCheckOffset2, miArbCheckOffsetAfter2);

    if (walker) {
        EXPECT_EQ(InterfaceDescriptorOffset, walker->getInterfaceDescriptorOffset());
        EXPECT_EQ(NumberOfHWThreadsPerWG, walker->getThreadWidthCounterMaximum());

        EXPECT_EQ(16u, SIMDSize);
        typename GPGPU_WALKER::SIMD_SIZE simd = GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD16;
        EXPECT_EQ(simd, walker->getSimdSize());

        EXPECT_EQ(StartPoint.x, walker->getThreadGroupIdStartingX());
        EXPECT_EQ(StartPoint.y, walker->getThreadGroupIdStartingY());
        //EXPECT_EQ(StartPoint.z, walker->GetThreadGroupIdStartingZ());

        EXPECT_EQ(DimSize.x, walker->getThreadGroupIdXDimension());
        EXPECT_EQ(DimSize.y, walker->getThreadGroupIdYDimension());
        //EXPECT_EQ(DimSize.z, walker->getThreadGroupIdZDimension());

        uint32_t mask = (1 << (TotalLocalWorkSize % SIMDSize)) - 1;
        if (mask == 0)
            mask = ~0;
        uint32_t yMask = 0xffffffff;

        EXPECT_EQ(mask, walker->getRightExecutionMask());
        EXPECT_EQ(yMask, walker->getBottomExecutionMask());

        EXPECT_EQ(IndirectPayloadSize, walker->getIndirectDataLength());

        EXPECT_EQ(IOHoffset, walker->getIndirectDataStartAddress());
    } else {
        EXPECT_TRUE(false) << "GPGPU_WALKER commandnot found, patchGpGpuWalker could have corrupted prepopulated commands\n";
    }

    delete pDevQueueHw;
}