/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/scheduler/scheduler_source_tests.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" #include "opencl/test/unit_test/fixtures/execution_model_fixture.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" #include "test.h" #include "gtest/gtest.h" #include "hw_cmds.h" // Keep this include after execution_model_fixture.h otherwise there is high chance of conflict with macros #include "opencl/source/builtin_kernels_simulation/opencl_c.h" #include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h" using namespace NEO; using namespace BuiltinKernelsSimulation; HWCMDTEST_F(IGFX_GEN8_CORE, SchedulerSourceTest, WhenEnqueingThenGpgpuWalkerIsPatchedCorrectly) { using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; size_t msfOffset = 0; size_t miArbCheckOffset = 0; size_t miAtomicOffset = 0; size_t mediaIDLoadOffset = 0; size_t miLoadRegOffset = 0; size_t pipeControlOffset = 0; size_t gpgpuOffset = 0; size_t msfOffset2 = 0; size_t miArbCheckOffset2 = 0; size_t msfOffsetAfter = 0; size_t miArbCheckOffsetAfter = 0; size_t miAtomicOffsetAfter = 0; size_t mediaIDLoadOffsetAfter = 0; size_t miLoadRegOffsetAfter = 0; size_t pipeControlOffsetAfter = 0; size_t gpgpuOffsetAfter = 0; size_t msfOffsetAfter2 = 0; size_t miArbCheckOffsetAfter2 = 0; auto pDevQueueHw = new MockDeviceQueueHw(&context, pDevice, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]); // Prepopulate SLB with commands pDevQueueHw->buildSlbDummyCommands(); LinearStream *slb = pDevQueueHw->getSlbCS(); HardwareParse hwParser; hwParser.parseCommands(*slb, 0); // Parse commands and save offsets of first enqueue space auto itorMediaStateFlush = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto *msf = (MEDIA_STATE_FLUSH *)*itorMediaStateFlush; EXPECT_EQ((void *)slb->getCpuBase(), (void *)msf); auto itorArbCheck = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto *arbCheck = itorArbCheck != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck : nullptr; auto itorMiAtomic = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto *miAtomic = itorMiAtomic != hwParser.cmdList.end() ? (MI_ATOMIC *)*itorMiAtomic : nullptr; auto itorIDLoad = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto *idLoad = itorIDLoad != hwParser.cmdList.end() ? (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorIDLoad : nullptr; auto itorMiLoadReg = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto *miLoadReg = itorMiLoadReg != hwParser.cmdList.end() ? (MI_LOAD_REGISTER_IMM *)*itorMiLoadReg : nullptr; auto itorPipeControl = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto *pipeControl = itorPipeControl != hwParser.cmdList.end() ? (PIPE_CONTROL *)*itorPipeControl : nullptr; auto itorWalker = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto *walker = itorWalker != hwParser.cmdList.end() ? (GPGPU_WALKER *)*itorWalker : nullptr; auto itorMediaStateFlush2 = find(itorWalker, hwParser.cmdList.end()); auto *msf2 = itorMediaStateFlush2 != hwParser.cmdList.end() ? (MEDIA_STATE_FLUSH *)*itorMediaStateFlush2 : nullptr; auto itorArbCheck2 = find(itorWalker, hwParser.cmdList.end()); auto *arbCheck2 = itorArbCheck2 != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck2 : nullptr; if (msf) msfOffset = ptrDiff(msf, slb->getCpuBase()); if (arbCheck) miArbCheckOffset = ptrDiff(arbCheck, slb->getCpuBase()); if (miAtomic) miAtomicOffset = ptrDiff(miAtomic, slb->getCpuBase()); if (idLoad) mediaIDLoadOffset = ptrDiff(idLoad, slb->getCpuBase()); if (miLoadReg) miLoadRegOffset = ptrDiff(miLoadReg, slb->getCpuBase()); if (pipeControl) pipeControlOffset = ptrDiff(pipeControl, slb->getCpuBase()); if (walker) gpgpuOffset = ptrDiff(walker, slb->getCpuBase()); if (msf2) msfOffset2 = ptrDiff(msf2, slb->getCpuBase()); if (arbCheck2) miArbCheckOffset2 = ptrDiff(arbCheck2, slb->getCpuBase()); uint32_t *slbBuffer = (uint32_t *)slb->getCpuBase(); uint32_t secondLevelBatchOffset = 0; uint32_t InterfaceDescriptorOffset = 3; uint32_t SIMDSize = 16; uint32_t TotalLocalWorkSize = 24; uint3 DimSize = {6, 4, 1}; uint3 StartPoint = {4, 4, 0}; uint32_t NumberOfHWThreadsPerWG = 3; uint32_t IndirectPayloadSize = 10; uint32_t IOHoffset = 256; SchedulerSimulation::patchGpGpuWalker(secondLevelBatchOffset, slbBuffer, InterfaceDescriptorOffset, SIMDSize, TotalLocalWorkSize, DimSize, StartPoint, NumberOfHWThreadsPerWG, IndirectPayloadSize, IOHoffset); size_t commandsSize = pDevQueueHw->getMinimumSlbSize() + pDevQueueHw->getWaCommandsSize(); // Parse again LinearStream slbTested(slbBuffer, commandsSize); hwParser.cmdList.clear(); slbTested.getSpace(commandsSize); hwParser.parseCommands(slbTested, 0); itorMediaStateFlush = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); msf = (MEDIA_STATE_FLUSH *)*itorMediaStateFlush; EXPECT_EQ((void *)slb->getCpuBase(), (void *)msf); itorArbCheck = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); arbCheck = itorArbCheck != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck : nullptr; itorMiAtomic = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); miAtomic = itorMiAtomic != hwParser.cmdList.end() ? (MI_ATOMIC *)*itorMiAtomic : nullptr; itorIDLoad = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); idLoad = itorIDLoad != hwParser.cmdList.end() ? (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorIDLoad : nullptr; itorMiLoadReg = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); miLoadReg = itorMiLoadReg != hwParser.cmdList.end() ? (MI_LOAD_REGISTER_IMM *)*itorMiLoadReg : nullptr; itorPipeControl = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); pipeControl = itorPipeControl != hwParser.cmdList.end() ? (PIPE_CONTROL *)*itorPipeControl : nullptr; itorWalker = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); walker = itorWalker != hwParser.cmdList.end() ? (GPGPU_WALKER *)*itorWalker : nullptr; itorMediaStateFlush2 = find(itorWalker, hwParser.cmdList.end()); msf2 = itorMediaStateFlush2 != hwParser.cmdList.end() ? (MEDIA_STATE_FLUSH *)*itorMediaStateFlush2 : nullptr; itorArbCheck2 = find(itorWalker, hwParser.cmdList.end()); arbCheck2 = itorArbCheck2 != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck2 : nullptr; if (msf) msfOffsetAfter = ptrDiff(msf, slbTested.getCpuBase()); if (arbCheck) miArbCheckOffsetAfter = ptrDiff(arbCheck, slbTested.getCpuBase()); if (miAtomic) miAtomicOffsetAfter = ptrDiff(miAtomic, slbTested.getCpuBase()); if (idLoad) mediaIDLoadOffsetAfter = ptrDiff(idLoad, slbTested.getCpuBase()); if (miLoadReg) miLoadRegOffsetAfter = ptrDiff(miLoadReg, slbTested.getCpuBase()); if (pipeControl) pipeControlOffsetAfter = ptrDiff(pipeControl, slbTested.getCpuBase()); if (walker) gpgpuOffsetAfter = ptrDiff(walker, slbTested.getCpuBase()); if (msf2) msfOffsetAfter2 = ptrDiff(msf2, slbTested.getCpuBase()); if (arbCheck2) miArbCheckOffsetAfter2 = ptrDiff(arbCheck2, slbTested.getCpuBase()); EXPECT_EQ(msfOffset, msfOffsetAfter); EXPECT_EQ(miArbCheckOffset, miArbCheckOffsetAfter); EXPECT_EQ(miAtomicOffset, miAtomicOffsetAfter); EXPECT_EQ(mediaIDLoadOffset, mediaIDLoadOffsetAfter); EXPECT_EQ(miLoadRegOffset, miLoadRegOffsetAfter); EXPECT_EQ(pipeControlOffset, pipeControlOffsetAfter); EXPECT_EQ(gpgpuOffset, gpgpuOffsetAfter); EXPECT_EQ(msfOffset2, msfOffsetAfter2); EXPECT_EQ(miArbCheckOffset2, miArbCheckOffsetAfter2); if (walker) { EXPECT_EQ(InterfaceDescriptorOffset, walker->getInterfaceDescriptorOffset()); EXPECT_EQ(NumberOfHWThreadsPerWG, walker->getThreadWidthCounterMaximum()); EXPECT_EQ(16u, SIMDSize); typename GPGPU_WALKER::SIMD_SIZE simd = GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD16; EXPECT_EQ(simd, walker->getSimdSize()); EXPECT_EQ(StartPoint.x, walker->getThreadGroupIdStartingX()); EXPECT_EQ(StartPoint.y, walker->getThreadGroupIdStartingY()); //EXPECT_EQ(StartPoint.z, walker->GetThreadGroupIdStartingZ()); EXPECT_EQ(DimSize.x, walker->getThreadGroupIdXDimension()); EXPECT_EQ(DimSize.y, walker->getThreadGroupIdYDimension()); //EXPECT_EQ(DimSize.z, walker->getThreadGroupIdZDimension()); uint32_t mask = static_cast(maxNBitValue(TotalLocalWorkSize % SIMDSize)); if (mask == 0) mask = ~0; uint32_t yMask = 0xffffffff; EXPECT_EQ(mask, walker->getRightExecutionMask()); EXPECT_EQ(yMask, walker->getBottomExecutionMask()); EXPECT_EQ(IndirectPayloadSize, walker->getIndirectDataLength()); EXPECT_EQ(IOHoffset, walker->getIndirectDataStartAddress()); } else { EXPECT_TRUE(false) << "GPGPU_WALKER commandnot found, patchGpGpuWalker could have corrupted prepopulated commands\n"; } delete pDevQueueHw; }