diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index ddadba5b63..8382de9ac5 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -302,6 +302,7 @@ set (RUNTIME_SRCS_HELPERS helpers/options.h helpers/per_thread_data.cpp helpers/per_thread_data.h + helpers/pipeline_select_helper.h helpers/preamble.h helpers/preamble.inl helpers/ptr_math.h diff --git a/runtime/command_stream/command_stream_receiver.h b/runtime/command_stream/command_stream_receiver.h index 5934a56d2b..5ce203fb12 100644 --- a/runtime/command_stream/command_stream_receiver.h +++ b/runtime/command_stream/command_stream_receiver.h @@ -135,7 +135,7 @@ class CommandStreamReceiver { uint32_t lastSentL3Config = 0; int8_t lastSentCoherencyRequest = -1; - bool lastMediaSamplerConfig = false; + int8_t lastMediaSamplerConfig = -1; PreemptionMode lastPreemptionMode = PreemptionMode::Disabled; uint32_t latestSentStatelessMocsConfig; diff --git a/runtime/command_stream/command_stream_receiver_hw.h b/runtime/command_stream/command_stream_receiver_hw.h index 717a96782e..d896e21eb6 100644 --- a/runtime/command_stream/command_stream_receiver_hw.h +++ b/runtime/command_stream/command_stream_receiver_hw.h @@ -70,7 +70,6 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { protected: void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags, const LinearStream &ih); void programL3(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t &newL3Config); - void programMediaSampler(LinearStream &csr, DispatchFlags &dispatchFlags); void programPreamble(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t &newL3Config); virtual void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags); virtual void initPageTableManagerRegisters(LinearStream &csr){}; diff --git a/runtime/command_stream/command_stream_receiver_hw.inl b/runtime/command_stream/command_stream_receiver_hw.inl index 9506c00f70..0116e5f890 100644 --- a/runtime/command_stream/command_stream_receiver_hw.inl +++ b/runtime/command_stream/command_stream_receiver_hw.inl @@ -162,7 +162,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( csrSizeRequestFlags.l3ConfigChanged = this->lastSentL3Config != newL3Config; csrSizeRequestFlags.coherencyRequestChanged = this->lastSentCoherencyRequest != static_cast(dispatchFlags.requiresCoherency); csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode; - csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != dispatchFlags.mediaSamplerRequired; + csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast(dispatchFlags.mediaSamplerRequired); auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSize(dispatchFlags)); auto commandStreamStartCSR = commandStreamCSR.getUsed(); @@ -171,7 +171,10 @@ CompletionStamp CommandStreamReceiverHw::flushTask( programPreemption(commandStreamCSR, dispatchFlags, ih); programCoherency(commandStreamCSR, dispatchFlags); programL3(commandStreamCSR, dispatchFlags, newL3Config); - programMediaSampler(commandStreamCSR, dispatchFlags); + if (csrSizeRequestFlags.mediaSamplerConfigChanged || !isPreambleSent) { + PreambleHelper::programPipelineSelect(&commandStreamCSR, dispatchFlags.mediaSamplerRequired); + this->lastMediaSamplerConfig = dispatchFlags.mediaSamplerRequired; + } programPreamble(commandStreamCSR, dispatchFlags, newL3Config); size_t requiredScratchSizeInBytes = requiredScratchSize * (hwInfo.pSysInfo->MaxSubSlicesSupported * hwInfo.pSysInfo->MaxEuPerSubSlice * hwInfo.pSysInfo->ThreadCount / hwInfo.pSysInfo->EUCount); @@ -300,11 +303,6 @@ CompletionStamp CommandStreamReceiverHw::flushTask( auto bbEndPaddingSize = this->dispatchMode == DispatchMode::ImmediateDispatch ? 0 : sizeof(MI_BATCH_BUFFER_START) - sizeof(MI_BATCH_BUFFER_END); if (submitTask) { - if (csrSizeRequestFlags.mediaSamplerConfigChanged && !dispatchFlags.mediaSamplerRequired) { - PreambleHelper::programPSForMedia(&commandStreamTask, false); - this->lastMediaSamplerConfig = false; - } - this->addBatchBufferEnd(commandStreamTask, &bbEndLocation); this->emitNoop(commandStreamTask, bbEndPaddingSize); this->alignToCacheLine(commandStreamTask); @@ -549,14 +547,6 @@ inline void CommandStreamReceiverHw::programL3(LinearStream &csr, Dis } } -template -inline void CommandStreamReceiverHw::programMediaSampler(LinearStream &csr, DispatchFlags &dispatchFlags) { - if (csrSizeRequestFlags.mediaSamplerConfigChanged && dispatchFlags.mediaSamplerRequired) { - PreambleHelper::programPSForMedia(&csr, true); - this->lastMediaSamplerConfig = true; - } -} - template inline void CommandStreamReceiverHw::programPreamble(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t &newL3Config) { if (!this->isPreambleSent) { diff --git a/runtime/gen8/preamble.cpp b/runtime/gen8/preamble.cpp index fa7a723d3e..5fe0482bbd 100644 --- a/runtime/gen8/preamble.cpp +++ b/runtime/gen8/preamble.cpp @@ -20,9 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include "hw_cmds.h" -#include "runtime/helpers/preamble.h" -#include "runtime/gen8/reg_configs.h" #include "runtime/helpers/preamble.inl" namespace OCLRT { @@ -56,16 +53,12 @@ uint32_t PreambleHelper::getL3Config(const HardwareInfo &hwInfo, bool } template <> -bool PreambleHelper::getMediaSamplerDopClockGateEnable(LinearStream *) { - return false; -} - -template <> -void PreambleHelper::programPSForMedia(LinearStream *, bool) {} - -template <> -uint32_t PreambleHelper::getPipelineSelectMaskBits() { - return 0; +void PreambleHelper::programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired) { + typedef typename BDWFamily::PIPELINE_SELECT PIPELINE_SELECT; + auto pCmd = (PIPELINE_SELECT *)pCommandStream->getSpace(sizeof(PIPELINE_SELECT)); + *pCmd = PIPELINE_SELECT::sInit(); + pCmd->setMaskBits(pipelineSelectEnablePipelineSelectMaskBits); + pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU); } // Explicitly instantiate PreambleHelper for BDW device family diff --git a/runtime/gen8/reg_configs.h b/runtime/gen8/reg_configs.h index 879756098b..f9b2034809 100644 --- a/runtime/gen8/reg_configs.h +++ b/runtime/gen8/reg_configs.h @@ -25,6 +25,7 @@ namespace OCLRT { +struct BDWFamily; template <> struct L3CNTLREGConfig { static const uint32_t valueForSLM = 0x60000121u; @@ -35,5 +36,4 @@ template <> struct L3CNTLRegisterOffset { static const uint32_t registerOffset = 0x7034; }; - } diff --git a/runtime/gen9/preamble.cpp b/runtime/gen9/preamble.cpp index 51e4646fb2..ee643da518 100644 --- a/runtime/gen9/preamble.cpp +++ b/runtime/gen9/preamble.cpp @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include "hw_cmds.h" -#include "runtime/helpers/preamble.h" #include "runtime/helpers/preamble.inl" namespace OCLRT { @@ -44,23 +42,16 @@ uint32_t PreambleHelper::getL3Config(const HardwareInfo &hwInfo, bool } template <> -bool PreambleHelper::getMediaSamplerDopClockGateEnable(LinearStream *pCommandStream) { +void PreambleHelper::programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; - auto pCmd = reinterpret_cast(pCommandStream); - - return pCmd->getMediaSamplerDopClockGateEnable(); -} - -template <> -void PreambleHelper::programPSForMedia(LinearStream *pCommandStream, bool enable) { - typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; - typedef typename SKLFamily::PIPE_CONTROL PIPE_CONTROL; - auto pCmd = (PIPELINE_SELECT *)pCommandStream->getSpace(sizeof(PIPELINE_SELECT)); *pCmd = PIPELINE_SELECT::sInit(); - pCmd->setMaskBits(getPipelineSelectMaskBits()); - pCmd->setMediaSamplerDopClockGateEnable(!enable); + + auto mask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; + pCmd->setMaskBits(mask); + pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU); + pCmd->setMediaSamplerDopClockGateEnable(!mediaSamplerRequired); } template <> @@ -74,4 +65,4 @@ void PreambleHelper::setupPipeControlInFrontOfCommand(void *pCmd, con } template struct PreambleHelper; -} // namespace OCLRT +} diff --git a/runtime/gen9/reg_configs.h b/runtime/gen9/reg_configs.h index 2439c74db4..091a36f5d4 100644 --- a/runtime/gen9/reg_configs.h +++ b/runtime/gen9/reg_configs.h @@ -23,7 +23,7 @@ #pragma once #include "runtime/helpers/preamble.h" namespace OCLRT { - +struct SKLFamily; template <> struct L3CNTLREGConfig { static const uint32_t valueForSLM = 0x60000121u; diff --git a/unit_tests/gen9/test_preamble.cpp b/runtime/helpers/pipeline_select_helper.h similarity index 72% rename from unit_tests/gen9/test_preamble.cpp rename to runtime/helpers/pipeline_select_helper.h index a068b48533..c497af7c0a 100644 --- a/unit_tests/gen9/test_preamble.cpp +++ b/runtime/helpers/pipeline_select_helper.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,14 +20,10 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include "runtime/helpers/preamble.h" -#include "runtime/helpers/preamble.inl" -#include "test.h" +#pragma once +#include "stdint.h" -using namespace OCLRT; - -typedef ::testing::Test Gen9MaskBitsTest; - -GEN9TEST_F(Gen9MaskBitsTest, pipelineSelectMaskBitsIsMediaSamplerDopClockGateMaskBits) { - EXPECT_EQ(pipelineSelectMediaSamplerDopClockGateMaskBits, PreambleHelper::getPipelineSelectMaskBits()); +namespace OCLRT { +const uint32_t pipelineSelectEnablePipelineSelectMaskBits = 0x3; +const uint32_t pipelineSelectMediaSamplerDopClockGateMaskBits = 0x10; } diff --git a/runtime/helpers/preamble.h b/runtime/helpers/preamble.h index e76f4b2579..6aee7ff9da 100644 --- a/runtime/helpers/preamble.h +++ b/runtime/helpers/preamble.h @@ -21,12 +21,13 @@ */ #pragma once -#include "runtime/gen_common/hw_cmds.h" +#include "igfxfmid.h" #include "stdint.h" #include "runtime/command_stream/thread_arbitration_policy.h" -namespace OCLRT { +#include "runtime/helpers/pipeline_select_helper.h" +#include -const uint32_t pipelineSelectMediaSamplerDopClockGateMaskBits = 0x10; +namespace OCLRT { struct HardwareInfo; class Device; @@ -38,7 +39,7 @@ struct PreambleHelper { static constexpr size_t getScratchSpaceOffsetFor64bit() { return 4096; } static void programL3(LinearStream *pCommandStream, uint32_t l3Config); - static void programPipelineSelect(LinearStream *pCommandStream); + static void programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired); static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy); static void programPreemption(LinearStream *pCommandStream, const Device &device, GraphicsAllocation *preemptionCsr); static void setupPipeControlInFrontOfCommand(void *pCmd, const HardwareInfo *hwInfo, bool isVfeCommand); @@ -46,12 +47,9 @@ struct PreambleHelper { static void programPreamble(LinearStream *pCommandStream, const Device &device, uint32_t l3Config, uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr); static uint32_t getL3Config(const HardwareInfo &hwInfo, bool useSLM); - static void programPSForMedia(LinearStream *pCommandStream, bool enable); - static bool getMediaSamplerDopClockGateEnable(LinearStream *pCommandStream); static uint32_t getAdditionalCommandsSize(const Device &device); static void programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo); static uint32_t getUrbEntryAllocationSize(); - static uint32_t getPipelineSelectMaskBits(); }; template diff --git a/runtime/helpers/preamble.inl b/runtime/helpers/preamble.inl index 62c52fedfe..a2650c5578 100644 --- a/runtime/helpers/preamble.inl +++ b/runtime/helpers/preamble.inl @@ -32,15 +32,6 @@ namespace OCLRT { -template -void PreambleHelper::programPipelineSelect(LinearStream *pCommandStream) { - typedef typename GfxFamily::PIPELINE_SELECT PIPELINE_SELECT; - auto pCmd = (PIPELINE_SELECT *)pCommandStream->getSpace(sizeof(PIPELINE_SELECT)); - *pCmd = PIPELINE_SELECT::sInit(); - pCmd->setMaskBits(0x3); - pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU); -} - template void PreambleHelper::programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy) { typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; @@ -111,7 +102,6 @@ template void PreambleHelper::programPreamble(LinearStream *pCommandStream, const Device &device, uint32_t l3Config, uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr) { programL3(pCommandStream, l3Config); - programPipelineSelect(pCommandStream); programThreadArbitration(pCommandStream, requiredThreadArbitrationPolicy); programPreemption(pCommandStream, device, preemptionCsr); programGenSpecificPreambleWorkArounds(pCommandStream, device.getHardwareInfo()); @@ -127,9 +117,4 @@ uint32_t PreambleHelper::getUrbEntryAllocationSize() { return 0x782; } -template -uint32_t PreambleHelper::getPipelineSelectMaskBits() { - return pipelineSelectMediaSamplerDopClockGateMaskBits; -} - } // namespace OCLRT diff --git a/unit_tests/command_queue/enqueue_copy_buffer_rect_tests.cpp b/unit_tests/command_queue/enqueue_copy_buffer_rect_tests.cpp index ff3e10cbe6..7c7eb4d49f 100644 --- a/unit_tests/command_queue/enqueue_copy_buffer_rect_tests.cpp +++ b/unit_tests/command_queue/enqueue_copy_buffer_rect_tests.cpp @@ -300,18 +300,9 @@ HWTEST_F(EnqueueCopyBufferRectTest, 2D_InterfaceDescriptorData) { } HWTEST_F(EnqueueCopyBufferRectTest, 2D_PipelineSelect) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - enqueueCopyBufferRect2D(); - - auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect; - ASSERT_NE(nullptr, cmd); - - // Verify we have a valid length - EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection()); - - // Specifying GPGPU mode requires setting equivalent mask bits. - EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(EnqueueCopyBufferRectTest, 2D_MediaVFEState) { @@ -489,17 +480,9 @@ HWTEST_F(EnqueueCopyBufferRectTest, 3D_InterfaceDescriptorData) { } HWTEST_F(EnqueueCopyBufferRectTest, 3D_PipelineSelect) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - enqueueCopyBufferRect3D(); - - auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect; - - // Verify we have a valid length - EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection()); - - // Specifying GPGPU mode requires setting equivalent mask bits. - EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(EnqueueCopyBufferRectTest, 3D_MediaVFEState) { diff --git a/unit_tests/command_queue/enqueue_copy_buffer_tests.cpp b/unit_tests/command_queue/enqueue_copy_buffer_tests.cpp index a71c6fecf0..a959c01f32 100644 --- a/unit_tests/command_queue/enqueue_copy_buffer_tests.cpp +++ b/unit_tests/command_queue/enqueue_copy_buffer_tests.cpp @@ -269,18 +269,9 @@ HWTEST_F(EnqueueCopyBufferTest, InterfaceDescriptorData) { } HWTEST_F(EnqueueCopyBufferTest, PipelineSelect) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - enqueueCopyBuffer(); - - auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect; - ASSERT_NE(nullptr, cmd); - - // Verify we have a valid length - EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection()); - - // Specifying GPGPU mode requires setting equivalent mask bits. - EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3u); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(EnqueueCopyBufferTest, MediaVFEState) { diff --git a/unit_tests/command_queue/enqueue_copy_buffer_to_image_tests.cpp b/unit_tests/command_queue/enqueue_copy_buffer_to_image_tests.cpp index df7443c1ec..786ce1e1fa 100644 --- a/unit_tests/command_queue/enqueue_copy_buffer_to_image_tests.cpp +++ b/unit_tests/command_queue/enqueue_copy_buffer_to_image_tests.cpp @@ -233,17 +233,9 @@ HWTEST_F(EnqueueCopyBufferToImageTest, surfaceState) { } HWTEST_F(EnqueueCopyBufferToImageTest, pipelineSelect) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - enqueueCopyBufferToImage(); - - auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect; - - // Verify we have a valid length - EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection()); - - // Specifying GPGPU mode requires setting equivalent mask bits. - EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(EnqueueCopyBufferToImageTest, mediaVFEState) { diff --git a/unit_tests/command_queue/enqueue_copy_image_tests.cpp b/unit_tests/command_queue/enqueue_copy_image_tests.cpp index 06887a03e7..cdbc7a645d 100644 --- a/unit_tests/command_queue/enqueue_copy_image_tests.cpp +++ b/unit_tests/command_queue/enqueue_copy_image_tests.cpp @@ -242,17 +242,9 @@ HWTEST_F(EnqueueCopyImageTest, surfaceState) { } HWTEST_F(EnqueueCopyImageTest, pipelineSelect) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - enqueueCopyImage(); - - auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect; - - // Verify we have a valid length - EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection()); - - // Specifying GPGPU mode requires setting equivalent mask bits. - EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(EnqueueCopyImageTest, mediaVFEState) { diff --git a/unit_tests/command_queue/enqueue_copy_image_to_buffer_tests.cpp b/unit_tests/command_queue/enqueue_copy_image_to_buffer_tests.cpp index bee087776b..a21c9d3118 100644 --- a/unit_tests/command_queue/enqueue_copy_image_to_buffer_tests.cpp +++ b/unit_tests/command_queue/enqueue_copy_image_to_buffer_tests.cpp @@ -234,17 +234,9 @@ HWTEST_F(EnqueueCopyImageToBufferTest, surfaceState) { } HWTEST_F(EnqueueCopyImageToBufferTest, pipelineSelect) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - enqueueCopyImageToBuffer(); - - auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect; - - // Verify we have a valid length - EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection()); - - // Specifying GPGPU mode requires setting equivalent mask bits. - EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(EnqueueCopyImageToBufferTest, mediaVFEState) { diff --git a/unit_tests/command_queue/enqueue_fill_buffer_tests.cpp b/unit_tests/command_queue/enqueue_fill_buffer_tests.cpp index 7f1910e015..7e038b37f7 100644 --- a/unit_tests/command_queue/enqueue_fill_buffer_tests.cpp +++ b/unit_tests/command_queue/enqueue_fill_buffer_tests.cpp @@ -319,18 +319,9 @@ HWTEST_F(EnqueueFillBufferCmdTests, InterfaceDescriptorData) { } HWTEST_F(EnqueueFillBufferCmdTests, PipelineSelect) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - enqueueFillBuffer(); - - auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect; - ASSERT_NE(nullptr, cmd); - - // Verify we have a valid length - EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection()); - - // Specifying GPGPU mode requires setting equivalent mask bits. - EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(EnqueueFillBufferCmdTests, MediaVFEState) { diff --git a/unit_tests/command_queue/enqueue_fill_image_tests.cpp b/unit_tests/command_queue/enqueue_fill_image_tests.cpp index efca07262e..c7583e6876 100644 --- a/unit_tests/command_queue/enqueue_fill_image_tests.cpp +++ b/unit_tests/command_queue/enqueue_fill_image_tests.cpp @@ -238,17 +238,9 @@ HWTEST_F(EnqueueFillImageTest, surfaceState) { } HWTEST_F(EnqueueFillImageTest, pipelineSelect) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - enqueueFillImage(); - - auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect; - - // Verify we have a valid length - EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection()); - - // Specifying GPGPU mode requires setting equivalent mask bits. - EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(EnqueueFillImageTest, mediaVFEState) { diff --git a/unit_tests/command_queue/enqueue_kernel_tests.cpp b/unit_tests/command_queue/enqueue_kernel_tests.cpp index 3390e0eba1..8583b42b0c 100644 --- a/unit_tests/command_queue/enqueue_kernel_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_tests.cpp @@ -558,19 +558,9 @@ HWTEST_P(EnqueueWorkItemTests, InterfaceDescriptorData) { } HWTEST_P(EnqueueWorkItemTests, PipelineSelect) { - typedef typename FamilyType::PARSE PARSE; - typedef typename PARSE::PIPELINE_SELECT PIPELINE_SELECT; - enqueueKernel(); - - ASSERT_NE(cmdList.end(), itorPipelineSelect); - auto *cmd = (PIPELINE_SELECT *)*itorPipelineSelect; - - // Verify we have a valid length - EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection()); - - // Specifying GPGPU mode requires setting equivalent mask bits. - EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_P(EnqueueWorkItemTests, MediaVFEState) { diff --git a/unit_tests/command_queue/enqueue_kernel_two_ioq_tests.cpp b/unit_tests/command_queue/enqueue_kernel_two_ioq_tests.cpp index 6536961ba2..2d3c62d5d2 100644 --- a/unit_tests/command_queue/enqueue_kernel_two_ioq_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_two_ioq_tests.cpp @@ -117,16 +117,9 @@ HWTEST_F(TwoIOQsTwoDependentWalkers, shouldHaveTwoWalkers) { } HWTEST_F(TwoIOQsTwoDependentWalkers, shouldHaveOnePS) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - parseWalkers(); - auto itorCmd1 = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), itorCmd1); - auto itorCmd2 = itorCmd1; - ++itorCmd2; - itorCmd2 = find(itorCmd2, cmdList.end()); - - EXPECT_EQ(cmdList.end(), itorCmd2); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(TwoIOQsTwoDependentWalkers, shouldHaveOneVFEState) { diff --git a/unit_tests/command_queue/enqueue_kernel_two_ooq_tests.cpp b/unit_tests/command_queue/enqueue_kernel_two_ooq_tests.cpp index be917d4851..c6089414b6 100644 --- a/unit_tests/command_queue/enqueue_kernel_two_ooq_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_two_ooq_tests.cpp @@ -121,16 +121,9 @@ HWTEST_F(TwoOOQsTwoDependentWalkers, shouldHaveTwoWalkers) { } HWTEST_F(TwoOOQsTwoDependentWalkers, shouldHaveOnePS) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - parseWalkers(); - auto itorCmd1 = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), itorCmd1); - auto itorCmd2 = itorCmd1; - ++itorCmd2; - itorCmd2 = find(itorCmd2, cmdList.end()); - - EXPECT_EQ(cmdList.end(), itorCmd2); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(TwoOOQsTwoDependentWalkers, shouldHaveOneVFEState) { diff --git a/unit_tests/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp b/unit_tests/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp index b5661f03a7..8b43adf47f 100644 --- a/unit_tests/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp @@ -36,13 +36,8 @@ HWTEST_F(IOQWithTwoWalkers, shouldHaveTwoWalkers) { HWTEST_F(IOQWithTwoWalkers, shouldHaveOnePS) { enqueueTwoKernels(); - - auto itorCmd1 = find(cmdList.begin(), cmdList.end()); - auto itorCmd2 = itorCmd1; - ++itorCmd2; - itorCmd2 = find(itorCmd2, cmdList.end()); - - EXPECT_EQ(cmdList.end(), itorCmd2); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(IOQWithTwoWalkers, shouldHaveOneVFEState) { diff --git a/unit_tests/command_queue/enqueue_kernel_two_walker_ooq_tests.cpp b/unit_tests/command_queue/enqueue_kernel_two_walker_ooq_tests.cpp index 0c10660c90..cabdfb9946 100644 --- a/unit_tests/command_queue/enqueue_kernel_two_walker_ooq_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_two_walker_ooq_tests.cpp @@ -40,13 +40,8 @@ HWTEST_F(OOQWithTwoWalkers, shouldHaveTwoWalkers) { HWTEST_F(OOQWithTwoWalkers, shouldHaveOnePS) { enqueueTwoKernels(); - - auto itorCmd1 = find(cmdList.begin(), cmdList.end()); - auto itorCmd2 = itorCmd1; - ++itorCmd2; - - itorCmd2 = find(itorCmd2, cmdList.end()); - EXPECT_EQ(cmdList.end(), itorCmd2); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(OOQWithTwoWalkers, shouldHaveOneVFEState) { diff --git a/unit_tests/command_queue/enqueue_media_kernel.cpp b/unit_tests/command_queue/enqueue_media_kernel.cpp index 2362c1b705..de00dca0fb 100644 --- a/unit_tests/command_queue/enqueue_media_kernel.cpp +++ b/unit_tests/command_queue/enqueue_media_kernel.cpp @@ -20,173 +20,26 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include "unit_tests/fixtures/hello_world_fixture.h" -#include "unit_tests/command_queue/enqueue_fixture.h" -#include "unit_tests/helpers/hw_parse.h" -#include "runtime/helpers/preamble.h" +#include "unit_tests/fixtures/media_kernel_fixture.h" +#include "test.h" using namespace OCLRT; -template -struct MediaKernelCommandQueueTestFactory : public HelloWorldFixture, - public HardwareParse, - public ::testing::Test { - typedef HelloWorldFixture Parent; +typedef MediaKernelFixture MediaKernelTest; - using Parent::pCmdQ; - using Parent::pCS; - using Parent::pKernel; - using Parent::pCmdBuffer; - using Parent::pContext; - using Parent::pDevice; - using Parent::pProgram; - using Parent::retVal; - - MediaKernelCommandQueueTestFactory() {} - - template - void enqueueVmeThenRegularKernel() { - auto retVal = EnqueueKernelHelper<>::enqueueKernel( - pCmdQ, - pVmeKernel); - ASSERT_EQ(CL_SUCCESS, retVal); - - // We have to parse after each enqueue* because - // the CSR CS may insert commands in between - parseCommands(*pCmdQ); - - retVal = EnqueueKernelHelper<>::enqueueKernel( - pCmdQ, - pKernel); - ASSERT_EQ(CL_SUCCESS, retVal); - - parseCommands(*pCmdQ); - - itorWalker1 = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), itorWalker1); - - itorWalker2 = itorWalker1; - ++itorWalker2; - itorWalker2 = find(itorWalker2, cmdList.end()); - ASSERT_NE(cmdList.end(), itorWalker2); - } - - template - void enqueueVmeKernel() { - auto retVal = EnqueueKernelHelper<>::enqueueKernel( - pCmdQ, - pVmeKernel); - ASSERT_EQ(CL_SUCCESS, retVal); - - parseCommands(*pCmdQ); - - itorWalker1 = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), itorWalker1); - } - - void SetUp() override { - Parent::kernelFilename = "vme_kernels"; - Parent::kernelName = "non_vme_kernel"; - - Parent::SetUp(); - HardwareParse::SetUp(); - - ASSERT_NE(nullptr, pKernel); - ASSERT_EQ(false, pKernel->isVmeKernel()); - - cl_int retVal; - - // create the VME kernel - pVmeKernel = Kernel::create( - pProgram, - *pProgram->getKernelInfo("device_side_block_motion_estimate_intel"), - &retVal); - - ASSERT_NE(nullptr, pVmeKernel); - ASSERT_EQ(true, pVmeKernel->isVmeKernel()); - } - - void TearDown() override { - delete pVmeKernel; - pVmeKernel = nullptr; - - HardwareParse::TearDown(); - Parent::TearDown(); - } - - GenCmdList::iterator itorWalker1; - GenCmdList::iterator itorWalker2; - - Kernel *pVmeKernel = nullptr; -}; - -typedef MediaKernelCommandQueueTestFactory MediaKernelCommandQueueTest; - -TEST_F(MediaKernelCommandQueueTest, VmeKernelProperlyIdentifiesItself) { +TEST_F(MediaKernelTest, VmeKernelProperlyIdentifiesItself) { ASSERT_NE(true, pKernel->isVmeKernel()); ASSERT_EQ(true, pVmeKernel->isVmeKernel()); } -// Test for the "enabling" pipeline select preceeding the VME kernel -HWTEST_F(MediaKernelCommandQueueTest, - HavePipelineSelectToDisableMediaSamplerDopClockGate) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - - if (::renderCoreFamily == IGFX_GEN8_CORE) { - return; - } - +HWTEST_F(MediaKernelTest, EnqueueVmeKernelUsesSinglePipelineSelect) { enqueueVmeKernel(); - - auto itorCmd1 = find(cmdList.begin(), cmdList.end()); - EXPECT_NE(cmdList.end(), itorCmd1); - - auto pPreceedingPipelineSelect = genCmdCast(*itorCmd1); - - bool samplerClockGateEnable = PreambleHelper::getMediaSamplerDopClockGateEnable( - reinterpret_cast(pPreceedingPipelineSelect)); - - EXPECT_EQ(PreambleHelper::getPipelineSelectMaskBits(), pPreceedingPipelineSelect->getMaskBits()); - EXPECT_EQ(false, samplerClockGateEnable); - EXPECT_EQ(0u, pPreceedingPipelineSelect->getPipelineSelection()); + auto numCommands = getCommandsList().size(); + EXPECT_EQ(1u, numCommands); } -// Test for the "restoring" pipeline select following the VME kernel (that -// disables the media sampler) -HWTEST_F(MediaKernelCommandQueueTest, - HavePipelineSelectToEnableMediaSamplerDopClockGate) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - - if (::renderCoreFamily == IGFX_GEN8_CORE) { - return; - } - - enqueueVmeThenRegularKernel(); - - auto itorCmd1 = find( - cmdList.begin(), cmdList.end()); - EXPECT_NE(cmdList.end(), itorCmd1); - - auto itorCmd2 = itorCmd1; - ++itorCmd2; - - itorCmd2 = find( - itorCmd2, cmdList.end()); - EXPECT_NE(cmdList.end(), itorCmd2); - - auto itorCmd3 = itorCmd2; - ++itorCmd3; - - itorCmd3 = find( - itorCmd3, cmdList.end()); - EXPECT_NE(cmdList.end(), itorCmd3); - - auto pFollowingPipelineSelect = genCmdCast(*itorCmd3); - - bool samplerClockGateEnable = PreambleHelper::getMediaSamplerDopClockGateEnable( - reinterpret_cast(pFollowingPipelineSelect)); - - EXPECT_EQ(PreambleHelper::getPipelineSelectMaskBits(), pFollowingPipelineSelect->getMaskBits()); - EXPECT_EQ(true, samplerClockGateEnable); - EXPECT_EQ(0u, pFollowingPipelineSelect->getPipelineSelection()); -} +HWTEST_F(MediaKernelTest, EnqueueRegularKernelUsesSinglePipelineSelect) { + enqueueRegularKernel(); + auto numCommands = getCommandsList().size(); + EXPECT_EQ(1u, numCommands); +} \ No newline at end of file diff --git a/unit_tests/command_queue/enqueue_read_buffer_rect_tests.cpp b/unit_tests/command_queue/enqueue_read_buffer_rect_tests.cpp index 3b9ef8ec9b..bada75902f 100644 --- a/unit_tests/command_queue/enqueue_read_buffer_rect_tests.cpp +++ b/unit_tests/command_queue/enqueue_read_buffer_rect_tests.cpp @@ -337,18 +337,9 @@ HWTEST_F(EnqueueReadBufferRectTest, 2D_InterfaceDescriptorData) { } HWTEST_F(EnqueueReadBufferRectTest, 2D_PipelineSelect) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - enqueueReadBufferRect2D(); - - ASSERT_NE(cmdList.end(), itorPipelineSelect); - auto *cmd = (PIPELINE_SELECT *)*itorPipelineSelect; - - // Verify we have a valid length - EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection()); - - // Specifying GPGPU mode requires setting equivalent mask bits. - EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(EnqueueReadBufferRectTest, 2D_MediaVFEState) { diff --git a/unit_tests/command_queue/enqueue_read_buffer_tests.cpp b/unit_tests/command_queue/enqueue_read_buffer_tests.cpp index 9db5173d52..272d2a7d49 100644 --- a/unit_tests/command_queue/enqueue_read_buffer_tests.cpp +++ b/unit_tests/command_queue/enqueue_read_buffer_tests.cpp @@ -304,19 +304,10 @@ HWTEST_F(EnqueueReadBufferTypeTest, InterfaceDescriptorData) { } HWTEST_F(EnqueueReadBufferTypeTest, PipelineSelect) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(); - - ASSERT_NE(cmdList.end(), itorPipelineSelect); - auto *cmd = (PIPELINE_SELECT *)*itorPipelineSelect; - - // Verify we have a valid length - EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection()); - - // Specifying GPGPU mode requires setting equivalent mask bits. - EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(EnqueueReadBufferTypeTest, MediaVFEState) { diff --git a/unit_tests/command_queue/enqueue_read_image_tests.cpp b/unit_tests/command_queue/enqueue_read_image_tests.cpp index f51f33d7b7..3ca2989b65 100644 --- a/unit_tests/command_queue/enqueue_read_image_tests.cpp +++ b/unit_tests/command_queue/enqueue_read_image_tests.cpp @@ -238,17 +238,9 @@ HWTEST_F(EnqueueReadImageTest, surfaceState) { } HWTEST_F(EnqueueReadImageTest, pipelineSelect) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - enqueueReadImage(); - - auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect; - - // Verify we have a valid length - EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection()); - - // Specifying GPGPU mode requires setting equivalent mask bits. - EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(EnqueueReadImageTest, mediaVFEState) { diff --git a/unit_tests/command_queue/enqueue_write_buffer_rect_tests.cpp b/unit_tests/command_queue/enqueue_write_buffer_rect_tests.cpp index 77b08917f9..5b9f9c7b34 100644 --- a/unit_tests/command_queue/enqueue_write_buffer_rect_tests.cpp +++ b/unit_tests/command_queue/enqueue_write_buffer_rect_tests.cpp @@ -310,18 +310,9 @@ HWTEST_F(EnqueueWriteBufferRectTest, 2D_InterfaceDescriptorData) { } HWTEST_F(EnqueueWriteBufferRectTest, 2D_PipelineSelect) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - enqueueWriteBufferRect2D(); - - ASSERT_NE(cmdList.end(), itorPipelineSelect); - auto *cmd = (PIPELINE_SELECT *)*itorPipelineSelect; - - // Verify we have a valid length - EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection()); - - // Specifying GPGPU mode requires setting equivalent mask bits. - EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(EnqueueWriteBufferRectTest, 2D_MediaVFEState) { diff --git a/unit_tests/command_queue/enqueue_write_buffer_tests.cpp b/unit_tests/command_queue/enqueue_write_buffer_tests.cpp index 7c76f74e4a..3c54e6e508 100644 --- a/unit_tests/command_queue/enqueue_write_buffer_tests.cpp +++ b/unit_tests/command_queue/enqueue_write_buffer_tests.cpp @@ -299,19 +299,10 @@ HWTEST_F(EnqueueWriteBufferTypeTest, InterfaceDescriptorData) { } HWTEST_F(EnqueueWriteBufferTypeTest, PipelineSelect) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - srcBuffer->forceDisallowCPUCopy = true; enqueueWriteBuffer(); - - ASSERT_NE(cmdList.end(), itorPipelineSelect); - auto *cmd = (PIPELINE_SELECT *)*itorPipelineSelect; - - // Verify we have a valid length - EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection()); - - // Specifying GPGPU mode requires setting equivalent mask bits. - EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(EnqueueWriteBufferTypeTest, MediaVFEState) { diff --git a/unit_tests/command_queue/enqueue_write_image_tests.cpp b/unit_tests/command_queue/enqueue_write_image_tests.cpp index 518d37476b..639dad2adf 100644 --- a/unit_tests/command_queue/enqueue_write_image_tests.cpp +++ b/unit_tests/command_queue/enqueue_write_image_tests.cpp @@ -240,17 +240,9 @@ HWTEST_F(EnqueueWriteImageTest, surfaceState) { } HWTEST_F(EnqueueWriteImageTest, pipelineSelect) { - typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; - enqueueWriteImage(); - - auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect; - - // Verify we have a valid length - EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection()); - - // Specifying GPGPU mode requires setting equivalent mask bits. - EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3); + int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); + EXPECT_EQ(1, numCommands); } HWTEST_F(EnqueueWriteImageTest, mediaVFEState) { diff --git a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp index 6c8348ea06..c530b57efb 100644 --- a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp +++ b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp @@ -174,6 +174,7 @@ struct UltCommandStreamReceiverTest commandStreamReceiver.lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin; commandStreamReceiver.lastSentCoherencyRequest = 0; + commandStreamReceiver.lastMediaSamplerConfig = 0; } template @@ -440,6 +441,41 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, preambleShouldBeSentIfNeverSent) { EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); } +HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldBeSentIfNeverSentPreambleAndMediaSamplerRequirementChanged) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.isPreambleSent = false; + commandStreamReceiver.lastMediaSamplerConfig = -1; + flushTask(commandStreamReceiver); + parseCommands(commandStreamReceiver.commandStream, 0); + EXPECT_NE(nullptr, getCommand()); +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldBeSentIfNeverSentPreambleAndMediaSamplerRequirementNotChanged) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.isPreambleSent = false; + commandStreamReceiver.lastMediaSamplerConfig = 0; + flushTask(commandStreamReceiver); + parseCommands(commandStreamReceiver.commandStream, 0); + EXPECT_NE(nullptr, getCommand()); +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldNotBeSentIfSentPreambleAndMediaSamplerRequirementDoesntChanged) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.isPreambleSent = true; + commandStreamReceiver.lastMediaSamplerConfig = 0; + flushTask(commandStreamReceiver); + parseCommands(commandStreamReceiver.commandStream, 0); + EXPECT_EQ(nullptr, getCommand()); +} +HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldBeSentIfSentPreambleAndMediaSamplerRequirementDoesntChanged) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.isPreambleSent = true; + commandStreamReceiver.lastMediaSamplerConfig = 1; + flushTask(commandStreamReceiver); + parseCommands(commandStreamReceiver.commandStream, 0); + EXPECT_NE(nullptr, getCommand()); +} + HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressShouldBeSentIfNeverSent) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; @@ -1761,6 +1797,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithPCWhenPreambleSentAnd // Force a PIPE_CONTROL through a taskLevel transition taskLevel = commandStreamReceiver.peekTaskLevel() + 1; commandStreamReceiver.isPreambleSent = true; + commandStreamReceiver.lastMediaSamplerConfig = 0; commandStreamReceiver.lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin; auto &csrCS = commandStreamReceiver.getCS(); diff --git a/unit_tests/fixtures/CMakeLists.txt b/unit_tests/fixtures/CMakeLists.txt index 84362dc159..0f9253c666 100644 --- a/unit_tests/fixtures/CMakeLists.txt +++ b/unit_tests/fixtures/CMakeLists.txt @@ -41,6 +41,7 @@ set(IGDRCL_SRCS_tests_fixtures "${CMAKE_CURRENT_SOURCE_DIR}/image_fixture.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/kernel_data_fixture.h" "${CMAKE_CURRENT_SOURCE_DIR}/kernel_data_fixture.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/media_kernel_fixture.h" "${CMAKE_CURRENT_SOURCE_DIR}/memory_allocator_fixture.h" "${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_fixture.h" "${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_fixture.cpp" diff --git a/unit_tests/fixtures/media_kernel_fixture.h b/unit_tests/fixtures/media_kernel_fixture.h new file mode 100644 index 0000000000..10ae67a487 --- /dev/null +++ b/unit_tests/fixtures/media_kernel_fixture.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#include "unit_tests/fixtures/hello_world_fixture.h" +#include "unit_tests/command_queue/enqueue_fixture.h" +#include "unit_tests/helpers/hw_parse.h" + +namespace OCLRT { + +template +struct MediaKernelFixture : public HelloWorldFixture, + public HardwareParse, + public ::testing::Test { + typedef HelloWorldFixture Parent; + + using Parent::pCS; + using Parent::pCmdBuffer; + using Parent::pCmdQ; + using Parent::pContext; + using Parent::pDevice; + using Parent::pKernel; + using Parent::pProgram; + using Parent::retVal; + + MediaKernelFixture() {} + + template + void enqueueRegularKernel() { + auto retVal = EnqueueKernelHelper<>::enqueueKernel( + pCmdQ, + pKernel); + ASSERT_EQ(CL_SUCCESS, retVal); + + parseCommands(*pCmdQ); + + itorWalker1 = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itorWalker1); + } + + template + void enqueueVmeKernel() { + auto retVal = EnqueueKernelHelper<>::enqueueKernel( + pCmdQ, + pVmeKernel); + ASSERT_EQ(CL_SUCCESS, retVal); + + parseCommands(*pCmdQ); + + itorWalker1 = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itorWalker1); + } + + void SetUp() override { + Parent::kernelFilename = "vme_kernels"; + Parent::kernelName = "non_vme_kernel"; + + Parent::SetUp(); + HardwareParse::SetUp(); + + ASSERT_NE(nullptr, pKernel); + ASSERT_EQ(false, pKernel->isVmeKernel()); + + cl_int retVal; + + // create the VME kernel + pVmeKernel = Kernel::create( + pProgram, + *pProgram->getKernelInfo("device_side_block_motion_estimate_intel"), + &retVal); + + ASSERT_NE(nullptr, pVmeKernel); + ASSERT_EQ(true, pVmeKernel->isVmeKernel()); + } + + void TearDown() override { + delete pVmeKernel; + pVmeKernel = nullptr; + + HardwareParse::TearDown(); + Parent::TearDown(); + } + + GenCmdList::iterator itorWalker1; + GenCmdList::iterator itorWalker2; + + Kernel *pVmeKernel = nullptr; +}; +} \ No newline at end of file diff --git a/unit_tests/gen8/enqueue_media_kernel.cpp b/unit_tests/gen8/enqueue_media_kernel.cpp index 92d7885331..06ea434c23 100644 --- a/unit_tests/gen8/enqueue_media_kernel.cpp +++ b/unit_tests/gen8/enqueue_media_kernel.cpp @@ -20,123 +20,19 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include "unit_tests/fixtures/hello_world_fixture.h" -#include "unit_tests/command_queue/enqueue_fixture.h" -#include "unit_tests/helpers/hw_parse.h" -#include "runtime/helpers/preamble.h" +#include "unit_tests/fixtures/media_kernel_fixture.h" +#include "test.h" using namespace OCLRT; -template -struct MediaKernelCommandQueueTestFactory : public HelloWorldFixture, - public HardwareParse, - public ::testing::Test { - typedef HelloWorldFixture Parent; +typedef MediaKernelFixture MediaKernelTest; - using Parent::pCS; - using Parent::pCmdBuffer; - using Parent::pCmdQ; - using Parent::pContext; - using Parent::pDevice; - using Parent::pKernel; - using Parent::pProgram; - using Parent::retVal; - - MediaKernelCommandQueueTestFactory() {} - - template - void enqueueVmeThenRegularKernel() { - auto retVal = EnqueueKernelHelper<>::enqueueKernel( - pCmdQ, - pVmeKernel); - ASSERT_EQ(CL_SUCCESS, retVal); - - // We have to parse after each enqueue* because - // the CSR CS may insert commands in between - parseCommands(*pCmdQ); - - retVal = EnqueueKernelHelper<>::enqueueKernel( - pCmdQ, - pKernel); - ASSERT_EQ(CL_SUCCESS, retVal); - - parseCommands(*pCmdQ); - - itorWalker1 = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), itorWalker1); - - itorWalker2 = itorWalker1; - ++itorWalker2; - itorWalker2 = find(itorWalker2, cmdList.end()); - ASSERT_NE(cmdList.end(), itorWalker2); - } - - template - void enqueueVmeKernel() { - auto retVal = EnqueueKernelHelper<>::enqueueKernel( - pCmdQ, - pVmeKernel); - ASSERT_EQ(CL_SUCCESS, retVal); - - parseCommands(*pCmdQ); - - itorWalker1 = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), itorWalker1); - } - - void SetUp() override { - Parent::kernelFilename = "vme_kernels"; - Parent::kernelName = "non_vme_kernel"; - - Parent::SetUp(); - HardwareParse::SetUp(); - - ASSERT_NE(nullptr, pKernel); - ASSERT_EQ(false, pKernel->isVmeKernel()); - - cl_int retVal; - - // create the VME kernel - pVmeKernel = Kernel::create( - pProgram, - *pProgram->getKernelInfo("device_side_block_motion_estimate_intel"), - &retVal); - - ASSERT_NE(nullptr, pVmeKernel); - ASSERT_EQ(true, pVmeKernel->isVmeKernel()); - } - - void TearDown() override { - delete pVmeKernel; - pVmeKernel = nullptr; - - HardwareParse::TearDown(); - Parent::TearDown(); - } - - GenCmdList::iterator itorWalker1; - GenCmdList::iterator itorWalker2; - - Kernel *pVmeKernel = nullptr; -}; - -typedef MediaKernelCommandQueueTestFactory MediaKernelCommandQueueTest; - -BDWTEST_F(MediaKernelCommandQueueTest, BdwClockGateAlwaysFalse) { - auto samplerClockGateEnable = PreambleHelper::getMediaSamplerDopClockGateEnable( - reinterpret_cast(pCmdQ)); - ASSERT_EQ(false, samplerClockGateEnable); // Always false for BDW -} - -BDWTEST_F(MediaKernelCommandQueueTest, BdwHasSinglePipelineSelect) { +GEN8TEST_F(MediaKernelTest, givenGen8CSRWhenEnqueueVmeKernelThenProgramOnlyPipelineSelection) { + typedef typename BDWFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); - - auto itorCmd1 = find(cmdList.begin(), cmdList.end()); - EXPECT_NE(cmdList.end(), itorCmd1); - - auto itorCmd2 = itorCmd1; - ++itorCmd2; - - itorCmd2 = find(itorCmd2, cmdList.end()); - ASSERT_EQ(cmdList.end(), itorCmd2); + auto pCmd = getCommand(); + auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits; + auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; + EXPECT_EQ(expectedMask, pCmd->getMaskBits()); + EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); } diff --git a/unit_tests/gen8/test_preamble.cpp b/unit_tests/gen8/test_preamble.cpp index 891341355f..2aedf4a671 100644 --- a/unit_tests/gen8/test_preamble.cpp +++ b/unit_tests/gen8/test_preamble.cpp @@ -111,9 +111,3 @@ BDWTEST_F(PreambleVfeState, basic) { EXPECT_TRUE(pc.getDcFlushEnable()); EXPECT_EQ(1u, pc.getCommandStreamerStallEnable()); } - -typedef ::testing::Test BdwMaskBitsTest; - -BDWTEST_F(BdwMaskBitsTest, noPipelineSelectMaskBits) { - EXPECT_EQ(0u, PreambleHelper::getPipelineSelectMaskBits()); -} \ No newline at end of file diff --git a/unit_tests/gen9/CMakeLists.txt b/unit_tests/gen9/CMakeLists.txt index 243110cccc..e86772f970 100644 --- a/unit_tests/gen9/CMakeLists.txt +++ b/unit_tests/gen9/CMakeLists.txt @@ -23,6 +23,7 @@ set(IGDRCL_SRCS_tests_gen9 ${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/coherency_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gen_cmd_parse.h ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program_data_tests_gen9.cpp @@ -30,7 +31,6 @@ set(IGDRCL_SRCS_tests_gen9 ${CMAKE_CURRENT_SOURCE_DIR}/test_device_queue_hw.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_platform_caps.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_preemption.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_preamble.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sample.cpp PARENT_SCOPE ) diff --git a/unit_tests/gen9/enqueue_media_kernel.cpp b/unit_tests/gen9/enqueue_media_kernel.cpp new file mode 100644 index 0000000000..827944178f --- /dev/null +++ b/unit_tests/gen9/enqueue_media_kernel.cpp @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "unit_tests/fixtures/media_kernel_fixture.h" +#include "runtime/helpers/preamble.inl" +#include "test.h" + +using namespace OCLRT; +typedef MediaKernelFixture MediaKernelTest; + +GEN9TEST_F(MediaKernelTest, givenGen9CSRWhenEnqueueVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { + typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; + enqueueVmeKernel(); + + auto numCommands = getCommandsList().size(); + EXPECT_EQ(1u, numCommands); + + auto pCmd = getCommand(); + auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; + auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; + EXPECT_EQ(expectedMask, pCmd->getMaskBits()); + EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); + EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); +} + +GEN9TEST_F(MediaKernelTest, givenGen9CSRWhenEnqueueNonVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { + typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; + enqueueRegularKernel(); + + auto numCommands = getCommandsList().size(); + EXPECT_EQ(1u, numCommands); + + auto pCmd = getCommand(); + auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; + auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; + EXPECT_EQ(expectedMask, pCmd->getMaskBits()); + EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); + EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); +} + +GEN9TEST_F(MediaKernelTest, givenGen9CSRWhenEnqueueVmeKernelTwiceThenProgramPipelineSelectOnce) { + typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; + enqueueVmeKernel(); + auto numCommands = getCommandsList().size(); + EXPECT_EQ(1u, numCommands); +} + +GEN9TEST_F(MediaKernelTest, givenGen9CSRWhenEnqueueNonVmeKernelTwiceThenProgramPipelineSelectOnce) { + typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; + enqueueVmeKernel(); + auto numCommands = getCommandsList().size(); + EXPECT_EQ(1u, numCommands); +} + +GEN9TEST_F(MediaKernelTest, givenGen9CSRWhenEnqueueVmeKernelAfterNonVmeKernelThenProgramPipelineSelectionAndMediaSamplerTwice) { + typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; + enqueueRegularKernel(); + enqueueVmeKernel(); + + auto commands = getCommandsList(); + EXPECT_EQ(2u, commands.size()); + + auto pCmd = static_cast(commands.back()); + + auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; + EXPECT_EQ(expectedMask, pCmd->getMaskBits()); + EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); +} + +GEN9TEST_F(MediaKernelTest, givenGen9CSRWhenEnqueueNonVmeKernelAfterVmeKernelThenProgramProgramPipelineSelectionAndMediaSamplerTwice) { + typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; + enqueueVmeKernel(); + enqueueRegularKernel(); + + auto commands = getCommandsList(); + EXPECT_EQ(2u, commands.size()); + + auto pCmd = static_cast(commands.back()); + + auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; + EXPECT_EQ(expectedMask, pCmd->getMaskBits()); + EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); +} diff --git a/unit_tests/helpers/hw_parse.h b/unit_tests/helpers/hw_parse.h index ac5f03bd41..1b6fdef45f 100644 --- a/unit_tests/helpers/hw_parse.h +++ b/unit_tests/helpers/hw_parse.h @@ -25,6 +25,7 @@ #include "runtime/command_stream/command_stream_receiver.h" #include "runtime/command_stream/linear_stream.h" #include "runtime/helpers/ptr_math.h" +#include "runtime/helpers/pipeline_select_helper.h" #include "runtime/kernel/kernel.h" #include "unit_tests/gen_common/gen_cmd_parse.h" #include "gtest/gtest.h" @@ -242,26 +243,33 @@ struct HardwareParse { template CmdType *getCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) { - auto itorCmd = find(cmdList.begin(), cmdList.end()); + auto itorCmd = find(itorStart, itorEnd); return itorCmd != cmdList.end() ? genCmdCast(*itorCmd) : nullptr; } - // pass found iterator to itorStart - template - CmdType *getCommand(GenCmdList::iterator *itorStart, GenCmdList::iterator itorEnd) { - *itorStart = find(*itorStart, itorEnd); - return *itorStart != cmdList.end() - ? genCmdCast(**itorStart) - : nullptr; - } - template CmdType *getCommand() { return getCommand(cmdList.begin(), cmdList.end()); } + template + int getNumberOfPipelineSelectsThatEnablePipelineSelect() { + typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; + int numCommands = 0; + auto itorCmd = find(cmdList.begin(), cmdList.end()); + while (itorCmd != cmdList.end()) { + auto cmd = getCommand(itorCmd, cmdList.end()); + if (cmd->getPipelineSelection() == PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU && + pipelineSelectEnablePipelineSelectMaskBits == (pipelineSelectEnablePipelineSelectMaskBits & cmd->getMaskBits())) { + numCommands++; + } + itorCmd = find(++itorCmd, cmdList.end()); + } + return numCommands; + } + // The starting point of parsing commandBuffers. This is important // because as buffers get reused, we only want to parse the deltas. LinearStream *previousCS; diff --git a/unit_tests/libult/ult_command_stream_receiver.h b/unit_tests/libult/ult_command_stream_receiver.h index 0be10fb40b..151bb3df3d 100644 --- a/unit_tests/libult/ult_command_stream_receiver.h +++ b/unit_tests/libult/ult_command_stream_receiver.h @@ -48,6 +48,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { using BaseClass::CommandStreamReceiver::lastSentThreadAribtrationPolicy; using BaseClass::CommandStreamReceiver::latestFlushedTaskCount; using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig; + using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig; using BaseClass::CommandStreamReceiver::lastPreemptionMode; using BaseClass::CommandStreamReceiver::taskCount; using BaseClass::CommandStreamReceiver::taskLevel;