Change pipeline select programing

- Program one PS with gpgpu selection and media sampler
- Program PS only when media sampler requirement changed
  or when preamble was not sent

Change-Id: I85ba3f74087733e79d048e120aeb8b4b04796e00
This commit is contained in:
Mateusz Jablonski 2018-01-17 08:37:47 +01:00 committed by sys_ocldev
parent ae1213a98d
commit 13ac81f465
39 changed files with 361 additions and 561 deletions

View File

@ -302,6 +302,7 @@ set (RUNTIME_SRCS_HELPERS
helpers/options.h
helpers/per_thread_data.cpp
helpers/per_thread_data.h
helpers/pipeline_select_helper.h
helpers/preamble.h
helpers/preamble.inl
helpers/ptr_math.h

View File

@ -135,7 +135,7 @@ class CommandStreamReceiver {
uint32_t lastSentL3Config = 0;
int8_t lastSentCoherencyRequest = -1;
bool lastMediaSamplerConfig = false;
int8_t lastMediaSamplerConfig = -1;
PreemptionMode lastPreemptionMode = PreemptionMode::Disabled;
uint32_t latestSentStatelessMocsConfig;

View File

@ -70,7 +70,6 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
protected:
void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags, const LinearStream &ih);
void programL3(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t &newL3Config);
void programMediaSampler(LinearStream &csr, DispatchFlags &dispatchFlags);
void programPreamble(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t &newL3Config);
virtual void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags);
virtual void initPageTableManagerRegisters(LinearStream &csr){};

View File

@ -162,7 +162,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
csrSizeRequestFlags.l3ConfigChanged = this->lastSentL3Config != newL3Config;
csrSizeRequestFlags.coherencyRequestChanged = this->lastSentCoherencyRequest != static_cast<int8_t>(dispatchFlags.requiresCoherency);
csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode;
csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != dispatchFlags.mediaSamplerRequired;
csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast<int8_t>(dispatchFlags.mediaSamplerRequired);
auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSize(dispatchFlags));
auto commandStreamStartCSR = commandStreamCSR.getUsed();
@ -171,7 +171,10 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
programPreemption(commandStreamCSR, dispatchFlags, ih);
programCoherency(commandStreamCSR, dispatchFlags);
programL3(commandStreamCSR, dispatchFlags, newL3Config);
programMediaSampler(commandStreamCSR, dispatchFlags);
if (csrSizeRequestFlags.mediaSamplerConfigChanged || !isPreambleSent) {
PreambleHelper<GfxFamily>::programPipelineSelect(&commandStreamCSR, dispatchFlags.mediaSamplerRequired);
this->lastMediaSamplerConfig = dispatchFlags.mediaSamplerRequired;
}
programPreamble(commandStreamCSR, dispatchFlags, newL3Config);
size_t requiredScratchSizeInBytes = requiredScratchSize * (hwInfo.pSysInfo->MaxSubSlicesSupported * hwInfo.pSysInfo->MaxEuPerSubSlice * hwInfo.pSysInfo->ThreadCount / hwInfo.pSysInfo->EUCount);
@ -300,11 +303,6 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
auto bbEndPaddingSize = this->dispatchMode == DispatchMode::ImmediateDispatch ? 0 : sizeof(MI_BATCH_BUFFER_START) - sizeof(MI_BATCH_BUFFER_END);
if (submitTask) {
if (csrSizeRequestFlags.mediaSamplerConfigChanged && !dispatchFlags.mediaSamplerRequired) {
PreambleHelper<GfxFamily>::programPSForMedia(&commandStreamTask, false);
this->lastMediaSamplerConfig = false;
}
this->addBatchBufferEnd(commandStreamTask, &bbEndLocation);
this->emitNoop(commandStreamTask, bbEndPaddingSize);
this->alignToCacheLine(commandStreamTask);
@ -549,14 +547,6 @@ inline void CommandStreamReceiverHw<GfxFamily>::programL3(LinearStream &csr, Dis
}
}
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::programMediaSampler(LinearStream &csr, DispatchFlags &dispatchFlags) {
if (csrSizeRequestFlags.mediaSamplerConfigChanged && dispatchFlags.mediaSamplerRequired) {
PreambleHelper<GfxFamily>::programPSForMedia(&csr, true);
this->lastMediaSamplerConfig = true;
}
}
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::programPreamble(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t &newL3Config) {
if (!this->isPreambleSent) {

View File

@ -20,9 +20,6 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "hw_cmds.h"
#include "runtime/helpers/preamble.h"
#include "runtime/gen8/reg_configs.h"
#include "runtime/helpers/preamble.inl"
namespace OCLRT {
@ -56,16 +53,12 @@ uint32_t PreambleHelper<BDWFamily>::getL3Config(const HardwareInfo &hwInfo, bool
}
template <>
bool PreambleHelper<BDWFamily>::getMediaSamplerDopClockGateEnable(LinearStream *) {
return false;
}
template <>
void PreambleHelper<BDWFamily>::programPSForMedia(LinearStream *, bool) {}
template <>
uint32_t PreambleHelper<BDWFamily>::getPipelineSelectMaskBits() {
return 0;
void PreambleHelper<BDWFamily>::programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired) {
typedef typename BDWFamily::PIPELINE_SELECT PIPELINE_SELECT;
auto pCmd = (PIPELINE_SELECT *)pCommandStream->getSpace(sizeof(PIPELINE_SELECT));
*pCmd = PIPELINE_SELECT::sInit();
pCmd->setMaskBits(pipelineSelectEnablePipelineSelectMaskBits);
pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU);
}
// Explicitly instantiate PreambleHelper for BDW device family

View File

@ -25,6 +25,7 @@
namespace OCLRT {
struct BDWFamily;
template <>
struct L3CNTLREGConfig<IGFX_BROADWELL> {
static const uint32_t valueForSLM = 0x60000121u;
@ -35,5 +36,4 @@ template <>
struct L3CNTLRegisterOffset<BDWFamily> {
static const uint32_t registerOffset = 0x7034;
};
}

View File

@ -20,8 +20,6 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "hw_cmds.h"
#include "runtime/helpers/preamble.h"
#include "runtime/helpers/preamble.inl"
namespace OCLRT {
@ -44,23 +42,16 @@ uint32_t PreambleHelper<SKLFamily>::getL3Config(const HardwareInfo &hwInfo, bool
}
template <>
bool PreambleHelper<SKLFamily>::getMediaSamplerDopClockGateEnable(LinearStream *pCommandStream) {
void PreambleHelper<SKLFamily>::programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired) {
typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT;
auto pCmd = reinterpret_cast<PIPELINE_SELECT *>(pCommandStream);
return pCmd->getMediaSamplerDopClockGateEnable();
}
template <>
void PreambleHelper<SKLFamily>::programPSForMedia(LinearStream *pCommandStream, bool enable) {
typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT;
typedef typename SKLFamily::PIPE_CONTROL PIPE_CONTROL;
auto pCmd = (PIPELINE_SELECT *)pCommandStream->getSpace(sizeof(PIPELINE_SELECT));
*pCmd = PIPELINE_SELECT::sInit();
pCmd->setMaskBits(getPipelineSelectMaskBits());
pCmd->setMediaSamplerDopClockGateEnable(!enable);
auto mask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits;
pCmd->setMaskBits(mask);
pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU);
pCmd->setMediaSamplerDopClockGateEnable(!mediaSamplerRequired);
}
template <>
@ -74,4 +65,4 @@ void PreambleHelper<SKLFamily>::setupPipeControlInFrontOfCommand(void *pCmd, con
}
template struct PreambleHelper<SKLFamily>;
} // namespace OCLRT
}

View File

@ -23,7 +23,7 @@
#pragma once
#include "runtime/helpers/preamble.h"
namespace OCLRT {
struct SKLFamily;
template <>
struct L3CNTLREGConfig<IGFX_SKYLAKE> {
static const uint32_t valueForSLM = 0x60000121u;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -20,14 +20,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/helpers/preamble.h"
#include "runtime/helpers/preamble.inl"
#include "test.h"
#pragma once
#include "stdint.h"
using namespace OCLRT;
typedef ::testing::Test Gen9MaskBitsTest;
GEN9TEST_F(Gen9MaskBitsTest, pipelineSelectMaskBitsIsMediaSamplerDopClockGateMaskBits) {
EXPECT_EQ(pipelineSelectMediaSamplerDopClockGateMaskBits, PreambleHelper<SKLFamily>::getPipelineSelectMaskBits());
namespace OCLRT {
const uint32_t pipelineSelectEnablePipelineSelectMaskBits = 0x3;
const uint32_t pipelineSelectMediaSamplerDopClockGateMaskBits = 0x10;
}

View File

@ -21,12 +21,13 @@
*/
#pragma once
#include "runtime/gen_common/hw_cmds.h"
#include "igfxfmid.h"
#include "stdint.h"
#include "runtime/command_stream/thread_arbitration_policy.h"
namespace OCLRT {
#include "runtime/helpers/pipeline_select_helper.h"
#include <cstddef>
const uint32_t pipelineSelectMediaSamplerDopClockGateMaskBits = 0x10;
namespace OCLRT {
struct HardwareInfo;
class Device;
@ -38,7 +39,7 @@ struct PreambleHelper {
static constexpr size_t getScratchSpaceOffsetFor64bit() { return 4096; }
static void programL3(LinearStream *pCommandStream, uint32_t l3Config);
static void programPipelineSelect(LinearStream *pCommandStream);
static void programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired);
static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy);
static void programPreemption(LinearStream *pCommandStream, const Device &device, GraphicsAllocation *preemptionCsr);
static void setupPipeControlInFrontOfCommand(void *pCmd, const HardwareInfo *hwInfo, bool isVfeCommand);
@ -46,12 +47,9 @@ struct PreambleHelper {
static void programPreamble(LinearStream *pCommandStream, const Device &device, uint32_t l3Config,
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr);
static uint32_t getL3Config(const HardwareInfo &hwInfo, bool useSLM);
static void programPSForMedia(LinearStream *pCommandStream, bool enable);
static bool getMediaSamplerDopClockGateEnable(LinearStream *pCommandStream);
static uint32_t getAdditionalCommandsSize(const Device &device);
static void programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo);
static uint32_t getUrbEntryAllocationSize();
static uint32_t getPipelineSelectMaskBits();
};
template <PRODUCT_FAMILY ProductFamily>

View File

@ -32,15 +32,6 @@
namespace OCLRT {
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programPipelineSelect(LinearStream *pCommandStream) {
typedef typename GfxFamily::PIPELINE_SELECT PIPELINE_SELECT;
auto pCmd = (PIPELINE_SELECT *)pCommandStream->getSpace(sizeof(PIPELINE_SELECT));
*pCmd = PIPELINE_SELECT::sInit();
pCmd->setMaskBits(0x3);
pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU);
}
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy) {
typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
@ -111,7 +102,6 @@ template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programPreamble(LinearStream *pCommandStream, const Device &device, uint32_t l3Config,
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr) {
programL3(pCommandStream, l3Config);
programPipelineSelect(pCommandStream);
programThreadArbitration(pCommandStream, requiredThreadArbitrationPolicy);
programPreemption(pCommandStream, device, preemptionCsr);
programGenSpecificPreambleWorkArounds(pCommandStream, device.getHardwareInfo());
@ -127,9 +117,4 @@ uint32_t PreambleHelper<GfxFamily>::getUrbEntryAllocationSize() {
return 0x782;
}
template <typename GfxFamily>
uint32_t PreambleHelper<GfxFamily>::getPipelineSelectMaskBits() {
return pipelineSelectMediaSamplerDopClockGateMaskBits;
}
} // namespace OCLRT

View File

@ -300,18 +300,9 @@ HWTEST_F(EnqueueCopyBufferRectTest, 2D_InterfaceDescriptorData) {
}
HWTEST_F(EnqueueCopyBufferRectTest, 2D_PipelineSelect) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
enqueueCopyBufferRect2D<FamilyType>();
auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect;
ASSERT_NE(nullptr, cmd);
// Verify we have a valid length
EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection());
// Specifying GPGPU mode requires setting equivalent mask bits.
EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(EnqueueCopyBufferRectTest, 2D_MediaVFEState) {
@ -489,17 +480,9 @@ HWTEST_F(EnqueueCopyBufferRectTest, 3D_InterfaceDescriptorData) {
}
HWTEST_F(EnqueueCopyBufferRectTest, 3D_PipelineSelect) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
enqueueCopyBufferRect3D<FamilyType>();
auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect;
// Verify we have a valid length
EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection());
// Specifying GPGPU mode requires setting equivalent mask bits.
EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(EnqueueCopyBufferRectTest, 3D_MediaVFEState) {

View File

@ -269,18 +269,9 @@ HWTEST_F(EnqueueCopyBufferTest, InterfaceDescriptorData) {
}
HWTEST_F(EnqueueCopyBufferTest, PipelineSelect) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
enqueueCopyBuffer<FamilyType>();
auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect;
ASSERT_NE(nullptr, cmd);
// Verify we have a valid length
EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection());
// Specifying GPGPU mode requires setting equivalent mask bits.
EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3u);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(EnqueueCopyBufferTest, MediaVFEState) {

View File

@ -233,17 +233,9 @@ HWTEST_F(EnqueueCopyBufferToImageTest, surfaceState) {
}
HWTEST_F(EnqueueCopyBufferToImageTest, pipelineSelect) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
enqueueCopyBufferToImage<FamilyType>();
auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect;
// Verify we have a valid length
EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection());
// Specifying GPGPU mode requires setting equivalent mask bits.
EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(EnqueueCopyBufferToImageTest, mediaVFEState) {

View File

@ -242,17 +242,9 @@ HWTEST_F(EnqueueCopyImageTest, surfaceState) {
}
HWTEST_F(EnqueueCopyImageTest, pipelineSelect) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
enqueueCopyImage<FamilyType>();
auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect;
// Verify we have a valid length
EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection());
// Specifying GPGPU mode requires setting equivalent mask bits.
EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(EnqueueCopyImageTest, mediaVFEState) {

View File

@ -234,17 +234,9 @@ HWTEST_F(EnqueueCopyImageToBufferTest, surfaceState) {
}
HWTEST_F(EnqueueCopyImageToBufferTest, pipelineSelect) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
enqueueCopyImageToBuffer<FamilyType>();
auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect;
// Verify we have a valid length
EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection());
// Specifying GPGPU mode requires setting equivalent mask bits.
EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(EnqueueCopyImageToBufferTest, mediaVFEState) {

View File

@ -319,18 +319,9 @@ HWTEST_F(EnqueueFillBufferCmdTests, InterfaceDescriptorData) {
}
HWTEST_F(EnqueueFillBufferCmdTests, PipelineSelect) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
enqueueFillBuffer<FamilyType>();
auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect;
ASSERT_NE(nullptr, cmd);
// Verify we have a valid length
EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection());
// Specifying GPGPU mode requires setting equivalent mask bits.
EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(EnqueueFillBufferCmdTests, MediaVFEState) {

View File

@ -238,17 +238,9 @@ HWTEST_F(EnqueueFillImageTest, surfaceState) {
}
HWTEST_F(EnqueueFillImageTest, pipelineSelect) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
enqueueFillImage<FamilyType>();
auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect;
// Verify we have a valid length
EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection());
// Specifying GPGPU mode requires setting equivalent mask bits.
EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(EnqueueFillImageTest, mediaVFEState) {

View File

@ -558,19 +558,9 @@ HWTEST_P(EnqueueWorkItemTests, InterfaceDescriptorData) {
}
HWTEST_P(EnqueueWorkItemTests, PipelineSelect) {
typedef typename FamilyType::PARSE PARSE;
typedef typename PARSE::PIPELINE_SELECT PIPELINE_SELECT;
enqueueKernel<FamilyType>();
ASSERT_NE(cmdList.end(), itorPipelineSelect);
auto *cmd = (PIPELINE_SELECT *)*itorPipelineSelect;
// Verify we have a valid length
EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection());
// Specifying GPGPU mode requires setting equivalent mask bits.
EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_P(EnqueueWorkItemTests, MediaVFEState) {

View File

@ -117,16 +117,9 @@ HWTEST_F(TwoIOQsTwoDependentWalkers, shouldHaveTwoWalkers) {
}
HWTEST_F(TwoIOQsTwoDependentWalkers, shouldHaveOnePS) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
parseWalkers<FamilyType>();
auto itorCmd1 = find<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorCmd1);
auto itorCmd2 = itorCmd1;
++itorCmd2;
itorCmd2 = find<PIPELINE_SELECT *>(itorCmd2, cmdList.end());
EXPECT_EQ(cmdList.end(), itorCmd2);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(TwoIOQsTwoDependentWalkers, shouldHaveOneVFEState) {

View File

@ -121,16 +121,9 @@ HWTEST_F(TwoOOQsTwoDependentWalkers, shouldHaveTwoWalkers) {
}
HWTEST_F(TwoOOQsTwoDependentWalkers, shouldHaveOnePS) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
parseWalkers<FamilyType>();
auto itorCmd1 = find<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorCmd1);
auto itorCmd2 = itorCmd1;
++itorCmd2;
itorCmd2 = find<PIPELINE_SELECT *>(itorCmd2, cmdList.end());
EXPECT_EQ(cmdList.end(), itorCmd2);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(TwoOOQsTwoDependentWalkers, shouldHaveOneVFEState) {

View File

@ -36,13 +36,8 @@ HWTEST_F(IOQWithTwoWalkers, shouldHaveTwoWalkers) {
HWTEST_F(IOQWithTwoWalkers, shouldHaveOnePS) {
enqueueTwoKernels<FamilyType>();
auto itorCmd1 = find<typename FamilyType::PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
auto itorCmd2 = itorCmd1;
++itorCmd2;
itorCmd2 = find<typename FamilyType::PIPELINE_SELECT *>(itorCmd2, cmdList.end());
EXPECT_EQ(cmdList.end(), itorCmd2);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(IOQWithTwoWalkers, shouldHaveOneVFEState) {

View File

@ -40,13 +40,8 @@ HWTEST_F(OOQWithTwoWalkers, shouldHaveTwoWalkers) {
HWTEST_F(OOQWithTwoWalkers, shouldHaveOnePS) {
enqueueTwoKernels<FamilyType>();
auto itorCmd1 = find<typename FamilyType::PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
auto itorCmd2 = itorCmd1;
++itorCmd2;
itorCmd2 = find<typename FamilyType::PIPELINE_SELECT *>(itorCmd2, cmdList.end());
EXPECT_EQ(cmdList.end(), itorCmd2);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(OOQWithTwoWalkers, shouldHaveOneVFEState) {

View File

@ -20,173 +20,26 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "unit_tests/fixtures/hello_world_fixture.h"
#include "unit_tests/command_queue/enqueue_fixture.h"
#include "unit_tests/helpers/hw_parse.h"
#include "runtime/helpers/preamble.h"
#include "unit_tests/fixtures/media_kernel_fixture.h"
#include "test.h"
using namespace OCLRT;
template <typename FactoryType>
struct MediaKernelCommandQueueTestFactory : public HelloWorldFixture<FactoryType>,
public HardwareParse,
public ::testing::Test {
typedef HelloWorldFixture<FactoryType> Parent;
typedef MediaKernelFixture<HelloWorldFixtureFactory> MediaKernelTest;
using Parent::pCmdQ;
using Parent::pCS;
using Parent::pKernel;
using Parent::pCmdBuffer;
using Parent::pContext;
using Parent::pDevice;
using Parent::pProgram;
using Parent::retVal;
MediaKernelCommandQueueTestFactory() {}
template <typename FamilyType>
void enqueueVmeThenRegularKernel() {
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pVmeKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
// We have to parse after each enqueue* because
// the CSR CS may insert commands in between
parseCommands<FamilyType>(*pCmdQ);
retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
parseCommands<FamilyType>(*pCmdQ);
itorWalker1 = find<typename FamilyType::GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker1);
itorWalker2 = itorWalker1;
++itorWalker2;
itorWalker2 = find<typename FamilyType::GPGPU_WALKER *>(itorWalker2, cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker2);
}
template <typename FamilyType>
void enqueueVmeKernel() {
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pVmeKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
parseCommands<FamilyType>(*pCmdQ);
itorWalker1 = find<typename FamilyType::GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker1);
}
void SetUp() override {
Parent::kernelFilename = "vme_kernels";
Parent::kernelName = "non_vme_kernel";
Parent::SetUp();
HardwareParse::SetUp();
ASSERT_NE(nullptr, pKernel);
ASSERT_EQ(false, pKernel->isVmeKernel());
cl_int retVal;
// create the VME kernel
pVmeKernel = Kernel::create<MockKernel>(
pProgram,
*pProgram->getKernelInfo("device_side_block_motion_estimate_intel"),
&retVal);
ASSERT_NE(nullptr, pVmeKernel);
ASSERT_EQ(true, pVmeKernel->isVmeKernel());
}
void TearDown() override {
delete pVmeKernel;
pVmeKernel = nullptr;
HardwareParse::TearDown();
Parent::TearDown();
}
GenCmdList::iterator itorWalker1;
GenCmdList::iterator itorWalker2;
Kernel *pVmeKernel = nullptr;
};
typedef MediaKernelCommandQueueTestFactory<HelloWorldFixtureFactory> MediaKernelCommandQueueTest;
TEST_F(MediaKernelCommandQueueTest, VmeKernelProperlyIdentifiesItself) {
TEST_F(MediaKernelTest, VmeKernelProperlyIdentifiesItself) {
ASSERT_NE(true, pKernel->isVmeKernel());
ASSERT_EQ(true, pVmeKernel->isVmeKernel());
}
// Test for the "enabling" pipeline select preceeding the VME kernel
HWTEST_F(MediaKernelCommandQueueTest,
HavePipelineSelectToDisableMediaSamplerDopClockGate) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
if (::renderCoreFamily == IGFX_GEN8_CORE) {
return;
}
HWTEST_F(MediaKernelTest, EnqueueVmeKernelUsesSinglePipelineSelect) {
enqueueVmeKernel<FamilyType>();
auto itorCmd1 = find<typename FamilyType::PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorCmd1);
auto pPreceedingPipelineSelect = genCmdCast<PIPELINE_SELECT *>(*itorCmd1);
bool samplerClockGateEnable = PreambleHelper<FamilyType>::getMediaSamplerDopClockGateEnable(
reinterpret_cast<LinearStream *>(pPreceedingPipelineSelect));
EXPECT_EQ(PreambleHelper<FamilyType>::getPipelineSelectMaskBits(), pPreceedingPipelineSelect->getMaskBits());
EXPECT_EQ(false, samplerClockGateEnable);
EXPECT_EQ(0u, pPreceedingPipelineSelect->getPipelineSelection());
auto numCommands = getCommandsList<typename FamilyType::PIPELINE_SELECT>().size();
EXPECT_EQ(1u, numCommands);
}
// Test for the "restoring" pipeline select following the VME kernel (that
// disables the media sampler)
HWTEST_F(MediaKernelCommandQueueTest,
HavePipelineSelectToEnableMediaSamplerDopClockGate) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
if (::renderCoreFamily == IGFX_GEN8_CORE) {
return;
}
enqueueVmeThenRegularKernel<FamilyType>();
auto itorCmd1 = find<typename FamilyType::PIPELINE_SELECT *>(
cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorCmd1);
auto itorCmd2 = itorCmd1;
++itorCmd2;
itorCmd2 = find<typename FamilyType::PIPELINE_SELECT *>(
itorCmd2, cmdList.end());
EXPECT_NE(cmdList.end(), itorCmd2);
auto itorCmd3 = itorCmd2;
++itorCmd3;
itorCmd3 = find<typename FamilyType::PIPELINE_SELECT *>(
itorCmd3, cmdList.end());
EXPECT_NE(cmdList.end(), itorCmd3);
auto pFollowingPipelineSelect = genCmdCast<PIPELINE_SELECT *>(*itorCmd3);
bool samplerClockGateEnable = PreambleHelper<FamilyType>::getMediaSamplerDopClockGateEnable(
reinterpret_cast<LinearStream *>(pFollowingPipelineSelect));
EXPECT_EQ(PreambleHelper<FamilyType>::getPipelineSelectMaskBits(), pFollowingPipelineSelect->getMaskBits());
EXPECT_EQ(true, samplerClockGateEnable);
EXPECT_EQ(0u, pFollowingPipelineSelect->getPipelineSelection());
}
HWTEST_F(MediaKernelTest, EnqueueRegularKernelUsesSinglePipelineSelect) {
enqueueRegularKernel<FamilyType>();
auto numCommands = getCommandsList<typename FamilyType::PIPELINE_SELECT>().size();
EXPECT_EQ(1u, numCommands);
}

View File

@ -337,18 +337,9 @@ HWTEST_F(EnqueueReadBufferRectTest, 2D_InterfaceDescriptorData) {
}
HWTEST_F(EnqueueReadBufferRectTest, 2D_PipelineSelect) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
enqueueReadBufferRect2D<FamilyType>();
ASSERT_NE(cmdList.end(), itorPipelineSelect);
auto *cmd = (PIPELINE_SELECT *)*itorPipelineSelect;
// Verify we have a valid length
EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection());
// Specifying GPGPU mode requires setting equivalent mask bits.
EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(EnqueueReadBufferRectTest, 2D_MediaVFEState) {

View File

@ -304,19 +304,10 @@ HWTEST_F(EnqueueReadBufferTypeTest, InterfaceDescriptorData) {
}
HWTEST_F(EnqueueReadBufferTypeTest, PipelineSelect) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
srcBuffer->forceDisallowCPUCopy = true;
enqueueReadBuffer<FamilyType>();
ASSERT_NE(cmdList.end(), itorPipelineSelect);
auto *cmd = (PIPELINE_SELECT *)*itorPipelineSelect;
// Verify we have a valid length
EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection());
// Specifying GPGPU mode requires setting equivalent mask bits.
EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(EnqueueReadBufferTypeTest, MediaVFEState) {

View File

@ -238,17 +238,9 @@ HWTEST_F(EnqueueReadImageTest, surfaceState) {
}
HWTEST_F(EnqueueReadImageTest, pipelineSelect) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
enqueueReadImage<FamilyType>();
auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect;
// Verify we have a valid length
EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection());
// Specifying GPGPU mode requires setting equivalent mask bits.
EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(EnqueueReadImageTest, mediaVFEState) {

View File

@ -310,18 +310,9 @@ HWTEST_F(EnqueueWriteBufferRectTest, 2D_InterfaceDescriptorData) {
}
HWTEST_F(EnqueueWriteBufferRectTest, 2D_PipelineSelect) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
enqueueWriteBufferRect2D<FamilyType>();
ASSERT_NE(cmdList.end(), itorPipelineSelect);
auto *cmd = (PIPELINE_SELECT *)*itorPipelineSelect;
// Verify we have a valid length
EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection());
// Specifying GPGPU mode requires setting equivalent mask bits.
EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(EnqueueWriteBufferRectTest, 2D_MediaVFEState) {

View File

@ -299,19 +299,10 @@ HWTEST_F(EnqueueWriteBufferTypeTest, InterfaceDescriptorData) {
}
HWTEST_F(EnqueueWriteBufferTypeTest, PipelineSelect) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
srcBuffer->forceDisallowCPUCopy = true;
enqueueWriteBuffer<FamilyType>();
ASSERT_NE(cmdList.end(), itorPipelineSelect);
auto *cmd = (PIPELINE_SELECT *)*itorPipelineSelect;
// Verify we have a valid length
EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection());
// Specifying GPGPU mode requires setting equivalent mask bits.
EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(EnqueueWriteBufferTypeTest, MediaVFEState) {

View File

@ -240,17 +240,9 @@ HWTEST_F(EnqueueWriteImageTest, surfaceState) {
}
HWTEST_F(EnqueueWriteImageTest, pipelineSelect) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
enqueueWriteImage<FamilyType>();
auto *cmd = (PIPELINE_SELECT *)cmdPipelineSelect;
// Verify we have a valid length
EXPECT_EQ(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU, cmd->getPipelineSelection());
// Specifying GPGPU mode requires setting equivalent mask bits.
EXPECT_EQ(0x3u, cmd->getMaskBits() & 0x3);
int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect<FamilyType>();
EXPECT_EQ(1, numCommands);
}
HWTEST_F(EnqueueWriteImageTest, mediaVFEState) {

View File

@ -174,6 +174,7 @@ struct UltCommandStreamReceiverTest
commandStreamReceiver.lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin;
commandStreamReceiver.lastSentCoherencyRequest = 0;
commandStreamReceiver.lastMediaSamplerConfig = 0;
}
template <typename GfxFamily>
@ -440,6 +441,41 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, preambleShouldBeSentIfNeverSent) {
EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldBeSentIfNeverSentPreambleAndMediaSamplerRequirementChanged) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = false;
commandStreamReceiver.lastMediaSamplerConfig = -1;
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
EXPECT_NE(nullptr, getCommand<typename FamilyType::PIPELINE_SELECT>());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldBeSentIfNeverSentPreambleAndMediaSamplerRequirementNotChanged) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = false;
commandStreamReceiver.lastMediaSamplerConfig = 0;
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
EXPECT_NE(nullptr, getCommand<typename FamilyType::PIPELINE_SELECT>());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldNotBeSentIfSentPreambleAndMediaSamplerRequirementDoesntChanged) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.lastMediaSamplerConfig = 0;
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
EXPECT_EQ(nullptr, getCommand<typename FamilyType::PIPELINE_SELECT>());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldBeSentIfSentPreambleAndMediaSamplerRequirementDoesntChanged) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.lastMediaSamplerConfig = 1;
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
EXPECT_NE(nullptr, getCommand<typename FamilyType::PIPELINE_SELECT>());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressShouldBeSentIfNeverSent) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
@ -1761,6 +1797,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithPCWhenPreambleSentAnd
// Force a PIPE_CONTROL through a taskLevel transition
taskLevel = commandStreamReceiver.peekTaskLevel() + 1;
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.lastMediaSamplerConfig = 0;
commandStreamReceiver.lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin;
auto &csrCS = commandStreamReceiver.getCS();

View File

@ -41,6 +41,7 @@ set(IGDRCL_SRCS_tests_fixtures
"${CMAKE_CURRENT_SOURCE_DIR}/image_fixture.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/kernel_data_fixture.h"
"${CMAKE_CURRENT_SOURCE_DIR}/kernel_data_fixture.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/media_kernel_fixture.h"
"${CMAKE_CURRENT_SOURCE_DIR}/memory_allocator_fixture.h"
"${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_fixture.h"
"${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_fixture.cpp"

View File

@ -0,0 +1,108 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "unit_tests/fixtures/hello_world_fixture.h"
#include "unit_tests/command_queue/enqueue_fixture.h"
#include "unit_tests/helpers/hw_parse.h"
namespace OCLRT {
template <typename FactoryType>
struct MediaKernelFixture : public HelloWorldFixture<FactoryType>,
public HardwareParse,
public ::testing::Test {
typedef HelloWorldFixture<FactoryType> Parent;
using Parent::pCS;
using Parent::pCmdBuffer;
using Parent::pCmdQ;
using Parent::pContext;
using Parent::pDevice;
using Parent::pKernel;
using Parent::pProgram;
using Parent::retVal;
MediaKernelFixture() {}
template <typename FamilyType>
void enqueueRegularKernel() {
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
parseCommands<FamilyType>(*pCmdQ);
itorWalker1 = find<typename FamilyType::GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker1);
}
template <typename FamilyType>
void enqueueVmeKernel() {
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pVmeKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
parseCommands<FamilyType>(*pCmdQ);
itorWalker1 = find<typename FamilyType::GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker1);
}
void SetUp() override {
Parent::kernelFilename = "vme_kernels";
Parent::kernelName = "non_vme_kernel";
Parent::SetUp();
HardwareParse::SetUp();
ASSERT_NE(nullptr, pKernel);
ASSERT_EQ(false, pKernel->isVmeKernel());
cl_int retVal;
// create the VME kernel
pVmeKernel = Kernel::create<MockKernel>(
pProgram,
*pProgram->getKernelInfo("device_side_block_motion_estimate_intel"),
&retVal);
ASSERT_NE(nullptr, pVmeKernel);
ASSERT_EQ(true, pVmeKernel->isVmeKernel());
}
void TearDown() override {
delete pVmeKernel;
pVmeKernel = nullptr;
HardwareParse::TearDown();
Parent::TearDown();
}
GenCmdList::iterator itorWalker1;
GenCmdList::iterator itorWalker2;
Kernel *pVmeKernel = nullptr;
};
}

View File

@ -20,123 +20,19 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "unit_tests/fixtures/hello_world_fixture.h"
#include "unit_tests/command_queue/enqueue_fixture.h"
#include "unit_tests/helpers/hw_parse.h"
#include "runtime/helpers/preamble.h"
#include "unit_tests/fixtures/media_kernel_fixture.h"
#include "test.h"
using namespace OCLRT;
template <typename FactoryType>
struct MediaKernelCommandQueueTestFactory : public HelloWorldFixture<FactoryType>,
public HardwareParse,
public ::testing::Test {
typedef HelloWorldFixture<FactoryType> Parent;
typedef MediaKernelFixture<HelloWorldFixtureFactory> MediaKernelTest;
using Parent::pCS;
using Parent::pCmdBuffer;
using Parent::pCmdQ;
using Parent::pContext;
using Parent::pDevice;
using Parent::pKernel;
using Parent::pProgram;
using Parent::retVal;
MediaKernelCommandQueueTestFactory() {}
template <typename FamilyType>
void enqueueVmeThenRegularKernel() {
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pVmeKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
// We have to parse after each enqueue* because
// the CSR CS may insert commands in between
parseCommands<FamilyType>(*pCmdQ);
retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
parseCommands<FamilyType>(*pCmdQ);
itorWalker1 = find<typename FamilyType::GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker1);
itorWalker2 = itorWalker1;
++itorWalker2;
itorWalker2 = find<typename FamilyType::GPGPU_WALKER *>(itorWalker2, cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker2);
}
template <typename FamilyType>
void enqueueVmeKernel() {
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pVmeKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
parseCommands<FamilyType>(*pCmdQ);
itorWalker1 = find<typename FamilyType::GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker1);
}
void SetUp() override {
Parent::kernelFilename = "vme_kernels";
Parent::kernelName = "non_vme_kernel";
Parent::SetUp();
HardwareParse::SetUp();
ASSERT_NE(nullptr, pKernel);
ASSERT_EQ(false, pKernel->isVmeKernel());
cl_int retVal;
// create the VME kernel
pVmeKernel = Kernel::create(
pProgram,
*pProgram->getKernelInfo("device_side_block_motion_estimate_intel"),
&retVal);
ASSERT_NE(nullptr, pVmeKernel);
ASSERT_EQ(true, pVmeKernel->isVmeKernel());
}
void TearDown() override {
delete pVmeKernel;
pVmeKernel = nullptr;
HardwareParse::TearDown();
Parent::TearDown();
}
GenCmdList::iterator itorWalker1;
GenCmdList::iterator itorWalker2;
Kernel *pVmeKernel = nullptr;
};
typedef MediaKernelCommandQueueTestFactory<HelloWorldFixtureFactory> MediaKernelCommandQueueTest;
BDWTEST_F(MediaKernelCommandQueueTest, BdwClockGateAlwaysFalse) {
auto samplerClockGateEnable = PreambleHelper<FamilyType>::getMediaSamplerDopClockGateEnable(
reinterpret_cast<LinearStream *>(pCmdQ));
ASSERT_EQ(false, samplerClockGateEnable); // Always false for BDW
}
BDWTEST_F(MediaKernelCommandQueueTest, BdwHasSinglePipelineSelect) {
GEN8TEST_F(MediaKernelTest, givenGen8CSRWhenEnqueueVmeKernelThenProgramOnlyPipelineSelection) {
typedef typename BDWFamily::PIPELINE_SELECT PIPELINE_SELECT;
enqueueVmeKernel<FamilyType>();
auto itorCmd1 = find<typename FamilyType::PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorCmd1);
auto itorCmd2 = itorCmd1;
++itorCmd2;
itorCmd2 = find<typename FamilyType::PIPELINE_SELECT *>(itorCmd2, cmdList.end());
ASSERT_EQ(cmdList.end(), itorCmd2);
auto pCmd = getCommand<PIPELINE_SELECT>();
auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits;
auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU;
EXPECT_EQ(expectedMask, pCmd->getMaskBits());
EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection());
}

View File

@ -111,9 +111,3 @@ BDWTEST_F(PreambleVfeState, basic) {
EXPECT_TRUE(pc.getDcFlushEnable());
EXPECT_EQ(1u, pc.getCommandStreamerStallEnable());
}
typedef ::testing::Test BdwMaskBitsTest;
BDWTEST_F(BdwMaskBitsTest, noPipelineSelectMaskBits) {
EXPECT_EQ(0u, PreambleHelper<BDWFamily>::getPipelineSelectMaskBits());
}

View File

@ -23,6 +23,7 @@ set(IGDRCL_SRCS_tests_gen9
${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_gen9.cpp
${CMAKE_CURRENT_SOURCE_DIR}/coherency_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/gen_cmd_parse.h
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/program_data_tests_gen9.cpp
@ -30,7 +31,6 @@ set(IGDRCL_SRCS_tests_gen9
${CMAKE_CURRENT_SOURCE_DIR}/test_device_queue_hw.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_platform_caps.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_preemption.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_preamble.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_sample.cpp
PARENT_SCOPE
)

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "unit_tests/fixtures/media_kernel_fixture.h"
#include "runtime/helpers/preamble.inl"
#include "test.h"
using namespace OCLRT;
typedef MediaKernelFixture<HelloWorldFixtureFactory> MediaKernelTest;
GEN9TEST_F(MediaKernelTest, givenGen9CSRWhenEnqueueVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) {
typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT;
enqueueVmeKernel<SKLFamily>();
auto numCommands = getCommandsList<PIPELINE_SELECT>().size();
EXPECT_EQ(1u, numCommands);
auto pCmd = getCommand<PIPELINE_SELECT>();
auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits;
auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU;
EXPECT_EQ(expectedMask, pCmd->getMaskBits());
EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection());
EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable());
}
GEN9TEST_F(MediaKernelTest, givenGen9CSRWhenEnqueueNonVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) {
typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT;
enqueueRegularKernel<SKLFamily>();
auto numCommands = getCommandsList<PIPELINE_SELECT>().size();
EXPECT_EQ(1u, numCommands);
auto pCmd = getCommand<PIPELINE_SELECT>();
auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits;
auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU;
EXPECT_EQ(expectedMask, pCmd->getMaskBits());
EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection());
EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable());
}
GEN9TEST_F(MediaKernelTest, givenGen9CSRWhenEnqueueVmeKernelTwiceThenProgramPipelineSelectOnce) {
typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT;
enqueueVmeKernel<SKLFamily>();
auto numCommands = getCommandsList<PIPELINE_SELECT>().size();
EXPECT_EQ(1u, numCommands);
}
GEN9TEST_F(MediaKernelTest, givenGen9CSRWhenEnqueueNonVmeKernelTwiceThenProgramPipelineSelectOnce) {
typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT;
enqueueVmeKernel<SKLFamily>();
auto numCommands = getCommandsList<PIPELINE_SELECT>().size();
EXPECT_EQ(1u, numCommands);
}
GEN9TEST_F(MediaKernelTest, givenGen9CSRWhenEnqueueVmeKernelAfterNonVmeKernelThenProgramPipelineSelectionAndMediaSamplerTwice) {
typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT;
enqueueRegularKernel<SKLFamily>();
enqueueVmeKernel<SKLFamily>();
auto commands = getCommandsList<PIPELINE_SELECT>();
EXPECT_EQ(2u, commands.size());
auto pCmd = static_cast<PIPELINE_SELECT *>(commands.back());
auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits;
EXPECT_EQ(expectedMask, pCmd->getMaskBits());
EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable());
}
GEN9TEST_F(MediaKernelTest, givenGen9CSRWhenEnqueueNonVmeKernelAfterVmeKernelThenProgramProgramPipelineSelectionAndMediaSamplerTwice) {
typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT;
enqueueVmeKernel<SKLFamily>();
enqueueRegularKernel<SKLFamily>();
auto commands = getCommandsList<PIPELINE_SELECT>();
EXPECT_EQ(2u, commands.size());
auto pCmd = static_cast<PIPELINE_SELECT *>(commands.back());
auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits;
EXPECT_EQ(expectedMask, pCmd->getMaskBits());
EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable());
}

View File

@ -25,6 +25,7 @@
#include "runtime/command_stream/command_stream_receiver.h"
#include "runtime/command_stream/linear_stream.h"
#include "runtime/helpers/ptr_math.h"
#include "runtime/helpers/pipeline_select_helper.h"
#include "runtime/kernel/kernel.h"
#include "unit_tests/gen_common/gen_cmd_parse.h"
#include "gtest/gtest.h"
@ -242,26 +243,33 @@ struct HardwareParse {
template <typename CmdType>
CmdType *getCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
auto itorCmd = find<CmdType *>(cmdList.begin(), cmdList.end());
auto itorCmd = find<CmdType *>(itorStart, itorEnd);
return itorCmd != cmdList.end()
? genCmdCast<CmdType *>(*itorCmd)
: nullptr;
}
// pass found iterator to itorStart
template <typename CmdType>
CmdType *getCommand(GenCmdList::iterator *itorStart, GenCmdList::iterator itorEnd) {
*itorStart = find<CmdType *>(*itorStart, itorEnd);
return *itorStart != cmdList.end()
? genCmdCast<CmdType *>(**itorStart)
: nullptr;
}
template <typename CmdType>
CmdType *getCommand() {
return getCommand<CmdType>(cmdList.begin(), cmdList.end());
}
template <typename FamilyType>
int getNumberOfPipelineSelectsThatEnablePipelineSelect() {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
int numCommands = 0;
auto itorCmd = find<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
while (itorCmd != cmdList.end()) {
auto cmd = getCommand<PIPELINE_SELECT>(itorCmd, cmdList.end());
if (cmd->getPipelineSelection() == PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU &&
pipelineSelectEnablePipelineSelectMaskBits == (pipelineSelectEnablePipelineSelectMaskBits & cmd->getMaskBits())) {
numCommands++;
}
itorCmd = find<PIPELINE_SELECT *>(++itorCmd, cmdList.end());
}
return numCommands;
}
// The starting point of parsing commandBuffers. This is important
// because as buffers get reused, we only want to parse the deltas.
LinearStream *previousCS;

View File

@ -48,6 +48,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
using BaseClass::CommandStreamReceiver::lastSentThreadAribtrationPolicy;
using BaseClass::CommandStreamReceiver::latestFlushedTaskCount;
using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig;
using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig;
using BaseClass::CommandStreamReceiver::lastPreemptionMode;
using BaseClass::CommandStreamReceiver::taskCount;
using BaseClass::CommandStreamReceiver::taskLevel;