Improve thread arbitration policy programming

Change-Id: Ibd764352e14d1a5112034b1c5a1fc6d6d67ebac0
This commit is contained in:
Dunajski, Bartosz
2018-02-20 08:11:24 +01:00
committed by sys_ocldev
parent 67c8d7a9ca
commit 1292c3d533
21 changed files with 127 additions and 104 deletions

2
Jenkinsfile vendored
View File

@ -2,4 +2,4 @@
neoDependenciesRev='735095-769'
strategy='EQUAL'
allowedF=42
allowedCD=339
allowedCD=340

View File

@ -532,7 +532,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT);
}
commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.begin()->getKernel()->getThreadArbitrationPolicy());
commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.begin()->getKernel()->getThreadArbitrationPolicy<GfxFamily>());
DispatchFlags dispatchFlags;
dispatchFlags.blocking = blocking;

View File

@ -153,8 +153,8 @@ class CommandStreamReceiver {
LinearStream commandStream;
uint32_t requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin;
uint32_t lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbitrationPolicyNotPresent;
uint32_t requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin;
uint32_t lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
GraphicsAllocation *scratchAllocation = nullptr;
GraphicsAllocation *preemptionCsrAllocation = nullptr;

View File

@ -41,7 +41,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
return new CommandStreamReceiverHw<GfxFamily>(hwInfoIn);
}
CommandStreamReceiverHw(const HardwareInfo &hwInfoIn) : hwInfo(hwInfoIn) {}
CommandStreamReceiverHw(const HardwareInfo &hwInfoIn);
FlushStamp flush(BatchBuffer &batchBuffer, EngineType engineType, ResidencyContainer *allocationsForResidency) override;

View File

@ -37,6 +37,11 @@
namespace OCLRT {
template <typename GfxFamily>
CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw(const HardwareInfo &hwInfoIn) : hwInfo(hwInfoIn) {
requiredThreadArbitrationPolicy = PreambleHelper<GfxFamily>::getDefaultThreadArbitrationPolicy();
}
template <typename GfxFamily>
FlushStamp CommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer, EngineType engineType, ResidencyContainer *allocationsForResidency) {
return flushStamp->peekStamp();
@ -171,6 +176,9 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
if (DebugManager.flags.ForceSLML3Config.get()) {
dispatchFlags.useSLM = true;
}
if (DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
requestThreadArbitrationPolicy(static_cast<uint32_t>(DebugManager.flags.OverrideThreadArbitrationPolicy.get()));
}
auto newL3Config = PreambleHelper<GfxFamily>::getL3Config(peekHwInfo(), dispatchFlags.useSLM);
@ -208,9 +216,9 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
}
}
if (this->lastSentThreadAribtrationPolicy != this->requiredThreadArbitrationPolicy) {
if (this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) {
PreambleHelper<GfxFamily>::programThreadArbitration(&commandStreamCSR, this->requiredThreadArbitrationPolicy);
this->lastSentThreadAribtrationPolicy = this->requiredThreadArbitrationPolicy;
this->lastSentThreadArbitrationPolicy = this->requiredThreadArbitrationPolicy;
}
stateBaseAddressDirty |= ((GSBAFor32BitProgrammed ^ dispatchFlags.GSBA32BitRequired) && force32BitAllocations);
@ -581,7 +589,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::programPreamble(LinearStream &cs
PreambleHelper<GfxFamily>::programPreamble(&csr, *memoryManager->device, newL3Config, this->requiredThreadArbitrationPolicy, this->preemptionCsrAllocation);
this->isPreambleSent = true;
this->lastSentL3Config = newL3Config;
this->lastSentThreadAribtrationPolicy = this->requiredThreadArbitrationPolicy;
this->lastSentThreadArbitrationPolicy = this->requiredThreadArbitrationPolicy;
}
}

View File

@ -23,8 +23,8 @@
#include <stdint.h>
namespace OCLRT {
namespace ThreadArbitrationPolicy {
const uint32_t threadArbirtrationPolicyRoundRobin = 0x100u;
const uint32_t threadArbitrationPolicyAgeBased = 0x0u;
const uint32_t threadArbitrationPolicyNotPresent = 0xffffffffu;
}
}
const uint32_t AgeBased = 0x0u;
const uint32_t RoundRobin = 0x1u;
const uint32_t NotPresent = 0xffffffffu;
} // namespace ThreadArbitrationPolicy
} // namespace OCLRT

View File

@ -29,15 +29,6 @@ void PreambleHelper<BDWFamily>::setupPipeControlInFrontOfCommand(void *pCmd, con
((BDWFamily::PIPE_CONTROL *)pCmd)->setDcFlushEnable(true);
}
template <>
void PreambleHelper<BDWFamily>::programThreadArbitration(LinearStream *pCommandStream, uint32_t threadArbitrationPolicy) {
}
template <>
uint32_t PreambleHelper<BDWFamily>::getAdditionalCommandsSize(const Device &device) {
return 0;
}
template <>
uint32_t PreambleHelper<BDWFamily>::getL3Config(const HardwareInfo &hwInfo, bool useSLM) {
uint32_t l3Config = 0;
@ -61,6 +52,5 @@ void PreambleHelper<BDWFamily>::programPipelineSelect(LinearStream *pCommandStre
pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU);
}
// Explicitly instantiate PreambleHelper for BDW device family
template struct PreambleHelper<BDWFamily>;
} // namespace OCLRT

View File

@ -64,5 +64,30 @@ void PreambleHelper<SKLFamily>::setupPipeControlInFrontOfCommand(void *pCmd, con
}
}
template struct PreambleHelper<SKLFamily>;
template <>
uint32_t PreambleHelper<SKLFamily>::getDefaultThreadArbitrationPolicy() {
return ThreadArbitrationPolicy::RoundRobin;
}
template <>
void PreambleHelper<SKLFamily>::programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy) {
UNRECOVERABLE_IF(requiredThreadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent);
auto pipeControl = pCommandStream->getSpaceForCmd<PIPE_CONTROL>();
*pipeControl = PIPE_CONTROL::sInit();
pipeControl->setCommandStreamerStallEnable(true);
auto pCmd = pCommandStream->getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
*pCmd = MI_LOAD_REGISTER_IMM::sInit();
pCmd->setRegisterOffset(DebugControlReg2::address);
pCmd->setDataDword(DebugControlReg2::getRegData(requiredThreadArbitrationPolicy));
}
template <>
size_t PreambleHelper<SKLFamily>::getAdditionalCommandsSize(const Device &device) {
return PreemptionHelper::getRequiredPreambleSize<SKLFamily>(device) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL);
}
template struct PreambleHelper<SKLFamily>;
} // namespace OCLRT

View File

@ -22,6 +22,8 @@
#pragma once
#include "runtime/helpers/preamble.h"
#include "runtime/command_stream/thread_arbitration_policy.h"
namespace OCLRT {
struct SKLFamily;
template <>
@ -40,4 +42,12 @@ struct L3CNTLREGConfig<IGFX_BROXTON> {
static const uint32_t valueForSLM = 0x60000121u;
static const uint32_t valueForNoSLM = 0x80000140u;
};
}
namespace DebugControlReg2 {
constexpr uint32_t address = 0xE404;
constexpr uint32_t getRegData(const uint32_t &policy) {
return policy == ThreadArbitrationPolicy::RoundRobin ? 0x100 : 0x0;
};
} // namespace DebugControlReg2
} // namespace OCLRT

View File

@ -36,10 +36,14 @@ class LinearStream;
template <typename GfxFamily>
struct PreambleHelper {
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
static constexpr size_t getScratchSpaceOffsetFor64bit() { return 4096; }
static void programL3(LinearStream *pCommandStream, uint32_t l3Config);
static void programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired);
static uint32_t getDefaultThreadArbitrationPolicy();
static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy);
static void programPreemption(LinearStream *pCommandStream, const Device &device, GraphicsAllocation *preemptionCsr);
static void setupPipeControlInFrontOfCommand(void *pCmd, const HardwareInfo *hwInfo, bool isVfeCommand);
@ -47,7 +51,7 @@ struct PreambleHelper {
static void programPreamble(LinearStream *pCommandStream, const Device &device, uint32_t l3Config,
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr);
static uint32_t getL3Config(const HardwareInfo &hwInfo, bool useSLM);
static uint32_t getAdditionalCommandsSize(const Device &device);
static size_t getAdditionalCommandsSize(const Device &device);
static void programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo);
static uint32_t getUrbEntryAllocationSize();
};
@ -73,4 +77,5 @@ template <typename GfxFamily>
struct L3CNTLRegisterOffset {
static const uint32_t registerOffset;
};
} // namespace OCLRT

View File

@ -34,21 +34,11 @@ namespace OCLRT {
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy) {
typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL;
}
// Add a PIPE_CONTROL w/ CS_stall
auto pPipeControl = (PIPE_CONTROL *)pCommandStream->getSpace(sizeof(PIPE_CONTROL));
*pPipeControl = PIPE_CONTROL::sInit();
pPipeControl->setCommandStreamerStallEnable(true);
setupPipeControlInFrontOfCommand(pPipeControl, nullptr, false);
auto pCmd = (MI_LOAD_REGISTER_IMM *)pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM));
*pCmd = MI_LOAD_REGISTER_IMM::sInit();
pCmd->setRegisterOffset(0xE404);
auto data = requiredThreadArbitrationPolicy;
pCmd->setDataDword(data);
template <typename GfxFamily>
uint32_t PreambleHelper<GfxFamily>::getDefaultThreadArbitrationPolicy() {
return 0;
}
template <typename GfxFamily>
@ -56,17 +46,12 @@ void PreambleHelper<GfxFamily>::programGenSpecificPreambleWorkArounds(LinearStre
}
template <typename GfxFamily>
uint32_t PreambleHelper<GfxFamily>::getAdditionalCommandsSize(const Device &device) {
typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL;
size_t requiredSize = sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL);
requiredSize += PreemptionHelper::getRequiredPreambleSize<GfxFamily>(device);
return static_cast<uint32_t>(requiredSize);
size_t PreambleHelper<GfxFamily>::getAdditionalCommandsSize(const Device &device) {
return 0;
}
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programVFEState(LinearStream *pCommandStream, const HardwareInfo &hwInfo, int scratchSize, uint64_t scratchAddress) {
typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL;
typedef typename GfxFamily::MEDIA_VFE_STATE MEDIA_VFE_STATE;
// Add a PIPE_CONTROL w/ CS_stall
@ -90,7 +75,6 @@ void PreambleHelper<GfxFamily>::programVFEState(LinearStream *pCommandStream, co
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programL3(LinearStream *pCommandStream, uint32_t l3Config) {
typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
auto pCmd = (MI_LOAD_REGISTER_IMM *)pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM));
*pCmd = MI_LOAD_REGISTER_IMM::sInit();
@ -116,5 +100,4 @@ template <typename GfxFamily>
uint32_t PreambleHelper<GfxFamily>::getUrbEntryAllocationSize() {
return 0x782;
}
} // namespace OCLRT

View File

@ -25,6 +25,7 @@
#include "runtime/command_stream/thread_arbitration_policy.h"
#include "runtime/device_queue/device_queue.h"
#include "runtime/helpers/base_object.h"
#include "runtime/helpers/preamble.h"
#include "runtime/program/program.h"
#include "runtime/program/kernel_info.h"
#include "runtime/os_interface/debug_settings_manager.h"
@ -337,11 +338,12 @@ class Kernel : public BaseObject<_cl_kernel> {
const bool isParentKernel;
const bool isSchedulerKernel;
template <typename GfxFamily>
uint32_t getThreadArbitrationPolicy() {
if (kernelInfo.patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired) {
return ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin;
return PreambleHelper<GfxFamily>::getDefaultThreadArbitrationPolicy();
} else {
return ThreadArbitrationPolicy::threadArbitrationPolicyAgeBased;
return ThreadArbitrationPolicy::AgeBased;
}
}
bool checkIfIsParentKernelAndBlocksUsesPrintf() {

View File

@ -81,3 +81,4 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API v
DECLARE_DEBUG_VARIABLE(int32_t, ForcePreemptionMode, -1, "Keep this variable in sync with PreemptionMode enum. -1 - devices default mode, 1 - disable, 2 - midBatch, 3 - threadGroup, 4 - midThread")
DECLARE_DEBUG_VARIABLE(int32_t, NodeOrdinal, -1, "-1: default do not override, 0: ENGINE_RCS")
DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideThreadArbitrationPolicy, -1, "-1 (dont override) or any valid config (0: Age Based, 1: Round Robin)")

View File

@ -131,10 +131,9 @@ struct UltCommandStreamReceiverTest
bool requiresCoherency = false,
bool lowPriority = false) {
DispatchFlags dispatchFlags;
dispatchFlags.blocking = block;
dispatchFlags.requiresCoherency = requiresCoherency;
dispatchFlags.lowPriority = lowPriority;
flushTaskFlags.blocking = block;
flushTaskFlags.requiresCoherency = requiresCoherency;
flushTaskFlags.lowPriority = lowPriority;
return commandStreamReceiver.flushTask(
commandStream,
@ -144,7 +143,7 @@ struct UltCommandStreamReceiverTest
ioh,
ssh,
taskLevel,
dispatchFlags);
flushTaskFlags);
}
template <typename GfxFamily>
@ -174,7 +173,7 @@ struct UltCommandStreamReceiverTest
configureCSRHeapStatesToNonDirty<GfxFamily>();
commandStreamReceiver.taskLevel = taskLevel;
commandStreamReceiver.lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin;
commandStreamReceiver.lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin;
commandStreamReceiver.lastSentCoherencyRequest = 0;
commandStreamReceiver.lastMediaSamplerConfig = 0;
}
@ -184,6 +183,7 @@ struct UltCommandStreamReceiverTest
return reinterpret_cast<UltCommandStreamReceiver<GfxFamily> &>(pDevice->getCommandStreamReceiver());
}
DispatchFlags flushTaskFlags = {};
uint32_t taskLevel = 42;
LinearStream commandStream;
LinearStream dsh;
@ -254,6 +254,19 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, shouldSeeCommandsOnFirstFlush) {
EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenOverrideThreadArbitrationPolicyDebugVariableSetWhenFlushingThenRequestRequiredMode) {
DebugManagerStateRestore restore;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased;
commandStreamReceiver.lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased;
DebugManager.flags.OverrideThreadArbitrationPolicy.set(ThreadArbitrationPolicy::RoundRobin);
flushTask(commandStreamReceiver);
EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, commandStreamReceiver.lastSentThreadArbitrationPolicy);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, taskCountShouldBeUpdated) {
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
flushTask(commandStreamReceiver);
@ -832,23 +845,15 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithOnlyEnoughMemoryForPr
commandStreamReceiver.lastSentL3Config = l3Config;
auto &csrCS = commandStreamReceiver.getCS();
size_t sizeNeeded = getSizeRequiredPreambleCS<FamilyType>(MockDevice(commandStreamReceiver.hwInfo)) +
sizeof(STATE_BASE_ADDRESS) +
sizeof(PIPE_CONTROL) +
sizeof(PIPELINE_SELECT) +
commandStreamReceiver.getRequiredPipeControlSize() +
sizeof(MI_BATCH_BUFFER_START);
size_t sizeNeeded = commandStreamReceiver.getRequiredCmdStreamSizeAligned(flushTaskFlags);
sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize);
DispatchFlags flags;
csrCS.getSpace(csrCS.getAvailableSpace() - commandStreamReceiver.getRequiredCmdStreamSizeAligned(flags));
csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeeded);
auto expectedBase = csrCS.getBase();
// This case handles when we have *just* enough space
auto expectedUsed = csrCS.getUsed() + sizeNeeded;
flushTask(commandStreamReceiver);
flushTask(commandStreamReceiver, flushTaskFlags.blocking, 0, flushTaskFlags.requiresCoherency, flushTaskFlags.lowPriority);
// Verify that we didn't grab a new CS buffer
EXPECT_EQ(expectedUsed, csrCS.getUsed());
@ -1465,7 +1470,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousNOSL
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDefaultCommandStreamReceiverThenRoundRobinPolicyIsSelected) {
MockCsrHw<FamilyType> commandStreamReceiver(*platformDevices[0]);
EXPECT_EQ(ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin, commandStreamReceiver.peekThreadArbitrationPolicy());
EXPECT_EQ(PreambleHelper<FamilyType>::getDefaultThreadArbitrationPolicy(), commandStreamReceiver.peekThreadArbitrationPolicy());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML3WasSentThenDontProgramL3) {
@ -1484,7 +1489,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML
// Mark Pramble as sent, override L3Config to SLM config
commandStreamReceiver->isPreambleSent = true;
commandStreamReceiver->lastSentL3Config = L3Config;
commandStreamReceiver->lastSentThreadAribtrationPolicy = kernel.mockKernel->getThreadArbitrationPolicy();
commandStreamReceiver->lastSentThreadArbitrationPolicy = kernel.mockKernel->getThreadArbitrationPolicy<FamilyType>();
((MockKernel *)kernel)->setTotalSLMSize(1024);
@ -1963,7 +1968,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithPCWhenPreambleSentAnd
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode();
commandStreamReceiver.lastMediaSamplerConfig = 0;
commandStreamReceiver.lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin;
commandStreamReceiver.lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin;
auto &csrCS = commandStreamReceiver.getCS();
size_t sizeNeeded = 2 * sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MEDIA_VFE_STATE) +

View File

@ -67,27 +67,16 @@ BDWTEST_F(Gen8L3Config, checkSLM) {
}
typedef PreambleFixture ThreadArbitrationGen8;
BDWTEST_F(ThreadArbitrationGen8, givenPreambleWhenItIsProgrammedThenThreadArbitrationIsNotPresent) {
BDWTEST_F(ThreadArbitrationGen8, givenPolicyWhenThreadArbitrationProgrammedThenDoNothing) {
typedef BDWFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
LinearStream &cs = linearStream;
uint32_t l3Config = PreambleHelper<BDWFamily>::getL3Config(**platformDevices, true);
PreambleHelper<BDWFamily>::programPreamble(&linearStream, MockDevice(**platformDevices), l3Config,
ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin,
nullptr);
PreambleHelper<BDWFamily>::programThreadArbitration(&cs, ThreadArbitrationPolicy::RoundRobin);
parseCommands<BDWFamily>(cs);
auto itorLRI = reverse_find<MI_LOAD_REGISTER_IMM *>(cmdList.rbegin(), cmdList.rend());
ASSERT_NE(cmdList.rend(), itorLRI);
//we expect l3 programming here
const auto &lri = *reinterpret_cast<MI_LOAD_REGISTER_IMM *>(*itorLRI);
auto RegisterOffset = L3CNTLRegisterOffset<BDWFamily>::registerOffset;
EXPECT_EQ(RegisterOffset, lri.getRegisterOffset());
EXPECT_EQ(1u, lri.getDataDword() & 1);
EXPECT_EQ(0u, cs.getUsed());
EXPECT_EQ(0u, PreambleHelper<BDWFamily>::getAdditionalCommandsSize(MockDevice(**platformDevices)));
EXPECT_EQ(0u, PreambleHelper<BDWFamily>::getDefaultThreadArbitrationPolicy());
}
typedef PreambleFixture Gen8UrbEntryAllocationSize;

View File

@ -21,6 +21,7 @@
*/
#include "runtime/command_queue/command_queue_hw.h"
#include "runtime/gen9/reg_configs.h"
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/fixtures/memory_management_fixture.h"
#include "unit_tests/helpers/hw_parse.h"
@ -30,8 +31,6 @@
namespace OCLRT {
constexpr uint32_t gen9ThreadArbiterPolicyRegOffset = 0xE404;
using Gen9EnqueueTest = Test<DeviceFixture>;
GEN9TEST_F(Gen9EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKernelIsSubmittedThenRoundRobinPolicyIsProgrammed) {
MockContext mc;
@ -44,10 +43,10 @@ GEN9TEST_F(Gen9EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKe
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdQ);
auto cmd = findMmioCmd<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), gen9ThreadArbiterPolicyRegOffset);
auto cmd = findMmioCmd<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address);
ASSERT_NE(nullptr, cmd);
EXPECT_EQ(ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin, cmd->getDataDword());
EXPECT_EQ(1U, countMmio<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), gen9ThreadArbiterPolicyRegOffset));
EXPECT_EQ(DebugControlReg2::getRegData(PreambleHelper<FamilyType>::getDefaultThreadArbitrationPolicy()), cmd->getDataDword());
EXPECT_EQ(1U, countMmio<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address));
}
GEN9TEST_F(Gen9EnqueueTest, givenKernelNotRequiringIndependentForwardProgressWhenKernelIsSubmittedThenAgeBasedPolicyIsProgrammed) {
@ -61,9 +60,9 @@ GEN9TEST_F(Gen9EnqueueTest, givenKernelNotRequiringIndependentForwardProgressWhe
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdQ);
auto cmd = findMmioCmd<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), gen9ThreadArbiterPolicyRegOffset);
auto cmd = findMmioCmd<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address);
ASSERT_NE(nullptr, cmd);
EXPECT_EQ(ThreadArbitrationPolicy::threadArbitrationPolicyAgeBased, cmd->getDataDword());
EXPECT_EQ(1U, countMmio<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), gen9ThreadArbiterPolicyRegOffset));
}
EXPECT_EQ(DebugControlReg2::getRegData(ThreadArbitrationPolicy::AgeBased), cmd->getDataDword());
EXPECT_EQ(1U, countMmio<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address));
}
} // namespace OCLRT

View File

@ -21,7 +21,7 @@
*/
#include "unit_tests/fixtures/media_kernel_fixture.h"
#include "runtime/helpers/preamble.inl"
#include "runtime/helpers/preamble.h"
#include "test.h"
using namespace OCLRT;

View File

@ -80,7 +80,7 @@ SKLTEST_F(ThreadArbitration, givenPreambleWhenItIsProgrammedThenThreadArbitratio
LinearStream &cs = linearStream;
uint32_t l3Config = PreambleHelper<FamilyType>::getL3Config(**platformDevices, true);
PreambleHelper<SKLFamily>::programPreamble(&linearStream, MockDevice(**platformDevices), l3Config,
ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin,
ThreadArbitrationPolicy::RoundRobin,
nullptr);
parseCommands<SKLFamily>(cs);
@ -99,6 +99,10 @@ SKLTEST_F(ThreadArbitration, givenPreambleWhenItIsProgrammedThenThreadArbitratio
PreambleHelper<SKLFamily>::getAdditionalCommandsSize(MockDevice(*platformDevices[0])));
}
SKLTEST_F(ThreadArbitration, defaultArbitrationPolicy) {
EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, PreambleHelper<SKLFamily>::getDefaultThreadArbitrationPolicy());
}
GEN9TEST_F(PreambleVfeState, WaOff) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
testWaTable.waSendMIFLUSHBeforeVFE = 0;

View File

@ -47,7 +47,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
using BaseClass::CommandStreamReceiver::lastPreemptionMode;
using BaseClass::CommandStreamReceiver::lastSentCoherencyRequest;
using BaseClass::CommandStreamReceiver::lastSentL3Config;
using BaseClass::CommandStreamReceiver::lastSentThreadAribtrationPolicy;
using BaseClass::CommandStreamReceiver::requiredThreadArbitrationPolicy;
using BaseClass::CommandStreamReceiver::lastSentThreadArbitrationPolicy;
using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig;
using BaseClass::CommandStreamReceiver::latestFlushedTaskCount;
using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig;

View File

@ -39,12 +39,12 @@ HWTEST_F(PreambleTest, PreemptionIsTakenIntoAccountWhenProgrammingPreamble) {
auto mockDevice = std::unique_ptr<MockDevice>(MockDevice::create<MockDevice>(nullptr));
mockDevice->setPreemptionMode(PreemptionMode::MidThread);
uint32_t cmdSizePreambleMidThread = PreambleHelper<FamilyType>::getAdditionalCommandsSize(*mockDevice);
uint32_t cmdSizePreemptionMidThread = static_cast<uint32_t>(PreemptionHelper::getRequiredPreambleSize<FamilyType>(*mockDevice));
auto cmdSizePreambleMidThread = PreambleHelper<FamilyType>::getAdditionalCommandsSize(*mockDevice);
auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredPreambleSize<FamilyType>(*mockDevice);
mockDevice->setPreemptionMode(PreemptionMode::Disabled);
uint32_t cmdSizePreambleDisabled = PreambleHelper<FamilyType>::getAdditionalCommandsSize(*mockDevice);
uint32_t cmdSizePreemptionDisabled = static_cast<uint32_t>(PreemptionHelper::getRequiredPreambleSize<FamilyType>(*mockDevice));
auto cmdSizePreambleDisabled = PreambleHelper<FamilyType>::getAdditionalCommandsSize(*mockDevice);
auto cmdSizePreemptionDisabled = PreemptionHelper::getRequiredPreambleSize<FamilyType>(*mockDevice);
EXPECT_LE(cmdSizePreemptionMidThread, cmdSizePreambleMidThread);
EXPECT_LE(cmdSizePreemptionDisabled, cmdSizePreambleDisabled);
@ -64,7 +64,7 @@ HWTEST_F(PreambleTest, PreemptionIsTakenIntoAccountWhenProgrammingPreamble) {
MockGraphicsAllocation csrSurface(reinterpret_cast<void *>(minCsrAlignment), 1024);
PreambleHelper<FamilyType>::programPreamble(&preambleStream, *mockDevice, 0U,
ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin, &csrSurface);
ThreadArbitrationPolicy::RoundRobin, &csrSurface);
PreemptionHelper::programPreamble<FamilyType>(preemptionStream, *mockDevice, &csrSurface);

View File

@ -53,4 +53,5 @@ TrackParentEvents = false
PrintLWSSizes = false
DisableAUBBufferDump = false
DisableAUBImageDump = false
UseNoRingFlushesKmdMode = false
UseNoRingFlushesKmdMode = false
OverrideThreadArbitrationPolicy = -1