mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Improve thread arbitration policy programming
Change-Id: Ibd764352e14d1a5112034b1c5a1fc6d6d67ebac0
This commit is contained in:

committed by
sys_ocldev

parent
67c8d7a9ca
commit
1292c3d533
2
Jenkinsfile
vendored
2
Jenkinsfile
vendored
@ -2,4 +2,4 @@
|
||||
neoDependenciesRev='735095-769'
|
||||
strategy='EQUAL'
|
||||
allowedF=42
|
||||
allowedCD=339
|
||||
allowedCD=340
|
||||
|
@ -532,7 +532,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT);
|
||||
}
|
||||
|
||||
commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.begin()->getKernel()->getThreadArbitrationPolicy());
|
||||
commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.begin()->getKernel()->getThreadArbitrationPolicy<GfxFamily>());
|
||||
|
||||
DispatchFlags dispatchFlags;
|
||||
dispatchFlags.blocking = blocking;
|
||||
|
@ -153,8 +153,8 @@ class CommandStreamReceiver {
|
||||
|
||||
LinearStream commandStream;
|
||||
|
||||
uint32_t requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin;
|
||||
uint32_t lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbitrationPolicyNotPresent;
|
||||
uint32_t requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin;
|
||||
uint32_t lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
|
||||
|
||||
GraphicsAllocation *scratchAllocation = nullptr;
|
||||
GraphicsAllocation *preemptionCsrAllocation = nullptr;
|
||||
|
@ -41,7 +41,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
return new CommandStreamReceiverHw<GfxFamily>(hwInfoIn);
|
||||
}
|
||||
|
||||
CommandStreamReceiverHw(const HardwareInfo &hwInfoIn) : hwInfo(hwInfoIn) {}
|
||||
CommandStreamReceiverHw(const HardwareInfo &hwInfoIn);
|
||||
|
||||
FlushStamp flush(BatchBuffer &batchBuffer, EngineType engineType, ResidencyContainer *allocationsForResidency) override;
|
||||
|
||||
|
@ -37,6 +37,11 @@
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
template <typename GfxFamily>
|
||||
CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw(const HardwareInfo &hwInfoIn) : hwInfo(hwInfoIn) {
|
||||
requiredThreadArbitrationPolicy = PreambleHelper<GfxFamily>::getDefaultThreadArbitrationPolicy();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
FlushStamp CommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer, EngineType engineType, ResidencyContainer *allocationsForResidency) {
|
||||
return flushStamp->peekStamp();
|
||||
@ -171,6 +176,9 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
if (DebugManager.flags.ForceSLML3Config.get()) {
|
||||
dispatchFlags.useSLM = true;
|
||||
}
|
||||
if (DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
|
||||
requestThreadArbitrationPolicy(static_cast<uint32_t>(DebugManager.flags.OverrideThreadArbitrationPolicy.get()));
|
||||
}
|
||||
|
||||
auto newL3Config = PreambleHelper<GfxFamily>::getL3Config(peekHwInfo(), dispatchFlags.useSLM);
|
||||
|
||||
@ -208,9 +216,9 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
}
|
||||
}
|
||||
|
||||
if (this->lastSentThreadAribtrationPolicy != this->requiredThreadArbitrationPolicy) {
|
||||
if (this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) {
|
||||
PreambleHelper<GfxFamily>::programThreadArbitration(&commandStreamCSR, this->requiredThreadArbitrationPolicy);
|
||||
this->lastSentThreadAribtrationPolicy = this->requiredThreadArbitrationPolicy;
|
||||
this->lastSentThreadArbitrationPolicy = this->requiredThreadArbitrationPolicy;
|
||||
}
|
||||
|
||||
stateBaseAddressDirty |= ((GSBAFor32BitProgrammed ^ dispatchFlags.GSBA32BitRequired) && force32BitAllocations);
|
||||
@ -581,7 +589,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::programPreamble(LinearStream &cs
|
||||
PreambleHelper<GfxFamily>::programPreamble(&csr, *memoryManager->device, newL3Config, this->requiredThreadArbitrationPolicy, this->preemptionCsrAllocation);
|
||||
this->isPreambleSent = true;
|
||||
this->lastSentL3Config = newL3Config;
|
||||
this->lastSentThreadAribtrationPolicy = this->requiredThreadArbitrationPolicy;
|
||||
this->lastSentThreadArbitrationPolicy = this->requiredThreadArbitrationPolicy;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,8 +23,8 @@
|
||||
#include <stdint.h>
|
||||
namespace OCLRT {
|
||||
namespace ThreadArbitrationPolicy {
|
||||
const uint32_t threadArbirtrationPolicyRoundRobin = 0x100u;
|
||||
const uint32_t threadArbitrationPolicyAgeBased = 0x0u;
|
||||
const uint32_t threadArbitrationPolicyNotPresent = 0xffffffffu;
|
||||
}
|
||||
}
|
||||
const uint32_t AgeBased = 0x0u;
|
||||
const uint32_t RoundRobin = 0x1u;
|
||||
const uint32_t NotPresent = 0xffffffffu;
|
||||
} // namespace ThreadArbitrationPolicy
|
||||
} // namespace OCLRT
|
||||
|
@ -29,15 +29,6 @@ void PreambleHelper<BDWFamily>::setupPipeControlInFrontOfCommand(void *pCmd, con
|
||||
((BDWFamily::PIPE_CONTROL *)pCmd)->setDcFlushEnable(true);
|
||||
}
|
||||
|
||||
template <>
|
||||
void PreambleHelper<BDWFamily>::programThreadArbitration(LinearStream *pCommandStream, uint32_t threadArbitrationPolicy) {
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t PreambleHelper<BDWFamily>::getAdditionalCommandsSize(const Device &device) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t PreambleHelper<BDWFamily>::getL3Config(const HardwareInfo &hwInfo, bool useSLM) {
|
||||
uint32_t l3Config = 0;
|
||||
@ -61,6 +52,5 @@ void PreambleHelper<BDWFamily>::programPipelineSelect(LinearStream *pCommandStre
|
||||
pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU);
|
||||
}
|
||||
|
||||
// Explicitly instantiate PreambleHelper for BDW device family
|
||||
template struct PreambleHelper<BDWFamily>;
|
||||
} // namespace OCLRT
|
||||
|
@ -64,5 +64,30 @@ void PreambleHelper<SKLFamily>::setupPipeControlInFrontOfCommand(void *pCmd, con
|
||||
}
|
||||
}
|
||||
|
||||
template struct PreambleHelper<SKLFamily>;
|
||||
template <>
|
||||
uint32_t PreambleHelper<SKLFamily>::getDefaultThreadArbitrationPolicy() {
|
||||
return ThreadArbitrationPolicy::RoundRobin;
|
||||
}
|
||||
|
||||
template <>
|
||||
void PreambleHelper<SKLFamily>::programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy) {
|
||||
UNRECOVERABLE_IF(requiredThreadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent);
|
||||
|
||||
auto pipeControl = pCommandStream->getSpaceForCmd<PIPE_CONTROL>();
|
||||
*pipeControl = PIPE_CONTROL::sInit();
|
||||
pipeControl->setCommandStreamerStallEnable(true);
|
||||
|
||||
auto pCmd = pCommandStream->getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
|
||||
*pCmd = MI_LOAD_REGISTER_IMM::sInit();
|
||||
|
||||
pCmd->setRegisterOffset(DebugControlReg2::address);
|
||||
pCmd->setDataDword(DebugControlReg2::getRegData(requiredThreadArbitrationPolicy));
|
||||
}
|
||||
|
||||
template <>
|
||||
size_t PreambleHelper<SKLFamily>::getAdditionalCommandsSize(const Device &device) {
|
||||
return PreemptionHelper::getRequiredPreambleSize<SKLFamily>(device) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL);
|
||||
}
|
||||
|
||||
template struct PreambleHelper<SKLFamily>;
|
||||
} // namespace OCLRT
|
||||
|
@ -22,6 +22,8 @@
|
||||
|
||||
#pragma once
|
||||
#include "runtime/helpers/preamble.h"
|
||||
#include "runtime/command_stream/thread_arbitration_policy.h"
|
||||
|
||||
namespace OCLRT {
|
||||
struct SKLFamily;
|
||||
template <>
|
||||
@ -40,4 +42,12 @@ struct L3CNTLREGConfig<IGFX_BROXTON> {
|
||||
static const uint32_t valueForSLM = 0x60000121u;
|
||||
static const uint32_t valueForNoSLM = 0x80000140u;
|
||||
};
|
||||
}
|
||||
|
||||
namespace DebugControlReg2 {
|
||||
constexpr uint32_t address = 0xE404;
|
||||
constexpr uint32_t getRegData(const uint32_t &policy) {
|
||||
return policy == ThreadArbitrationPolicy::RoundRobin ? 0x100 : 0x0;
|
||||
};
|
||||
} // namespace DebugControlReg2
|
||||
|
||||
} // namespace OCLRT
|
||||
|
@ -36,10 +36,14 @@ class LinearStream;
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct PreambleHelper {
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
|
||||
static constexpr size_t getScratchSpaceOffsetFor64bit() { return 4096; }
|
||||
|
||||
static void programL3(LinearStream *pCommandStream, uint32_t l3Config);
|
||||
static void programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired);
|
||||
static uint32_t getDefaultThreadArbitrationPolicy();
|
||||
static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy);
|
||||
static void programPreemption(LinearStream *pCommandStream, const Device &device, GraphicsAllocation *preemptionCsr);
|
||||
static void setupPipeControlInFrontOfCommand(void *pCmd, const HardwareInfo *hwInfo, bool isVfeCommand);
|
||||
@ -47,7 +51,7 @@ struct PreambleHelper {
|
||||
static void programPreamble(LinearStream *pCommandStream, const Device &device, uint32_t l3Config,
|
||||
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr);
|
||||
static uint32_t getL3Config(const HardwareInfo &hwInfo, bool useSLM);
|
||||
static uint32_t getAdditionalCommandsSize(const Device &device);
|
||||
static size_t getAdditionalCommandsSize(const Device &device);
|
||||
static void programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo);
|
||||
static uint32_t getUrbEntryAllocationSize();
|
||||
};
|
||||
@ -73,4 +77,5 @@ template <typename GfxFamily>
|
||||
struct L3CNTLRegisterOffset {
|
||||
static const uint32_t registerOffset;
|
||||
};
|
||||
|
||||
} // namespace OCLRT
|
||||
|
@ -34,21 +34,11 @@ namespace OCLRT {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void PreambleHelper<GfxFamily>::programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy) {
|
||||
typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
|
||||
typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL;
|
||||
}
|
||||
|
||||
// Add a PIPE_CONTROL w/ CS_stall
|
||||
auto pPipeControl = (PIPE_CONTROL *)pCommandStream->getSpace(sizeof(PIPE_CONTROL));
|
||||
*pPipeControl = PIPE_CONTROL::sInit();
|
||||
pPipeControl->setCommandStreamerStallEnable(true);
|
||||
setupPipeControlInFrontOfCommand(pPipeControl, nullptr, false);
|
||||
|
||||
auto pCmd = (MI_LOAD_REGISTER_IMM *)pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM));
|
||||
*pCmd = MI_LOAD_REGISTER_IMM::sInit();
|
||||
|
||||
pCmd->setRegisterOffset(0xE404);
|
||||
auto data = requiredThreadArbitrationPolicy;
|
||||
pCmd->setDataDword(data);
|
||||
template <typename GfxFamily>
|
||||
uint32_t PreambleHelper<GfxFamily>::getDefaultThreadArbitrationPolicy() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@ -56,17 +46,12 @@ void PreambleHelper<GfxFamily>::programGenSpecificPreambleWorkArounds(LinearStre
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t PreambleHelper<GfxFamily>::getAdditionalCommandsSize(const Device &device) {
|
||||
typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
|
||||
typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL;
|
||||
size_t requiredSize = sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL);
|
||||
requiredSize += PreemptionHelper::getRequiredPreambleSize<GfxFamily>(device);
|
||||
return static_cast<uint32_t>(requiredSize);
|
||||
size_t PreambleHelper<GfxFamily>::getAdditionalCommandsSize(const Device &device) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void PreambleHelper<GfxFamily>::programVFEState(LinearStream *pCommandStream, const HardwareInfo &hwInfo, int scratchSize, uint64_t scratchAddress) {
|
||||
typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL;
|
||||
typedef typename GfxFamily::MEDIA_VFE_STATE MEDIA_VFE_STATE;
|
||||
|
||||
// Add a PIPE_CONTROL w/ CS_stall
|
||||
@ -90,7 +75,6 @@ void PreambleHelper<GfxFamily>::programVFEState(LinearStream *pCommandStream, co
|
||||
|
||||
template <typename GfxFamily>
|
||||
void PreambleHelper<GfxFamily>::programL3(LinearStream *pCommandStream, uint32_t l3Config) {
|
||||
typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
|
||||
auto pCmd = (MI_LOAD_REGISTER_IMM *)pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM));
|
||||
*pCmd = MI_LOAD_REGISTER_IMM::sInit();
|
||||
|
||||
@ -116,5 +100,4 @@ template <typename GfxFamily>
|
||||
uint32_t PreambleHelper<GfxFamily>::getUrbEntryAllocationSize() {
|
||||
return 0x782;
|
||||
}
|
||||
|
||||
} // namespace OCLRT
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "runtime/command_stream/thread_arbitration_policy.h"
|
||||
#include "runtime/device_queue/device_queue.h"
|
||||
#include "runtime/helpers/base_object.h"
|
||||
#include "runtime/helpers/preamble.h"
|
||||
#include "runtime/program/program.h"
|
||||
#include "runtime/program/kernel_info.h"
|
||||
#include "runtime/os_interface/debug_settings_manager.h"
|
||||
@ -337,11 +338,12 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
const bool isParentKernel;
|
||||
const bool isSchedulerKernel;
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t getThreadArbitrationPolicy() {
|
||||
if (kernelInfo.patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired) {
|
||||
return ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin;
|
||||
return PreambleHelper<GfxFamily>::getDefaultThreadArbitrationPolicy();
|
||||
} else {
|
||||
return ThreadArbitrationPolicy::threadArbitrationPolicyAgeBased;
|
||||
return ThreadArbitrationPolicy::AgeBased;
|
||||
}
|
||||
}
|
||||
bool checkIfIsParentKernelAndBlocksUsesPrintf() {
|
||||
|
@ -81,3 +81,4 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API v
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForcePreemptionMode, -1, "Keep this variable in sync with PreemptionMode enum. -1 - devices default mode, 1 - disable, 2 - midBatch, 3 - threadGroup, 4 - midThread")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, NodeOrdinal, -1, "-1: default do not override, 0: ENGINE_RCS")
|
||||
DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideThreadArbitrationPolicy, -1, "-1 (dont override) or any valid config (0: Age Based, 1: Round Robin)")
|
||||
|
@ -131,10 +131,9 @@ struct UltCommandStreamReceiverTest
|
||||
bool requiresCoherency = false,
|
||||
bool lowPriority = false) {
|
||||
|
||||
DispatchFlags dispatchFlags;
|
||||
dispatchFlags.blocking = block;
|
||||
dispatchFlags.requiresCoherency = requiresCoherency;
|
||||
dispatchFlags.lowPriority = lowPriority;
|
||||
flushTaskFlags.blocking = block;
|
||||
flushTaskFlags.requiresCoherency = requiresCoherency;
|
||||
flushTaskFlags.lowPriority = lowPriority;
|
||||
|
||||
return commandStreamReceiver.flushTask(
|
||||
commandStream,
|
||||
@ -144,7 +143,7 @@ struct UltCommandStreamReceiverTest
|
||||
ioh,
|
||||
ssh,
|
||||
taskLevel,
|
||||
dispatchFlags);
|
||||
flushTaskFlags);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@ -174,7 +173,7 @@ struct UltCommandStreamReceiverTest
|
||||
configureCSRHeapStatesToNonDirty<GfxFamily>();
|
||||
commandStreamReceiver.taskLevel = taskLevel;
|
||||
|
||||
commandStreamReceiver.lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin;
|
||||
commandStreamReceiver.lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin;
|
||||
commandStreamReceiver.lastSentCoherencyRequest = 0;
|
||||
commandStreamReceiver.lastMediaSamplerConfig = 0;
|
||||
}
|
||||
@ -184,6 +183,7 @@ struct UltCommandStreamReceiverTest
|
||||
return reinterpret_cast<UltCommandStreamReceiver<GfxFamily> &>(pDevice->getCommandStreamReceiver());
|
||||
}
|
||||
|
||||
DispatchFlags flushTaskFlags = {};
|
||||
uint32_t taskLevel = 42;
|
||||
LinearStream commandStream;
|
||||
LinearStream dsh;
|
||||
@ -254,6 +254,19 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, shouldSeeCommandsOnFirstFlush) {
|
||||
EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenOverrideThreadArbitrationPolicyDebugVariableSetWhenFlushingThenRequestRequiredMode) {
|
||||
DebugManagerStateRestore restore;
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased;
|
||||
commandStreamReceiver.lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased;
|
||||
|
||||
DebugManager.flags.OverrideThreadArbitrationPolicy.set(ThreadArbitrationPolicy::RoundRobin);
|
||||
|
||||
flushTask(commandStreamReceiver);
|
||||
|
||||
EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, commandStreamReceiver.lastSentThreadArbitrationPolicy);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, taskCountShouldBeUpdated) {
|
||||
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
|
||||
flushTask(commandStreamReceiver);
|
||||
@ -832,23 +845,15 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithOnlyEnoughMemoryForPr
|
||||
commandStreamReceiver.lastSentL3Config = l3Config;
|
||||
|
||||
auto &csrCS = commandStreamReceiver.getCS();
|
||||
size_t sizeNeeded = getSizeRequiredPreambleCS<FamilyType>(MockDevice(commandStreamReceiver.hwInfo)) +
|
||||
sizeof(STATE_BASE_ADDRESS) +
|
||||
sizeof(PIPE_CONTROL) +
|
||||
sizeof(PIPELINE_SELECT) +
|
||||
commandStreamReceiver.getRequiredPipeControlSize() +
|
||||
sizeof(MI_BATCH_BUFFER_START);
|
||||
size_t sizeNeeded = commandStreamReceiver.getRequiredCmdStreamSizeAligned(flushTaskFlags);
|
||||
|
||||
sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize);
|
||||
|
||||
DispatchFlags flags;
|
||||
csrCS.getSpace(csrCS.getAvailableSpace() - commandStreamReceiver.getRequiredCmdStreamSizeAligned(flags));
|
||||
csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeeded);
|
||||
auto expectedBase = csrCS.getBase();
|
||||
|
||||
// This case handles when we have *just* enough space
|
||||
auto expectedUsed = csrCS.getUsed() + sizeNeeded;
|
||||
|
||||
flushTask(commandStreamReceiver);
|
||||
flushTask(commandStreamReceiver, flushTaskFlags.blocking, 0, flushTaskFlags.requiresCoherency, flushTaskFlags.lowPriority);
|
||||
|
||||
// Verify that we didn't grab a new CS buffer
|
||||
EXPECT_EQ(expectedUsed, csrCS.getUsed());
|
||||
@ -1465,7 +1470,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousNOSL
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDefaultCommandStreamReceiverThenRoundRobinPolicyIsSelected) {
|
||||
MockCsrHw<FamilyType> commandStreamReceiver(*platformDevices[0]);
|
||||
EXPECT_EQ(ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin, commandStreamReceiver.peekThreadArbitrationPolicy());
|
||||
EXPECT_EQ(PreambleHelper<FamilyType>::getDefaultThreadArbitrationPolicy(), commandStreamReceiver.peekThreadArbitrationPolicy());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML3WasSentThenDontProgramL3) {
|
||||
@ -1484,7 +1489,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML
|
||||
// Mark Pramble as sent, override L3Config to SLM config
|
||||
commandStreamReceiver->isPreambleSent = true;
|
||||
commandStreamReceiver->lastSentL3Config = L3Config;
|
||||
commandStreamReceiver->lastSentThreadAribtrationPolicy = kernel.mockKernel->getThreadArbitrationPolicy();
|
||||
commandStreamReceiver->lastSentThreadArbitrationPolicy = kernel.mockKernel->getThreadArbitrationPolicy<FamilyType>();
|
||||
|
||||
((MockKernel *)kernel)->setTotalSLMSize(1024);
|
||||
|
||||
@ -1963,7 +1968,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithPCWhenPreambleSentAnd
|
||||
commandStreamReceiver.isPreambleSent = true;
|
||||
commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode();
|
||||
commandStreamReceiver.lastMediaSamplerConfig = 0;
|
||||
commandStreamReceiver.lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin;
|
||||
commandStreamReceiver.lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin;
|
||||
|
||||
auto &csrCS = commandStreamReceiver.getCS();
|
||||
size_t sizeNeeded = 2 * sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MEDIA_VFE_STATE) +
|
||||
|
@ -67,27 +67,16 @@ BDWTEST_F(Gen8L3Config, checkSLM) {
|
||||
}
|
||||
|
||||
typedef PreambleFixture ThreadArbitrationGen8;
|
||||
BDWTEST_F(ThreadArbitrationGen8, givenPreambleWhenItIsProgrammedThenThreadArbitrationIsNotPresent) {
|
||||
BDWTEST_F(ThreadArbitrationGen8, givenPolicyWhenThreadArbitrationProgrammedThenDoNothing) {
|
||||
typedef BDWFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
|
||||
LinearStream &cs = linearStream;
|
||||
uint32_t l3Config = PreambleHelper<BDWFamily>::getL3Config(**platformDevices, true);
|
||||
|
||||
PreambleHelper<BDWFamily>::programPreamble(&linearStream, MockDevice(**platformDevices), l3Config,
|
||||
ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin,
|
||||
nullptr);
|
||||
PreambleHelper<BDWFamily>::programThreadArbitration(&cs, ThreadArbitrationPolicy::RoundRobin);
|
||||
|
||||
parseCommands<BDWFamily>(cs);
|
||||
|
||||
auto itorLRI = reverse_find<MI_LOAD_REGISTER_IMM *>(cmdList.rbegin(), cmdList.rend());
|
||||
ASSERT_NE(cmdList.rend(), itorLRI);
|
||||
|
||||
//we expect l3 programming here
|
||||
const auto &lri = *reinterpret_cast<MI_LOAD_REGISTER_IMM *>(*itorLRI);
|
||||
auto RegisterOffset = L3CNTLRegisterOffset<BDWFamily>::registerOffset;
|
||||
EXPECT_EQ(RegisterOffset, lri.getRegisterOffset());
|
||||
EXPECT_EQ(1u, lri.getDataDword() & 1);
|
||||
EXPECT_EQ(0u, cs.getUsed());
|
||||
|
||||
EXPECT_EQ(0u, PreambleHelper<BDWFamily>::getAdditionalCommandsSize(MockDevice(**platformDevices)));
|
||||
EXPECT_EQ(0u, PreambleHelper<BDWFamily>::getDefaultThreadArbitrationPolicy());
|
||||
}
|
||||
|
||||
typedef PreambleFixture Gen8UrbEntryAllocationSize;
|
||||
|
@ -21,6 +21,7 @@
|
||||
*/
|
||||
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/gen9/reg_configs.h"
|
||||
#include "unit_tests/fixtures/device_fixture.h"
|
||||
#include "unit_tests/fixtures/memory_management_fixture.h"
|
||||
#include "unit_tests/helpers/hw_parse.h"
|
||||
@ -30,8 +31,6 @@
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
constexpr uint32_t gen9ThreadArbiterPolicyRegOffset = 0xE404;
|
||||
|
||||
using Gen9EnqueueTest = Test<DeviceFixture>;
|
||||
GEN9TEST_F(Gen9EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKernelIsSubmittedThenRoundRobinPolicyIsProgrammed) {
|
||||
MockContext mc;
|
||||
@ -44,10 +43,10 @@ GEN9TEST_F(Gen9EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKe
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdQ);
|
||||
|
||||
auto cmd = findMmioCmd<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), gen9ThreadArbiterPolicyRegOffset);
|
||||
auto cmd = findMmioCmd<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address);
|
||||
ASSERT_NE(nullptr, cmd);
|
||||
EXPECT_EQ(ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin, cmd->getDataDword());
|
||||
EXPECT_EQ(1U, countMmio<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), gen9ThreadArbiterPolicyRegOffset));
|
||||
EXPECT_EQ(DebugControlReg2::getRegData(PreambleHelper<FamilyType>::getDefaultThreadArbitrationPolicy()), cmd->getDataDword());
|
||||
EXPECT_EQ(1U, countMmio<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address));
|
||||
}
|
||||
|
||||
GEN9TEST_F(Gen9EnqueueTest, givenKernelNotRequiringIndependentForwardProgressWhenKernelIsSubmittedThenAgeBasedPolicyIsProgrammed) {
|
||||
@ -61,9 +60,9 @@ GEN9TEST_F(Gen9EnqueueTest, givenKernelNotRequiringIndependentForwardProgressWhe
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdQ);
|
||||
|
||||
auto cmd = findMmioCmd<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), gen9ThreadArbiterPolicyRegOffset);
|
||||
auto cmd = findMmioCmd<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address);
|
||||
ASSERT_NE(nullptr, cmd);
|
||||
EXPECT_EQ(ThreadArbitrationPolicy::threadArbitrationPolicyAgeBased, cmd->getDataDword());
|
||||
EXPECT_EQ(1U, countMmio<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), gen9ThreadArbiterPolicyRegOffset));
|
||||
}
|
||||
EXPECT_EQ(DebugControlReg2::getRegData(ThreadArbitrationPolicy::AgeBased), cmd->getDataDword());
|
||||
EXPECT_EQ(1U, countMmio<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address));
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
@ -21,7 +21,7 @@
|
||||
*/
|
||||
|
||||
#include "unit_tests/fixtures/media_kernel_fixture.h"
|
||||
#include "runtime/helpers/preamble.inl"
|
||||
#include "runtime/helpers/preamble.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
|
@ -80,7 +80,7 @@ SKLTEST_F(ThreadArbitration, givenPreambleWhenItIsProgrammedThenThreadArbitratio
|
||||
LinearStream &cs = linearStream;
|
||||
uint32_t l3Config = PreambleHelper<FamilyType>::getL3Config(**platformDevices, true);
|
||||
PreambleHelper<SKLFamily>::programPreamble(&linearStream, MockDevice(**platformDevices), l3Config,
|
||||
ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin,
|
||||
ThreadArbitrationPolicy::RoundRobin,
|
||||
nullptr);
|
||||
|
||||
parseCommands<SKLFamily>(cs);
|
||||
@ -99,6 +99,10 @@ SKLTEST_F(ThreadArbitration, givenPreambleWhenItIsProgrammedThenThreadArbitratio
|
||||
PreambleHelper<SKLFamily>::getAdditionalCommandsSize(MockDevice(*platformDevices[0])));
|
||||
}
|
||||
|
||||
SKLTEST_F(ThreadArbitration, defaultArbitrationPolicy) {
|
||||
EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, PreambleHelper<SKLFamily>::getDefaultThreadArbitrationPolicy());
|
||||
}
|
||||
|
||||
GEN9TEST_F(PreambleVfeState, WaOff) {
|
||||
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
|
||||
testWaTable.waSendMIFLUSHBeforeVFE = 0;
|
||||
|
@ -47,7 +47,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
|
||||
using BaseClass::CommandStreamReceiver::lastPreemptionMode;
|
||||
using BaseClass::CommandStreamReceiver::lastSentCoherencyRequest;
|
||||
using BaseClass::CommandStreamReceiver::lastSentL3Config;
|
||||
using BaseClass::CommandStreamReceiver::lastSentThreadAribtrationPolicy;
|
||||
using BaseClass::CommandStreamReceiver::requiredThreadArbitrationPolicy;
|
||||
using BaseClass::CommandStreamReceiver::lastSentThreadArbitrationPolicy;
|
||||
using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig;
|
||||
using BaseClass::CommandStreamReceiver::latestFlushedTaskCount;
|
||||
using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig;
|
||||
|
@ -39,12 +39,12 @@ HWTEST_F(PreambleTest, PreemptionIsTakenIntoAccountWhenProgrammingPreamble) {
|
||||
auto mockDevice = std::unique_ptr<MockDevice>(MockDevice::create<MockDevice>(nullptr));
|
||||
|
||||
mockDevice->setPreemptionMode(PreemptionMode::MidThread);
|
||||
uint32_t cmdSizePreambleMidThread = PreambleHelper<FamilyType>::getAdditionalCommandsSize(*mockDevice);
|
||||
uint32_t cmdSizePreemptionMidThread = static_cast<uint32_t>(PreemptionHelper::getRequiredPreambleSize<FamilyType>(*mockDevice));
|
||||
auto cmdSizePreambleMidThread = PreambleHelper<FamilyType>::getAdditionalCommandsSize(*mockDevice);
|
||||
auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredPreambleSize<FamilyType>(*mockDevice);
|
||||
|
||||
mockDevice->setPreemptionMode(PreemptionMode::Disabled);
|
||||
uint32_t cmdSizePreambleDisabled = PreambleHelper<FamilyType>::getAdditionalCommandsSize(*mockDevice);
|
||||
uint32_t cmdSizePreemptionDisabled = static_cast<uint32_t>(PreemptionHelper::getRequiredPreambleSize<FamilyType>(*mockDevice));
|
||||
auto cmdSizePreambleDisabled = PreambleHelper<FamilyType>::getAdditionalCommandsSize(*mockDevice);
|
||||
auto cmdSizePreemptionDisabled = PreemptionHelper::getRequiredPreambleSize<FamilyType>(*mockDevice);
|
||||
|
||||
EXPECT_LE(cmdSizePreemptionMidThread, cmdSizePreambleMidThread);
|
||||
EXPECT_LE(cmdSizePreemptionDisabled, cmdSizePreambleDisabled);
|
||||
@ -64,7 +64,7 @@ HWTEST_F(PreambleTest, PreemptionIsTakenIntoAccountWhenProgrammingPreamble) {
|
||||
MockGraphicsAllocation csrSurface(reinterpret_cast<void *>(minCsrAlignment), 1024);
|
||||
|
||||
PreambleHelper<FamilyType>::programPreamble(&preambleStream, *mockDevice, 0U,
|
||||
ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin, &csrSurface);
|
||||
ThreadArbitrationPolicy::RoundRobin, &csrSurface);
|
||||
|
||||
PreemptionHelper::programPreamble<FamilyType>(preemptionStream, *mockDevice, &csrSurface);
|
||||
|
||||
|
@ -53,4 +53,5 @@ TrackParentEvents = false
|
||||
PrintLWSSizes = false
|
||||
DisableAUBBufferDump = false
|
||||
DisableAUBImageDump = false
|
||||
UseNoRingFlushesKmdMode = false
|
||||
UseNoRingFlushesKmdMode = false
|
||||
OverrideThreadArbitrationPolicy = -1
|
||||
|
Reference in New Issue
Block a user