Always initialize Kernel::threadArbitrationPolicy

Resolves: NEO-4824

Change-Id: I2ba24bec1a4cc2d1573ca7283a68ea0d94c73ff0
Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski 2020-08-03 20:39:06 +02:00 committed by sys_ocldev
parent d7f196f62a
commit 76328b8247
20 changed files with 103 additions and 25 deletions

View File

@ -227,7 +227,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
statePreemption = commandQueuePreemptionMode;
}
uint32_t threadArbitrationPolicy = NEO::PreambleHelper<GfxFamily>::getDefaultThreadArbitrationPolicy();
auto &hwHelper = NEO::HwHelper::get(neoDevice->getHardwareInfo().platform.eRenderCoreFamily);
uint32_t threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
threadArbitrationPolicy = static_cast<uint32_t>(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get());
}

View File

@ -321,6 +321,8 @@ cl_int Kernel::initialize() {
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0);
}
}
setThreadArbitrationPolicy(hwHelper.getDefaultThreadArbitrationPolicy());
if (kernelInfo.patchInfo.executionEnvironment) {
if (!kernelInfo.patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired) {
setThreadArbitrationPolicy(ThreadArbitrationPolicy::AgeBased);

View File

@ -695,6 +695,68 @@ INSTANTIATE_TEST_CASE_P(EnqueueKernel,
EnqueueKernelPrintfTest,
::testing::ValuesIn(TestParamPrintf));
using EnqueueKernelTests = ::testing::Test;
HWTEST_F(EnqueueKernelTests, whenEnqueueingKernelThenCsrCorrectlySetsRequiredThreadArbitrationPolicy) {
struct myCsr : public UltCommandStreamReceiver<FamilyType> {
using CommandStreamReceiverHw<FamilyType>::requiredThreadArbitrationPolicy;
};
cl_uint workDim = 1;
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {1, 1, 1};
size_t localWorkSize[3] = {1, 1, 1};
UltClDeviceFactory clDeviceFactory{1, 0};
MockContext context{clDeviceFactory.rootDevices[0]};
SPatchExecutionEnvironment sPatchExecutionEnvironment = {};
sPatchExecutionEnvironment.SubgroupIndependentForwardProgressRequired = true;
MockKernelWithInternals mockKernelWithInternalsWithIfpRequired{*clDeviceFactory.rootDevices[0], sPatchExecutionEnvironment};
sPatchExecutionEnvironment.SubgroupIndependentForwardProgressRequired = false;
MockKernelWithInternals mockKernelWithInternalsWithIfpNotRequired{*clDeviceFactory.rootDevices[0], sPatchExecutionEnvironment};
cl_int retVal;
std::unique_ptr<CommandQueue> pCommandQueue{CommandQueue::create(&context, clDeviceFactory.rootDevices[0], nullptr, true, retVal)};
auto &csr = static_cast<myCsr &>(pCommandQueue->getGpgpuCommandStreamReceiver());
pCommandQueue->enqueueKernel(
mockKernelWithInternalsWithIfpRequired.mockKernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
0,
nullptr,
nullptr);
pCommandQueue->flush();
EXPECT_EQ(HwHelperHw<FamilyType>::get().getDefaultThreadArbitrationPolicy(), csr.requiredThreadArbitrationPolicy);
pCommandQueue->enqueueKernel(
mockKernelWithInternalsWithIfpNotRequired.mockKernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
0,
nullptr,
nullptr);
pCommandQueue->flush();
EXPECT_EQ(ThreadArbitrationPolicy::AgeBased, csr.requiredThreadArbitrationPolicy);
pCommandQueue->enqueueKernel(
mockKernelWithInternalsWithIfpRequired.mockKernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
0,
nullptr,
nullptr);
pCommandQueue->flush();
EXPECT_EQ(HwHelperHw<FamilyType>::get().getDefaultThreadArbitrationPolicy(), csr.requiredThreadArbitrationPolicy);
}
typedef HelloWorldFixture<HelloWorldFixtureFactory> EnqueueKernelFixture;
typedef Test<EnqueueKernelFixture> EnqueueKernelTest;

View File

@ -360,7 +360,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests,
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDefaultCommandStreamReceiverThenRoundRobinPolicyIsSelected) {
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
EXPECT_EQ(PreambleHelper<FamilyType>::getDefaultThreadArbitrationPolicy(), commandStreamReceiver.peekThreadArbitrationPolicy());
EXPECT_EQ(HwHelperHw<FamilyType>::get().getDefaultThreadArbitrationPolicy(), commandStreamReceiver.peekThreadArbitrationPolicy());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML3WasSentThenDontProgramL3) {

View File

@ -32,7 +32,7 @@ GEN11TEST_F(Gen11EnqueueTest, givenKernelRequiringIndependentForwardProgressWhen
auto cmd = findMmioCmd<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), RowChickenReg4::address);
ASSERT_NE(nullptr, cmd);
EXPECT_EQ(RowChickenReg4::regDataForArbitrationPolicy[PreambleHelper<FamilyType>::getDefaultThreadArbitrationPolicy()], cmd->getDataDword());
EXPECT_EQ(RowChickenReg4::regDataForArbitrationPolicy[HwHelperHw<FamilyType>::get().getDefaultThreadArbitrationPolicy()], cmd->getDataDword());
EXPECT_EQ(1U, countMmio<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), RowChickenReg4::address));
}

View File

@ -32,7 +32,7 @@ GEN9TEST_F(Gen9EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKe
auto cmd = findMmioCmd<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address);
ASSERT_NE(nullptr, cmd);
EXPECT_EQ(DebugControlReg2::getRegData(PreambleHelper<FamilyType>::getDefaultThreadArbitrationPolicy()), cmd->getDataDword());
EXPECT_EQ(DebugControlReg2::getRegData(HwHelperHw<FamilyType>::get().getDefaultThreadArbitrationPolicy()), cmd->getDataDword());
EXPECT_EQ(1U, countMmio<FamilyType>(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address));
}

View File

@ -3101,6 +3101,17 @@ TEST(KernelTest, givenKernelWhenForcePerDssBackedBufferProgrammingIsNotSetThenKe
EXPECT_FALSE(kernel.mockKernel->requiresPerDssBackedBuffer());
}
TEST(KernelTest, whenKernelIsInitializedThenThreadArbitrationPolicyIsSetToDefaultValue) {
SPatchExecutionEnvironment sPatchExecutionEnvironment = {};
sPatchExecutionEnvironment.SubgroupIndependentForwardProgressRequired = true;
UltClDeviceFactory deviceFactory{1, 0};
MockKernelWithInternals mockKernelWithInternals{*deviceFactory.rootDevices[0], sPatchExecutionEnvironment};
auto &mockKernel = *mockKernelWithInternals.mockKernel;
auto &hwHelper = HwHelper::get(deviceFactory.rootDevices[0]->getHardwareInfo().platform.eRenderCoreFamily);
EXPECT_EQ(hwHelper.getDefaultThreadArbitrationPolicy(), mockKernel.threadArbitrationPolicy);
}
namespace NEO {
template <typename GfxFamily>

View File

@ -45,7 +45,7 @@ CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw(ExecutionEnvironment
auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily);
localMemoryEnabled = hwHelper.getEnableLocalMemory(peekHwInfo());
requiredThreadArbitrationPolicy = PreambleHelper<GfxFamily>::getDefaultThreadArbitrationPolicy();
requiredThreadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
resetKmdNotifyHelper(new KmdNotifyHelper(&peekHwInfo().capabilityTable.kmdNotifyProperties));
flatBatchBufferHelper.reset(new FlatBatchBufferHelperHw<GfxFamily>(executionEnvironment));
defaultSshSize = getSshHeapSize();

View File

@ -21,6 +21,11 @@ std::string HwHelperHw<Family>::getExtensions() const {
return "cl_intel_subgroup_local_block_io ";
}
template <>
uint32_t HwHelperHw<Family>::getDefaultThreadArbitrationPolicy() const {
return ThreadArbitrationPolicy::RoundRobinAfterDependency;
}
template class HwHelperHw<Family>;
template class FlatBatchBufferHelperHw<Family>;
template struct MemorySynchronizationCommands<Family>;

View File

@ -63,11 +63,6 @@ void PreambleHelper<ICLFamily>::addPipeControlBeforeVfeCmd(LinearStream *pComman
*pipeControl = cmd;
}
template <>
uint32_t PreambleHelper<ICLFamily>::getDefaultThreadArbitrationPolicy() {
return ThreadArbitrationPolicy::RoundRobinAfterDependency;
}
template <>
void PreambleHelper<ICLFamily>::programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy) {
UNRECOVERABLE_IF(requiredThreadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent);

View File

@ -34,6 +34,11 @@ uint32_t HwHelperHw<Family>::getMetricsLibraryGenId() const {
return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::Gen9);
}
template <>
uint32_t HwHelperHw<Family>::getDefaultThreadArbitrationPolicy() const {
return ThreadArbitrationPolicy::RoundRobin;
}
template class HwHelperHw<Family>;
template class FlatBatchBufferHelperHw<Family>;
template struct MemorySynchronizationCommands<Family>;

View File

@ -63,11 +63,6 @@ void PreambleHelper<SKLFamily>::addPipeControlBeforeVfeCmd(LinearStream *pComman
*pipeControl = cmd;
}
template <>
uint32_t PreambleHelper<SKLFamily>::getDefaultThreadArbitrationPolicy() {
return ThreadArbitrationPolicy::RoundRobin;
}
template <>
void PreambleHelper<SKLFamily>::programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy) {
UNRECOVERABLE_IF(requiredThreadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent);

View File

@ -120,6 +120,7 @@ class HwHelper {
virtual bool isBankOverrideRequired(const HardwareInfo &hwInfo) const = 0;
virtual bool isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const = 0;
virtual uint32_t getGlobalTimeStampBits() const = 0;
virtual uint32_t getDefaultThreadArbitrationPolicy() const = 0;
static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo);
static uint32_t getEnginesCount(const HardwareInfo &hwInfo);
@ -304,6 +305,8 @@ class HwHelperHw : public HwHelper {
bool isBankOverrideRequired(const HardwareInfo &hwInfo) const override;
uint32_t getDefaultThreadArbitrationPolicy() const override;
protected:
LocalMemoryAccessMode getDefaultLocalMemoryAccessMode(const HardwareInfo &hwInfo) const override;

View File

@ -466,4 +466,9 @@ bool HwHelperHw<GfxFamily>::isBankOverrideRequired(const HardwareInfo &hwInfo) c
return false;
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::getDefaultThreadArbitrationPolicy() const {
return 0;
}
} // namespace NEO

View File

@ -33,7 +33,6 @@ struct PreambleHelper {
static void programPipelineSelect(LinearStream *pCommandStream,
const PipelineSelectArgs &pipelineSelectArgs,
const HardwareInfo &hwInfo);
static uint32_t getDefaultThreadArbitrationPolicy();
static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy);
static void programPreemption(LinearStream *pCommandStream, Device &device, GraphicsAllocation *preemptionCsr);
static void addPipeControlBeforeVfeCmd(LinearStream *pCommandStream, const HardwareInfo *hwInfo, aub_stream::EngineType engineType);

View File

@ -31,11 +31,6 @@ size_t PreambleHelper<GfxFamily>::getThreadArbitrationCommandsSize() {
return 0;
}
template <typename GfxFamily>
uint32_t PreambleHelper<GfxFamily>::getDefaultThreadArbitrationPolicy() {
return 0;
}
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo) {
}

View File

@ -138,5 +138,5 @@ GEN11TEST_F(ThreadArbitrationGen11, givenPreambleWhenItIsProgrammedThenThreadArb
}
GEN11TEST_F(ThreadArbitrationGen11, defaultArbitrationPolicy) {
EXPECT_EQ(ThreadArbitrationPolicy::RoundRobinAfterDependency, PreambleHelper<ICLFamily>::getDefaultThreadArbitrationPolicy());
EXPECT_EQ(ThreadArbitrationPolicy::RoundRobinAfterDependency, HwHelperHw<ICLFamily>::get().getDefaultThreadArbitrationPolicy());
}

View File

@ -158,7 +158,7 @@ GEN12LPTEST_F(ThreadArbitrationGen12Lp, givenPolicyWhenThreadArbitrationProgramm
PreambleHelper<FamilyType>::programThreadArbitration(&cs, ThreadArbitrationPolicy::RoundRobin);
EXPECT_EQ(0u, cs.getUsed());
EXPECT_EQ(0u, PreambleHelper<FamilyType>::getDefaultThreadArbitrationPolicy());
EXPECT_EQ(0u, HwHelperHw<FamilyType>::get().getDefaultThreadArbitrationPolicy());
}
typedef PreambleFixture PreemptionWatermarkGen12LP;

View File

@ -82,7 +82,7 @@ BDWTEST_F(ThreadArbitrationGen8, givenPolicyWhenThreadArbitrationProgrammedThenD
MockDevice device;
EXPECT_EQ(0u, PreambleHelper<BDWFamily>::getAdditionalCommandsSize(device));
EXPECT_EQ(0u, PreambleHelper<BDWFamily>::getThreadArbitrationCommandsSize());
EXPECT_EQ(0u, PreambleHelper<BDWFamily>::getDefaultThreadArbitrationPolicy());
EXPECT_EQ(0u, HwHelperHw<BDWFamily>::get().getDefaultThreadArbitrationPolicy());
}
typedef PreambleFixture Gen8UrbEntryAllocationSize;

View File

@ -100,7 +100,7 @@ SKLTEST_F(ThreadArbitration, givenPreambleWhenItIsProgrammedThenThreadArbitratio
}
SKLTEST_F(ThreadArbitration, defaultArbitrationPolicy) {
EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, PreambleHelper<SKLFamily>::getDefaultThreadArbitrationPolicy());
EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, HwHelperHw<SKLFamily>::get().getDefaultThreadArbitrationPolicy());
}
GEN9TEST_F(PreambleVfeState, WaOff) {