diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index ac907f5f85..0798cb086e 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -227,7 +227,8 @@ ze_result_t CommandQueueHw::executeCommandLists( statePreemption = commandQueuePreemptionMode; } - uint32_t threadArbitrationPolicy = NEO::PreambleHelper::getDefaultThreadArbitrationPolicy(); + auto &hwHelper = NEO::HwHelper::get(neoDevice->getHardwareInfo().platform.eRenderCoreFamily); + uint32_t threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) { threadArbitrationPolicy = static_cast(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get()); } diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index ac2bb65ac4..7fd7894ee9 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -321,6 +321,8 @@ cl_int Kernel::initialize() { Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0); } } + + setThreadArbitrationPolicy(hwHelper.getDefaultThreadArbitrationPolicy()); if (kernelInfo.patchInfo.executionEnvironment) { if (!kernelInfo.patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired) { setThreadArbitrationPolicy(ThreadArbitrationPolicy::AgeBased); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index b6fd63afc3..392ea92ff7 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -695,6 +695,68 @@ INSTANTIATE_TEST_CASE_P(EnqueueKernel, EnqueueKernelPrintfTest, ::testing::ValuesIn(TestParamPrintf)); +using EnqueueKernelTests = ::testing::Test; + +HWTEST_F(EnqueueKernelTests, whenEnqueueingKernelThenCsrCorrectlySetsRequiredThreadArbitrationPolicy) { + struct myCsr : public UltCommandStreamReceiver { + using CommandStreamReceiverHw::requiredThreadArbitrationPolicy; + }; + + cl_uint workDim = 1; + size_t globalWorkOffset[3] = {0, 0, 0}; + size_t globalWorkSize[3] = {1, 1, 1}; + size_t localWorkSize[3] = {1, 1, 1}; + + UltClDeviceFactory clDeviceFactory{1, 0}; + MockContext context{clDeviceFactory.rootDevices[0]}; + SPatchExecutionEnvironment sPatchExecutionEnvironment = {}; + + sPatchExecutionEnvironment.SubgroupIndependentForwardProgressRequired = true; + MockKernelWithInternals mockKernelWithInternalsWithIfpRequired{*clDeviceFactory.rootDevices[0], sPatchExecutionEnvironment}; + sPatchExecutionEnvironment.SubgroupIndependentForwardProgressRequired = false; + MockKernelWithInternals mockKernelWithInternalsWithIfpNotRequired{*clDeviceFactory.rootDevices[0], sPatchExecutionEnvironment}; + + cl_int retVal; + std::unique_ptr pCommandQueue{CommandQueue::create(&context, clDeviceFactory.rootDevices[0], nullptr, true, retVal)}; + auto &csr = static_cast(pCommandQueue->getGpgpuCommandStreamReceiver()); + + pCommandQueue->enqueueKernel( + mockKernelWithInternalsWithIfpRequired.mockKernel, + workDim, + globalWorkOffset, + globalWorkSize, + localWorkSize, + 0, + nullptr, + nullptr); + pCommandQueue->flush(); + EXPECT_EQ(HwHelperHw::get().getDefaultThreadArbitrationPolicy(), csr.requiredThreadArbitrationPolicy); + + pCommandQueue->enqueueKernel( + mockKernelWithInternalsWithIfpNotRequired.mockKernel, + workDim, + globalWorkOffset, + globalWorkSize, + localWorkSize, + 0, + nullptr, + nullptr); + pCommandQueue->flush(); + EXPECT_EQ(ThreadArbitrationPolicy::AgeBased, csr.requiredThreadArbitrationPolicy); + + pCommandQueue->enqueueKernel( + mockKernelWithInternalsWithIfpRequired.mockKernel, + workDim, + globalWorkOffset, + globalWorkSize, + localWorkSize, + 0, + nullptr, + nullptr); + pCommandQueue->flush(); + EXPECT_EQ(HwHelperHw::get().getDefaultThreadArbitrationPolicy(), csr.requiredThreadArbitrationPolicy); +} + typedef HelloWorldFixture EnqueueKernelFixture; typedef Test EnqueueKernelTest; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index 9e8ae88f01..c00e1e2968 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -360,7 +360,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDefaultCommandStreamReceiverThenRoundRobinPolicyIsSelected) { MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); - EXPECT_EQ(PreambleHelper::getDefaultThreadArbitrationPolicy(), commandStreamReceiver.peekThreadArbitrationPolicy()); + EXPECT_EQ(HwHelperHw::get().getDefaultThreadArbitrationPolicy(), commandStreamReceiver.peekThreadArbitrationPolicy()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML3WasSentThenDontProgramL3) { diff --git a/opencl/test/unit_test/gen11/enqueue_kernel_gen11.cpp b/opencl/test/unit_test/gen11/enqueue_kernel_gen11.cpp index dc42294dd6..3b4eee4431 100644 --- a/opencl/test/unit_test/gen11/enqueue_kernel_gen11.cpp +++ b/opencl/test/unit_test/gen11/enqueue_kernel_gen11.cpp @@ -32,7 +32,7 @@ GEN11TEST_F(Gen11EnqueueTest, givenKernelRequiringIndependentForwardProgressWhen auto cmd = findMmioCmd(hwParser.cmdList.begin(), hwParser.cmdList.end(), RowChickenReg4::address); ASSERT_NE(nullptr, cmd); - EXPECT_EQ(RowChickenReg4::regDataForArbitrationPolicy[PreambleHelper::getDefaultThreadArbitrationPolicy()], cmd->getDataDword()); + EXPECT_EQ(RowChickenReg4::regDataForArbitrationPolicy[HwHelperHw::get().getDefaultThreadArbitrationPolicy()], cmd->getDataDword()); EXPECT_EQ(1U, countMmio(hwParser.cmdList.begin(), hwParser.cmdList.end(), RowChickenReg4::address)); } diff --git a/opencl/test/unit_test/gen9/enqueue_kernel_gen9.cpp b/opencl/test/unit_test/gen9/enqueue_kernel_gen9.cpp index 4b4e340e11..ffd91ca1c4 100644 --- a/opencl/test/unit_test/gen9/enqueue_kernel_gen9.cpp +++ b/opencl/test/unit_test/gen9/enqueue_kernel_gen9.cpp @@ -32,7 +32,7 @@ GEN9TEST_F(Gen9EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKe auto cmd = findMmioCmd(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address); ASSERT_NE(nullptr, cmd); - EXPECT_EQ(DebugControlReg2::getRegData(PreambleHelper::getDefaultThreadArbitrationPolicy()), cmd->getDataDword()); + EXPECT_EQ(DebugControlReg2::getRegData(HwHelperHw::get().getDefaultThreadArbitrationPolicy()), cmd->getDataDword()); EXPECT_EQ(1U, countMmio(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address)); } diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 54954fb806..df346681f8 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -3101,6 +3101,17 @@ TEST(KernelTest, givenKernelWhenForcePerDssBackedBufferProgrammingIsNotSetThenKe EXPECT_FALSE(kernel.mockKernel->requiresPerDssBackedBuffer()); } +TEST(KernelTest, whenKernelIsInitializedThenThreadArbitrationPolicyIsSetToDefaultValue) { + SPatchExecutionEnvironment sPatchExecutionEnvironment = {}; + sPatchExecutionEnvironment.SubgroupIndependentForwardProgressRequired = true; + UltClDeviceFactory deviceFactory{1, 0}; + MockKernelWithInternals mockKernelWithInternals{*deviceFactory.rootDevices[0], sPatchExecutionEnvironment}; + + auto &mockKernel = *mockKernelWithInternals.mockKernel; + auto &hwHelper = HwHelper::get(deviceFactory.rootDevices[0]->getHardwareInfo().platform.eRenderCoreFamily); + EXPECT_EQ(hwHelper.getDefaultThreadArbitrationPolicy(), mockKernel.threadArbitrationPolicy); +} + namespace NEO { template diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index fc858689a4..c71e6d98de 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -45,7 +45,7 @@ CommandStreamReceiverHw::CommandStreamReceiverHw(ExecutionEnvironment auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily); localMemoryEnabled = hwHelper.getEnableLocalMemory(peekHwInfo()); - requiredThreadArbitrationPolicy = PreambleHelper::getDefaultThreadArbitrationPolicy(); + requiredThreadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); resetKmdNotifyHelper(new KmdNotifyHelper(&peekHwInfo().capabilityTable.kmdNotifyProperties)); flatBatchBufferHelper.reset(new FlatBatchBufferHelperHw(executionEnvironment)); defaultSshSize = getSshHeapSize(); diff --git a/shared/source/gen11/hw_helper_gen11.cpp b/shared/source/gen11/hw_helper_gen11.cpp index 76844eded5..dabd0f65f2 100644 --- a/shared/source/gen11/hw_helper_gen11.cpp +++ b/shared/source/gen11/hw_helper_gen11.cpp @@ -21,6 +21,11 @@ std::string HwHelperHw::getExtensions() const { return "cl_intel_subgroup_local_block_io "; } +template <> +uint32_t HwHelperHw::getDefaultThreadArbitrationPolicy() const { + return ThreadArbitrationPolicy::RoundRobinAfterDependency; +} + template class HwHelperHw; template class FlatBatchBufferHelperHw; template struct MemorySynchronizationCommands; diff --git a/shared/source/gen11/preamble_gen11.cpp b/shared/source/gen11/preamble_gen11.cpp index f799d9237c..1da5e4cb08 100644 --- a/shared/source/gen11/preamble_gen11.cpp +++ b/shared/source/gen11/preamble_gen11.cpp @@ -63,11 +63,6 @@ void PreambleHelper::addPipeControlBeforeVfeCmd(LinearStream *pComman *pipeControl = cmd; } -template <> -uint32_t PreambleHelper::getDefaultThreadArbitrationPolicy() { - return ThreadArbitrationPolicy::RoundRobinAfterDependency; -} - template <> void PreambleHelper::programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy) { UNRECOVERABLE_IF(requiredThreadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent); diff --git a/shared/source/gen9/hw_helper_gen9.cpp b/shared/source/gen9/hw_helper_gen9.cpp index 52a8b5f6af..e596c3d941 100644 --- a/shared/source/gen9/hw_helper_gen9.cpp +++ b/shared/source/gen9/hw_helper_gen9.cpp @@ -34,6 +34,11 @@ uint32_t HwHelperHw::getMetricsLibraryGenId() const { return static_cast(MetricsLibraryApi::ClientGen::Gen9); } +template <> +uint32_t HwHelperHw::getDefaultThreadArbitrationPolicy() const { + return ThreadArbitrationPolicy::RoundRobin; +} + template class HwHelperHw; template class FlatBatchBufferHelperHw; template struct MemorySynchronizationCommands; diff --git a/shared/source/gen9/preamble_gen9.cpp b/shared/source/gen9/preamble_gen9.cpp index deac7033ef..61a32604b5 100644 --- a/shared/source/gen9/preamble_gen9.cpp +++ b/shared/source/gen9/preamble_gen9.cpp @@ -63,11 +63,6 @@ void PreambleHelper::addPipeControlBeforeVfeCmd(LinearStream *pComman *pipeControl = cmd; } -template <> -uint32_t PreambleHelper::getDefaultThreadArbitrationPolicy() { - return ThreadArbitrationPolicy::RoundRobin; -} - template <> void PreambleHelper::programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy) { UNRECOVERABLE_IF(requiredThreadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent); diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index 7e450fab3e..90bbef9d45 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -120,6 +120,7 @@ class HwHelper { virtual bool isBankOverrideRequired(const HardwareInfo &hwInfo) const = 0; virtual bool isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const = 0; virtual uint32_t getGlobalTimeStampBits() const = 0; + virtual uint32_t getDefaultThreadArbitrationPolicy() const = 0; static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo); static uint32_t getEnginesCount(const HardwareInfo &hwInfo); @@ -304,6 +305,8 @@ class HwHelperHw : public HwHelper { bool isBankOverrideRequired(const HardwareInfo &hwInfo) const override; + uint32_t getDefaultThreadArbitrationPolicy() const override; + protected: LocalMemoryAccessMode getDefaultLocalMemoryAccessMode(const HardwareInfo &hwInfo) const override; diff --git a/shared/source/helpers/hw_helper_base.inl b/shared/source/helpers/hw_helper_base.inl index ffec72d17a..eb8e233b04 100644 --- a/shared/source/helpers/hw_helper_base.inl +++ b/shared/source/helpers/hw_helper_base.inl @@ -466,4 +466,9 @@ bool HwHelperHw::isBankOverrideRequired(const HardwareInfo &hwInfo) c return false; } +template +uint32_t HwHelperHw::getDefaultThreadArbitrationPolicy() const { + return 0; +} + } // namespace NEO diff --git a/shared/source/helpers/preamble.h b/shared/source/helpers/preamble.h index f35916e268..75b0e00410 100644 --- a/shared/source/helpers/preamble.h +++ b/shared/source/helpers/preamble.h @@ -33,7 +33,6 @@ struct PreambleHelper { static void programPipelineSelect(LinearStream *pCommandStream, const PipelineSelectArgs &pipelineSelectArgs, const HardwareInfo &hwInfo); - static uint32_t getDefaultThreadArbitrationPolicy(); static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy); static void programPreemption(LinearStream *pCommandStream, Device &device, GraphicsAllocation *preemptionCsr); static void addPipeControlBeforeVfeCmd(LinearStream *pCommandStream, const HardwareInfo *hwInfo, aub_stream::EngineType engineType); diff --git a/shared/source/helpers/preamble_base.inl b/shared/source/helpers/preamble_base.inl index 61f957c914..10875c98aa 100644 --- a/shared/source/helpers/preamble_base.inl +++ b/shared/source/helpers/preamble_base.inl @@ -31,11 +31,6 @@ size_t PreambleHelper::getThreadArbitrationCommandsSize() { return 0; } -template -uint32_t PreambleHelper::getDefaultThreadArbitrationPolicy() { - return 0; -} - template void PreambleHelper::programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo) { } diff --git a/shared/test/unit_test/gen11/test_preamble_gen11.cpp b/shared/test/unit_test/gen11/test_preamble_gen11.cpp index 140ea13c4b..a09c4c85dd 100644 --- a/shared/test/unit_test/gen11/test_preamble_gen11.cpp +++ b/shared/test/unit_test/gen11/test_preamble_gen11.cpp @@ -138,5 +138,5 @@ GEN11TEST_F(ThreadArbitrationGen11, givenPreambleWhenItIsProgrammedThenThreadArb } GEN11TEST_F(ThreadArbitrationGen11, defaultArbitrationPolicy) { - EXPECT_EQ(ThreadArbitrationPolicy::RoundRobinAfterDependency, PreambleHelper::getDefaultThreadArbitrationPolicy()); + EXPECT_EQ(ThreadArbitrationPolicy::RoundRobinAfterDependency, HwHelperHw::get().getDefaultThreadArbitrationPolicy()); } diff --git a/shared/test/unit_test/gen12lp/test_preamble_gen12lp.cpp b/shared/test/unit_test/gen12lp/test_preamble_gen12lp.cpp index bdb9931f8c..6b22b8ee19 100644 --- a/shared/test/unit_test/gen12lp/test_preamble_gen12lp.cpp +++ b/shared/test/unit_test/gen12lp/test_preamble_gen12lp.cpp @@ -158,7 +158,7 @@ GEN12LPTEST_F(ThreadArbitrationGen12Lp, givenPolicyWhenThreadArbitrationProgramm PreambleHelper::programThreadArbitration(&cs, ThreadArbitrationPolicy::RoundRobin); EXPECT_EQ(0u, cs.getUsed()); - EXPECT_EQ(0u, PreambleHelper::getDefaultThreadArbitrationPolicy()); + EXPECT_EQ(0u, HwHelperHw::get().getDefaultThreadArbitrationPolicy()); } typedef PreambleFixture PreemptionWatermarkGen12LP; diff --git a/shared/test/unit_test/gen8/test_preamble_gen8.cpp b/shared/test/unit_test/gen8/test_preamble_gen8.cpp index f358b081f6..7d167ce38a 100644 --- a/shared/test/unit_test/gen8/test_preamble_gen8.cpp +++ b/shared/test/unit_test/gen8/test_preamble_gen8.cpp @@ -82,7 +82,7 @@ BDWTEST_F(ThreadArbitrationGen8, givenPolicyWhenThreadArbitrationProgrammedThenD MockDevice device; EXPECT_EQ(0u, PreambleHelper::getAdditionalCommandsSize(device)); EXPECT_EQ(0u, PreambleHelper::getThreadArbitrationCommandsSize()); - EXPECT_EQ(0u, PreambleHelper::getDefaultThreadArbitrationPolicy()); + EXPECT_EQ(0u, HwHelperHw::get().getDefaultThreadArbitrationPolicy()); } typedef PreambleFixture Gen8UrbEntryAllocationSize; diff --git a/shared/test/unit_test/gen9/skl/test_preamble_skl.cpp b/shared/test/unit_test/gen9/skl/test_preamble_skl.cpp index 571f2cde9b..08a61bb026 100644 --- a/shared/test/unit_test/gen9/skl/test_preamble_skl.cpp +++ b/shared/test/unit_test/gen9/skl/test_preamble_skl.cpp @@ -100,7 +100,7 @@ SKLTEST_F(ThreadArbitration, givenPreambleWhenItIsProgrammedThenThreadArbitratio } SKLTEST_F(ThreadArbitration, defaultArbitrationPolicy) { - EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, PreambleHelper::getDefaultThreadArbitrationPolicy()); + EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, HwHelperHw::get().getDefaultThreadArbitrationPolicy()); } GEN9TEST_F(PreambleVfeState, WaOff) {