diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index ee1b1cd07c..0ef8591ec4 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -75,7 +75,7 @@ template void CommandListCoreFamily::programThreadArbitrationPolicy(Device *device) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto &hwHelper = NEO::HwHelper::get(device->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily); - threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); + auto threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) { threadArbitrationPolicy = static_cast(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get()); } diff --git a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp index 25ae18ed4f..e06f08099e 100644 --- a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp @@ -602,7 +602,8 @@ HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenClSetKernelExecInfoAlreadySe 0, nullptr, nullptr); - EXPECT_EQ(getNewKernelArbitrationPolicy(euThreadSetting), pDevice->getUltCommandStreamReceiver().requiredThreadArbitrationPolicy); + EXPECT_EQ(getNewKernelArbitrationPolicy(euThreadSetting), + static_cast(pDevice->getUltCommandStreamReceiver().streamProperties.stateComputeMode.threadArbitrationPolicy.value)); mockCmdQ->release(); } @@ -637,8 +638,9 @@ HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenNotSupportedPolicyChangeThen 0, nullptr, nullptr); - EXPECT_NE(getNewKernelArbitrationPolicy(euThreadSetting), pDevice->getUltCommandStreamReceiver().requiredThreadArbitrationPolicy); - EXPECT_EQ(0u, pDevice->getUltCommandStreamReceiver().requiredThreadArbitrationPolicy); + EXPECT_NE(getNewKernelArbitrationPolicy(euThreadSetting), + static_cast(pDevice->getUltCommandStreamReceiver().streamProperties.stateComputeMode.threadArbitrationPolicy.value)); + EXPECT_EQ(0, pDevice->getUltCommandStreamReceiver().streamProperties.stateComputeMode.threadArbitrationPolicy.value); mockCmdQ->release(); } diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 726afe07cb..badb134730 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -721,7 +721,7 @@ using EnqueueKernelTests = ::testing::Test; HWTEST_F(EnqueueKernelTests, whenEnqueueingKernelThenCsrCorrectlySetsRequiredThreadArbitrationPolicy) { struct myCsr : public UltCommandStreamReceiver { - using CommandStreamReceiverHw::requiredThreadArbitrationPolicy; + using CommandStreamReceiverHw::streamProperties; }; cl_uint workDim = 1; @@ -754,7 +754,8 @@ HWTEST_F(EnqueueKernelTests, whenEnqueueingKernelThenCsrCorrectlySetsRequiredThr nullptr, nullptr); pCommandQueue->flush(); - EXPECT_EQ(HwHelperHw::get().getDefaultThreadArbitrationPolicy(), csr.requiredThreadArbitrationPolicy); + EXPECT_EQ(HwHelperHw::get().getDefaultThreadArbitrationPolicy(), + static_cast(csr.streamProperties.stateComputeMode.threadArbitrationPolicy.value)); pCommandQueue->enqueueKernel( mockKernelWithInternalsWithIfpNotRequired.mockKernel, @@ -766,7 +767,8 @@ HWTEST_F(EnqueueKernelTests, whenEnqueueingKernelThenCsrCorrectlySetsRequiredThr nullptr, nullptr); pCommandQueue->flush(); - EXPECT_EQ(ThreadArbitrationPolicy::AgeBased, csr.requiredThreadArbitrationPolicy); + EXPECT_EQ(ThreadArbitrationPolicy::AgeBased, + static_cast(csr.streamProperties.stateComputeMode.threadArbitrationPolicy.value)); pCommandQueue->enqueueKernel( mockKernelWithInternalsWithIfpRequired.mockKernel, @@ -778,7 +780,8 @@ HWTEST_F(EnqueueKernelTests, whenEnqueueingKernelThenCsrCorrectlySetsRequiredThr nullptr, nullptr); pCommandQueue->flush(); - EXPECT_EQ(HwHelperHw::get().getDefaultThreadArbitrationPolicy(), csr.requiredThreadArbitrationPolicy); + EXPECT_EQ(HwHelperHw::get().getDefaultThreadArbitrationPolicy(), + static_cast(csr.streamProperties.stateComputeMode.threadArbitrationPolicy.value)); } typedef HelloWorldFixture EnqueueKernelFixture; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp index 3c34f224d3..ed8b3ef555 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp @@ -70,14 +70,14 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenForceImplicitFlushDebugVariab HWTEST_F(CommandStreamReceiverFlushTaskTests, givenOverrideThreadArbitrationPolicyDebugVariableSetWhenFlushingThenRequestRequiredMode) { DebugManagerStateRestore restore; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - commandStreamReceiver.requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased; - commandStreamReceiver.lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased; DebugManager.flags.OverrideThreadArbitrationPolicy.set(ThreadArbitrationPolicy::RoundRobin); - flushTask(commandStreamReceiver); + EXPECT_EQ(-1, commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value); - EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, commandStreamReceiver.lastSentThreadArbitrationPolicy); + flushTask(commandStreamReceiver); + EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, + static_cast(commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value)); } HWTEST_F(CommandStreamReceiverFlushTaskTests, WhenFlushingTaskThenTaskCountIsIncremented) { @@ -953,7 +953,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleWh csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeededForPreamble); commandStreamReceiver.streamProperties.stateComputeMode.setProperties(flushTaskFlags.requiresCoherency, flushTaskFlags.numGrfRequired, - commandStreamReceiver.requiredThreadArbitrationPolicy); + flushTaskFlags.threadArbitrationPolicy); flushTask(commandStreamReceiver); EXPECT_EQ(sizeNeeded, csrCS.getUsed()); @@ -987,7 +987,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAn csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeededForPreamble - sizeNeededForStateBaseAddress); commandStreamReceiver.streamProperties.stateComputeMode.setProperties(flushTaskFlags.requiresCoherency, flushTaskFlags.numGrfRequired, - commandStreamReceiver.requiredThreadArbitrationPolicy); + flushTaskFlags.threadArbitrationPolicy); flushTask(commandStreamReceiver); EXPECT_EQ(sizeNeeded, csrCS.getUsed()); @@ -1025,7 +1025,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAn flushTaskFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(mockDevice->getHardwareInfo()); commandStreamReceiver.streamProperties.stateComputeMode.setProperties(flushTaskFlags.requiresCoherency, flushTaskFlags.numGrfRequired, - commandStreamReceiver.requiredThreadArbitrationPolicy); + flushTaskFlags.threadArbitrationPolicy); commandStreamReceiver.flushTask( commandStream, 0, @@ -1304,21 +1304,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWhenCallFlushTas DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); - uint32_t beforeFlushRequiredThreadArbitrationPolicy = mockCsr->requiredThreadArbitrationPolicy; - - mockCsr->flushTask(commandStream, - 0, - dsh, - ioh, - ssh, - taskLevel, - dispatchFlags, - *pDevice); - - EXPECT_EQ(beforeFlushRequiredThreadArbitrationPolicy, mockCsr->requiredThreadArbitrationPolicy); - dispatchFlags.threadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin; - mockCsr->requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; mockCsr->flushTask(commandStream, 0, @@ -1329,7 +1315,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWhenCallFlushTas dispatchFlags, *pDevice); - EXPECT_EQ(dispatchFlags.threadArbitrationPolicy, mockCsr->requiredThreadArbitrationPolicy); + EXPECT_EQ(dispatchFlags.threadArbitrationPolicy, static_cast(mockCsr->streamProperties.stateComputeMode.threadArbitrationPolicy.value)); } class CommandStreamReceiverFlushTaskMemoryCompressionTests : public UltCommandStreamReceiverTest, diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index 298869c401..d512e31464 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -377,8 +377,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDefaultCommandStreamReceiverThenRoundRobinPolicyIsSelected) { - MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - EXPECT_EQ(HwHelperHw::get().getDefaultThreadArbitrationPolicy(), commandStreamReceiver.peekThreadArbitrationPolicy()); + auto pCommandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + pDevice->resetCommandStreamReceiver(pCommandStreamReceiver); + EXPECT_EQ(static_cast(-1), pCommandStreamReceiver->peekThreadArbitrationPolicy()); + + flushTask(*pCommandStreamReceiver); + EXPECT_EQ(HwHelperHw::get().getDefaultThreadArbitrationPolicy(), pCommandStreamReceiver->peekThreadArbitrationPolicy()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML3WasSentThenDontProgramL3) { @@ -397,7 +401,6 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML // Mark Pramble as sent, override L3Config to SLM config commandStreamReceiver->isPreambleSent = true; commandStreamReceiver->lastSentL3Config = L3Config; - commandStreamReceiver->lastSentThreadArbitrationPolicy = kernel.mockKernel->getThreadArbitrationPolicy(); ((MockKernel *)kernel)->setTotalSLMSize(1024); @@ -1040,7 +1043,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, GivenPreambleSe commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode(); commandStreamReceiver.lastMediaSamplerConfig = 0; - commandStreamReceiver.lastSentThreadArbitrationPolicy = commandStreamReceiver.requiredThreadArbitrationPolicy; commandStreamReceiver.streamProperties.stateComputeMode.isCoherencyRequired.value = 0; csrSizeRequest.l3ConfigChanged = true; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); @@ -1052,7 +1054,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, GivenPreambleSe expectedUsed = alignUp(expectedUsed, MemoryConstants::cacheLineSize); commandStreamReceiver.streamProperties.stateComputeMode.setProperties(flushTaskFlags.requiresCoherency, flushTaskFlags.numGrfRequired, - commandStreamReceiver.requiredThreadArbitrationPolicy); + flushTaskFlags.threadArbitrationPolicy); commandStreamReceiver.flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, flushTaskFlags, *pDevice); // Verify that we didn't grab a new CS buffer diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp index 39f6814db0..0da9327fd0 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp @@ -674,28 +674,28 @@ struct PreambleThreadArbitrationMatcher { } }; -HWTEST2_F(CommandStreamReceiverFlushTaskTests, givenVariousInputWhenFlushingTaskThenProgramThreadArbitrationPolicyWhenNeeded, PreambleThreadArbitrationMatcher) { +HWTEST2_F(CommandStreamReceiverFlushTaskTests, givenPolicyValueChangedWhenFlushingTaskThenProgramThreadArbitrationPolicy, PreambleThreadArbitrationMatcher) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.isPreambleSent = true; - commandStreamReceiver.requiredThreadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); flushTask(commandStreamReceiver); size_t parsingOffset = commandStreamReceiver.commandStream.getUsed(); for (auto arbitrationChanged : ::testing::Bool()) { - commandStreamReceiver.lastSentThreadArbitrationPolicy = arbitrationChanged ? ThreadArbitrationPolicy::NotPresent - : hwHelper.getDefaultThreadArbitrationPolicy(); - for (auto isPreambleNeeded : ::testing::Bool()) { - commandStreamReceiver.isPreambleSent = !isPreambleNeeded; + commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value = + arbitrationChanged ? -1 : hwHelper.getDefaultThreadArbitrationPolicy(); - flushTask(commandStreamReceiver); - HardwareParse csHwParser; - csHwParser.parseCommands(commandStreamReceiver.commandStream, parsingOffset); - auto miLoadRegisterCommandsCount = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()).size(); - size_t expectedCount = (isPreambleNeeded ? 2 : (arbitrationChanged ? 1 : 0)); - EXPECT_EQ(expectedCount, miLoadRegisterCommandsCount); - parsingOffset = commandStreamReceiver.commandStream.getUsed(); + flushTask(commandStreamReceiver); + HardwareParse csHwParser; + csHwParser.parseCommands(commandStreamReceiver.commandStream, parsingOffset); + auto miLoadRegisterCommandsCount = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()).size(); + if (arbitrationChanged) { + EXPECT_GE(miLoadRegisterCommandsCount, 1u); + } else { + EXPECT_EQ(0u, miLoadRegisterCommandsCount); } + parsingOffset = commandStreamReceiver.commandStream.getUsed(); } } diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index c567f362c0..6a7a2971cc 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -34,7 +34,6 @@ using namespace NEO; HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenPreambleSentAndThreadArbitrationPolicyNotChangedWhenEstimatingPreambleCmdSizeThenReturnItsValue) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; - commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy; auto expectedCmdSize = sizeof(typename FamilyType::PIPE_CONTROL) + sizeof(typename FamilyType::MEDIA_VFE_STATE); EXPECT_EQ(expectedCmdSize, commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice)); } @@ -144,11 +143,10 @@ HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentAndThreadArbitrationPoli auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; - commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy; auto policyNotChangedPreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); auto policyNotChangedFlush = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); - commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy + 1; + commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.isDirty = true; auto policyChangedPreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); auto policyChangedFlush = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); @@ -160,9 +158,8 @@ HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentAndThreadArbitrationPoli EXPECT_EQ(expectedDifference, actualDifferenceForFlush); } -HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentWhenEstimatingFlushTaskSizeThenResultDependsOnPolicyProgrammingAndAdditionalCmdsSize) { +HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentWhenEstimatingFlushTaskSizeThenResultDependsOnAdditionalCmdsSize) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy; commandStreamReceiver.isPreambleSent = false; auto preambleNotSentPreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); @@ -177,8 +174,7 @@ HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentWhenEstimatingFlushTaskS commandStreamReceiver.isPreambleSent = false; auto expectedDifferenceForPreamble = PreambleHelper::getAdditionalCommandsSize(*pDevice); - auto expectedDifferenceForFlush = expectedDifferenceForPreamble + PreambleHelper::getThreadArbitrationCommandsSize() + - commandStreamReceiver.getCmdSizeForL3Config() + + auto expectedDifferenceForFlush = expectedDifferenceForPreamble + commandStreamReceiver.getCmdSizeForL3Config() + PreambleHelper::getCmdSizeForPipelineSelect(pDevice->getHardwareInfo()); EXPECT_EQ(expectedDifferenceForPreamble, actualDifferenceForPreamble); @@ -215,7 +211,6 @@ HWTEST_F(UltCommandStreamReceiverTest, givenCommandStreamReceiverInInitialStateW HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentAndForceSemaphoreDelayBetweenWaitsFlagWhenEstimatingPreambleCmdSizeThenResultIsExpected) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy; DebugManagerStateRestore debugManagerStateRestore; DebugManager.flags.ForceSemaphoreDelayBetweenWaits.set(-1); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_dg2_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_dg2_and_later.cpp index 3f3e19cb3c..c19ab2d8f3 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_dg2_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_dg2_and_later.cpp @@ -90,9 +90,11 @@ HWTEST2_F(CommandStreamReceiverFlushTasDg2AndLaterTests, givenProgramPipeControl DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.usePerDssBackedBuffer = true; + auto &hwHelper = NEO::HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); + dispatchFlags.threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); commandStreamReceiver.streamProperties.stateComputeMode.setProperties(dispatchFlags.requiresCoherency, dispatchFlags.numGrfRequired, - commandStreamReceiver.requiredThreadArbitrationPolicy); + dispatchFlags.threadArbitrationPolicy); auto cmdSizeForAllCommands = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); commandStreamReceiver.flushTask(commandStream, 0, diff --git a/opencl/test/unit_test/command_stream/compute_mode_tests.h b/opencl/test/unit_test/command_stream/compute_mode_tests.h index 99547b01a9..ae30eadf81 100644 --- a/opencl/test/unit_test/command_stream/compute_mode_tests.h +++ b/opencl/test/unit_test/command_stream/compute_mode_tests.h @@ -20,8 +20,6 @@ struct ComputeModeRequirements : public ::testing::Test { struct myCsr : public UltCommandStreamReceiver { using CommandStreamReceiver::commandStream; using CommandStreamReceiver::streamProperties; - using CommandStreamReceiverHw::lastSentThreadArbitrationPolicy; - using CommandStreamReceiverHw::requiredThreadArbitrationPolicy; myCsr(ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield) : UltCommandStreamReceiver(executionEnvironment, 0, deviceBitfield){}; CsrSizeRequestFlags *getCsrRequestFlags() { return &this->csrSizeRequestFlags; } @@ -36,9 +34,9 @@ struct ComputeModeRequirements : public ::testing::Test { uint32_t numGrfRequired = 128u) { overrideComputeModeRequest(reqestChanged, requireCoherency, hasSharedHandles, numGrfRequiredChanged, numGrfRequired); if (modifyThreadArbitrationPolicy) { + auto &hwHelper = NEO::HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily); auto csrHw = getCsrHw(); - csrHw->lastSentThreadArbitrationPolicy = csrHw->requiredThreadArbitrationPolicy; - csrHw->streamProperties.stateComputeMode.threadArbitrationPolicy.value = csrHw->requiredThreadArbitrationPolicy; + csrHw->streamProperties.stateComputeMode.threadArbitrationPolicy.value = hwHelper.getDefaultThreadArbitrationPolicy(); csrHw->streamProperties.stateComputeMode.threadArbitrationPolicy.isDirty = true; } } diff --git a/opencl/test/unit_test/command_stream/compute_mode_tests_dg2.cpp b/opencl/test/unit_test/command_stream/compute_mode_tests_dg2.cpp index 06fc282b48..85eb7c6bd5 100644 --- a/opencl/test/unit_test/command_stream/compute_mode_tests_dg2.cpp +++ b/opencl/test/unit_test/command_stream/compute_mode_tests_dg2.cpp @@ -121,7 +121,6 @@ HWTEST2_F(ComputeModeRequirements, GivenProgramPipeControlPriorToNonPipelinedSta using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; SetUpImpl(); - getCsrHw()->requiredThreadArbitrationPolicy = getCsrHw()->lastSentThreadArbitrationPolicy; auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + sizeof(PIPE_CONTROL); overrideComputeModeRequest(false, false, false); diff --git a/opencl/test/unit_test/command_stream/compute_mode_tests_pvc_and_later.cpp b/opencl/test/unit_test/command_stream/compute_mode_tests_pvc_and_later.cpp index 41ad86edf2..70b322c6b6 100644 --- a/opencl/test/unit_test/command_stream/compute_mode_tests_pvc_and_later.cpp +++ b/opencl/test/unit_test/command_stream/compute_mode_tests_pvc_and_later.cpp @@ -20,7 +20,7 @@ HWTEST2_F(ComputeModeRequirementsPvcAndLater, givenComputeModeCmdSizeWhenLargeGr overrideComputeModeRequest(false, false, false, false, 128u); auto retSize = getCsrHw()->getCmdSizeForComputeMode(); - EXPECT_EQ(cmdSize, retSize); + EXPECT_EQ(0u, retSize); overrideComputeModeRequest(false, false, false, true, 256u); retSize = getCsrHw()->getCmdSizeForComputeMode(); diff --git a/opencl/test/unit_test/command_stream/compute_mode_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/compute_mode_tests_xehp_and_later.cpp index 0e65fa996f..d95476ebf5 100644 --- a/opencl/test/unit_test/command_stream/compute_mode_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_stream/compute_mode_tests_xehp_and_later.cpp @@ -22,7 +22,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ComputeModeRequirements, givenCoherencyWithoutShare using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; SetUpImpl(); - getCsrHw()->requiredThreadArbitrationPolicy = getCsrHw()->lastSentThreadArbitrationPolicy; auto cmdsSize = sizeof(STATE_COMPUTE_MODE); overrideComputeModeRequest(false, false, false); @@ -168,7 +167,6 @@ HWTEST2_F(ComputeModeRequirements, givenCoherencyRequirementWithoutSharedHandles IndirectHeap stream(graphicAlloc); auto flushTask = [&](bool coherencyRequired) { - getCsrHw()->lastSentThreadArbitrationPolicy = getCsrHw()->requiredThreadArbitrationPolicy; flags.requiresCoherency = coherencyRequired; startOffset = getCsrHw()->commandStream.getUsed(); csr->flushTask(stream, 0, stream, stream, stream, 0, flags, *device); @@ -226,7 +224,6 @@ HWTEST2_F(ComputeModeRequirements, givenCoherencyRequirementWithSharedHandlesWhe IndirectHeap stream(graphicsAlloc); auto flushTask = [&](bool coherencyRequired) { - getCsrHw()->lastSentThreadArbitrationPolicy = getCsrHw()->requiredThreadArbitrationPolicy; flags.requiresCoherency = coherencyRequired; makeResidentSharedAlloc(); @@ -400,8 +397,6 @@ HWTEST2_F(ComputeModeRequirements, givenComputeModeProgrammingWhenRequiredGRFNum expectedScmCmd.setLargeGrfMode(true); auto expectedBitsMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask; - getCsrHw()->requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; - overrideComputeModeRequest(true, false, false, true, 256u); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); diff --git a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h index 085d530cef..05a6fd25b7 100644 --- a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h +++ b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h @@ -59,6 +59,8 @@ struct UltCommandStreamReceiverTest graphicsAllocation = new MockGraphicsAllocation(sshBuffer, sizeStream); ssh.replaceGraphicsAllocation(graphicsAllocation); + flushTaskFlags.threadArbitrationPolicy = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily).getDefaultThreadArbitrationPolicy(); + pDevice->getGpgpuCommandStreamReceiver().setupContext(*pDevice->getDefaultEngine().osContext); } @@ -131,18 +133,18 @@ struct UltCommandStreamReceiverTest commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode(); commandStreamReceiver.setMediaVFEStateDirty(false); auto gmmHelper = pDevice->getGmmHelper(); - auto mocsIndex = HwHelper::get(defaultHwInfo->platform.eDisplayCoreFamily).getMocsIndex(*gmmHelper, true, isL1CacheEnabled); + auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eDisplayCoreFamily); + auto mocsIndex = hwHelper.getMocsIndex(*gmmHelper, true, isL1CacheEnabled); commandStreamReceiver.latestSentStatelessMocsConfig = mocsIndex; commandStreamReceiver.lastSentL3Config = L3Config; configureCSRHeapStatesToNonDirty(); commandStreamReceiver.taskLevel = taskLevel; - commandStreamReceiver.lastSentThreadArbitrationPolicy = commandStreamReceiver.requiredThreadArbitrationPolicy; commandStreamReceiver.lastMediaSamplerConfig = 0; commandStreamReceiver.lastSentUseGlobalAtomics = false; commandStreamReceiver.streamProperties.stateComputeMode.setProperties(0, GrfConfig::DefaultGrfNumber, - commandStreamReceiver.requiredThreadArbitrationPolicy); + hwHelper.getDefaultThreadArbitrationPolicy()); } template diff --git a/opencl/test/unit_test/xe_hp_core/compute_mode_tests_xe_hp_core.cpp b/opencl/test/unit_test/xe_hp_core/compute_mode_tests_xe_hp_core.cpp index ee8a02c00c..15dc63634d 100644 --- a/opencl/test/unit_test/xe_hp_core/compute_mode_tests_xe_hp_core.cpp +++ b/opencl/test/unit_test/xe_hp_core/compute_mode_tests_xe_hp_core.cpp @@ -106,7 +106,6 @@ HWTEST2_F(ComputeModeRequirements, GivenProgramPipeControlPriorToNonPipelinedSta using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; SetUpImpl(); - getCsrHw()->requiredThreadArbitrationPolicy = getCsrHw()->lastSentThreadArbitrationPolicy; auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + sizeof(PIPE_CONTROL); overrideComputeModeRequest(false, false, false); diff --git a/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp b/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp index 3e788c3278..7873b73e3a 100644 --- a/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp +++ b/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp @@ -42,13 +42,14 @@ PVCTEST_F(PvcCommandStreamReceiverFlushTaskTests, givenOverrideThreadArbitration using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; DebugManagerStateRestore restore; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy = static_cast(STATE_COMPUTE_MODE::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_HW_DEFAULT); DebugManager.flags.OverrideThreadArbitrationPolicy.set(ThreadArbitrationPolicy::RoundRobin); - flushTask(commandStreamReceiver); + EXPECT_EQ(-1, commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value); - EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, commandStreamReceiver.lastSentThreadArbitrationPolicy); + flushTask(commandStreamReceiver); + EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, + static_cast(commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value)); } PVCTEST_F(PvcCommandStreamReceiverFlushTaskTests, givenNotExistPolicyWhenFlushingThenDefaultPolicyIsProgrammed) { @@ -59,11 +60,11 @@ PVCTEST_F(PvcCommandStreamReceiverFlushTaskTests, givenNotExistPolicyWhenFlushin auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); uint32_t notExistPolicy = -2; - commandStreamReceiver.requiredThreadArbitrationPolicy = notExistPolicy; + flushTaskFlags.threadArbitrationPolicy = notExistPolicy; flushTask(commandStreamReceiver); - EXPECT_EQ(notExistPolicy, commandStreamReceiver.lastSentThreadArbitrationPolicy); + EXPECT_EQ(notExistPolicy, static_cast(commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value)); } PVCTEST_F(PvcCommandStreamReceiverFlushTaskTests, givenRevisionBAndAboveWhenLastSpecialPipelineSelectModeIsTrueAndFlushTaskIsCalledThenDontReprogramPipelineSelect) { diff --git a/opencl/test/unit_test/xe_hpc_core/pvc/compute_mode_tests_pvc.cpp b/opencl/test/unit_test/xe_hpc_core/pvc/compute_mode_tests_pvc.cpp index 48d55c6f3a..4720249214 100644 --- a/opencl/test/unit_test/xe_hpc_core/pvc/compute_mode_tests_pvc.cpp +++ b/opencl/test/unit_test/xe_hpc_core/pvc/compute_mode_tests_pvc.cpp @@ -31,7 +31,8 @@ PVCTEST_F(PvcComputeModeRequirements, givenNewRequiredThreadArbitrationPolicyWhe auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + sizeof(PIPE_CONTROL); char buff[1024] = {0}; LinearStream stream(buff, 1024); - auto newEuThreadSchedulingMode = ThreadArbitrationPolicy::RoundRobin; + auto &hwHelper = NEO::HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily); + auto newEuThreadSchedulingMode = hwHelper.getDefaultThreadArbitrationPolicy(); auto expectedEuThreadSchedulingMode = static_cast(UnitTestHelper::getAppropriateThreadArbitrationPolicy(newEuThreadSchedulingMode)); auto expectedScmCmd = FamilyType::cmdInitStateComputeMode; @@ -39,9 +40,6 @@ PVCTEST_F(PvcComputeModeRequirements, givenNewRequiredThreadArbitrationPolicyWhe expectedScmCmd.setMaskBits(FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask | FamilyType::stateComputeModeEuThreadSchedulingModeOverrideMask); expectedScmCmd.setEuThreadSchedulingModeOverride(expectedEuThreadSchedulingMode); - getCsrHw()->lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; - getCsrHw()->requiredThreadArbitrationPolicy = newEuThreadSchedulingMode; - overrideComputeModeRequest(true, false, false, true, true); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -64,8 +62,6 @@ PVCTEST_F(PvcComputeModeRequirements, givenRequiredThreadArbitrationPolicyAlread expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT); expectedScmCmd.setMaskBits(FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask); - getCsrHw()->lastSentThreadArbitrationPolicy = getCsrHw()->requiredThreadArbitrationPolicy; - getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -83,13 +79,11 @@ PVCTEST_F(PvcComputeModeRequirements, givenCoherencyWithoutSharedHandlesWhenComm auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + sizeof(PIPE_CONTROL); - getCsrHw()->lastSentThreadArbitrationPolicy = getCsrHw()->requiredThreadArbitrationPolicy; - overrideComputeModeRequest(false, false, false); + overrideComputeModeRequest(false, false, false, false); auto retSize = getCsrHw()->getCmdSizeForComputeMode(); EXPECT_EQ(0u, retSize); - getCsrHw()->lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; - overrideComputeModeRequest(false, false, false); + overrideComputeModeRequest(false, false, false, true); retSize = getCsrHw()->getCmdSizeForComputeMode(); EXPECT_EQ(cmdsSize, retSize); } @@ -99,7 +93,6 @@ PVCTEST_F(PvcComputeModeRequirements, givenNumGrfRequiredChangedWhenCommandSizeI using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; SetUpImpl(); - getCsrHw()->lastSentThreadArbitrationPolicy = getCsrHw()->requiredThreadArbitrationPolicy; auto numGrfRequired = 128u; auto numGrfRequiredChanged = false; @@ -120,7 +113,7 @@ PVCTEST_F(PvcComputeModeRequirements, givenComputeModeProgrammingWhenLargeGrfMod LinearStream stream(buff, 1024); auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + sizeof(PIPE_CONTROL); - overrideComputeModeRequest(false, false, false, false); + overrideComputeModeRequest(false, false, false, true); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); } @@ -135,7 +128,6 @@ PVCTEST_F(PvcComputeModeRequirements, givenComputeModeProgrammingWhenLargeGrfMod auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + sizeof(PIPE_CONTROL); uint32_t numGrfRequired = GrfConfig::LargeGrfNumber; - getCsrHw()->lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; overrideComputeModeRequest(false, false, false, true, numGrfRequired); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -153,7 +145,6 @@ PVCTEST_F(PvcComputeModeRequirements, givenComputeModeProgrammingWhenLargeGrfReq auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + sizeof(PIPE_CONTROL); uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber; - getCsrHw()->lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; overrideComputeModeRequest(false, false, false, true, numGrfRequired); getCsrHw()->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); @@ -173,7 +164,6 @@ PVCTEST_F(PvcComputeModeRequirements, giventhreadArbitrationPolicyWithoutSharedH auto flushTask = [&](bool threadArbitrationPolicyChanged) { if (threadArbitrationPolicyChanged) { getCsrHw()->streamProperties.stateComputeMode.threadArbitrationPolicy.value = -1; - getCsrHw()->lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; } startOffset = getCsrHw()->commandStream.getUsed(); csr->flushTask(stream, 0, stream, stream, stream, 0, flags, *device); @@ -202,7 +192,7 @@ PVCTEST_F(PvcComputeModeRequirements, giventhreadArbitrationPolicyWithoutSharedH }; getCsrHw()->streamProperties.stateComputeMode.setProperties(flags.requiresCoherency, flags.numGrfRequired, - getCsrHw()->lastSentThreadArbitrationPolicy); + flags.threadArbitrationPolicy); flushTask(true); findCmd(true); // first time diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 422bf4d669..85a0f903fd 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -372,8 +372,6 @@ class CommandStreamReceiver { uint32_t lastSentL3Config = 0; uint32_t latestSentStatelessMocsConfig = 0; uint32_t lastSentNumGrfRequired = GrfConfig::DefaultGrfNumber; - uint32_t requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin; - uint32_t lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; uint64_t lastSentSliceCount = QueueSliceCount::defaultSliceCount; uint32_t requiredScratchSize = 0; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 3e550bb184..6ffc7850e5 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -57,7 +57,6 @@ CommandStreamReceiverHw::CommandStreamReceiverHw(ExecutionEnvironment auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily); localMemoryEnabled = hwHelper.getEnableLocalMemory(peekHwInfo()); - requiredThreadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); resetKmdNotifyHelper(new KmdNotifyHelper(&peekHwInfo().capabilityTable.kmdNotifyProperties)); if (DebugManager.flags.FlattenBatchBufferForAUBDump.get() || DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { @@ -247,24 +246,22 @@ CompletionStamp CommandStreamReceiverHw::flushTask( if (DebugManager.flags.ForceSLML3Config.get()) { dispatchFlags.useSLM = true; } - if (DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) { - dispatchFlags.threadArbitrationPolicy = static_cast(DebugManager.flags.OverrideThreadArbitrationPolicy.get()); - } auto newL3Config = PreambleHelper::getL3Config(peekHwInfo(), dispatchFlags.useSLM); auto isSpecialPipelineSelectModeChanged = PreambleHelper::isSpecialPipelineSelectModeChanged(lastSpecialPipelineSelectMode, dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode, peekHwInfo()); - if (dispatchFlags.threadArbitrationPolicy != ThreadArbitrationPolicy::NotPresent) { - this->requiredThreadArbitrationPolicy = dispatchFlags.threadArbitrationPolicy; + if (dispatchFlags.threadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent) { + auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily); + dispatchFlags.threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); } if (dispatchFlags.numGrfRequired == GrfConfig::NotApplicable) { dispatchFlags.numGrfRequired = lastSentNumGrfRequired; } this->streamProperties.stateComputeMode.setProperties(dispatchFlags.requiresCoherency, dispatchFlags.numGrfRequired, - this->requiredThreadArbitrationPolicy); + dispatchFlags.threadArbitrationPolicy); csrSizeRequestFlags.l3ConfigChanged = this->lastSentL3Config != newL3Config; csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode; @@ -331,7 +328,6 @@ CompletionStamp CommandStreamReceiverHw::flushTask( pageTableManagerInitialized = pageTableManager->initPageTableManagerRegisters(this); } - bool isPreambleNeeded = !this->isPreambleSent; programHardwareContext(commandStreamCSR); programComputeMode(commandStreamCSR, dispatchFlags, device.getHardwareInfo()); programPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs); @@ -341,9 +337,9 @@ CompletionStamp CommandStreamReceiverHw::flushTask( addPipeControlBefore3dState(commandStreamCSR, dispatchFlags); programPerDssBackedBuffer(commandStreamCSR, device, dispatchFlags); - if (this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy || isPreambleNeeded) { - PreambleHelper::programThreadArbitration(&commandStreamCSR, this->requiredThreadArbitrationPolicy); - this->lastSentThreadArbitrationPolicy = this->requiredThreadArbitrationPolicy; + if (this->streamProperties.stateComputeMode.threadArbitrationPolicy.isDirty) { + auto threadArbitrationPolicy = static_cast(this->streamProperties.stateComputeMode.threadArbitrationPolicy.value); + PreambleHelper::programThreadArbitration(&commandStreamCSR, threadArbitrationPolicy); } stateBaseAddressDirty |= ((GSBAFor32BitProgrammed ^ dispatchFlags.gsba32BitRequired) && force32BitAllocations); @@ -823,7 +819,7 @@ size_t CommandStreamReceiverHw::getRequiredCmdStreamSize(const Dispat size += TimestampPacketHelper::getRequiredCmdStreamSize(dispatchFlags.csrDependencies); size += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(dispatchFlags.csrDependencies); - if (!this->isPreambleSent || this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) { + if (this->streamProperties.stateComputeMode.threadArbitrationPolicy.isDirty) { size += PreambleHelper::getThreadArbitrationCommandsSize(); } @@ -908,10 +904,10 @@ inline void CommandStreamReceiverHw::programStateSip(LinearStream &cm template inline void CommandStreamReceiverHw::programPreamble(LinearStream &csr, Device &device, uint32_t &newL3Config) { if (!this->isPreambleSent) { - PreambleHelper::programPreamble(&csr, device, newL3Config, this->requiredThreadArbitrationPolicy, this->preemptionAllocation); + auto threadArbitrationPolicy = static_cast(this->streamProperties.stateComputeMode.threadArbitrationPolicy.value); + PreambleHelper::programPreamble(&csr, device, newL3Config, threadArbitrationPolicy, this->preemptionAllocation); this->isPreambleSent = true; this->lastSentL3Config = newL3Config; - this->lastSentThreadArbitrationPolicy = this->requiredThreadArbitrationPolicy; } } diff --git a/shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl b/shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl index 7a8588ccd6..23730ad72b 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl @@ -42,7 +42,7 @@ void CommandStreamReceiverHw::programComputeMode(LinearStream &stream template <> inline bool CommandStreamReceiverHw::isComputeModeNeeded() const { return this->streamProperties.stateComputeMode.isDirty() || - StateComputeModeHelper::isStateComputeModeRequired(csrSizeRequestFlags, this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy); + StateComputeModeHelper::isStateComputeModeRequired(csrSizeRequestFlags, false); } template <> diff --git a/shared/source/xe_hpc_core/state_compute_mode_helper_xe_hpc_core.cpp b/shared/source/xe_hpc_core/state_compute_mode_helper_xe_hpc_core.cpp index d8435c2a1a..ff1fe7fc77 100644 --- a/shared/source/xe_hpc_core/state_compute_mode_helper_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/state_compute_mode_helper_xe_hpc_core.cpp @@ -11,7 +11,7 @@ namespace NEO { template <> bool StateComputeModeHelper::isStateComputeModeRequired(const CsrSizeRequestFlags &csrSizeRequestFlags, bool isThreadArbitionPolicyProgrammed) { - return csrSizeRequestFlags.numGrfRequiredChanged || isThreadArbitionPolicyProgrammed; + return csrSizeRequestFlags.numGrfRequiredChanged; } } // namespace NEO diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index 309b5649cc..312b7e2bcb 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -90,7 +90,6 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::lastMemoryCompressionState; using BaseClass::CommandStreamReceiver::lastPreemptionMode; using BaseClass::CommandStreamReceiver::lastSentL3Config; - using BaseClass::CommandStreamReceiver::lastSentThreadArbitrationPolicy; using BaseClass::CommandStreamReceiver::lastSentUseGlobalAtomics; using BaseClass::CommandStreamReceiver::lastSpecialPipelineSelectMode; using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig; @@ -105,7 +104,6 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::profilingTimeStampAllocator; using BaseClass::CommandStreamReceiver::requiredPrivateScratchSize; using BaseClass::CommandStreamReceiver::requiredScratchSize; - using BaseClass::CommandStreamReceiver::requiredThreadArbitrationPolicy; using BaseClass::CommandStreamReceiver::samplerCacheFlushRequired; using BaseClass::CommandStreamReceiver::scratchSpaceController; using BaseClass::CommandStreamReceiver::stallingCommandsOnNextFlushRequired; diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index 5771b500f0..1432cd6d93 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -38,7 +38,6 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { using CommandStreamReceiver::osContext; using CommandStreamReceiver::postSyncWriteOffset; using CommandStreamReceiver::preemptionAllocation; - using CommandStreamReceiver::requiredThreadArbitrationPolicy; using CommandStreamReceiver::tagAddress; using CommandStreamReceiver::tagsMultiAllocation; using CommandStreamReceiver::taskCount; @@ -180,7 +179,6 @@ class MockCsrHw2 : public CommandStreamReceiverHw { using CommandStreamReceiver::pageTableManagerInitialized; using CommandStreamReceiver::postSyncWriteOffset; using CommandStreamReceiver::requiredScratchSize; - using CommandStreamReceiver::requiredThreadArbitrationPolicy; using CommandStreamReceiver::streamProperties; using CommandStreamReceiver::tagAddress; using CommandStreamReceiver::taskCount; diff --git a/shared/test/common/mocks/mock_csr.h b/shared/test/common/mocks/mock_csr.h index 746268d63f..e0fff42e23 100644 --- a/shared/test/common/mocks/mock_csr.h +++ b/shared/test/common/mocks/mock_csr.h @@ -44,7 +44,7 @@ class MockCsrBase : public UltCommandStreamReceiver { madeNonResidentGfxAllocations.push_back(&gfxAllocation); } - uint32_t peekThreadArbitrationPolicy() { return this->requiredThreadArbitrationPolicy; } + uint32_t peekThreadArbitrationPolicy() { return static_cast(this->streamProperties.stateComputeMode.threadArbitrationPolicy.value); } bool isMadeResident(GraphicsAllocation *gfxAllocation) { for (GraphicsAllocation *gfxAlloc : madeResidentGfxAllocations) {