diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index bc26e24325..313f2ef93a 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -987,7 +987,7 @@ CompletionStamp CommandQueueHw::enqueueCommandWithoutKernel( getThrottle(), //throttle device->getPreemptionMode(), //preemptionMode GrfConfig::DefaultGrfNumber, //numGrfRequired - L3CachingSettings::l3CacheOn, //l3CacheSettings + L3CachingSettings::NotApplicable, //l3CacheSettings ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy getSliceCount(), //sliceCount blocking, //blocking diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index df768a3dec..885bad80ac 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -58,7 +58,7 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) { commandQueue.getThrottle(), //throttle PreemptionHelper::taskPreemptionMode(device, multiDispatch), //preemptionMode GrfConfig::DefaultGrfNumber, //numGrfRequired - L3CachingSettings::l3CacheOn, //l3CacheSettings + L3CachingSettings::NotApplicable, //l3CacheSettings ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy commandQueue.getSliceCount(), //sliceCount true, //blocking @@ -335,7 +335,7 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate commandQueue.getThrottle(), //throttle commandQueue.getDevice().getPreemptionMode(), //preemptionMode GrfConfig::DefaultGrfNumber, //numGrfRequired - L3CachingSettings::l3CacheOn, //l3CacheSettings + L3CachingSettings::NotApplicable, //l3CacheSettings ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy commandQueue.getSliceCount(), //sliceCount true, //blocking diff --git a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp index 58c84a3f7f..5a0e95ec4b 100644 --- a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp @@ -202,6 +202,7 @@ HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectDispa EXPECT_EQ(blocking, mockCsr->passedDispatchFlags.blocking); EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); + EXPECT_EQ(L3CachingSettings::NotApplicable, mockCsr->passedDispatchFlags.l3CacheSettings); EXPECT_EQ(device->getPreemptionMode(), mockCsr->passedDispatchFlags.preemptionMode); EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference); } @@ -265,6 +266,7 @@ HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne EXPECT_TRUE(mockCsr->passedDispatchFlags.implicitFlush); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); + EXPECT_EQ(L3CachingSettings::NotApplicable, mockCsr->passedDispatchFlags.l3CacheSettings); } HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelTheAllowOutOfOrderExecution) { diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp index 616f9dabbb..1c4f938d04 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp @@ -540,6 +540,32 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenStateBaseAddressWhenItIsRequi EXPECT_TRUE(pipeControlCmd->getDcFlushEnable()); } +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNotApplicableL3ConfigWhenFlushingTaskThenDontReloadSba) { + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + + { + flushTaskFlags.l3CacheSettings = L3CachingSettings::l3CacheOn; + flushTask(commandStreamReceiver); + + parseCommands(commandStreamReceiver.commandStream, 0); + auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), stateBaseAddressItor); + } + + { + flushTaskFlags.l3CacheSettings = L3CachingSettings::NotApplicable; + auto offset = commandStreamReceiver.commandStream.getUsed(); + + flushTask(commandStreamReceiver); + + cmdList.clear(); + parseCommands(commandStreamReceiver.commandStream, offset); + auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); + EXPECT_EQ(cmdList.end(), stateBaseAddressItor); + } +} + HWTEST_F(CommandStreamReceiverFlushTaskTests, preambleShouldBeSentIfNeverSent) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = false; diff --git a/opencl/test/unit_test/helpers/task_information_tests.cpp b/opencl/test/unit_test/helpers/task_information_tests.cpp index 1f5273b7fb..7081622e00 100644 --- a/opencl/test/unit_test/helpers/task_information_tests.cpp +++ b/opencl/test/unit_test/helpers/task_information_tests.cpp @@ -190,7 +190,7 @@ HWTEST_F(DispatchFlagsTests, givenCommandMapUnmapWhenSubmitThenPassCorrectDispat EXPECT_EQ(mockCmdQ->getThrottle(), mockCsr->passedDispatchFlags.throttle); EXPECT_EQ(PreemptionHelper::taskPreemptionMode(devicePreemption, flags), mockCsr->passedDispatchFlags.preemptionMode); EXPECT_EQ(GrfConfig::DefaultGrfNumber, mockCsr->passedDispatchFlags.numGrfRequired); - EXPECT_EQ(L3CachingSettings::l3CacheOn, mockCsr->passedDispatchFlags.l3CacheSettings); + EXPECT_EQ(L3CachingSettings::NotApplicable, mockCsr->passedDispatchFlags.l3CacheSettings); EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); EXPECT_TRUE(mockCsr->passedDispatchFlags.dcFlush); EXPECT_FALSE(mockCsr->passedDispatchFlags.useSLM); @@ -278,7 +278,7 @@ HWTEST_F(DispatchFlagsTests, givenCommandWithoutKernelWhenSubmitThenPassCorrectD EXPECT_EQ(mockCmdQ->getThrottle(), mockCsr->passedDispatchFlags.throttle); EXPECT_EQ(mockCmdQ->getDevice().getPreemptionMode(), mockCsr->passedDispatchFlags.preemptionMode); EXPECT_EQ(GrfConfig::DefaultGrfNumber, mockCsr->passedDispatchFlags.numGrfRequired); - EXPECT_EQ(L3CachingSettings::l3CacheOn, mockCsr->passedDispatchFlags.l3CacheSettings); + EXPECT_EQ(L3CachingSettings::NotApplicable, mockCsr->passedDispatchFlags.l3CacheSettings); EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); EXPECT_FALSE(mockCsr->passedDispatchFlags.dcFlush); EXPECT_FALSE(mockCsr->passedDispatchFlags.useSLM); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 4f63a7f1ec..c30b8d4615 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -328,10 +328,14 @@ CompletionStamp CommandStreamReceiverHw::flushTask( auto isStateBaseAddressDirty = dshDirty || iohDirty || sshDirty || stateBaseAddressDirty; - auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily); - auto l3On = dispatchFlags.l3CacheSettings != L3CachingSettings::l3CacheOff; - auto l1On = dispatchFlags.l3CacheSettings == L3CachingSettings::l3AndL1On; - auto mocsIndex = hwHelper.getMocsIndex(*device.getGmmHelper(), l3On, l1On); + auto mocsIndex = latestSentStatelessMocsConfig; + + if (dispatchFlags.l3CacheSettings != L3CachingSettings::NotApplicable) { + auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily); + auto l3On = dispatchFlags.l3CacheSettings != L3CachingSettings::l3CacheOff; + auto l1On = dispatchFlags.l3CacheSettings == L3CachingSettings::l3AndL1On; + mocsIndex = hwHelper.getMocsIndex(*device.getGmmHelper(), l3On, l1On); + } if (mocsIndex != latestSentStatelessMocsConfig) { isStateBaseAddressDirty = true; diff --git a/shared/source/command_stream/csr_definitions.h b/shared/source/command_stream/csr_definitions.h index a7c659423c..be9805a73a 100644 --- a/shared/source/command_stream/csr_definitions.h +++ b/shared/source/command_stream/csr_definitions.h @@ -41,6 +41,7 @@ namespace L3CachingSettings { constexpr uint32_t l3CacheOn = 0u; constexpr uint32_t l3CacheOff = 1u; constexpr uint32_t l3AndL1On = 2u; +constexpr uint32_t NotApplicable = 3u; } // namespace L3CachingSettings struct DispatchFlags {