From cb4e5576cb5414ab3af54c49819a4ced305b965b Mon Sep 17 00:00:00 2001 From: "Mrozek, Michal" Date: Mon, 26 Aug 2019 17:03:13 +0200 Subject: [PATCH] Pass proper dispatch flags. - add new policy to select L1 caching - this is when kernel doesn't have any stateless writes Change-Id: I3948e652797420976159bbfec2c2a154eb9e18ee Signed-off-by: Mrozek, Michal --- runtime/command_queue/enqueue_common.h | 2 ++ runtime/command_stream/csr_definitions.h | 1 + runtime/helpers/task_information.cpp | 2 ++ .../command_queue/enqueue_kernel_1_tests.cpp | 32 +++++++++++++++++++ .../libult/ult_command_stream_receiver.h | 2 ++ unit_tests/mocks/mock_kernel.h | 1 + 6 files changed, 40 insertions(+) diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index e18f532735..3a5035452d 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -688,6 +688,8 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( if (anyUncacheableArgs) { dispatchFlags.l3CacheSettings = L3CachingSettings::l3CacheOff; + } else if (!kernel->areStatelessWritesUsed()) { + dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On; } if (gtpinIsGTPinInitialized()) { diff --git a/runtime/command_stream/csr_definitions.h b/runtime/command_stream/csr_definitions.h index 25cf99a191..3a9710b4ff 100644 --- a/runtime/command_stream/csr_definitions.h +++ b/runtime/command_stream/csr_definitions.h @@ -33,6 +33,7 @@ constexpr int64_t maxTimeout = std::numeric_limits::max(); namespace L3CachingSettings { constexpr uint32_t l3CacheOn = 0u; constexpr uint32_t l3CacheOff = 1u; +constexpr uint32_t l3AndL1On = 2u; } // namespace L3CachingSettings struct DispatchFlags { diff --git a/runtime/helpers/task_information.cpp b/runtime/helpers/task_information.cpp index fae103a09f..88a5aa26bd 100644 --- a/runtime/helpers/task_information.cpp +++ b/runtime/helpers/task_information.cpp @@ -194,6 +194,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate if (anyUncacheableArgs) { dispatchFlags.l3CacheSettings = L3CachingSettings::l3CacheOff; + } else if (!kernel->areStatelessWritesUsed()) { + dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On; } DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady); diff --git a/unit_tests/command_queue/enqueue_kernel_1_tests.cpp b/unit_tests/command_queue/enqueue_kernel_1_tests.cpp index 40df2851e3..ba8efa5020 100644 --- a/unit_tests/command_queue/enqueue_kernel_1_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_1_tests.cpp @@ -354,6 +354,38 @@ HWTEST_F(EnqueueKernelTest, whenEnqueueingKernelThatRequirePrivateScratchThenPri EXPECT_EQ(privateScratchSize, csr.requiredPrivateScratchSize); } +HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithNoStatelessWriteWhenSbaIsBeingProgrammedThenConstPolicyIsChoosen) { + auto &csr = pDevice->getUltCommandStreamReceiver(); + size_t off[3] = {0, 0, 0}; + size_t gws[3] = {1, 1, 1}; + + MockKernelWithInternals mockKernel(*pDevice); + mockKernel.mockKernel->containsStatelessWrites = false; + + pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); + + EXPECT_EQ(csr.recordedDispatchFlags.l3CacheSettings, L3CachingSettings::l3AndL1On); +} + +HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithNoStatelessWriteOnBlockedCodePathWhenSbaIsBeingProgrammedThenConstPolicyIsChoosen) { + auto &csr = pDevice->getUltCommandStreamReceiver(); + size_t off[3] = {0, 0, 0}; + size_t gws[3] = {1, 1, 1}; + + auto userEvent = clCreateUserEvent(this->context, nullptr); + + MockKernelWithInternals mockKernel(*pDevice); + mockKernel.mockKernel->containsStatelessWrites = false; + + pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 1, &userEvent, nullptr); + + clSetUserEventStatus(userEvent, 0u); + + EXPECT_EQ(csr.recordedDispatchFlags.l3CacheSettings, L3CachingSettings::l3AndL1On); + + clReleaseEvent(userEvent); +} + HWTEST_F(EnqueueKernelTest, givenEnqueueWithGlobalWorkSizeWhenZeroValueIsPassedInDimensionThenTheKernelCommandWillTriviallySucceed) { size_t gws[3] = {0, 0, 0}; MockKernelWithInternals mockKernel(*pDevice); diff --git a/unit_tests/libult/ult_command_stream_receiver.h b/unit_tests/libult/ult_command_stream_receiver.h index d2e27ef8e1..e9b5a6b33e 100644 --- a/unit_tests/libult/ult_command_stream_receiver.h +++ b/unit_tests/libult/ult_command_stream_receiver.h @@ -98,6 +98,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap &dsh, const IndirectHeap &ioh, const IndirectHeap &ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { + recordedDispatchFlags = dispatchFlags; this->lastFlushedCommandStream = &commandStream; return BaseClass::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); } @@ -179,5 +180,6 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ uint32_t latestSentTaskCountValueDuringFlush = 0; uint32_t blitBufferCalled = 0; std::atomic latestWaitForCompletionWithTimeoutTaskCount{0}; + DispatchFlags recordedDispatchFlags; }; } // namespace NEO diff --git a/unit_tests/mocks/mock_kernel.h b/unit_tests/mocks/mock_kernel.h index 37ceb4fa4e..63138934aa 100644 --- a/unit_tests/mocks/mock_kernel.h +++ b/unit_tests/mocks/mock_kernel.h @@ -27,6 +27,7 @@ class MockKernel : public Kernel { using Kernel::addAllocationToCacheFlushVector; using Kernel::allBufferArgsStateful; using Kernel::auxTranslationRequired; + using Kernel::containsStatelessWrites; using Kernel::isSchedulerKernel; using Kernel::kernelArgRequiresCacheFlush; using Kernel::kernelArguments;