diff --git a/runtime/command_queue/command_queue.cpp b/runtime/command_queue/command_queue.cpp index 3d0f319daf..8dce178d35 100644 --- a/runtime/command_queue/command_queue.cpp +++ b/runtime/command_queue/command_queue.cpp @@ -557,4 +557,13 @@ void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, Timestamp timestampPacketContainer->add(allocator->getTag()); } } + +size_t CommandQueue::estimateTimestampPacketNodesCount(const MultiDispatchInfo &dispatchInfo) const { + size_t nodesCount = dispatchInfo.size(); + auto mainKernel = dispatchInfo.peekMainKernel(); + if (mainKernel->requiresCacheFlushCommand(*this)) { + nodesCount++; + } + return nodesCount; +} } // namespace OCLRT diff --git a/runtime/command_queue/command_queue.h b/runtime/command_queue/command_queue.h index b6c4efbec0..4da28ff714 100644 --- a/runtime/command_queue/command_queue.h +++ b/runtime/command_queue/command_queue.h @@ -331,9 +331,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> { const cl_event *eventWaitList); CommandStreamReceiver &getCommandStreamReceiver() const; - Device &getDevice() { return *device; } - Context &getContext() { return *context; } - Context *getContextPtr() { return context; } + Device &getDevice() const { return *device; } + Context &getContext() const { return *context; } + Context *getContextPtr() const { return context; } MOCKABLE_VIRTUAL LinearStream &getCS(size_t minRequiredSize); IndirectHeap &getIndirectHeap(IndirectHeap::Type heapType, @@ -423,6 +423,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> { // virtual event that holds last Enqueue information Event *virtualEvent = nullptr; + size_t estimateTimestampPacketNodesCount(const MultiDispatchInfo &dispatchInfo) const; + protected: void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet); cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest); diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index 9ae8e1149e..b8e5427e5e 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -205,7 +205,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, csrDeps.fillFromEventsRequestAndMakeResident(eventsRequest, getCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); if (!multiDispatchInfo.empty()) { - obtainNewTimestampPacketNodes(multiDispatchInfo.size(), previousTimestampPacketNodes); + obtainNewTimestampPacketNodes(estimateTimestampPacketNodesCount(multiDispatchInfo), previousTimestampPacketNodes); csrDeps.push_back(&previousTimestampPacketNodes); } } diff --git a/runtime/command_queue/hardware_interface.inl b/runtime/command_queue/hardware_interface.inl index e3151654f1..95f07d0517 100644 --- a/runtime/command_queue/hardware_interface.inl +++ b/runtime/command_queue/hardware_interface.inl @@ -194,7 +194,7 @@ void HardwareInterface::dispatchWalker( dispatchWorkarounds(commandStream, commandQueue, kernel, true); - if (currentTimestampPacketNodes && commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { + if (commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { auto timestampPacketNode = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex); GpgpuWalkerHelper::setupTimestampPacket(commandStream, nullptr, timestampPacketNode, TimestampPacket::WriteOperationType::BeforeWalker); } @@ -210,10 +210,17 @@ void HardwareInterface::dispatchWalker( *pPipeControlCmd = GfxFamily::cmdInitPipeControl; pPipeControlCmd->setCommandStreamerStallEnable(true); } - KernelCommandsHelper::programCacheFlushAfterWalkerCommand(commandStream, commandQueue, &kernel, 0U, 0U); currentDispatchIndex++; } + if (mainKernel->requiresCacheFlushCommand(commandQueue)) { + uint64_t postSyncAddress = 0; + if (commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { + auto timestampPacketNodeForPostSync = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex); + postSyncAddress = timestampPacketNodeForPostSync->getGpuAddress(); + } + KernelCommandsHelper::programCacheFlushAfterWalkerCommand(commandStream, commandQueue, mainKernel, postSyncAddress, 0); + } dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue); } diff --git a/runtime/helpers/CMakeLists.txt b/runtime/helpers/CMakeLists.txt index 8621a5ec94..e22cd95978 100644 --- a/runtime/helpers/CMakeLists.txt +++ b/runtime/helpers/CMakeLists.txt @@ -53,6 +53,7 @@ set(RUNTIME_SRCS_HELPERS_BASE ${CMAKE_CURRENT_SOURCE_DIR}/kernel_commands.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_commands.inl ${CMAKE_CURRENT_SOURCE_DIR}/kernel_commands_base.inl + ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/kernel_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_properties.h ${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_properties.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mipmap.cpp diff --git a/runtime/helpers/kernel_commands_base.inl b/runtime/helpers/kernel_commands_base.inl index 1cbc938c8d..a3b1088cc9 100644 --- a/runtime/helpers/kernel_commands_base.inl +++ b/runtime/helpers/kernel_commands_base.inl @@ -164,12 +164,10 @@ bool KernelCommandsHelper::isRuntimeLocalIdsGenerationRequired(uint32 template void KernelCommandsHelper::programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress, uint64_t postSyncData) { - if (kernel->requiresCacheFlushCommand(commandQueue)) { - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - auto pipeControl = reinterpret_cast(commandStream->getSpace(sizeof(PIPE_CONTROL))); - *pipeControl = GfxFamily::cmdInitPipeControl; - pipeControl->setCommandStreamerStallEnable(true); - pipeControl->setDcFlushEnable(true); - } + using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; + auto pipeControl = reinterpret_cast(commandStream->getSpace(sizeof(PIPE_CONTROL))); + *pipeControl = GfxFamily::cmdInitPipeControl; + pipeControl->setCommandStreamerStallEnable(true); + pipeControl->setDcFlushEnable(true); } } // namespace OCLRT diff --git a/runtime/kernel/kernel_reconfiguration.cpp b/runtime/helpers/kernel_helpers.cpp similarity index 52% rename from runtime/kernel/kernel_reconfiguration.cpp rename to runtime/helpers/kernel_helpers.cpp index b30d675e7b..65345edffb 100644 --- a/runtime/kernel/kernel_reconfiguration.cpp +++ b/runtime/helpers/kernel_helpers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -8,6 +8,9 @@ #include "runtime/kernel/kernel.h" namespace OCLRT { +bool Kernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const { + return false; +} void Kernel::reconfigureKernel() { } } // namespace OCLRT \ No newline at end of file diff --git a/runtime/kernel/CMakeLists.txt b/runtime/kernel/CMakeLists.txt index 6d42fc2f6b..3723728e6d 100644 --- a/runtime/kernel/CMakeLists.txt +++ b/runtime/kernel/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2018 Intel Corporation +# Copyright (C) 2018-2019 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -13,7 +13,6 @@ set(RUNTIME_SRCS_KERNEL ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel.inl - ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/kernel_reconfiguration.cpp ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_KERNEL}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_KERNEL ${RUNTIME_SRCS_KERNEL}) diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index d4ed0c6150..5bc6c36503 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -2144,31 +2144,6 @@ void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsF } } -bool Kernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const { - if (false == HwHelper::cacheFlushAfterWalkerSupported(device.getHardwareInfo())) { - return false; - } - - bool cmdQueueRequiresCacheFlush = commandQueue.getRequiresCacheFlushAfterWalker() || DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.get(); - if (false == cmdQueueRequiresCacheFlush) { - return false; - } - - if (getProgram()->getGlobalSurface() != nullptr) { - return true; - } - if (svmAllocationsRequireCacheFlush) { - return true; - } - size_t args = kernelArgRequiresCacheFlush.size(); - for (size_t i = 0; i < args; i++) { - if (kernelArgRequiresCacheFlush[i] != nullptr) { - return true; - } - } - return false; -} - void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const { if (false == HwHelper::cacheFlushAfterWalkerSupported(device.getHardwareInfo())) { return; diff --git a/runtime/kernel/kernel.h b/runtime/kernel/kernel.h index 6c62b614ac..e938471b30 100644 --- a/runtime/kernel/kernel.h +++ b/runtime/kernel/kernel.h @@ -377,7 +377,7 @@ class Kernel : public BaseObject<_cl_kernel> { void fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation); - bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const; + MOCKABLE_VIRTUAL bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const; using CacheFlushAllocationsVec = StackVec; void getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const; diff --git a/unit_tests/command_queue/command_queue_tests.cpp b/unit_tests/command_queue/command_queue_tests.cpp index b2bab064aa..2292f55f73 100644 --- a/unit_tests/command_queue/command_queue_tests.cpp +++ b/unit_tests/command_queue/command_queue_tests.cpp @@ -28,7 +28,9 @@ #include "unit_tests/mocks/mock_command_queue.h" #include "unit_tests/mocks/mock_context.h" #include "unit_tests/mocks/mock_csr.h" +#include "unit_tests/mocks/mock_graphics_allocation.h" #include "unit_tests/mocks/mock_kernel.h" +#include "unit_tests/mocks/mock_mdi.h" #include "unit_tests/mocks/mock_memory_manager.h" #include "unit_tests/mocks/mock_program.h" @@ -386,6 +388,43 @@ TEST_F(CommandQueueCommandStreamTest, givenCommandQueueWhenGetCSIsCalledThenComm EXPECT_EQ(GraphicsAllocation::AllocationType::COMMAND_BUFFER, commandStreamAllocation->getAllocationType()); } +HWTEST_F(CommandQueueCommandStreamTest, givenMultiDispatchInfoWithSingleKernelWithFlushAllocationsDisabledWhenEstimatingNodesCountEqualMultiDispatchInfoSize) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.EnableCacheFlushAfterWalker.set(0); + DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); + + MockCommandQueueHw cmdQ(context.get(), pDevice, nullptr); + cmdQ.multiEngineQueue = true; + MockKernelWithInternals mockKernelWithInternals(*pDevice, context.get()); + + mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush.resize(1); + MockGraphicsAllocation cacheRequiringAllocation; + mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; + + MockMultiDispatchInfo multiDispatchInfo(std::vector({mockKernelWithInternals.mockKernel})); + + size_t estimatedNodesCount = cmdQ.estimateTimestampPacketNodesCount(multiDispatchInfo); + EXPECT_EQ(estimatedNodesCount, multiDispatchInfo.size()); +} + +HWTEST_F(CommandQueueCommandStreamTest, givenMultiDispatchInfoWithSingleKernelWithFlushAllocationsEnabledWhenEstimatingNodesCountEqualMultiDispatchInfoSizePlusOne) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.EnableCacheFlushAfterWalker.set(1); + DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); + + MockCommandQueueHw cmdQ(context.get(), pDevice, nullptr); + MockKernelWithInternals mockKernelWithInternals(*pDevice, context.get()); + + mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush.resize(1); + MockGraphicsAllocation cacheRequiringAllocation; + mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; + + MockMultiDispatchInfo multiDispatchInfo(std::vector({mockKernelWithInternals.mockKernel})); + + size_t estimatedNodesCount = cmdQ.estimateTimestampPacketNodesCount(multiDispatchInfo); + EXPECT_EQ(estimatedNodesCount, multiDispatchInfo.size() + 1); +} + struct CommandQueueIndirectHeapTest : public CommandQueueMemoryDevice, public ::testing::TestWithParam { void SetUp() override { diff --git a/unit_tests/command_queue/dispatch_walker_tests.cpp b/unit_tests/command_queue/dispatch_walker_tests.cpp index 4796129b2c..e051d5b26e 100644 --- a/unit_tests/command_queue/dispatch_walker_tests.cpp +++ b/unit_tests/command_queue/dispatch_walker_tests.cpp @@ -19,6 +19,8 @@ #include "unit_tests/helpers/debug_manager_state_restore.h" #include "unit_tests/helpers/hw_parse.h" #include "unit_tests/libult/mock_gfx_family.h" +#include "unit_tests/mocks/mock_command_queue.h" +#include "unit_tests/mocks/mock_graphics_allocation.h" #include "unit_tests/mocks/mock_kernel.h" #include "unit_tests/mocks/mock_mdi.h" #include "unit_tests/mocks/mock_program.h" @@ -32,6 +34,7 @@ struct DispatchWalkerTest : public CommandQueueFixture, public DeviceFixture, pu using CommandQueueFixture::SetUp; void SetUp() override { + DebugManager.flags.EnableTimestampPacket.set(0); DeviceFixture::SetUp(); CommandQueueFixture::SetUp(nullptr, pDevice, 0); @@ -93,6 +96,8 @@ struct DispatchWalkerTest : public CommandQueueFixture, public DeviceFixture, pu uint32_t kernelIsa[32]; uint32_t crossThreadData[32]; uint32_t dsh[32]; + + DebugManagerStateRestore dbgRestore; }; HWTEST_F(DispatchWalkerTest, computeDimensions) { @@ -1095,6 +1100,134 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat } } +HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerDisabledWhenAllocationRequiresCacheFlushThenFlushCommandNotPresentAfterWalker) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + DebugManagerStateRestore dbgRestore; + DebugManager.flags.EnableCacheFlushAfterWalker.set(0); + DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); + + MockKernel kernel1(program.get(), kernelInfo, *pDevice); + ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); + kernel1.kernelArgRequiresCacheFlush.resize(1); + MockGraphicsAllocation cacheRequiringAllocation; + kernel1.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; + + MockMultiDispatchInfo multiDispatchInfo(std::vector({&kernel1})); + // create commandStream + auto &cmdStream = pCmdQ->getCS(0); + + HardwareInterface::dispatchWalker( + *pCmdQ, + multiDispatchInfo, + CsrDependencies(), + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + pDevice->getPreemptionMode(), + false); + + HardwareParse hwParse; + hwParse.parseCommands(cmdStream); + PIPE_CONTROL *pipeControl = hwParse.getCommand(); + EXPECT_EQ(nullptr, pipeControl); +} + +HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenWalkerWithTwoKernelsThenFlushCommandPresentOnce) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + DebugManagerStateRestore dbgRestore; + DebugManager.flags.EnableCacheFlushAfterWalker.set(1); + DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); + + MockKernel kernel1(program.get(), kernelInfo, *pDevice); + ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); + MockKernel kernel2(program.get(), kernelInfoWithSampler, *pDevice); + ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); + + kernel1.kernelArgRequiresCacheFlush.resize(1); + kernel2.kernelArgRequiresCacheFlush.resize(1); + MockGraphicsAllocation cacheRequiringAllocation; + kernel1.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; + kernel2.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; + + MockMultiDispatchInfo multiDispatchInfo(std::vector({&kernel1, &kernel2})); + // create commandStream + auto &cmdStream = pCmdQ->getCS(0); + + HardwareInterface::dispatchWalker( + *pCmdQ, + multiDispatchInfo, + CsrDependencies(), + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + pDevice->getPreemptionMode(), + false); + + HardwareParse hwParse; + hwParse.parseCommands(cmdStream); + uint32_t pipeControlCount = hwParse.getCommandCount(); + EXPECT_EQ(pipeControlCount, 1u); +} + +HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenTwoWalkersForQueueThenFlushCommandPresentTwice) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + DebugManagerStateRestore dbgRestore; + DebugManager.flags.EnableCacheFlushAfterWalker.set(1); + DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); + + MockKernel kernel1(program.get(), kernelInfo, *pDevice); + ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); + MockKernel kernel2(program.get(), kernelInfoWithSampler, *pDevice); + ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); + + kernel1.kernelArgRequiresCacheFlush.resize(1); + kernel2.kernelArgRequiresCacheFlush.resize(1); + MockGraphicsAllocation cacheRequiringAllocation; + kernel1.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; + kernel2.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; + + MockMultiDispatchInfo multiDispatchInfo1(std::vector({&kernel1})); + MockMultiDispatchInfo multiDispatchInfo2(std::vector({&kernel2})); + // create commandStream + auto &cmdStream = pCmdQ->getCS(0); + + HardwareInterface::dispatchWalker( + *pCmdQ, + multiDispatchInfo1, + CsrDependencies(), + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + pDevice->getPreemptionMode(), + false); + + HardwareInterface::dispatchWalker( + *pCmdQ, + multiDispatchInfo2, + CsrDependencies(), + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + pDevice->getPreemptionMode(), + false); + + HardwareParse hwParse; + hwParse.parseCommands(cmdStream); + uint32_t pipeControlCount = hwParse.getCommandCount(); + EXPECT_EQ(pipeControlCount, 2u); +} + HWTEST_F(DispatchWalkerTest, givenMultiDispatchWhenWhitelistedRegisterForCoherencySwitchThenDontProgramLriInTaskStream) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; WhitelistedRegisters registers = {0}; @@ -1274,4 +1407,4 @@ HWTEST_P(ProfilingCommandsTest, givenKernelWhenProfilingCommandStartIsTakenThenT } INSTANTIATE_TEST_CASE_P(StartEndFlag, - ProfilingCommandsTest, ::testing::Values(true, false)); + ProfilingCommandsTest, ::testing::Bool()); diff --git a/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp b/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp index 7b864e5d43..6bb2b41b75 100644 --- a/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp +++ b/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp @@ -11,6 +11,7 @@ #include "runtime/kernel/kernel.h" #include "runtime/sampler/sampler.h" #include "unit_tests/fixtures/execution_model_fixture.h" +#include "unit_tests/helpers/debug_manager_state_restore.h" #include "unit_tests/helpers/hw_parse.h" #include "unit_tests/helpers/unit_test_helper.h" #include "unit_tests/mocks/mock_context.h" @@ -242,12 +243,14 @@ class MockParentKernelDispatch : public ExecutionModelSchedulerTest, public testing::Test { public: void SetUp() override { + DebugManager.flags.EnableTimestampPacket.set(0); ExecutionModelSchedulerTest::SetUp(); } void TearDown() override { ExecutionModelSchedulerTest::TearDown(); } + DebugManagerStateRestore dbgRestore; }; HWTEST_F(MockParentKernelDispatch, GivenBlockedQueueWhenParentKernelIsDispatchedThenDshHeapForIndirectObjectHeapIsUsed) { diff --git a/unit_tests/fixtures/execution_model_fixture.h b/unit_tests/fixtures/execution_model_fixture.h index bd6f187134..478579a82a 100644 --- a/unit_tests/fixtures/execution_model_fixture.h +++ b/unit_tests/fixtures/execution_model_fixture.h @@ -10,6 +10,7 @@ #include "runtime/device_queue/device_queue.h" #include "unit_tests/command_queue/command_queue_fixture.h" #include "unit_tests/fixtures/execution_model_kernel_fixture.h" +#include "unit_tests/helpers/debug_manager_state_restore.h" #include "unit_tests/mocks/mock_kernel.h" class DeviceQueueFixture { @@ -51,6 +52,7 @@ class ExecutionModelKernelTest : public ExecutionModelKernelFixture, ExecutionModelKernelTest(){}; void SetUp() override { + DebugManager.flags.EnableTimestampPacket.set(0); ExecutionModelKernelFixture::SetUp(); CommandQueueHwFixture::SetUp(pDevice, 0); DeviceQueueFixture::SetUp(context, pDevice); @@ -62,6 +64,7 @@ class ExecutionModelKernelTest : public ExecutionModelKernelFixture, CommandQueueHwFixture::TearDown(); ExecutionModelKernelFixture::TearDown(); } + DebugManagerStateRestore dbgRestore; }; class ExecutionModelSchedulerTest : public DeviceFixture, diff --git a/unit_tests/helpers/kernel_commands_tests.cpp b/unit_tests/helpers/kernel_commands_tests.cpp index 6002babe4d..0a0e2bbcc8 100644 --- a/unit_tests/helpers/kernel_commands_tests.cpp +++ b/unit_tests/helpers/kernel_commands_tests.cpp @@ -1302,32 +1302,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnable EXPECT_TRUE(pipeControl->getDcFlushEnable()); } -HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerDisabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectNoCacheFlushCommand) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH; - using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; - - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(0); - DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); - - CommandQueueHw cmdQ(nullptr, pDevice, 0); - auto &commandStream = cmdQ.getCS(1024); - - mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true; - - size_t expectedSize = 0U; - size_t actualSize = KernelCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); - EXPECT_EQ(expectedSize, actualSize); - - KernelCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); - - HardwareParse hwParse; - hwParse.parseCommands(commandStream); - PIPE_CONTROL *pipeControl = hwParse.getCommand(); - ASSERT_EQ(nullptr, pipeControl); -} - HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnabledWhenKernelArgIsSetAsCacheFlushRequiredThenExpectCacheFlushCommand) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH; @@ -1362,62 +1336,27 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnable EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControl->getDcFlushEnable()); } - -HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnabledWhenNoGlobalSurfaceSvmAllocationKernelArgRequireCacheFlushThenExpectNoCacheFlushCommand) { +HWTEST_F(KernelCommandsTest, givenCacheFlushAfterWalkerDisabledWhenGettingRequiredCacheFlushSizeThenReturnZero) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH; - using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); + DebugManager.flags.EnableCacheFlushAfterWalker.set(0); DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); CommandQueueHw cmdQ(nullptr, pDevice, 0); - auto &commandStream = cmdQ.getCS(1024); - - addSpaceForSingleKernelArg(); size_t expectedSize = 0U; size_t actualSize = KernelCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); EXPECT_EQ(expectedSize, actualSize); - - KernelCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); - - HardwareParse hwParse; - hwParse.parseCommands(commandStream); - PIPE_CONTROL *pipeControl = hwParse.getCommand(); - EXPECT_EQ(nullptr, pipeControl); } -HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnabledWhenPlatformNotSupportFlushThenExpectNoCacheFlushCommand) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH; - using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; - +TEST_F(KernelCommandsTest, givenCacheFlushAfterWalkerEnabledWhenPlatformNotSupportFlushThenExpectNoCacheAllocationForFlush) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(-1); DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); hwInfoHelper.capabilityTable.supportCacheFlushAfterWalker = false; - CommandQueueHw cmdQ(nullptr, pDevice, 0); - auto &commandStream = cmdQ.getCS(1024); - - addSpaceForSingleKernelArg(); - MockGraphicsAllocation cacheRequiringAllocation; - mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; - StackVec allocationsForCacheFlush; mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocationsForCacheFlush); EXPECT_EQ(0U, allocationsForCacheFlush.size()); - - size_t expectedSize = 0U; - size_t actualSize = KernelCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); - EXPECT_EQ(expectedSize, actualSize); - - KernelCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); - - HardwareParse hwParse; - hwParse.parseCommands(commandStream); - PIPE_CONTROL *pipeControl = hwParse.getCommand(); - EXPECT_EQ(nullptr, pipeControl); -} +} \ No newline at end of file diff --git a/unit_tests/helpers/timestamp_packet_tests.cpp b/unit_tests/helpers/timestamp_packet_tests.cpp index 1d06935c49..fac531e08e 100644 --- a/unit_tests/helpers/timestamp_packet_tests.cpp +++ b/unit_tests/helpers/timestamp_packet_tests.cpp @@ -12,6 +12,7 @@ #include "runtime/helpers/timestamp_packet.h" #include "runtime/utilities/tag_allocator.h" #include "test.h" +#include "unit_tests/helpers/debug_manager_state_restore.h" #include "unit_tests/helpers/hw_parse.h" #include "unit_tests/mocks/mock_command_queue.h" #include "unit_tests/mocks/mock_context.h" @@ -861,6 +862,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingTh MockTimestampPacketContainer timestamp4(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp5(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockTimestampPacketContainer timestamp6(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1); + MockTimestampPacketContainer timestamp7(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1); UserEvent event1; UserEvent event2; @@ -887,7 +889,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingTh nullptr, nullptr, nullptr, - nullptr, + ×tamp7, device->getPreemptionMode(), false); @@ -944,6 +946,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr MockTimestampPacketContainer timestamp4(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp5(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockTimestampPacketContainer timestamp6(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1); + MockTimestampPacketContainer timestamp7(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1); UserEvent event1; UserEvent event2; @@ -970,7 +973,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr nullptr, nullptr, nullptr, - nullptr, + ×tamp7, device->getPreemptionMode(), false); @@ -1426,3 +1429,41 @@ HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenFlushingThenProgramPip cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(secondEnqueueOffset, csr.commandStream.getUsed()); // nothing programmed when flag is not set } + +HWTEST_F(TimestampPacketTests, givenKernelWhichDoesntRequiersFlushWhenEnquingKernelThenOneNodeCreated) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.EnableCacheFlushAfterWalker.set(false); + auto &csr = device->getUltCommandStreamReceiver(); + csr.timestampPacketWriteEnabled = true; + + auto mockTagAllocator = new MockTagAllocator<>(executionEnvironment.memoryManager.get()); + csr.timestampPacketAllocator.reset(mockTagAllocator); + auto cmdQ = std::make_unique>(context, device.get(), nullptr); + // obtain first node for cmdQ and event1 + cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + auto size = cmdQ->timestampPacketContainer->peekNodes().size(); + EXPECT_EQ(size, 1u); +} + +HWTEST_F(TimestampPacketTests, givenKernelWhichRequiersFlushWhenEnquingKernelThenTwoNodesCreated) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.EnableCacheFlushAfterWalker.set(true); + DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(true); + + auto &csr = device->getUltCommandStreamReceiver(); + csr.timestampPacketWriteEnabled = true; + + auto mockTagAllocator = new MockTagAllocator<>(executionEnvironment.memoryManager.get()); + csr.timestampPacketAllocator.reset(mockTagAllocator); + auto cmdQ = std::make_unique>(context, device.get(), nullptr); + kernel->mockKernel->svmAllocationsRequireCacheFlush = true; + // obtain first node for cmdQ and event1 + cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + auto node1 = cmdQ->timestampPacketContainer->peekNodes().at(0); + auto node2 = cmdQ->timestampPacketContainer->peekNodes().at(1); + auto size = cmdQ->timestampPacketContainer->peekNodes().size(); + EXPECT_EQ(size, 2u); + EXPECT_NE(nullptr, node1); + EXPECT_NE(nullptr, node2); + EXPECT_NE(node1, node2); +} diff --git a/unit_tests/kernel/CMakeLists.txt b/unit_tests/kernel/CMakeLists.txt index aa316de08c..c5aa112ead 100644 --- a/unit_tests/kernel/CMakeLists.txt +++ b/unit_tests/kernel/CMakeLists.txt @@ -15,6 +15,7 @@ set(IGDRCL_SRCS_tests_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_pipe_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_svm_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/kernel_cache_flush_requirements_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_image_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_immediate_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_is_patched_tests.cpp diff --git a/unit_tests/kernel/kernel_cache_flush_requirements_tests.cpp b/unit_tests/kernel/kernel_cache_flush_requirements_tests.cpp new file mode 100644 index 0000000000..052fbaa377 --- /dev/null +++ b/unit_tests/kernel/kernel_cache_flush_requirements_tests.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "unit_tests/fixtures/context_fixture.h" +#include "unit_tests/fixtures/device_fixture.h" +#include "unit_tests/helpers/debug_manager_state_restore.h" +#include "unit_tests/mocks/mock_command_queue.h" +#include "unit_tests/mocks/mock_context.h" +#include "unit_tests/mocks/mock_graphics_allocation.h" +#include "unit_tests/mocks/mock_kernel.h" +#include "unit_tests/mocks/mock_program.h" + +using namespace OCLRT; + +TEST(KernelWithCasheFlushTests, givenDeviceWhichDoesntRequireCasheFlushWhenCheckIfKernelRequierFlushThenReturnedFalse) { + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); + + auto mockKernel = std::make_unique(*device); + MockContext mockContext(device.get()); + MockCommandQueue queue; + bool flushRequierd = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(queue); + EXPECT_FALSE(flushRequierd); +} diff --git a/unit_tests/kernel/kernel_tests.cpp b/unit_tests/kernel/kernel_tests.cpp index a866f64892..c830f8212c 100644 --- a/unit_tests/kernel/kernel_tests.cpp +++ b/unit_tests/kernel/kernel_tests.cpp @@ -2385,24 +2385,6 @@ TEST(KernelTest, whenAllocationRequiringCacheFlushThenAssignAllocationPointerToC EXPECT_EQ(&mockAllocation, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); } -TEST(KernelTest, whenQueueAndKernelRequireCacheFlushAfterWalkerThenRequireCacheFlushAfterWalker) { - MockGraphicsAllocation mockAllocation; - auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); - MockKernelWithInternals kernel(*device); - kernel.mockKernel->svmAllocationsRequireCacheFlush = true; - - MockCommandQueue queue; - - DebugManagerStateRestore debugRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(true); - - queue.requiresCacheFlushAfterWalker = true; - EXPECT_TRUE(kernel.mockKernel->requiresCacheFlushCommand(queue)); - - queue.requiresCacheFlushAfterWalker = false; - EXPECT_FALSE(kernel.mockKernel->requiresCacheFlushCommand(queue)); -} - TEST(KernelTest, whenCacheFlushEnabledForAllQueuesAndKernelRequireCacheFlushAfterWalkerThenRequireCacheFlushAfterWalker) { MockGraphicsAllocation mockAllocation; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); diff --git a/unit_tests/mocks/mock_command_queue.h b/unit_tests/mocks/mock_command_queue.h index c341ee77a5..862d10b676 100644 --- a/unit_tests/mocks/mock_command_queue.h +++ b/unit_tests/mocks/mock_command_queue.h @@ -73,7 +73,9 @@ class MockCommandQueueHw : public CommandQueueHw { public: using BaseClass::commandStream; using BaseClass::engine; + using BaseClass::multiEngineQueue; using BaseClass::obtainNewTimestampPacketNodes; + using BaseClass::requiresCacheFlushAfterWalker; using BaseClass::timestampPacketContainer; MockCommandQueueHw(Context *context, diff --git a/unit_tests/mocks/mock_kernel.cpp b/unit_tests/mocks/mock_kernel.cpp index 1e61a29b9e..464de440e4 100644 --- a/unit_tests/mocks/mock_kernel.cpp +++ b/unit_tests/mocks/mock_kernel.cpp @@ -53,4 +53,7 @@ void MockKernel::getResidency(std::vector &dst) { getResidencyCalls++; Kernel::getResidency(dst); } +bool MockKernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const { + return DebugManager.flags.EnableCacheFlushAfterWalker.get(); +} } // namespace OCLRT diff --git a/unit_tests/mocks/mock_kernel.h b/unit_tests/mocks/mock_kernel.h index 58c54d8b9e..6224c7bd82 100644 --- a/unit_tests/mocks/mock_kernel.h +++ b/unit_tests/mocks/mock_kernel.h @@ -237,6 +237,8 @@ class MockKernel : public Kernel { void setSpecialPipelineSelectMode(bool value) { specialPipelineSelectMode = value; } + bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const override; + uint32_t makeResidentCalls = 0; uint32_t getResidencyCalls = 0; mutable uint32_t takeOwnershipCalls = 0;