diff --git a/runtime/command_queue/command_queue.h b/runtime/command_queue/command_queue.h index 54159578a9..ac11070f74 100644 --- a/runtime/command_queue/command_queue.h +++ b/runtime/command_queue/command_queue.h @@ -402,6 +402,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> { MOCKABLE_VIRTUAL bool setupDebugSurface(Kernel *kernel); + bool getRequiresCacheFlushAfterWalker() const { + return requiresCacheFlushAfterWalker; + } + // taskCount of last task uint32_t taskCount = 0; @@ -451,6 +455,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { bool mapDcFlushRequired = false; bool isSpecialCommandQueue = false; + bool requiresCacheFlushAfterWalker = false; std::unique_ptr timestampPacketContainer; diff --git a/runtime/command_queue/command_queue_hw.h b/runtime/command_queue/command_queue_hw.h index 1dcf91362f..202edfd960 100644 --- a/runtime/command_queue/command_queue_hw.h +++ b/runtime/command_queue/command_queue_hw.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -55,8 +55,12 @@ class CommandQueueHw : public CommandQueue { getCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch); getCommandStreamReceiver().enableNTo1SubmissionModel(); } + + this->requiresCacheFlushAfterWalker = CommandQueueHw::requiresCacheFlushAfterWalkerBasedOnProperties(properties); } + static bool requiresCacheFlushAfterWalkerBasedOnProperties(const cl_queue_properties *properties); + static CommandQueue *create(Context *context, Device *device, const cl_queue_properties *properties) { diff --git a/runtime/command_queue/command_queue_hw.inl b/runtime/command_queue/command_queue_hw.inl index ce53b1f5f4..7a02b7e0fe 100644 --- a/runtime/command_queue/command_queue_hw.inl +++ b/runtime/command_queue/command_queue_hw.inl @@ -41,4 +41,8 @@ void CommandQueueHw::notifyEnqueueReadImage(Image *image, bool blockingR image->getGraphicsAllocation()->setAllocDumpable(blockingRead); } } +template +bool CommandQueueHw::requiresCacheFlushAfterWalkerBasedOnProperties(const cl_queue_properties *properties) { + return false; +} } // namespace OCLRT diff --git a/runtime/command_queue/gpgpu_walker.inl b/runtime/command_queue/gpgpu_walker.inl index 523e84274b..6e4d01dc37 100644 --- a/runtime/command_queue/gpgpu_walker.inl +++ b/runtime/command_queue/gpgpu_walker.inl @@ -417,7 +417,7 @@ template size_t EnqueueOperation::getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel) { size_t size = sizeof(typename GfxFamily::GPGPU_WALKER) + KernelCommandsHelper::getSizeRequiredCS(pKernel) + sizeof(PIPE_CONTROL) * (KernelCommandsHelper::isPipeControlWArequired() ? 2 : 1); - size += KernelCommandsHelper::getSizeRequiredForCacheFlush(pKernel, 0U, 0U); + size += KernelCommandsHelper::getSizeRequiredForCacheFlush(commandQueue, pKernel, 0U, 0U); size += PreemptionHelper::getPreemptionWaCsSize(commandQueue.getDevice()); if (reserveProfilingCmdsSpace) { size += 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); diff --git a/runtime/command_queue/hardware_interface.inl b/runtime/command_queue/hardware_interface.inl index 6f0c970d68..47bdbb0500 100644 --- a/runtime/command_queue/hardware_interface.inl +++ b/runtime/command_queue/hardware_interface.inl @@ -214,7 +214,7 @@ void HardwareInterface::dispatchWalker( *pPipeControlCmd = GfxFamily::cmdInitPipeControl; pPipeControlCmd->setCommandStreamerStallEnable(true); } - KernelCommandsHelper::programCacheFlushAfterWalkerCommand(commandStream, &kernel, 0U, 0U); + KernelCommandsHelper::programCacheFlushAfterWalkerCommand(commandStream, commandQueue, &kernel, 0U, 0U); currentDispatchIndex++; } diff --git a/runtime/gen10/command_queue_gen10.cpp b/runtime/gen10/command_queue_gen10.cpp index e0fb1237e8..b75c0dc9b0 100644 --- a/runtime/gen10/command_queue_gen10.cpp +++ b/runtime/gen10/command_queue_gen10.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -14,6 +14,8 @@ namespace OCLRT { typedef CNLFamily Family; static auto gfxCore = IGFX_GEN10_CORE; +template class CommandQueueHw; + template <> void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; diff --git a/runtime/gen8/command_queue_gen8.cpp b/runtime/gen8/command_queue_gen8.cpp index 0430607ef7..34685b307b 100644 --- a/runtime/gen8/command_queue_gen8.cpp +++ b/runtime/gen8/command_queue_gen8.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -14,6 +14,8 @@ namespace OCLRT { typedef BDWFamily Family; static auto gfxCore = IGFX_GEN8_CORE; +template class CommandQueueHw; + template <> void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; diff --git a/runtime/gen9/command_queue_gen9.cpp b/runtime/gen9/command_queue_gen9.cpp index 5ff5c2555d..6da47fcf05 100644 --- a/runtime/gen9/command_queue_gen9.cpp +++ b/runtime/gen9/command_queue_gen9.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -14,6 +14,8 @@ namespace OCLRT { typedef SKLFamily Family; static auto gfxCore = IGFX_GEN9_CORE; +template class CommandQueueHw; + template <> void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; diff --git a/runtime/helpers/kernel_commands.h b/runtime/helpers/kernel_commands.h index 867857837d..c99ba02eff 100644 --- a/runtime/helpers/kernel_commands.h +++ b/runtime/helpers/kernel_commands.h @@ -141,7 +141,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper { Kernel &kernel); static size_t getSizeRequiredCS(const Kernel *kernel); - static size_t getSizeRequiredForCacheFlush(const Kernel *kernel, uint64_t postSyncAddress, uint64_t postSyncData); + static size_t getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress, uint64_t postSyncData); static bool isPipeControlWArequired(); static size_t getSizeRequiredDSH( const Kernel &kernel); @@ -201,7 +201,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper { static void programMiSemaphoreWait(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData); static MI_ATOMIC *programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize); - static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const Kernel *kernel, uint64_t postSyncAddress, uint64_t postSyncData); + static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress, uint64_t postSyncData); static const size_t alignInterfaceDescriptorData = 64 * sizeof(uint8_t); static const uint32_t alignIndirectStatePointer = 64 * sizeof(uint8_t); diff --git a/runtime/helpers/kernel_commands_base.inl b/runtime/helpers/kernel_commands_base.inl index d97423639d..1cbc938c8d 100644 --- a/runtime/helpers/kernel_commands_base.inl +++ b/runtime/helpers/kernel_commands_base.inl @@ -51,8 +51,8 @@ size_t KernelCommandsHelper::getSizeRequiredCS(const Kernel *kernel) } template -size_t KernelCommandsHelper::getSizeRequiredForCacheFlush(const Kernel *kernel, uint64_t postSyncAddress, uint64_t postSyncData) { - return kernel->requiresCacheFlushCommand() ? sizeof(typename GfxFamily::PIPE_CONTROL) : 0; +size_t KernelCommandsHelper::getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress, uint64_t postSyncData) { + return kernel->requiresCacheFlushCommand(commandQueue) ? sizeof(typename GfxFamily::PIPE_CONTROL) : 0; } template @@ -163,8 +163,8 @@ bool KernelCommandsHelper::isRuntimeLocalIdsGenerationRequired(uint32 } template -void KernelCommandsHelper::programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const Kernel *kernel, uint64_t postSyncAddress, uint64_t postSyncData) { - if (kernel->requiresCacheFlushCommand()) { +void KernelCommandsHelper::programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress, uint64_t postSyncData) { + if (kernel->requiresCacheFlushCommand(commandQueue)) { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; auto pipeControl = reinterpret_cast(commandStream->getSpace(sizeof(PIPE_CONTROL))); *pipeControl = GfxFamily::cmdInitPipeControl; diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index c3692fbb65..7a548e590f 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -11,6 +11,7 @@ #include "runtime/built_ins/built_ins.h" #include "runtime/built_ins/builtins_dispatch_builder.h" #include "runtime/command_stream/command_stream_receiver.h" +#include "runtime/command_queue/command_queue.h" #include "runtime/context/context.h" #include "runtime/device_queue/device_queue.h" #include "runtime/execution_model/device_enqueue.h" @@ -31,6 +32,7 @@ #include "runtime/memory_manager/memory_manager.h" #include "runtime/memory_manager/surface.h" #include "runtime/os_interface/debug_settings_manager.h" +#include "runtime/platform/platform.h" #include "runtime/program/kernel_info.h" #include "runtime/program/printf_handler.h" #include "runtime/sampler/sampler.h" @@ -2142,10 +2144,16 @@ void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsF } } -bool Kernel::requiresCacheFlushCommand() const { +bool Kernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const { if (false == HwHelper::cacheFlushAfterWalkerSupported(device.getHardwareInfo())) { return false; } + + bool cmdQueueRequiresCacheFlush = commandQueue.getRequiresCacheFlushAfterWalker() || DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.get(); + if (false == cmdQueueRequiresCacheFlush) { + return false; + } + if (getProgram()->getGlobalSurface() != nullptr) { return true; } diff --git a/runtime/kernel/kernel.h b/runtime/kernel/kernel.h index 31028d896a..4bd63c3471 100644 --- a/runtime/kernel/kernel.h +++ b/runtime/kernel/kernel.h @@ -376,7 +376,7 @@ class Kernel : public BaseObject<_cl_kernel> { void fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation); - bool requiresCacheFlushCommand() const; + bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const; using CacheFlushAllocationsVec = StackVec; void getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const; diff --git a/runtime/os_interface/debug_variables_base.inl b/runtime/os_interface/debug_variables_base.inl index b99fb5fff4..00f581b402 100644 --- a/runtime/os_interface/debug_variables_base.inl +++ b/runtime/os_interface/debug_variables_base.inl @@ -81,6 +81,7 @@ DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active a DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForBuffers, false, "When active all buffer allocations will not share memory with CPU.") DECLARE_DEBUG_VARIABLE(bool, EnableHostPtrTracking, true, "Enable host ptr tracking") DECLARE_DEBUG_VARIABLE(bool, DisableDcFlushInEpilogue, false, "Disable DC flush in epilogue") +DECLARE_DEBUG_VARIABLE(bool, EnableCacheFlushAfterWalkerForAllQueues, false, "Enable cache flush after walker even if queue doesn't require it") /*FEATURE FLAGS*/ DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension") diff --git a/unit_tests/command_queue/command_queue_fixture.cpp b/unit_tests/command_queue/command_queue_fixture.cpp index 2e682b5d4b..0d892375aa 100644 --- a/unit_tests/command_queue/command_queue_fixture.cpp +++ b/unit_tests/command_queue/command_queue_fixture.cpp @@ -1,15 +1,17 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * */ +#include "hw_cmds.h" #include "unit_tests/command_queue/command_queue_fixture.h" #include "runtime/command_queue/command_queue_hw.h" -#include "hw_cmds.h" #include "runtime/context/context.h" #include "runtime/device/device.h" +#include "unit_tests/mocks/mock_device.h" + #include "gtest/gtest.h" namespace OCLRT { @@ -17,21 +19,32 @@ namespace OCLRT { // Global table of create functions extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; -CommandQueueHwFixture::CommandQueueHwFixture() - : pCmdQ(nullptr), context(nullptr) { -} - CommandQueue *CommandQueueHwFixture::createCommandQueue( Device *pDevice, cl_command_queue_properties properties) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, properties, 0}; - auto funcCreate = commandQueueFactory[pDevice->getRenderCoreFamily()]; - assert(nullptr != funcCreate); + return createCommandQueue(pDevice, props); +} + +CommandQueue *CommandQueueHwFixture::createCommandQueue( + Device *pDevice, + const cl_command_queue_properties *properties) { + + if (pDevice == nullptr) { + if (this->device == nullptr) { + this->device = MockDevice::createWithNewExecutionEnvironment(nullptr); + } + pDevice = this->device; + } + if (!context) context = new MockContext(pDevice); - return funcCreate(context, pDevice, props); + auto funcCreate = commandQueueFactory[pDevice->getRenderCoreFamily()]; + assert(nullptr != funcCreate); + + return funcCreate(context, pDevice, properties); } void CommandQueueHwFixture::SetUp() { @@ -55,7 +68,12 @@ void CommandQueueHwFixture::TearDown() { UNRECOVERABLE_IF(blocked); pCmdQ->release(); } - context->release(); + if (context) { + context->release(); + } + if (device) { + delete device; + } } CommandQueueFixture::CommandQueueFixture() diff --git a/unit_tests/command_queue/command_queue_fixture.h b/unit_tests/command_queue/command_queue_fixture.h index 7a089a0e76..e24fe1cb37 100644 --- a/unit_tests/command_queue/command_queue_fixture.h +++ b/unit_tests/command_queue/command_queue_fixture.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -15,19 +15,26 @@ namespace OCLRT { class Device; struct CommandQueueHwFixture { - CommandQueueHwFixture(); + CommandQueue *createCommandQueue(Device *device) { + return createCommandQueue(device, cl_command_queue_properties{0}); + } CommandQueue *createCommandQueue( Device *device, cl_command_queue_properties properties); + CommandQueue *createCommandQueue( + Device *device, + const cl_command_queue_properties *properties); + virtual void SetUp(); virtual void SetUp(Device *_pDevice, cl_command_queue_properties properties); virtual void TearDown(); - CommandQueue *pCmdQ; - MockContext *context; + CommandQueue *pCmdQ = nullptr; + Device *device = nullptr; + MockContext *context = nullptr; }; struct OOQueueFixture : public CommandQueueHwFixture { diff --git a/unit_tests/command_queue/command_queue_hw_tests.cpp b/unit_tests/command_queue/command_queue_hw_tests.cpp index affeda5745..612ed08b30 100644 --- a/unit_tests/command_queue/command_queue_hw_tests.cpp +++ b/unit_tests/command_queue/command_queue_hw_tests.cpp @@ -1012,3 +1012,7 @@ HWTEST_F(CommandQueueHwTest, givenKernelSplitEnqueueReadBufferWhenBlockedThenEnq pCmdQ->isQueueBlocked(); } + +HWTEST_F(CommandQueueHwTest, givenDefaultHwCommandQueueThenCacheFlushAfterWalkerIsNotNeeded) { + EXPECT_FALSE(pCmdQ->getRequiresCacheFlushAfterWalker()); +} diff --git a/unit_tests/command_queue/enqueue_kernel_1_tests.cpp b/unit_tests/command_queue/enqueue_kernel_1_tests.cpp index 6220b98ecf..7b54c95f9c 100644 --- a/unit_tests/command_queue/enqueue_kernel_1_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_1_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -40,7 +40,7 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreSetThenClEnqueueNDRangeKernel size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; - CommandQueue *pCmdQ2 = createCommandQueue(pDevice, 0); + CommandQueue *pCmdQ2 = createCommandQueue(pDevice); std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); @@ -79,7 +79,7 @@ TEST_F(EnqueueKernelTest, givenKernelWhenNotAllArgsAreSetButSetKernelArgIsCalled size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; - CommandQueue *pCmdQ2 = createCommandQueue(pDevice, 0); + CommandQueue *pCmdQ2 = createCommandQueue(pDevice); std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); @@ -118,7 +118,7 @@ TEST_F(EnqueueKernelTest, givenKernelWhenSetKernelArgIsCalledForEachArgButAtLeas size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; - CommandQueue *pCmdQ2 = createCommandQueue(pDevice, 0); + CommandQueue *pCmdQ2 = createCommandQueue(pDevice); std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); @@ -380,7 +380,7 @@ HWTEST_F(EnqueueKernelTest, givenReducedAddressSpaceGraphicsAllocationForHostPtr MockKernelWithInternals mockKernel(*device, context); size_t gws[3] = {1, 0, 0}; mockCsr->makeResident(*allocation); - cmdQ.reset(createCommandQueue(device.get(), 0)); + cmdQ.reset(createCommandQueue(device.get())); auto ret = cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(mockCsr->passedDispatchFlags.dcFlush); @@ -402,7 +402,7 @@ HWTEST_F(EnqueueKernelTest, givenReducedAddressSpaceGraphicsAllocationForHostPtr MockKernelWithInternals mockKernel(*device, context); size_t gws[3] = {1, 0, 0}; mockCsr->makeResident(*allocation); - cmdQ.reset(createCommandQueue(device.get(), 0)); + cmdQ.reset(createCommandQueue(device.get())); auto ret = cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_FALSE(mockCsr->passedDispatchFlags.dcFlush); @@ -425,7 +425,7 @@ HWTEST_F(EnqueueKernelTest, givenFullAddressSpaceGraphicsAllocationWhenEnqueueKe MockKernelWithInternals mockKernel(*device, context); size_t gws[3] = {1, 0, 0}; mockCsr->makeResident(*allocation); - cmdQ.reset(createCommandQueue(device.get(), 0)); + cmdQ.reset(createCommandQueue(device.get())); auto ret = cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_FALSE(mockCsr->passedDispatchFlags.dcFlush); @@ -433,7 +433,7 @@ HWTEST_F(EnqueueKernelTest, givenFullAddressSpaceGraphicsAllocationWhenEnqueueKe allocation = (memoryManager->allocateGraphicsMemoryForHostPtr(1, hostPtr, device->isFullRangeSvm(), true)); mockCsr->makeResident(*allocation); - cmdQ.reset(createCommandQueue(device.get(), 0)); + cmdQ.reset(createCommandQueue(device.get())); ret = cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_FALSE(mockCsr->passedDispatchFlags.dcFlush); @@ -944,7 +944,7 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreNotAndEventExistSetThenClEnqu size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; - CommandQueue *pCmdQ2 = createCommandQueue(pDevice, 0); + CommandQueue *pCmdQ2 = createCommandQueue(pDevice); std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); diff --git a/unit_tests/command_queue/enqueue_kernel_2_tests.cpp b/unit_tests/command_queue/enqueue_kernel_2_tests.cpp index c173bfcaa7..7b0d2ec25a 100644 --- a/unit_tests/command_queue/enqueue_kernel_2_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_2_tests.cpp @@ -864,6 +864,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenCacheFlushAfterWalkerEnabled DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); + DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); MockKernelWithInternals mockKernel(*pDevice, context); CommandQueueHw cmdQ(context, pDevice, nullptr); diff --git a/unit_tests/command_queue/enqueue_kernel_two_ioq_tests.cpp b/unit_tests/command_queue/enqueue_kernel_two_ioq_tests.cpp index 9d90fb004f..98688681ac 100644 --- a/unit_tests/command_queue/enqueue_kernel_two_ioq_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_two_ioq_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -58,7 +58,7 @@ struct TwoIOQsTwoDependentWalkers : public HelloWorldTest(*pCmdQ); // Create a second command queue (beyond the default one) - pCmdQ2 = createCommandQueue(pDevice, 0); + pCmdQ2 = createCommandQueue(pDevice); ASSERT_NE(nullptr, pCmdQ2); retVal = pCmdQ2->enqueueKernel( diff --git a/unit_tests/context/driver_diagnostics_tests.h b/unit_tests/context/driver_diagnostics_tests.h index 8f465e0a08..3c2d1b93a9 100644 --- a/unit_tests/context/driver_diagnostics_tests.h +++ b/unit_tests/context/driver_diagnostics_tests.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -130,7 +130,7 @@ struct PerformanceHintCommandQueueTest : public PerformanceHintTest, struct PerformanceHintEnqueueTest : public PerformanceHintTest { void SetUp() override { PerformanceHintTest::SetUp(); - pCmdQ = createCommandQueue(pPlatform->getDevice(0), 0); + pCmdQ = createCommandQueue(pPlatform->getDevice(0)); } void TearDown() override { diff --git a/unit_tests/helpers/kernel_commands_tests.cpp b/unit_tests/helpers/kernel_commands_tests.cpp index 6f736e34b1..01ea75c880 100644 --- a/unit_tests/helpers/kernel_commands_tests.cpp +++ b/unit_tests/helpers/kernel_commands_tests.cpp @@ -1233,6 +1233,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnable DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); + DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); CommandQueueHw cmdQ(nullptr, pDevice, 0); auto &commandStream = cmdQ.getCS(1024); @@ -1245,10 +1246,10 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnable EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &globalAllocation)); size_t expectedSize = sizeof(PIPE_CONTROL); - size_t actualSize = KernelCommandsHelper::getSizeRequiredForCacheFlush(mockKernelWithInternal->mockKernel, 0U, 0U); + size_t actualSize = KernelCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); EXPECT_EQ(expectedSize, actualSize); - KernelCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, mockKernelWithInternal->mockKernel, 0U, 0U); + KernelCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); HardwareParse hwParse; hwParse.parseCommands(commandStream); @@ -1267,6 +1268,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnable DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); + DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); CommandQueueHw cmdQ(nullptr, pDevice, 0); auto &commandStream = cmdQ.getCS(1024); @@ -1285,10 +1287,10 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnable EXPECT_EQ(allocs.end(), std::find(allocs.begin(), allocs.end(), &svmAllocation2)); size_t expectedSize = sizeof(PIPE_CONTROL); - size_t actualSize = KernelCommandsHelper::getSizeRequiredForCacheFlush(mockKernelWithInternal->mockKernel, 0U, 0U); + size_t actualSize = KernelCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); EXPECT_EQ(expectedSize, actualSize); - KernelCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, mockKernelWithInternal->mockKernel, 0U, 0U); + KernelCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); HardwareParse hwParse; hwParse.parseCommands(commandStream); @@ -1305,6 +1307,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerDisabl DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(0); + DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); CommandQueueHw cmdQ(nullptr, pDevice, 0); auto &commandStream = cmdQ.getCS(1024); @@ -1312,10 +1315,10 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerDisabl mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true; size_t expectedSize = 0U; - size_t actualSize = KernelCommandsHelper::getSizeRequiredForCacheFlush(mockKernelWithInternal->mockKernel, 0U, 0U); + size_t actualSize = KernelCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); EXPECT_EQ(expectedSize, actualSize); - KernelCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, mockKernelWithInternal->mockKernel, 0U, 0U); + KernelCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); HardwareParse hwParse; hwParse.parseCommands(commandStream); @@ -1330,6 +1333,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnable DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); + DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); CommandQueueHw cmdQ(nullptr, pDevice, 0); auto &commandStream = cmdQ.getCS(1024); @@ -1344,10 +1348,10 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnable EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &cacheRequiringAllocation)); size_t expectedSize = sizeof(PIPE_CONTROL); - size_t actualSize = KernelCommandsHelper::getSizeRequiredForCacheFlush(mockKernelWithInternal->mockKernel, 0U, 0U); + size_t actualSize = KernelCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); EXPECT_EQ(expectedSize, actualSize); - KernelCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, mockKernelWithInternal->mockKernel, 0U, 0U); + KernelCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); HardwareParse hwParse; hwParse.parseCommands(commandStream); @@ -1364,6 +1368,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnable DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); + DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); CommandQueueHw cmdQ(nullptr, pDevice, 0); auto &commandStream = cmdQ.getCS(1024); @@ -1371,10 +1376,10 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnable addSpaceForSingleKernelArg(); size_t expectedSize = 0U; - size_t actualSize = KernelCommandsHelper::getSizeRequiredForCacheFlush(mockKernelWithInternal->mockKernel, 0U, 0U); + size_t actualSize = KernelCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); EXPECT_EQ(expectedSize, actualSize); - KernelCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, mockKernelWithInternal->mockKernel, 0U, 0U); + KernelCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); HardwareParse hwParse; hwParse.parseCommands(commandStream); @@ -1389,6 +1394,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnable DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(-1); + DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); hwInfoHelper.capabilityTable.supportCacheFlushAfterWalker = false; CommandQueueHw cmdQ(nullptr, pDevice, 0); @@ -1403,10 +1409,10 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenCacheFlushAfterWalkerEnable EXPECT_EQ(0U, allocationsForCacheFlush.size()); size_t expectedSize = 0U; - size_t actualSize = KernelCommandsHelper::getSizeRequiredForCacheFlush(mockKernelWithInternal->mockKernel, 0U, 0U); + size_t actualSize = KernelCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); EXPECT_EQ(expectedSize, actualSize); - KernelCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, mockKernelWithInternal->mockKernel, 0U, 0U); + KernelCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U, 0U); HardwareParse hwParse; hwParse.parseCommands(commandStream); diff --git a/unit_tests/kernel/kernel_tests.cpp b/unit_tests/kernel/kernel_tests.cpp index 65e2c8487a..715814b8c3 100644 --- a/unit_tests/kernel/kernel_tests.cpp +++ b/unit_tests/kernel/kernel_tests.cpp @@ -23,6 +23,7 @@ #include "unit_tests/fixtures/memory_management_fixture.h" #include "unit_tests/helpers/debug_manager_state_restore.h" #include "unit_tests/helpers/gtest_helpers.h" +#include "unit_tests/mocks/mock_command_queue.h" #include "unit_tests/mocks/mock_graphics_allocation.h" #include "unit_tests/mocks/mock_kernel.h" #include "unit_tests/mocks/mock_program.h" @@ -2384,6 +2385,43 @@ TEST(KernelTest, whenAllocationRequiringCacheFlushThenAssignAllocationPointerToC EXPECT_EQ(&mockAllocation, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); } +TEST(KernelTest, whenQueueAndKernelRequireCacheFlushAfterWalkerThenRequireCacheFlushAfterWalker) { + MockGraphicsAllocation mockAllocation; + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); + MockKernelWithInternals kernel(*device); + kernel.mockKernel->svmAllocationsRequireCacheFlush = true; + + MockCommandQueue queue; + + DebugManagerStateRestore debugRestore; + DebugManager.flags.EnableCacheFlushAfterWalker.set(true); + + queue.requiresCacheFlushAfterWalker = true; + EXPECT_TRUE(kernel.mockKernel->requiresCacheFlushCommand(queue)); + + queue.requiresCacheFlushAfterWalker = false; + EXPECT_FALSE(kernel.mockKernel->requiresCacheFlushCommand(queue)); +} + +TEST(KernelTest, whenCacheFlushEnabledForAllQueuesAndKernelRequireCacheFlushAfterWalkerThenRequireCacheFlushAfterWalker) { + MockGraphicsAllocation mockAllocation; + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); + MockKernelWithInternals kernel(*device); + kernel.mockKernel->svmAllocationsRequireCacheFlush = true; + + MockCommandQueue queue; + + DebugManagerStateRestore debugRestore; + DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(true); + DebugManager.flags.EnableCacheFlushAfterWalker.set(true); + + queue.requiresCacheFlushAfterWalker = true; + EXPECT_TRUE(kernel.mockKernel->requiresCacheFlushCommand(queue)); + + queue.requiresCacheFlushAfterWalker = false; + EXPECT_TRUE(kernel.mockKernel->requiresCacheFlushCommand(queue)); +} + TEST(KernelTest, whenAllocationWriteableThenAssignAllocationPointerToCacheFlushVector) { MockGraphicsAllocation mockAllocation; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); diff --git a/unit_tests/mocks/mock_command_queue.h b/unit_tests/mocks/mock_command_queue.h index e43a12aabc..ae4248525d 100644 --- a/unit_tests/mocks/mock_command_queue.h +++ b/unit_tests/mocks/mock_command_queue.h @@ -18,6 +18,7 @@ class MockCommandQueue : public CommandQueue { public: using CommandQueue::device; using CommandQueue::obtainNewTimestampPacketNodes; + using CommandQueue::requiresCacheFlushAfterWalker; using CommandQueue::throttle; using CommandQueue::timestampPacketContainer; diff --git a/unit_tests/test_files/igdrcl.config b/unit_tests/test_files/igdrcl.config index 272378f0ca..24cbe735c8 100644 --- a/unit_tests/test_files/igdrcl.config +++ b/unit_tests/test_files/igdrcl.config @@ -107,3 +107,4 @@ AUBDumpForceAllToLocalMemory = 0 EnableCacheFlushAfterWalker = 0 EnableHostPtrTracking = 1 DisableDcFlushInEpilogue = 0 +EnableCacheFlushAfterWalkerForAllQueues = 0