From 15a0bf9d7ba2ae725cd435521a19cb064a322c72 Mon Sep 17 00:00:00 2001 From: "Mrozek, Michal" Date: Tue, 12 Sep 2023 12:13:00 +0000 Subject: [PATCH] refactor: remove not needed code. Signed-off-by: Mrozek, Michal --- opencl/source/command_queue/command_queue.cpp | 7 +- opencl/source/command_queue/command_queue.h | 1 - .../source/command_queue/command_queue_hw.h | 2 - .../command_queue/command_queue_hw_base.inl | 5 -- .../gpgpu_walker_xehp_and_later.inl | 20 +---- .../command_queue/hardware_interface_base.inl | 10 --- opencl/source/kernel/kernel.cpp | 19 ----- opencl/source/kernel/kernel.h | 3 - .../api/cl_set_kernel_exec_info_tests.inl | 54 ------------ .../command_queue/enqueue_kernel_2_tests.cpp | 27 ------ .../hardware_commands_helper_tests.cpp | 38 --------- .../helpers/timestamp_packet_2_tests.cpp | 22 ----- .../unit_test/kernel/cache_flush_tests.inl | 36 -------- .../cache_flush_xehp_and_later_tests.inl | 64 --------------- .../unit_test/kernel/kernel_arg_svm_tests.cpp | 57 ------------- .../kernel_cache_flush_requirements_tests.cpp | 82 ------------------- opencl/test/unit_test/kernel/kernel_tests.cpp | 18 ---- .../test/unit_test/mocks/mock_command_queue.h | 2 - opencl/test/unit_test/mocks/mock_kernel.h | 1 - 19 files changed, 2 insertions(+), 466 deletions(-) diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 7cf01c21fe..310c11ed2c 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -953,12 +953,7 @@ void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, Timestamp } size_t CommandQueue::estimateTimestampPacketNodesCount(const MultiDispatchInfo &dispatchInfo) const { - size_t nodesCount = dispatchInfo.size(); - auto mainKernel = dispatchInfo.peekMainKernel(); - if (obtainTimestampPacketForCacheFlush(mainKernel->requiresCacheFlushCommand(*this))) { - ++nodesCount; - } - return nodesCount; + return dispatchInfo.size(); } bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr, diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index d9b469c5c7..481b291c08 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -411,7 +411,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> { MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const; void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo); - virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0; void assignDataToOverwrittenBcsNode(TagNodeBase *node); void registerGpgpuCsrClient(); diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index 7c87720578..8d992feb8a 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -491,8 +491,6 @@ class CommandQueueHw : public CommandQueue { TimestampPacketDependencies ×tampPacketDependencies, const EventsRequest &eventsRequest, bool queueBlocked); - bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const override; - bool isTaskLevelUpdateRequired(const TaskCountType &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType); void obtainTaskLevelAndBlockedStatus(TaskCountType &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override; static void computeOffsetsValueForRectCommands(size_t *bufferOffset, diff --git a/opencl/source/command_queue/command_queue_hw_base.inl b/opencl/source/command_queue/command_queue_hw_base.inl index 74fa74a50e..b9e823f493 100644 --- a/opencl/source/command_queue/command_queue_hw_base.inl +++ b/opencl/source/command_queue/command_queue_hw_base.inl @@ -201,11 +201,6 @@ void CommandQueueHw::setupBlitAuxTranslation(MultiDispatchInfo &multiDis TimestampPacketHelper::getRequiredCmdStreamSizeForAuxTranslationNodeDependency); } -template -bool CommandQueueHw::obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const { - return isCacheFlushRequired; -} - template bool CommandQueueHw::isGpgpuSubmissionForBcsRequired(bool queueBlocked, TimestampPacketDependencies ×tampPacketDependencies) const { if (queueBlocked || timestampPacketDependencies.barrierNodes.peekNodes().size() > 0u) { diff --git a/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl b/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl index a51cc791b8..fee0907182 100644 --- a/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl +++ b/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl @@ -167,25 +167,7 @@ void GpgpuWalkerHelper::dispatchProfilingCommandsEnd(TagNodeBase &hwT template size_t EnqueueOperation::getSizeForCacheFlushAfterWalkerCommands(const Kernel &kernel, const CommandQueue &commandQueue) { - size_t size = 0; - - if (kernel.requiresCacheFlushCommand(commandQueue)) { - size += MemorySynchronizationCommands::getSizeForSingleBarrier(false); - - if constexpr (GfxFamily::isUsingL3Control) { - StackVec allocationsForCacheFlush; - kernel.getAllocationsForCacheFlush(allocationsForCacheFlush); - - StackVec subranges; - for (auto &allocation : allocationsForCacheFlush) { - coverRangeExact(allocation->getGpuAddress(), allocation->getUnderlyingBufferSize(), subranges, GfxFamily::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); - } - - size += getSizeNeededToFlushGpuCache(subranges, true); - } - } - - return size; + return 0; } } // namespace NEO diff --git a/opencl/source/command_queue/hardware_interface_base.inl b/opencl/source/command_queue/hardware_interface_base.inl index 6dea1bd957..d27b12d184 100644 --- a/opencl/source/command_queue/hardware_interface_base.inl +++ b/opencl/source/command_queue/hardware_interface_base.inl @@ -149,16 +149,6 @@ void HardwareInterface::dispatchWalker( dispatchInfo.dispatchEpilogueCommands(*commandStream, walkerArgs.timestampPacketDependencies, commandQueue.getDevice().getRootDeviceEnvironment()); } - if (mainKernel->requiresCacheFlushCommand(commandQueue)) { - uint64_t postSyncAddress = 0; - if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { - auto timestampPacketNodeForPostSync = walkerArgs.currentTimestampPacketNodes->peekNodes().at(walkerArgs.currentDispatchIndex); - timestampPacketNodeForPostSync->setProfilingCapable(false); - postSyncAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNodeForPostSync); - } - HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(commandStream, commandQueue, mainKernel, postSyncAddress); - } - if (PauseOnGpuProperties::gpuScratchRegWriteAllowed(DebugManager.flags.GpuScratchRegWriteAfterWalker.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount())) { uint32_t registerOffset = DebugManager.flags.GpuScratchRegWriteRegisterOffset.get(); uint32_t registerData = DebugManager.flags.GpuScratchRegWriteRegisterData.get(); diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 7ff8c077c8..46b963667f 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -1031,14 +1031,10 @@ bool Kernel::getAllowNonUniform() const { void Kernel::setSvmKernelExecInfo(GraphicsAllocation *argValue) { kernelSvmGfxAllocations.push_back(argValue); - if (allocationForCacheFlush(argValue)) { - svmAllocationsRequireCacheFlush = true; - } } void Kernel::clearSvmKernelExecInfo() { kernelSvmGfxAllocations.clear(); - svmAllocationsRequireCacheFlush = false; } void Kernel::setUnifiedMemoryProperty(cl_kernel_exec_info infoType, bool infoValue) { @@ -2045,18 +2041,6 @@ void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const { if (global != nullptr) { out.push_back(global); } - - if (svmAllocationsRequireCacheFlush) { - for (GraphicsAllocation *alloc : kernelSvmGfxAllocations) { - if (allocationForCacheFlush(alloc)) { - out.push_back(alloc); - } - } - } -} - -bool Kernel::allocationForCacheFlush(GraphicsAllocation *argAllocation) const { - return argAllocation->isFlushL3Required(); } uint64_t Kernel::getKernelStartAddress(const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, const bool isCssUsed, const bool returnFullAddress) const { @@ -2252,9 +2236,6 @@ bool Kernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const { if (getProgram()->getGlobalSurface(commandQueue.getDevice().getRootDeviceIndex()) != nullptr) { return true; } - if (svmAllocationsRequireCacheFlush) { - return true; - } return false; } diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 3b4c272dec..38ffcf7419 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -466,8 +466,6 @@ class Kernel : public ReferenceTrackedObject { bool hasDirectStatelessAccessToHostMemory() const; bool hasIndirectStatelessAccessToHostMemory() const; - bool allocationForCacheFlush(GraphicsAllocation *argAllocation) const; - const ClDevice &getDevice() const { return clDevice; } @@ -532,7 +530,6 @@ class Kernel : public ReferenceTrackedObject { bool usingImagesOnly = false; bool auxTranslationRequired = false; bool systolicPipelineSelectMode = false; - bool svmAllocationsRequireCacheFlush = false; bool isUnifiedMemorySyncRequired = true; bool singleSubdevicePreferredInCurrentEnqueue = false; bool kernelHasIndirectAccess = true; diff --git a/opencl/test/unit_test/api/cl_set_kernel_exec_info_tests.inl b/opencl/test/unit_test/api/cl_set_kernel_exec_info_tests.inl index 11d1553745..dcdb3d3596 100644 --- a/opencl/test/unit_test/api/cl_set_kernel_exec_info_tests.inl +++ b/opencl/test/unit_test/api/cl_set_kernel_exec_info_tests.inl @@ -202,60 +202,6 @@ TEST_F(clSetKernelExecInfoTests, GivenValidPointerListWithOnePointerWhenSettingA } } -TEST_F(clSetKernelExecInfoTests, GivenValidPointerListWithMultiplePointersWhenSettingAdditionalKernelInfoThenSuccessIsReturned) { - if (svmCapabilities != 0) { - void *ptrSvm1 = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); - EXPECT_NE(nullptr, ptrSvm1); - - void *ptrSvm2 = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); - EXPECT_NE(nullptr, ptrSvm2); - - void *pSvmPtrList[] = {ptrSvm, ptrSvm1, ptrSvm2}; - size_t svmPtrListSizeInBytes = 3 * sizeof(void *); - - retVal = clSetKernelExecInfo( - pMockMultiDeviceKernel, // cl_kernel kernel - CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name - svmPtrListSizeInBytes, // size_t param_value_size - pSvmPtrList // const void *param_value - ); - EXPECT_EQ(CL_SUCCESS, retVal); - - EXPECT_EQ(3u, pMockKernel->kernelSvmGfxAllocations.size()); - EXPECT_TRUE(pMockKernel->svmAllocationsRequireCacheFlush); - - clSVMFree(pContext, ptrSvm1); - clSVMFree(pContext, ptrSvm2); - } -} - -TEST_F(clSetKernelExecInfoTests, givenReadOnlySvmPtrListWhenUsedAsKernelPointersThenCacheFlushIsNotRequired) { - if (svmCapabilities != 0) { - void *ptrSvm1 = clSVMAlloc(pContext, CL_MEM_READ_ONLY, 256, 4); - EXPECT_NE(nullptr, ptrSvm1); - - void *ptrSvm2 = clSVMAlloc(pContext, CL_MEM_READ_ONLY, 256, 4); - EXPECT_NE(nullptr, ptrSvm2); - - void *pSvmPtrList[] = {ptrSvm1, ptrSvm2}; - size_t svmPtrListSizeInBytes = 2 * sizeof(void *); - - retVal = clSetKernelExecInfo( - pMockMultiDeviceKernel, // cl_kernel kernel - CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name - svmPtrListSizeInBytes, // size_t param_value_size - pSvmPtrList // const void *param_value - ); - EXPECT_EQ(CL_SUCCESS, retVal); - - EXPECT_EQ(2u, pMockKernel->kernelSvmGfxAllocations.size()); - EXPECT_FALSE(pMockKernel->svmAllocationsRequireCacheFlush); - - clSVMFree(pContext, ptrSvm1); - clSVMFree(pContext, ptrSvm2); - } -} - TEST_F(clSetKernelExecInfoTests, GivenMultipleSettingKernelInfoOperationsWhenSettingAdditionalKernelInfoThenSuccessIsReturned) { if (svmCapabilities != 0) { void *pSvmPtrList[] = {ptrSvm}; diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 9320400108..8a0b0811f1 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -1058,33 +1058,6 @@ HWTEST_F(BlitAuxKernelTests, givenDebugVariableDisablingBuiltinTranslationWhenDi EXPECT_EQ(0u, cmdQ.dispatchAuxTranslationInputs.size()); } -HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenCacheFlushAfterWalkerEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker) { - using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - - MockKernelWithInternals mockKernel(*pClDevice, context); - CommandQueueHw cmdQ(context, pClDevice, nullptr, false); - - size_t gws[3] = {1, 0, 0}; - - mockKernel.mockKernel->svmAllocationsRequireCacheFlush = true; - - cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); - - HardwareParse hwParse; - hwParse.parseCommands(cmdQ.getCS(0), 0); - auto itorCmd = find(hwParse.cmdList.begin(), hwParse.cmdList.end()); - ASSERT_NE(hwParse.cmdList.end(), itorCmd); - itorCmd = find(itorCmd, hwParse.cmdList.end()); - auto pipeControl = genCmdCast(*itorCmd); - ASSERT_NE(nullptr, pipeControl); - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_TRUE(pipeControl->getDcFlushEnable()); -} - HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithoutWaitListThenSizeHasFourMMIOStoresAndPipeControll) { pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockKernelWithInternals mockKernel(*pClDevice); diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index ef83a2a7f0..6467399631 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -1134,44 +1134,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnab mockKernelWithInternal->mockProgram->setGlobalSurface(nullptr); } -HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCacheFlushCommand) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH; - using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; - - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - - CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); - auto &commandStream = cmdQ.getCS(1024); - - char buff[MemoryConstants::pageSize * 2]; - MockGraphicsAllocation svmAllocation1{alignUp(buff, MemoryConstants::pageSize), MemoryConstants::pageSize}; - mockKernelWithInternal->mockKernel->kernelSvmGfxAllocations.push_back(&svmAllocation1); - MockGraphicsAllocation svmAllocation2{alignUp(buff, MemoryConstants::pageSize), MemoryConstants::pageSize}; - svmAllocation2.setFlushL3Required(false); - mockKernelWithInternal->mockKernel->kernelSvmGfxAllocations.push_back(&svmAllocation2); - mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true; - - Kernel::CacheFlushAllocationsVec allocs; - mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs); - EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &svmAllocation1)); - EXPECT_EQ(allocs.end(), std::find(allocs.begin(), allocs.end(), &svmAllocation2)); - - size_t expectedSize = sizeof(PIPE_CONTROL); - size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U); - EXPECT_EQ(expectedSize, actualSize); - - HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U); - - HardwareParse hwParse; - hwParse.parseCommands(commandStream); - PIPE_CONTROL *pipeControl = hwParse.getCommand(); - ASSERT_NE(nullptr, pipeControl); - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_TRUE(pipeControl->getDcFlushEnable()); -} - HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerDisabledWhenGettingRequiredCacheFlushSizeThenReturnZero) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; diff --git a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp index a4c1f44bca..476673c8e6 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp @@ -414,28 +414,6 @@ HWTEST_F(TimestampPacketTests, givenKernelWhichDoesntRequireFlushWhenEnqueueingK EXPECT_EQ(size, 1u); } -HWTEST_F(TimestampPacketTests, givenKernelWhichRequiresFlushWhenEnqueueingKernelThenTwoNodesAreCreated) { - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(true); - - auto &csr = device->getUltCommandStreamReceiver(); - csr.timestampPacketWriteEnabled = true; - - auto mockTagAllocator = new MockTagAllocator<>(device->getRootDeviceIndex(), executionEnvironment->memoryManager.get()); - csr.timestampPacketAllocator.reset(mockTagAllocator); - auto cmdQ = std::make_unique>(context, device.get(), nullptr); - kernel->mockKernel->svmAllocationsRequireCacheFlush = true; - // obtain first node for cmdQ and event1 - cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); - auto node1 = cmdQ->timestampPacketContainer->peekNodes().at(0); - auto node2 = cmdQ->timestampPacketContainer->peekNodes().at(1); - auto size = cmdQ->timestampPacketContainer->peekNodes().size(); - EXPECT_EQ(size, 2u); - EXPECT_NE(nullptr, node1); - EXPECT_NE(nullptr, node2); - EXPECT_NE(node1, node2); -} - HWTEST_F(TimestampPacketTests, givenEventsWaitlistFromDifferentCSRsWhenEnqueueingThenMakeAllTimestampsResident) { MockTagAllocator> tagAllocator(device->getRootDeviceIndex(), executionEnvironment->memoryManager.get(), 1, 1, sizeof(TimestampPackets), false, device->getDeviceBitfield()); diff --git a/opencl/test/unit_test/kernel/cache_flush_tests.inl b/opencl/test/unit_test/kernel/cache_flush_tests.inl index a3aa717e98..d8ca68686b 100644 --- a/opencl/test/unit_test/kernel/cache_flush_tests.inl +++ b/opencl/test/unit_test/kernel/cache_flush_tests.inl @@ -48,42 +48,6 @@ struct L3ControlPolicy : CmdValidator { bool isA0Stepping; }; -template -class GivenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCacheFlushCommand : public HardwareCommandsTest { - public: - void testBodyImpl() { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using L3_CONTROL_WITHOUT_POST_SYNC = typename FamilyType::L3_CONTROL; - - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - - CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); - auto &commandStream = cmdQ.getCS(1024); - - void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); - MockGraphicsAllocation svmAllocation{allocPtr, MemoryConstants::pageSize * 2}; - svmAllocation.setFlushL3Required(true); - this->mockKernelWithInternal->mockKernel->kernelSvmGfxAllocations.push_back(&svmAllocation); - this->mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true; - - size_t expectedSize = sizeof(PIPE_CONTROL) + sizeof(L3_CONTROL_WITHOUT_POST_SYNC); - size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, this->mockKernelWithInternal->mockKernel, 0U); - EXPECT_EQ(expectedSize, actualSize); - - HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, 0U); - - std::string err; - auto cmdBuffOk = expectCmdBuff(cmdQ.getCS(0), 0, - std::vector({ - new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), - new MatchHwCmd(AtLeastOne), - }), - &err); - EXPECT_TRUE(cmdBuffOk) << err; - } -}; - template class GivenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentThenExpectCacheFlushCommand : public HardwareCommandsTest { public: diff --git a/opencl/test/unit_test/kernel/cache_flush_xehp_and_later_tests.inl b/opencl/test/unit_test/kernel/cache_flush_xehp_and_later_tests.inl index 15490d538f..90a42dc853 100644 --- a/opencl/test/unit_test/kernel/cache_flush_xehp_and_later_tests.inl +++ b/opencl/test/unit_test/kernel/cache_flush_xehp_and_later_tests.inl @@ -46,70 +46,6 @@ struct L3ControlPolicy : CmdValidator { typename FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY expectedPolicy; }; -template -class GivenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCorrectCommandSize : public HardwareCommandsTest { - public: - void testBodyImpl() { - using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using L3_CONTROL = typename FamilyType::L3_CONTROL; - using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; - - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - - CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); - - void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); - MockGraphicsAllocation svmAllocation{allocPtr, MemoryConstants::pageSize * 2}; - svmAllocation.setFlushL3Required(true); - this->mockKernelWithInternal->mockKernel->kernelSvmGfxAllocations.push_back(&svmAllocation); - this->mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true; - StackVec allocationsForCacheFlush; - this->mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocationsForCacheFlush); - StackVec subranges; - for (GraphicsAllocation *alloc : allocationsForCacheFlush) { - coverRangeExact(alloc->getGpuAddress(), alloc->getUnderlyingBufferSize(), subranges, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); - } - size_t expectedSize = sizeof(COMPUTE_WALKER) + sizeof(PIPE_CONTROL); - DispatchInfo di; - size_t actualSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, cmdQ, this->mockKernelWithInternal->mockKernel, di); - EXPECT_EQ(expectedSize, actualSize); - } -}; - -template -class GivenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCacheFlushCommand : public HardwareCommandsTest { - public: - void testBodyImpl() { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using L3_CONTROL = typename FamilyType::L3_CONTROL; - - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - - CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); - auto &commandStream = cmdQ.getCS(1024); - - void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); - MockGraphicsAllocation svmAllocation{allocPtr, MemoryConstants::pageSize * 2}; - svmAllocation.setFlushL3Required(true); - this->mockKernelWithInternal->mockKernel->kernelSvmGfxAllocations.push_back(&svmAllocation); - this->mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true; - - HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, 0U); - - std::string err; - auto cmdBuffOk = expectCmdBuff(cmdQ.getCS(0), 0, - std::vector({ - new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), - new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE)}), - }), - &err); - EXPECT_TRUE(cmdBuffOk) << err; - } -}; - template class GivenCacheFlushAfterWalkerEnabledWhenNoGlobalSurfaceSvmAllocationKernelArgRequireCacheFlushThenExpectNoCacheFlushCommand : public HardwareCommandsTest { public: diff --git a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp index 586e2ac310..384f011b96 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp @@ -468,63 +468,6 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN alignedFree(svmPtr); } -TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingKernelExecInfoThenDoNotExpectSvmFlushFlagTrue) { - const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); - if (devInfo.svmCapabilities == 0) { - GTEST_SKIP(); - } - - size_t svmSize = 4096; - void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); - MockGraphicsAllocation svmAlloc(svmPtr, svmSize); - - svmAlloc.setMemObjectsAllocationWithWritableFlags(true); - svmAlloc.setFlushL3Required(false); - - pKernel->setSvmKernelExecInfo(&svmAlloc); - EXPECT_FALSE(pKernel->svmAllocationsRequireCacheFlush); - - alignedFree(svmPtr); -} - -TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingKernelExecInfoThenExpectSvmFlushFlagTrue) { - const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); - if (devInfo.svmCapabilities == 0) { - GTEST_SKIP(); - } - - size_t svmSize = 4096; - void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); - MockGraphicsAllocation svmAlloc(svmPtr, svmSize); - - svmAlloc.setMemObjectsAllocationWithWritableFlags(false); - svmAlloc.setFlushL3Required(true); - - pKernel->setSvmKernelExecInfo(&svmAlloc); - EXPECT_TRUE(pKernel->svmAllocationsRequireCacheFlush); - - alignedFree(svmPtr); -} - -TEST_F(KernelArgSvmTest, givenNoCacheFlushReadOnlySvmAllocationWhenSettingKernelExecInfoThenExpectSvmFlushFlagFalse) { - const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); - if (devInfo.svmCapabilities == 0) { - GTEST_SKIP(); - } - - size_t svmSize = 4096; - void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); - MockGraphicsAllocation svmAlloc(svmPtr, svmSize); - - svmAlloc.setMemObjectsAllocationWithWritableFlags(false); - svmAlloc.setFlushL3Required(false); - - pKernel->setSvmKernelExecInfo(&svmAlloc); - EXPECT_FALSE(pKernel->svmAllocationsRequireCacheFlush); - - alignedFree(svmPtr); -} - TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidThenExpectSvmArgUseGpuAddress) { const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { diff --git a/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp b/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp index 2c54ae5014..5dd97b6807 100644 --- a/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp @@ -176,86 +176,4 @@ HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushWithGlobalSurfaceWhenCheckIfK mockKernel->mockProgram->setGlobalSurface(nullptr); clearPlatform(); } - -HWTEST2_F(KernelWithCacheFlushTests, givenCacheFlushRequiredWhenEstimatingThenAddRequiredCommands, IsAtLeastXeHpCore) { - DebugManagerStateRestore dbgRestore; - DebugManager.flags.CreateMultipleSubDevices.set(2); - - initializePlatform(); - - if (!pPlatform->getClDevice(0)->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker) { - clearPlatform(); - GTEST_SKIP(); - } - - auto device = pPlatform->getClDevice(0); - - auto mockKernel = std::make_unique(*device); - MockContext mockContext(device); - mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; - auto cmdQ = std::make_unique>(&mockContext, device, nullptr); - - CsrDependencies csrDeps; - DispatchInfo dispatchInfo; - MultiDispatchInfo multiDispatchInfo(mockKernel->mockKernel); - dispatchInfo.setKernel(mockKernel->mockKernel); - dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); - dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); - multiDispatchInfo.push(dispatchInfo); - - size_t initialSize = 0; - size_t sizeWithCacheFlush = 0; - size_t expectedDiff = sizeof(typename FamilyType::PIPE_CONTROL); - if constexpr (FamilyType::isUsingL3Control) { - expectedDiff += sizeof(typename FamilyType::L3_CONTROL) + sizeof(typename FamilyType::L3_FLUSH_ADDRESS_RANGE); - } - - { - EXPECT_FALSE(mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ)); - - initialSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false, false, nullptr); - } - - { - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); - MockGraphicsAllocation globalAllocation{allocPtr, MemoryConstants::pageSize * 2}; - mockKernel->mockProgram->setGlobalSurface(&globalAllocation); - - cmdQ->requiresCacheFlushAfterWalker = true; - auto &ultCsr = static_cast &>(cmdQ->getGpgpuCommandStreamReceiver()); - ultCsr.multiOsContextCapable = false; - EXPECT_TRUE(mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ)); - - sizeWithCacheFlush = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false, false, nullptr); - } - - EXPECT_EQ(initialSize + expectedDiff, sizeWithCacheFlush); - - mockKernel->mockProgram->setGlobalSurface(nullptr); - clearPlatform(); -} - -HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushWithAllocationsRequireCacheFlushFlagOnWhenCheckIfKernelRequireFlushThenReturnedTrue) { - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - uint32_t numDevices = 2; - DebugManager.flags.CreateMultipleSubDevices.set(numDevices); - initializePlatform(); - auto device = pPlatform->getClDevice(0); - - auto mockKernel = std::make_unique(*device); - MockContext mockContext(device); - mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; - auto cmdQ = std::make_unique>(&mockContext, device, nullptr); - cmdQ->requiresCacheFlushAfterWalker = true; - auto &ultCsr = static_cast &>(cmdQ->getGpgpuCommandStreamReceiver()); - ultCsr.multiOsContextCapable = false; - mockKernel->mockKernel->svmAllocationsRequireCacheFlush = true; - bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get()); - - EXPECT_TRUE(flushRequired); - clearPlatform(); -} - } // namespace NEO diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index bf709d8cbc..d558fded79 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -2798,24 +2798,6 @@ TEST(KernelTest, givenKernelCompiledWithSimdOneWhenInitializingThenReturnError) EXPECT_EQ(CL_SUCCESS, retVal); } -TEST(KernelTest, whenKernelRequireCacheFlushAfterWalkerThenRequireCacheFlushAfterWalker) { - MockGraphicsAllocation mockAllocation; - auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); - MockKernelWithInternals kernel(*device); - kernel.mockKernel->svmAllocationsRequireCacheFlush = true; - - MockCommandQueue queue; - - DebugManagerStateRestore debugRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(true); - - queue.requiresCacheFlushAfterWalker = true; - EXPECT_TRUE(kernel.mockKernel->requiresCacheFlushCommand(queue)); - - queue.requiresCacheFlushAfterWalker = false; - EXPECT_TRUE(kernel.mockKernel->requiresCacheFlushCommand(queue)); -} - TEST(KernelTest, givenKernelUsesPrivateMemoryWhenDeviceReleasedBeforeKernelThenKernelUsesMemoryManagerFromEnvironment) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); auto executionEnvironment = device->getExecutionEnvironment(); diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index 12dc95fc46..0b9011df3c 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -217,8 +217,6 @@ class MockCommandQueue : public CommandQueue { cl_int flush() override { return CL_SUCCESS; } - bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const override { return isCacheFlushRequired; } - bool waitForTimestamps(Range copyEnginesToWait, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override { waitForTimestampsCalled = true; return false; diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index 1a114235d0..edaebbf67c 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -128,7 +128,6 @@ class MockKernel : public Kernel { using Kernel::privateSurface; using Kernel::setInlineSamplers; using Kernel::singleSubdevicePreferredInCurrentEnqueue; - using Kernel::svmAllocationsRequireCacheFlush; using Kernel::unifiedMemoryControls; using Kernel::slmSizes;