diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 31ab791ec0..7ff8c077c8 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -306,7 +306,6 @@ cl_int Kernel::initialize() { bool usingBuffers = false; kernelArguments.resize(numArgs); kernelArgHandlers.resize(numArgs); - kernelArgRequiresCacheFlush.resize(numArgs); for (uint32_t i = 0; i < numArgs; ++i) { storeKernelArg(i, NONE_OBJ, nullptr, nullptr, 0); @@ -2040,13 +2039,6 @@ void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const { if (false == GfxCoreHelper::cacheFlushAfterWalkerSupported(getHardwareInfo())) { return; } - for (GraphicsAllocation *alloc : this->kernelArgRequiresCacheFlush) { - if (nullptr == alloc) { - continue; - } - - out.push_back(alloc); - } auto rootDeviceIndex = getDevice().getRootDeviceIndex(); auto global = getProgram()->getGlobalSurface(rootDeviceIndex); @@ -2067,18 +2059,6 @@ bool Kernel::allocationForCacheFlush(GraphicsAllocation *argAllocation) const { return argAllocation->isFlushL3Required(); } -void Kernel::addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation) { - if (argAllocation == nullptr) { - kernelArgRequiresCacheFlush[argIndex] = nullptr; - } else { - if (allocationForCacheFlush(argAllocation)) { - kernelArgRequiresCacheFlush[argIndex] = argAllocation; - } else { - kernelArgRequiresCacheFlush[argIndex] = nullptr; - } - } -} - uint64_t Kernel::getKernelStartAddress(const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, const bool isCssUsed, const bool returnFullAddress) const { uint64_t kernelStartOffset = 0; @@ -2275,12 +2255,6 @@ bool Kernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const { if (svmAllocationsRequireCacheFlush) { return true; } - size_t args = kernelArgRequiresCacheFlush.size(); - for (size_t i = 0; i < args; i++) { - if (kernelArgRequiresCacheFlush[i] != nullptr) { - return true; - } - } return false; } diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 1cd8631ace..3b4c272dec 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -466,7 +466,6 @@ class Kernel : public ReferenceTrackedObject { bool hasDirectStatelessAccessToHostMemory() const; bool hasIndirectStatelessAccessToHostMemory() const; - void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation); bool allocationForCacheFlush(GraphicsAllocation *argAllocation) const; const ClDevice &getDevice() const { @@ -491,7 +490,6 @@ class Kernel : public ReferenceTrackedObject { std::vector kernelSvmGfxAllocations; std::vector kernelUnifiedMemoryGfxAllocations; std::vector patchInfoDataList; - std::vector kernelArgRequiresCacheFlush; std::vector slmSizes; std::unique_ptr imageTransformer; diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index 9dec696309..08fe6364db 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -560,41 +560,6 @@ TEST_F(CommandQueueCommandStreamTest, givenCommandQueueWhenGetCSIsCalledThenComm EXPECT_EQ(AllocationType::COMMAND_BUFFER, commandStreamAllocation->getAllocationType()); } -HWTEST_F(CommandQueueCommandStreamTest, givenMultiDispatchInfoWithSingleKernelWithFlushAllocationsDisabledWhenEstimatingNodesCountThenItEqualsMultiDispatchInfoSize) { - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(0); - - MockCommandQueueHw cmdQ(context.get(), pClDevice, nullptr); - pDevice->getUltCommandStreamReceiver().multiOsContextCapable = true; - MockKernelWithInternals mockKernelWithInternals(*pClDevice, context.get()); - - mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush.resize(1); - MockGraphicsAllocation cacheRequiringAllocation; - mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; - - MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({mockKernelWithInternals.mockKernel})); - - size_t estimatedNodesCount = cmdQ.estimateTimestampPacketNodesCount(multiDispatchInfo); - EXPECT_EQ(estimatedNodesCount, multiDispatchInfo.size()); -} - -HWTEST_F(CommandQueueCommandStreamTest, givenMultiDispatchInfoWithSingleKernelWithFlushAllocationsEnabledWhenEstimatingNodesCountThenItEqualsMultiDispatchInfoSizePlusOne) { - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - - MockCommandQueueHw cmdQ(context.get(), pClDevice, nullptr); - MockKernelWithInternals mockKernelWithInternals(*pClDevice, context.get()); - - mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush.resize(1); - MockGraphicsAllocation cacheRequiringAllocation; - mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; - - MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({mockKernelWithInternals.mockKernel})); - - size_t estimatedNodesCount = cmdQ.estimateTimestampPacketNodesCount(multiDispatchInfo); - EXPECT_EQ(estimatedNodesCount, multiDispatchInfo.size() + 1); -} - struct CommandQueueIndirectHeapTest : public CommandQueueMemoryDevice, public ::testing::TestWithParam { void SetUp() override { diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp index a66347baa0..dc6f7efe9d 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp @@ -1085,109 +1085,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleDispatchInfoAndSame } } -HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerDisabledWhenAllocationRequiresCacheFlushThenFlushCommandNotPresentAfterWalker) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(0); - - MockKernel kernel1(program.get(), kernelInfo, *pClDevice); - ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - kernel1.kernelArgRequiresCacheFlush.resize(1); - MockGraphicsAllocation cacheRequiringAllocation; - kernel1.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; - - MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1})); - // create commandStream - auto &cmdStream = pCmdQ->getCS(0); - HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo, - CsrDependencies(), - walkerArgs); - - HardwareParse hwParse; - hwParse.parseCommands(cmdStream); - PIPE_CONTROL *pipeControl = hwParse.getCommand(); - EXPECT_EQ(nullptr, pipeControl); -} - -HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenWalkerWithTwoKernelsThenFlushCommandPresentOnce) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - - MockKernel kernel1(program.get(), kernelInfo, *pClDevice); - ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); - ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); - - kernel1.kernelArgRequiresCacheFlush.resize(1); - kernel2.kernelArgRequiresCacheFlush.resize(1); - MockGraphicsAllocation cacheRequiringAllocation; - kernel1.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; - kernel2.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; - - MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1, &kernel2})); - // create commandStream - auto &cmdStream = pCmdQ->getCS(0); - HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo, - CsrDependencies(), - walkerArgs); - - HardwareParse hwParse; - hwParse.parseCommands(cmdStream); - uint32_t pipeControlCount = hwParse.getCommandCount(); - EXPECT_EQ(pipeControlCount, 1u); -} - -HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenTwoWalkersForQueueThenFlushCommandPresentTwice) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - - MockKernel kernel1(program.get(), kernelInfo, *pClDevice); - ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); - ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); - - kernel1.kernelArgRequiresCacheFlush.resize(1); - kernel2.kernelArgRequiresCacheFlush.resize(1); - MockGraphicsAllocation cacheRequiringAllocation; - kernel1.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; - kernel2.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; - - MockMultiDispatchInfo multiDispatchInfo1(pClDevice, std::vector({&kernel1})); - MockMultiDispatchInfo multiDispatchInfo2(pClDevice, std::vector({&kernel2})); - // create commandStream - auto &cmdStream = pCmdQ->getCS(0); - - HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo1, - CsrDependencies(), - walkerArgs); - - HardwareInterfaceWalkerArgs walkerArgs2 = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo2, - CsrDependencies(), - walkerArgs2); - - HardwareParse hwParse; - hwParse.parseCommands(cmdStream); - uint32_t pipeControlCount = hwParse.getCommandCount(); - EXPECT_EQ(pipeControlCount, 2u); -} - TEST(DispatchWalker, WhenCalculatingDispatchDimensionsThenCorrectValuesAreReturned) { Vec3 dim0{0, 0, 0}; Vec3 dim1{2, 1, 1}; diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index 2f6827fdec..ef83a2a7f0 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -57,7 +57,6 @@ void HardwareCommandsTest::addSpaceForSingleKernelArg() { kernelArguments[0] = kernelArgInfo; mockKernelWithInternal->kernelInfo.addArgBuffer(0, 0, sizeof(uintptr_t)); mockKernelWithInternal->mockKernel->setKernelArguments(kernelArguments); - mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush.resize(1); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenProgramInterfaceDescriptorDataIsCreatedThenOnlyRequiredSpaceOnIndirectHeapIsAllocated) { @@ -1173,40 +1172,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnab EXPECT_TRUE(pipeControl->getDcFlushEnable()); } -HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnabledWhenKernelArgIsSetAsCacheFlushRequiredThenExpectCacheFlushCommand) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH; - using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; - - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - - CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); - auto &commandStream = cmdQ.getCS(1024); - - addSpaceForSingleKernelArg(); - MockGraphicsAllocation cacheRequiringAllocation; - mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush.resize(2); - mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; - - Kernel::CacheFlushAllocationsVec allocs; - mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs); - EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &cacheRequiringAllocation)); - - size_t expectedSize = sizeof(PIPE_CONTROL); - size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U); - EXPECT_EQ(expectedSize, actualSize); - - HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U); - - HardwareParse hwParse; - hwParse.parseCommands(commandStream); - PIPE_CONTROL *pipeControl = hwParse.getCommand(); - ASSERT_NE(nullptr, pipeControl); - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_TRUE(pipeControl->getDcFlushEnable()); -} - HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerDisabledWhenGettingRequiredCacheFlushSizeThenReturnZero) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; diff --git a/opencl/test/unit_test/kernel/cache_flush_tests.inl b/opencl/test/unit_test/kernel/cache_flush_tests.inl index 86a90677ff..a3aa717e98 100644 --- a/opencl/test/unit_test/kernel/cache_flush_tests.inl +++ b/opencl/test/unit_test/kernel/cache_flush_tests.inl @@ -84,55 +84,6 @@ class GivenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequirin } }; -template -class GivenCacheFlushAfterWalkerEnabledAndProperSteppingIsSetWhenKernelArgIsSetAsCacheFlushRequiredAndA0SteppingIsDisabledThenExpectCacheFlushCommand : public HardwareCommandsTest { - public: - void testBodyImpl(bool isA0Stepping) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; - using L3_CONTROL_WITHOUT_POST_SYNC = typename FamilyType::L3_CONTROL; - - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - - pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->setHwInfoAndInitHelpers(&hardwareInfo); - const auto &productHelper = pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->getProductHelper(); - auto stepping = (isA0Stepping ? REVISION_A0 : REVISION_A1); - hardwareInfo.platform.usRevId = productHelper.getHwRevIdFromStepping(stepping, hardwareInfo); - pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->setHwInfoAndInitHelpers(&hardwareInfo); - pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->initGmm(); - - CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); - auto &commandStream = cmdQ.getCS(1024); - addSpaceForSingleKernelArg(); - this->mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush.resize(2); - void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); - MockGraphicsAllocation cacheRequiringAllocation{allocPtr, MemoryConstants::pageSize * 7}; - this->mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; - L3RangesVec rangesExpected; - coverRangeExact(cacheRequiringAllocation.getGpuAddress(), cacheRequiringAllocation.getUnderlyingBufferSize(), rangesExpected, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); - - size_t expectedSize = sizeof(PIPE_CONTROL) + rangesExpected.size() * sizeof(L3_CONTROL_WITHOUT_POST_SYNC); - size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, this->mockKernelWithInternal->mockKernel, 0U); - EXPECT_EQ(expectedSize, actualSize); - - HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, 0U); - - L3ControlPolicy validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION, isA0Stepping}; - - std::string err; - auto cmdBuffOk = expectCmdBuff(cmdQ.getCS(0), 0, - std::vector{ - new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), - new MatchHwCmd(AtLeastOne, {&validateL3ControlPolicy}), - }, - &err); - EXPECT_TRUE(cmdBuffOk) << err; - - EXPECT_EQ(rangesExpected, validateL3ControlPolicy.l3RangesParsed); - } -}; - template class GivenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentThenExpectCacheFlushCommand : public HardwareCommandsTest { public: @@ -207,181 +158,6 @@ class GivenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentAndPostSyn using EnqueueKernelFixture = HelloWorldFixture; using EnqueueKernelTest = Test; -template -class GivenCacheFlushAfterWalkerEnabledAndProperSteppingIsSetWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker : public EnqueueKernelTest { - public: - void testBodyImpl(bool isA0Stepping) { - using WALKER = typename FamilyType::WALKER_TYPE; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; - using L3_CONTROL_WITHOUT_POST_SYNC = typename FamilyType::L3_CONTROL; - - DebugManagerStateRestore restore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - DebugManager.flags.EnableTimestampPacket.set(0); - - pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->setHwInfoAndInitHelpers(&hardwareInfo); - const auto &productHelper = pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->getProductHelper(); - auto stepping = (isA0Stepping ? REVISION_A0 : REVISION_A1); - hardwareInfo.platform.usRevId = productHelper.getHwRevIdFromStepping(stepping, hardwareInfo); - pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->setHwInfoAndInitHelpers(&hardwareInfo); - pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->initGmm(); - - MockKernelWithInternals mockKernel(*pClDevice, context, true); - mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; - - auto cmdQ = std::make_unique>(context, pClDevice, nullptr); - - cmdQ->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; - - auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); - SVMAllocsManager svmManager(memoryManager, false); - void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize * 5, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); - ASSERT_NE(nullptr, svm); - auto svmData = svmManager.getSVMAlloc(svm); - ASSERT_NE(nullptr, svmData); - auto svmAllocation = svmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); - ASSERT_NE(nullptr, svmAllocation); - svmAllocation->setFlushL3Required(true); - - mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP)); - mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); - mockKernel.mockKernel->kernelArgRequiresCacheFlush[0] = svmAllocation; - - cmdQ->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; - - cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); - - L3RangesVec rangesExpected; - coverRangeExact(svmAllocation->getGpuAddress(), svmAllocation->getUnderlyingBufferSize(), rangesExpected, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); - - L3ControlPolicy validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION, isA0Stepping}; - - std::string err; - auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, - std::vector{new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1), - new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), - new MatchHwCmd(AtLeastOne, Expects{&validateL3ControlPolicy}), - new MatchAnyCmd(AnyNumber)}, - &err); - EXPECT_TRUE(cmdBuffOk) << err; - EXPECT_EQ(rangesExpected, validateL3ControlPolicy.l3RangesParsed); - - memoryManager->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation); - svmManager.freeSVMAlloc(svm); - } -}; - -template -class GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker : public EnqueueKernelTest { - public: - void testBodyImpl() { - using WALKER = typename FamilyType::WALKER_TYPE; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; - using L3_CONTROL_WITH_POST_SYNC = typename FamilyType::L3_CONTROL; - - DebugManagerStateRestore restore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - DebugManager.flags.EnableTimestampPacket.set(1); - - MockKernelWithInternals mockKernel(*pDevice, context, true); - mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; - - auto cmdQ = std::make_unique>(context, pClDevice, nullptr); - - auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); - SVMAllocsManager svmManager(memoryManager, false); - void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize * 5, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); - ASSERT_NE(nullptr, svm); - auto svmAllocation = svmManager.getSVMAlloc(svm)->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); - svmAllocation->setFlushL3Required(true); - - mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP)); - mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); - mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0); - - cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); - - L3RangesVec rangesExpected; - coverRangeExact(svmAllocation->getGpuAddress(), svmAllocation->getUnderlyingBufferSize(), rangesExpected, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); - - L3ControlPolicy validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION}; - - std::string err; - auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, - std::vector{new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1), - new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), - new MatchAnyCmd(AnyNumber), - new MatchHwCmd(AtLeastOne, Expects{&validateL3ControlPolicy}), - new MatchAnyCmd(AnyNumber)}, - &err); - EXPECT_TRUE(cmdBuffOk) << err; - auto expectedRangeWithPostSync = rangesExpected[rangesExpected.size() - 1]; - auto l3ParsedRangeWithPostSync = validateL3ControlPolicy.l3RangesParsed[validateL3ControlPolicy.l3RangesParsed.size() - 1]; - EXPECT_EQ(expectedRangeWithPostSync, l3ParsedRangeWithPostSync); - - memoryManager->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation); - svmManager.freeSVMAlloc(svm); - } -}; - -template -class GivenCacheFlushAfterWalkerDisabledAndProperSteppingIsSetWhenAllocationRequiresCacheFlushThenFlushCommandNotPresentAfterWalker : public EnqueueKernelTest { - public: - void testBodyImpl(bool isA0Stepping) { - using WALKER = typename FamilyType::WALKER_TYPE; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; - using L3_CONTROL_BASE = typename FamilyType::L3_CONTROL_BASE; - - DebugManagerStateRestore restore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(0); - - MockKernelWithInternals mockKernel(*pClDevice, context, true); - mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; - - auto cmdQ = std::make_unique>(context, pClDevice, nullptr); - - auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); - SVMAllocsManager svmManager(memoryManager, false); - void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize * 5, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); - ASSERT_NE(nullptr, svm); - auto svmData = svmManager.getSVMAlloc(svm); - ASSERT_NE(nullptr, svmData); - auto svmAllocation = svmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); - ASSERT_NE(nullptr, svmAllocation); - svmAllocation->setFlushL3Required(true); - - mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP)); - mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); - mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u); - - cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); - - L3ControlPolicy validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION, isA0Stepping}; - - std::string err; - auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, - std::vector{ - new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1), - new MatchAnyCmd(AnyNumber), - new MatchHwCmd(0), - new MatchAnyCmd(AnyNumber), - }, - &err); - EXPECT_TRUE(cmdBuffOk) << err; - - memoryManager->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation); - svmManager.freeSVMAlloc(svm); - } -}; - template class GivenCacheResourceSurfacesWhenprocessingCacheFlushThenExpectProperCacheFlushCommand : public EnqueueKernelTest { public: diff --git a/opencl/test/unit_test/kernel/cache_flush_xehp_and_later_tests.inl b/opencl/test/unit_test/kernel/cache_flush_xehp_and_later_tests.inl index 55e307be68..15490d538f 100644 --- a/opencl/test/unit_test/kernel/cache_flush_xehp_and_later_tests.inl +++ b/opencl/test/unit_test/kernel/cache_flush_xehp_and_later_tests.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2022 Intel Corporation + * Copyright (C) 2021-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -110,45 +110,6 @@ class GivenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequirin } }; -template -class GivenCacheFlushAfterWalkerEnabledWhenKernelArgIsSetAsCacheFlushRequiredThenExpectCacheFlushCommand : public HardwareCommandsTest { - public: - void testBodyImpl() { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; - using L3_CONTROL = typename FamilyType::L3_CONTROL; - - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - - CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); - auto &commandStream = cmdQ.getCS(1024); - addSpaceForSingleKernelArg(); - this->mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush.resize(2); - void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); - MockGraphicsAllocation cacheRequiringAllocation{allocPtr, MemoryConstants::pageSize * 7}; - this->mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; - - L3RangesVec rangesExpected; - coverRangeExact(cacheRequiringAllocation.getGpuAddress(), cacheRequiringAllocation.getUnderlyingBufferSize(), rangesExpected, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); - - HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, 0U); - - L3ControlPolicy validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION}; - - std::string err; - auto cmdBuffOk = expectCmdBuff(cmdQ.getCS(0), 0, - std::vector{ - new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), - new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE), &validateL3ControlPolicy}), - }, - &err); - EXPECT_TRUE(cmdBuffOk) << err; - - EXPECT_EQ(rangesExpected, validateL3ControlPolicy.l3RangesParsed); - } -}; - template class GivenCacheFlushAfterWalkerEnabledWhenNoGlobalSurfaceSvmAllocationKernelArgRequireCacheFlushThenExpectNoCacheFlushCommand : public HardwareCommandsTest { public: @@ -236,340 +197,3 @@ class GivenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentAndPostSyn using EnqueueKernelFixture = HelloWorldFixture; using EnqueueKernelTest = Test; - -template -class GivenCacheFlushAfterWalkerEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker : public EnqueueKernelTest { - public: - void testBodyImpl() { - using WALKER = typename FamilyType::WALKER_TYPE; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; - using L3_CONTROL = typename FamilyType::L3_CONTROL; - - DebugManagerStateRestore restore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - DebugManager.flags.EnableTimestampPacket.set(0); - - MockKernelWithInternals mockKernel(*pClDevice, context, true); - mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; - - auto cmdQ = std::make_unique>(context, pClDevice, nullptr); - - cmdQ->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; - - auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); - SVMAllocsManager svmManager(memoryManager, false); - void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize * 5, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); - ASSERT_NE(nullptr, svm); - auto svmAllocation = svmManager.getSVMAlloc(svm)->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); - svmAllocation->setFlushL3Required(true); - - mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP, pDevice->getDeviceBitfield())); - mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); - mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u); - - cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); - - L3RangesVec rangesExpected; - coverRangeExact(svmAllocation->getGpuAddress(), svmAllocation->getUnderlyingBufferSize(), rangesExpected, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); - - L3ControlPolicy validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION}; - - std::string err; - auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, - std::vector{new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1), - new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), - new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE), &validateL3ControlPolicy}), - new MatchAnyCmd(AnyNumber)}, - &err); - EXPECT_TRUE(cmdBuffOk) << err; - EXPECT_EQ(rangesExpected, validateL3ControlPolicy.l3RangesParsed); - - memoryManager->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation); - svmManager.freeSVMAlloc(svm); - } -}; - -template -class GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker : public EnqueueKernelTest { - public: - void testBodyImpl() { - using WALKER = typename FamilyType::WALKER_TYPE; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; - using L3_CONTROL = typename FamilyType::L3_CONTROL; - - DebugManagerStateRestore restore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - DebugManager.flags.EnableTimestampPacket.set(1); - - MockKernelWithInternals mockKernel(*pClDevice, context, true); - mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; - - auto cmdQ = std::make_unique>(context, pClDevice, nullptr); - - auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); - SVMAllocsManager svmManager(memoryManager, false); - void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize * 5, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); - ASSERT_NE(nullptr, svm); - auto svmAllocation = svmManager.getSVMAlloc(svm)->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); - svmAllocation->setFlushL3Required(true); - - mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP, pDevice->getDeviceBitfield())); - mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); - mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u); - - cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); - - auto &nodes = cmdQ->timestampPacketContainer->peekNodes(); - EXPECT_FALSE(nodes[nodes.size() - 1]->isProfilingCapable()); - - L3RangesVec rangesExpected; - coverRangeExact(svmAllocation->getGpuAddress(), svmAllocation->getUnderlyingBufferSize(), rangesExpected, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); - - L3ControlPolicy validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION}; - - std::string err; - auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, - std::vector{new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1), - new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), - new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA), &validateL3ControlPolicy}), - new MatchAnyCmd(AnyNumber)}, - &err); - EXPECT_TRUE(cmdBuffOk) << err; - EXPECT_EQ(rangesExpected, validateL3ControlPolicy.l3RangesParsed); - - memoryManager->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation); - svmManager.freeSVMAlloc(svm); - } -}; - -template -class GivenCacheFlushAfterWalkerDisabledWhenAllocationRequiresCacheFlushThenFlushCommandNotPresentAfterWalker : public EnqueueKernelTest { - public: - void testBodyImpl() { - using WALKER = typename FamilyType::WALKER_TYPE; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; - using L3_CONTROL = typename FamilyType::L3_CONTROL; - - DebugManagerStateRestore restore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(0); - - MockKernelWithInternals mockKernel(*pClDevice, context, true); - mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; - - auto cmdQ = std::make_unique>(context, pClDevice, nullptr); - - auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); - SVMAllocsManager svmManager(memoryManager, false); - void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize * 5, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); - ASSERT_NE(nullptr, svm); - auto svmAllocation = svmManager.getSVMAlloc(svm)->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); - svmAllocation->setFlushL3Required(true); - - mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP, pDevice->getDeviceBitfield())); - mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); - mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u); - - cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); - - L3ControlPolicy validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION}; - - std::string err; - auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, - std::vector{ - new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1), - new MatchAnyCmd(AnyNumber), - new MatchHwCmd(0), - new MatchAnyCmd(AnyNumber), - }, - &err); - EXPECT_TRUE(cmdBuffOk) << err; - - memoryManager->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation); - svmManager.freeSVMAlloc(svm); - } -}; -template -class GivenCacheFlushAfterWalkerEnabledWhenMoreThan126AllocationRangesRequiresCacheFlushThenAtLeatsTwoFlushCommandPresentAfterWalker : public EnqueueKernelTest { - public: - void testBodyImpl() { - using WALKER = typename FamilyType::WALKER_TYPE; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; - using L3_CONTROL = typename FamilyType::L3_CONTROL; - - DebugManagerStateRestore restore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - DebugManager.flags.EnableTimestampPacket.set(0); - - MockKernelWithInternals mockKernel(*pClDevice, context, true); - mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; - - auto cmdQ = std::make_unique>(context, pClDevice, nullptr); - - cmdQ->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; - - auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); - SVMAllocsManager svmManager(memoryManager, false); - mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(maxFlushSubrangeCount + 1); - - std::vector svmAllocs; - for (uint32_t i = 0; i < maxFlushSubrangeCount + 1; i++) { - void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); - auto svmAllocation = svmManager.getSVMAlloc(svm)->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); - svmAllocation->setFlushL3Required(true); - mockKernel.mockKernel->addAllocationToCacheFlushVector(i, svmAllocation); - svmAllocs.push_back(svm); - } - - cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); - - std::string err; - auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, - std::vector{new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1), - new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), - new MatchHwCmd(1), - new MatchHwCmd(AtLeastOne), - new MatchAnyCmd(AnyNumber)}, - &err); - EXPECT_TRUE(cmdBuffOk) << err; - for (void *svm : svmAllocs) { - svmManager.freeSVMAlloc(svm); - } - } -}; - -template -class GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenMoreThan126AllocationRangesRequiresCacheFlushThenExpectFlushWithOutPostSyncAndThenWithPostSync : public EnqueueKernelTest { - public: - void testBodyImpl() { - using WALKER = typename FamilyType::WALKER_TYPE; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; - using L3_CONTROL = typename FamilyType::L3_CONTROL; - - DebugManagerStateRestore restore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - DebugManager.flags.EnableTimestampPacket.set(1); - - MockKernelWithInternals mockKernel(*pClDevice, context, true); - mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; - - auto cmdQ = std::make_unique>(context, pClDevice, nullptr); - - cmdQ->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; - - auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); - SVMAllocsManager svmManager(memoryManager, false); - mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(maxFlushSubrangeCount + 1); - - std::vector svmAllocs; - for (uint32_t i = 0; i < maxFlushSubrangeCount + 1; i++) { - void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); - auto svmAllocation = svmManager.getSVMAlloc(svm)->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); - svmAllocation->setFlushL3Required(true); - mockKernel.mockKernel->addAllocationToCacheFlushVector(i, svmAllocation); - svmAllocs.push_back(svm); - } - - cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); - - auto &nodes = cmdQ->timestampPacketContainer->peekNodes(); - EXPECT_FALSE(nodes[1]->isProfilingCapable()); - - auto timestampPacketNode = nodes[1]; - auto timestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNode); - - std::string err; - auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, - std::vector{new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1), - new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), - new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE)}), - new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA)}), - new MatchAnyCmd(AnyNumber)}, - &err); - - HardwareParse hwParser; - hwParser.parseCommands(cmdQ->getCS(0), 0); - - bool postSyncWriteFound = false; - for (auto &cmd : hwParser.cmdList) { - if (auto l3ControlCmd = genCmdCast(cmd)) { - if (l3ControlCmd->getPostSyncOperation() == L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { - EXPECT_EQ(timestampPacketGpuAddress, l3ControlCmd->getPostSyncAddress()); - postSyncWriteFound = true; - } - } - } - EXPECT_TRUE(postSyncWriteFound); - - EXPECT_TRUE(cmdBuffOk) << err; - for (void *svm : svmAllocs) { - svmManager.freeSVMAlloc(svm); - } - } -}; - -template -class GivenCacheFlushAfterWalkerEnabledWhen126AllocationRangesRequiresCacheFlushThenExpectOneFlush : public EnqueueKernelTest { - public: - void testBodyImpl() { - using WALKER = typename FamilyType::WALKER_TYPE; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; - using L3_CONTROL = typename FamilyType::L3_CONTROL; - - DebugManagerStateRestore restore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - DebugManager.flags.EnableTimestampPacket.set(0); - - MockKernelWithInternals mockKernel(*pClDevice, context, true); - mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; - - auto cmdQ = std::make_unique>(context, pClDevice, nullptr); - - cmdQ->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; - - auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); - SVMAllocsManager svmManager(memoryManager, false); - mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(maxFlushSubrangeCount); - - std::vector svmAllocs; - for (uint32_t i = 0; i < maxFlushSubrangeCount; i++) { - void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); - auto svmAllocation = svmManager.getSVMAlloc(svm)->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); - svmAllocation->setFlushL3Required(true); - mockKernel.mockKernel->addAllocationToCacheFlushVector(i, svmAllocation); - svmAllocs.push_back(svm); - } - - cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); - - std::string err; - auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, - std::vector{new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1), - new MatchAnyCmd(AnyNumber), - new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), - new MatchHwCmd(1), - new MatchHwCmd(0), - new MatchAnyCmd(AnyNumber)}, - &err); - EXPECT_TRUE(cmdBuffOk) << err; - for (void *svm : svmAllocs) { - svmManager.freeSVMAlloc(svm); - } - } -}; diff --git a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp index cd064ea6f8..586e2ac310 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp @@ -468,26 +468,6 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN alignedFree(svmPtr); } -TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingAsArgThenDoNotExpectAllocationInCacheFlushVector) { - const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); - if (devInfo.svmCapabilities == 0) { - GTEST_SKIP(); - } - - size_t svmSize = 4096; - void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); - MockGraphicsAllocation svmAlloc(svmPtr, svmSize); - - svmAlloc.setMemObjectsAllocationWithWritableFlags(true); - svmAlloc.setFlushL3Required(false); - - auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); - - alignedFree(svmPtr); -} - TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingKernelExecInfoThenDoNotExpectSvmFlushFlagTrue) { const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { diff --git a/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp b/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp index b909872212..2c54ae5014 100644 --- a/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp @@ -257,71 +257,5 @@ HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushWithAllocationsRequireCacheFl EXPECT_TRUE(flushRequired); clearPlatform(); } -HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushWithAllocationsWhichRequireCacheFlushWhenCheckIfKernelRequireFlushThenReturnedTrue) { - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - uint32_t numDevices = 2; - DebugManager.flags.CreateMultipleSubDevices.set(numDevices); - initializePlatform(); - auto device = pPlatform->getClDevice(0); - auto mockKernel = std::make_unique(*device); - MockContext mockContext(device); - mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; - auto cmdQ = std::make_unique>(&mockContext, device, nullptr); - cmdQ->requiresCacheFlushAfterWalker = true; - auto &ultCsr = static_cast &>(cmdQ->getGpgpuCommandStreamReceiver()); - ultCsr.multiOsContextCapable = false; - mockKernel->mockKernel->svmAllocationsRequireCacheFlush = false; - mockKernel->mockKernel->kernelArgRequiresCacheFlush.resize(2); - MockGraphicsAllocation cacheRequiringAllocation; - mockKernel->mockKernel->kernelArgRequiresCacheFlush[1] = &cacheRequiringAllocation; - bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get()); - - EXPECT_TRUE(flushRequired); - clearPlatform(); -} - -HWTEST_F(KernelWithCacheFlushTests, - givenEnableCacheFlushAfterWalkerForAllQueuesFlagSetWhenCheckIfKernelRequierFlushThenTrueIsAlwaysReturned) { - DebugManagerStateRestore dbgRestore; - DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); - MockGraphicsAllocation cacheRequiringAllocation; - - for (auto isMultiEngine : ::testing::Bool()) { - for (auto isMultiDevice : ::testing::Bool()) { - for (auto isDefaultContext : ::testing::Bool()) { - for (auto svmAllocationRequiresCacheFlush : ::testing::Bool()) { - for (auto kernelArgRequiresCacheFlush : ::testing::Bool()) { - auto deviceCount = (isMultiDevice ? 2 : 0); - auto contextType = - (isDefaultContext ? ContextType::CONTEXT_TYPE_DEFAULT : ContextType::CONTEXT_TYPE_SPECIALIZED); - GraphicsAllocation *kernelArg = (kernelArgRequiresCacheFlush ? &cacheRequiringAllocation : nullptr); - - DebugManager.flags.CreateMultipleSubDevices.set(deviceCount); - initializePlatform(); - - auto device = pPlatform->getClDevice(0); - MockContext mockContext(device); - mockContext.contextType = contextType; - auto cmdQ = std::make_unique>(&mockContext, device, nullptr); - cmdQ->requiresCacheFlushAfterWalker = true; - auto &ultCsr = static_cast &>(cmdQ->getGpgpuCommandStreamReceiver()); - ultCsr.multiOsContextCapable = isMultiEngine; - - auto mockKernel = std::make_unique(*device); - mockKernel->mockKernel->svmAllocationsRequireCacheFlush = svmAllocationRequiresCacheFlush; - mockKernel->mockKernel->kernelArgRequiresCacheFlush.resize(1); - mockKernel->mockKernel->kernelArgRequiresCacheFlush[0] = kernelArg; - - auto flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get()); - EXPECT_TRUE(flushRequired); - clearPlatform(); - } - } - } - } - } -} } // namespace NEO diff --git a/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp b/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp index e04fd0524f..c7597cd91c 100644 --- a/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp @@ -318,30 +318,6 @@ TEST_F(KernelImageArgTest, givenKernelWithSharedImageWhenSetArgCalledThenUsingSh EXPECT_TRUE(pKernel->isUsingSharedObjArgs()); } -TEST_F(KernelImageArgTest, givenWritableImageWhenSettingAsArgThenDoNotExpectAllocationInCacheFlushVector) { - MockImageBase image; - image.graphicsAllocation->setMemObjectsAllocationWithWritableFlags(true); - image.graphicsAllocation->setFlushL3Required(false); - - cl_mem imageObj = ℑ - - pKernel->setArg(0, sizeof(imageObj), &imageObj); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); -} - -TEST_F(KernelImageArgTest, givenNoCacheFlushImageWhenSettingAsArgThenExpectAllocationInCacheFlushVector) { - MockImageBase image; - image.graphicsAllocation->setMemObjectsAllocationWithWritableFlags(false); - image.graphicsAllocation->setFlushL3Required(false); - - cl_mem imageObj = ℑ - - pKernel->setArg(0, sizeof(imageObj), &imageObj); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); -} - class KernelImageArgTestBindless : public KernelImageArgTest { public: void SetUp() override { diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index db47acc412..bf709d8cbc 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -2093,7 +2093,6 @@ HWTEST_F(KernelResidencyTest, givenSimpleKernelWhenExecEnvDoesNotHavePageFaultMa mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1); mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as(true).accessedUsingStatelessAddressingMode = true; mockKernel.mockKernel->setKernelArguments(kernelArguments); - mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 0); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); static_cast(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(); @@ -2120,7 +2119,6 @@ HWTEST_F(KernelResidencyTest, givenSimpleKernelWhenIsUnifiedMemorySyncRequiredIs mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1); mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as(true).accessedUsingStatelessAddressingMode = true; mockKernel.mockKernel->setKernelArguments(kernelArguments); - mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); mockKernel.mockKernel->isUnifiedMemorySyncRequired = false; EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 0); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); @@ -2774,16 +2772,6 @@ TEST(KernelTest, givenKernelWithPairArgumentWhenItIsInitializedThenPatchImmediat EXPECT_EQ(&Kernel::setArgImmediate, kernel.mockKernel->kernelArgHandlers[0]); } -TEST(KernelTest, whenNullAllocationThenAssignNullPointerToCacheFlushVector) { - auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); - MockKernelWithInternals kernel(*device); - kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); - kernel.mockKernel->kernelArgRequiresCacheFlush[0] = reinterpret_cast(0x1); - - kernel.mockKernel->addAllocationToCacheFlushVector(0, nullptr); - EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); -} - TEST(KernelTest, givenKernelCompiledWithSimdSizeLowerThanExpectedWhenInitializingThenReturnError) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); auto &gfxCoreHelper = device->getGfxCoreHelper(); @@ -2810,19 +2798,6 @@ TEST(KernelTest, givenKernelCompiledWithSimdOneWhenInitializingThenReturnError) EXPECT_EQ(CL_SUCCESS, retVal); } -TEST(KernelTest, whenAllocationRequiringCacheFlushThenAssignAllocationPointerToCacheFlushVector) { - MockGraphicsAllocation mockAllocation; - auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); - MockKernelWithInternals kernel(*device); - kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); - - mockAllocation.setMemObjectsAllocationWithWritableFlags(false); - mockAllocation.setFlushL3Required(true); - - kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation); - EXPECT_EQ(&mockAllocation, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); -} - TEST(KernelTest, whenKernelRequireCacheFlushAfterWalkerThenRequireCacheFlushAfterWalker) { MockGraphicsAllocation mockAllocation; auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); @@ -2841,33 +2816,6 @@ TEST(KernelTest, whenKernelRequireCacheFlushAfterWalkerThenRequireCacheFlushAfte EXPECT_TRUE(kernel.mockKernel->requiresCacheFlushCommand(queue)); } -TEST(KernelTest, whenAllocationWriteableThenDoNotAssignAllocationPointerToCacheFlushVector) { - MockGraphicsAllocation mockAllocation; - auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); - MockKernelWithInternals kernel(*device); - kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); - - mockAllocation.setMemObjectsAllocationWithWritableFlags(true); - mockAllocation.setFlushL3Required(false); - - kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation); - EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); -} - -TEST(KernelTest, whenAllocationReadOnlyNonFlushRequiredThenAssignNullPointerToCacheFlushVector) { - MockGraphicsAllocation mockAllocation; - auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); - MockKernelWithInternals kernel(*device); - kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); - kernel.mockKernel->kernelArgRequiresCacheFlush[0] = reinterpret_cast(0x1); - - mockAllocation.setMemObjectsAllocationWithWritableFlags(false); - mockAllocation.setFlushL3Required(false); - - kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation); - EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); -} - TEST(KernelTest, givenKernelUsesPrivateMemoryWhenDeviceReleasedBeforeKernelThenKernelUsesMemoryManagerFromEnvironment) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); auto executionEnvironment = device->getExecutionEnvironment(); diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index 5cd8a8601f..1a114235d0 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -94,7 +94,6 @@ class MockMultiDeviceKernel : public MultiDeviceKernel { //////////////////////////////////////////////////////////////////////////////// class MockKernel : public Kernel { public: - using Kernel::addAllocationToCacheFlushVector; using Kernel::allBufferArgsStateful; using Kernel::anyKernelArgumentUsingSystemMemory; using Kernel::auxTranslationRequired; @@ -109,7 +108,6 @@ class MockKernel : public Kernel { using Kernel::hasIndirectStatelessAccessToHostMemory; using Kernel::isUnifiedMemorySyncRequired; using Kernel::kernelArgHandlers; - using Kernel::kernelArgRequiresCacheFlush; using Kernel::kernelArguments; using Kernel::KernelConfig; using Kernel::kernelHasIndirectAccess; @@ -335,7 +333,6 @@ class MockKernelWithInternals { kernelInfo.setAccessQualifier(1, KernelArgMetadata::AccessReadWrite); mockKernel->setKernelArguments(defaultKernelArguments); - mockKernel->kernelArgRequiresCacheFlush.resize(2); mockKernel->kernelArgHandlers.resize(2); mockKernel->kernelArgHandlers[0] = &Kernel::setArgBuffer; mockKernel->kernelArgHandlers[1] = &Kernel::setArgBuffer;