diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 826061e1ed..18394e93d4 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -161,7 +161,9 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z auto event = Event::fromHandle(hEvent); eventAlloc = &event->getAllocation(this->device); commandContainer.addToResidencyContainer(eventAlloc); - l3FlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(!!event->signalScope, hwInfo); + bool flushRequired = !!event->signalScope && + !launchParams.isKernelSplitOperation; + l3FlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(flushRequired, hwInfo); isTimestampEvent = event->isUsingContextEndOffset(); eventAddress = event->getPacketAddress(this->device); } @@ -362,9 +364,18 @@ void CommandListCoreFamily::appendEventForProfilingAllWalkers(ze_ appendSignalEventPostWalker(hEvent, false); } } else { - if (hEvent && beforeWalker) { + if (hEvent) { auto event = Event::fromHandle(hEvent); - event->zeroKernelCount(); + if (beforeWalker) { + event->zeroKernelCount(); + } else { + const auto &hwInfo = this->device->getHwInfo(); + if (NEO::MemorySynchronizationCommands::getDcFlushEnable(!!event->signalScope, hwInfo)) { + NEO::PipeControlArgs args; + args.dcFlushEnable = true; + NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); + } + } } } } diff --git a/level_zero/core/test/unit_tests/fixtures/device_fixture.h b/level_zero/core/test/unit_tests/fixtures/device_fixture.h index 37118df6aa..41962be959 100644 --- a/level_zero/core/test/unit_tests/fixtures/device_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/device_fixture.h @@ -177,6 +177,13 @@ struct SingleRootMultiSubDeviceFixture : public MultiDeviceFixture { NEO::Device *neoDevice = nullptr; }; +struct ImplicitScalingRootDevice : public SingleRootMultiSubDeviceFixture { + void SetUp() { + DebugManager.flags.EnableImplicitScaling.set(1); + SingleRootMultiSubDeviceFixture::SetUp(); + } +}; + struct ContextFixture : DeviceFixture { void SetUp(); void TearDown(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp index 53d4ef59e7..df0d51be5f 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp @@ -97,8 +97,20 @@ class AppendFillFixture : public DeviceFixture { uint8_t *immediateDstPtr = nullptr; }; +struct MultiTileAppendFillFixture : public AppendFillFixture { + void SetUp() { + DebugManager.flags.CreateMultipleSubDevices.set(2); + DebugManager.flags.EnableImplicitScaling.set(1); + AppendFillFixture::SetUp(); + } + + DebugManagerStateRestore restorer; +}; + using AppendFillTest = Test; +using MultiTileAppendFillTest = Test; + HWTEST2_F(AppendFillTest, givenCallToAppendMemoryFillWithImmediateValueThenSuccessIsReturned, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; @@ -343,19 +355,23 @@ HWTEST2_F(AppendFillTest, auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + auto &commandContainer = commandList->commandContainer; + size_t usedBefore = commandContainer.getCommandStream()->getUsed(); result = commandList->appendMemoryFill(immediateDstPtr, &immediatePattern, sizeof(immediatePattern), immediateAllocSize, event->toHandle(), 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); + size_t usedAfter = commandContainer.getCommandStream()->getUsed(); EXPECT_EQ(2u, event->getPacketsInUse()); EXPECT_EQ(2u, event->getKernelCount()); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( - cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), - commandList->commandContainer.getCommandStream()->getUsed())); + cmdList, + ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore), + usedAfter - usedBefore)); auto itorWalkers = findAll(cmdList.begin(), cmdList.end()); ASSERT_EQ(2u, itorWalkers.size()); @@ -394,17 +410,21 @@ HWTEST2_F(AppendFillTest, auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + auto &commandContainer = commandList->commandContainer; + size_t usedBefore = commandContainer.getCommandStream()->getUsed(); result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, event->toHandle(), 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); + size_t usedAfter = commandContainer.getCommandStream()->getUsed(); EXPECT_EQ(2u, event->getPacketsInUse()); EXPECT_EQ(2u, event->getKernelCount()); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( - cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), - commandList->commandContainer.getCommandStream()->getUsed())); + cmdList, + ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore), + usedAfter - usedBefore)); auto itorWalkers = findAll(cmdList.begin(), cmdList.end()); ASSERT_EQ(2u, itorWalkers.size()); @@ -420,5 +440,85 @@ HWTEST2_F(AppendFillTest, EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); } +HWTEST2_F(MultiTileAppendFillTest, + givenMultiTileCmdListCallToAppendMemoryFillWhenSignalScopeTimestampEventUsesComputeWalkerPostSyncThenSeparateKernelsUsesPostSyncProfilingAndSingleDcFlushWhenRequired, IsAtLeastXeHpCore) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + uint64_t firstKernelEventAddress = event->getGpuAddress(device); + uint64_t secondKernelEventAddress = event->getGpuAddress(device) + 2 * event->getSinglePacketSize(); + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + EXPECT_EQ(2u, commandList->partitionCount); + auto &commandContainer = commandList->commandContainer; + + size_t usedBefore = commandContainer.getCommandStream()->getUsed(); + result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, event->toHandle(), 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + size_t usedAfter = commandContainer.getCommandStream()->getUsed(); + + EXPECT_EQ(4u, event->getPacketsInUse()); + EXPECT_EQ(2u, event->getKernelCount()); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore), + usedAfter - usedBefore)); + + auto itorWalkers = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(2u, itorWalkers.size()); + auto firstWalker = itorWalkers[0]; + auto secondWalker = itorWalkers[1]; + + auto walkerCmd = genCmdCast(*firstWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + walkerCmd = genCmdCast(*secondWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + auto itorPipeControls = findAll(secondWalker, cmdList.end()); + + uint32_t postSyncPipeControls = 0; + uint32_t dcFlushFound = 0; + + for (auto it : itorPipeControls) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + postSyncPipeControls++; + } + if (cmd->getDcFlushEnable()) { + dcFlushFound++; + } + } + + uint32_t expectedDcFlush = + NEO::MemorySynchronizationCommands::getDcFlushEnable(true, device->getHwInfo()) + ? 2 // 1st dc flush after cross-tile sync, 2nd dc flush for signal scope event + : 0; + + EXPECT_EQ(0u, postSyncPipeControls); + EXPECT_EQ(expectedDcFlush, dcFlushFound); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index fc79dc06e4..49aa91c38a 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -280,6 +280,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsAndPatchingDisallowe } using AppendMemoryCopyXeHpAndLater = Test; +using MultiTileAppendMemoryCopyXeHpAndLater = Test; HWTEST2_F(AppendMemoryCopyXeHpAndLater, givenCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernels, @@ -341,7 +342,7 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLater, EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); } -HWTEST2_F(AppendMemoryCopyXeHpAndLater, +HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLater, givenMultiTileCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernels, IsAtLeastXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; @@ -350,9 +351,9 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLater, MockAppendMemoryCopy commandList; commandList.appendMemoryCopyKernelWithGACallBase = true; - commandList.partitionCount = 2; commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + EXPECT_EQ(2u, commandList.partitionCount); void *srcPtr = reinterpret_cast(0x1231); void *dstPtr = reinterpret_cast(0x200002345); @@ -403,7 +404,7 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLater, } HWTEST2_F(AppendMemoryCopyXeHpAndLater, - givenCommandListAndEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernelsAndL3FlushWaHandled, + givenCommandListAndEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernelsAndL3FlushAddedOnce, isXeHpOrXeHpgCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; @@ -431,11 +432,95 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLater, EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + uint64_t firstKernelEventAddress = event->getGpuAddress(device); + uint64_t secondKernelEventAddress = event->getGpuAddress(device) + event->getSinglePacketSize(); + uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 2 * event->getSinglePacketSize(); + + commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr); + EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled); + EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled); + EXPECT_EQ(3u, event->getPacketsInUse()); + EXPECT_EQ(3u, event->getKernelCount()); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), + commandList.commandContainer.getCommandStream()->getUsed())); + + auto itorWalkers = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(3u, itorWalkers.size()); + auto firstWalker = itorWalkers[0]; + auto secondWalker = itorWalkers[1]; + auto thirdWalker = itorWalkers[2]; + + auto walkerCmd = genCmdCast(*firstWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + walkerCmd = genCmdCast(*secondWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + walkerCmd = genCmdCast(*thirdWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + auto itorPipeControls = findAll(firstWalker, cmdList.end()); + + uint32_t postSyncPipeControls = 0; + uint32_t dcFlushFound = 0; + for (auto it : itorPipeControls) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + postSyncPipeControls++; + } + if (cmd->getDcFlushEnable()) { + dcFlushFound++; + } + } + EXPECT_EQ(0u, postSyncPipeControls); + EXPECT_EQ(1u, dcFlushFound); +} + +HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLater, + givenMultiTileCommandListAndEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernelsAndL3FlusAddedForScopedEvent, + isXeHpOrXeHpgCore) { + using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + + MockAppendMemoryCopy commandList; + commandList.appendMemoryCopyKernelWithGACallBase = true; + + commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + EXPECT_EQ(2u, commandList.partitionCount); + auto &commandContainer = commandList.commandContainer; + + void *srcPtr = reinterpret_cast(0x1231); + void *dstPtr = reinterpret_cast(0x200002345); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + uint64_t firstKernelEventAddress = event->getGpuAddress(device); uint64_t secondKernelEventAddress = event->getGpuAddress(device) + 2 * event->getSinglePacketSize(); uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 4 * event->getSinglePacketSize(); + size_t usedBefore = commandContainer.getCommandStream()->getUsed(); commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr); + size_t usedAfter = commandContainer.getCommandStream()->getUsed(); + EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled); EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled); EXPECT_EQ(6u, event->getPacketsInUse()); @@ -443,8 +528,9 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLater, GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( - cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), - commandList.commandContainer.getCommandStream()->getUsed())); + cmdList, + ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore), + usedAfter - usedBefore)); auto itorWalkers = findAll(cmdList.begin(), cmdList.end()); ASSERT_EQ(3u, itorWalkers.size()); @@ -464,113 +550,29 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLater, EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); - auto itorPipeControls = findAll(cmdList.begin(), cmdList.end()); - uint64_t eventGpuAddress = firstKernelEventAddress + event->getSinglePacketSize(); - if (event->isUsingContextEndOffset()) { - eventGpuAddress += event->getContextEndOffset(); - } + auto itorPipeControls = findAll(thirdWalker, cmdList.end()); + uint32_t postSyncPipeControls = 0; + uint32_t dcFlushFound = 0; + for (auto it : itorPipeControls) { auto cmd = genCmdCast(*it); if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { - EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED); - EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); - EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable()); - EXPECT_TRUE(cmd->getDcFlushEnable()); - EXPECT_EQ(eventGpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); postSyncPipeControls++; - eventGpuAddress += (2 * event->getSinglePacketSize()); + } + if (cmd->getDcFlushEnable()) { + dcFlushFound++; } } - EXPECT_EQ(3u, postSyncPipeControls); -} -HWTEST2_F(AppendMemoryCopyXeHpAndLater, - givenMultiTileCommandListAndEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernelsAndL3FlushWaHandled, - isXeHpOrXeHpgCore) { - using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; - using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; - using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; - - MockAppendMemoryCopy commandList; - commandList.appendMemoryCopyKernelWithGACallBase = true; - commandList.partitionCount = 2; - - commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - void *srcPtr = reinterpret_cast(0x1231); - void *dstPtr = reinterpret_cast(0x200002345); - - ze_event_pool_desc_t eventPoolDesc = {}; - eventPoolDesc.count = 1; - eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - - ze_event_desc_t eventDesc = {}; - eventDesc.index = 0; - eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; - - ze_result_t result = ZE_RESULT_SUCCESS; - auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); - EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - - uint64_t firstKernelEventAddress = event->getGpuAddress(device); - uint64_t secondKernelEventAddress = event->getGpuAddress(device) + 4 * event->getSinglePacketSize(); - uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 8 * event->getSinglePacketSize(); - - commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr); - EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled); - EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled); - EXPECT_EQ(12u, event->getPacketsInUse()); - EXPECT_EQ(3u, event->getKernelCount()); - - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( - cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), - commandList.commandContainer.getCommandStream()->getUsed())); - - auto itorWalkers = findAll(cmdList.begin(), cmdList.end()); - ASSERT_EQ(3u, itorWalkers.size()); - auto firstWalker = itorWalkers[0]; - auto secondWalker = itorWalkers[1]; - auto thirdWalker = itorWalkers[2]; - - auto walkerCmd = genCmdCast(*firstWalker); - EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); - EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); - - walkerCmd = genCmdCast(*secondWalker); - EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); - EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); - - walkerCmd = genCmdCast(*thirdWalker); - EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); - EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); - - auto itorPipeControls = findAll(cmdList.begin(), cmdList.end()); - uint64_t eventGpuAddress = firstKernelEventAddress + 2 * event->getSinglePacketSize(); - if (event->isUsingContextEndOffset()) { - eventGpuAddress += event->getContextEndOffset(); - } - uint32_t postSyncPipeControls = 0; - for (auto it : itorPipeControls) { - auto cmd = genCmdCast(*it); - if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { - EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED); - EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); - EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); - EXPECT_TRUE(cmd->getDcFlushEnable()); - EXPECT_EQ(eventGpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); - postSyncPipeControls++; - eventGpuAddress += (4 * event->getSinglePacketSize()); - } - } - EXPECT_EQ(3u, postSyncPipeControls); + constexpr uint32_t expectedDcFlush = 2; //dc flush for last cross-tile sync and separately for signal scope event after last kernel split + EXPECT_EQ(0u, postSyncPipeControls); + EXPECT_EQ(expectedDcFlush, dcFlushFound); } HWTEST2_F(AppendMemoryCopyXeHpAndLater, givenCommandListWhenMemoryCopyWithSignalEventScopeSetToSubDeviceThenB2BPipeControlIsAddedWithDcFlushForLastPC, isXeHpOrXeHpgCore) { + using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; @@ -601,7 +603,10 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLater, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore), usedAfter - usedBefore)); - auto pipeControls = findAll(cmdList.begin(), cmdList.end()); + auto itorWalker = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itorWalker); + + auto pipeControls = findAll(itorWalker, cmdList.end()); uint32_t postSyncFound = 0; uint32_t dcFlushFound = 0; ASSERT_NE(0u, pipeControls.size()); @@ -616,16 +621,9 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLater, } } - uint32_t expectedDcFlushFound = 2u; + constexpr uint32_t expectedDcFlushFound = 1u; - auto &hwInfo = device->getHwInfo(); - auto &hwInfoConfig = (*NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily)); - const auto waPair = hwInfoConfig.isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, true); - if (waPair.first) { - expectedDcFlushFound++; - } - - EXPECT_EQ(2u, postSyncFound); + EXPECT_EQ(0u, postSyncFound); EXPECT_EQ(expectedDcFlushFound, dcFlushFound); }