diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 47b74a0e61..675aad15ff 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -442,7 +442,7 @@ void CommandListCoreFamily::appendMultiTileBarrier(NEO::Device &n 0, 0, !(cmdListType == CommandListType::TYPE_IMMEDIATE), - !this->isFlushTaskSubmissionEnabled); + !(this->isFlushTaskSubmissionEnabled || this->dispatchCmdListBatchBufferAsPrimary)); } template diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp index 7026bb34df..46f4509650 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp @@ -57,7 +57,9 @@ void MultiTileCommandListFixtureInit::setUp() { SingleRootMultiSubDeviceFixture::setUp(); } -void MultiTileCommandListFixtureInit::setUpParams(bool createImmediate, bool createInternal, bool createCopy) { +void MultiTileCommandListFixtureInit::setUpParams(bool createImmediate, bool createInternal, bool createCopy, int32_t primaryBuffer) { + DebugManager.flags.DispatchCmdlistCmdBufferPrimary.set(primaryBuffer); + ze_result_t returnValue; NEO::EngineGroupType cmdListEngineType = createCopy ? NEO::EngineGroupType::Copy : NEO::EngineGroupType::RenderCompute; diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h index 8de28e26ae..5806c8d11e 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h @@ -34,7 +34,7 @@ class CommandListFixture : public DeviceFixture { struct MultiTileCommandListFixtureInit : public SingleRootMultiSubDeviceFixture { void setUp(); - void setUpParams(bool createImmediate, bool createInternal, bool createCopy); + void setUpParams(bool createImmediate, bool createInternal, bool createCopy, int32_t primaryBuffer); inline void tearDown() { SingleRootMultiSubDeviceFixture::tearDown(); } @@ -46,11 +46,11 @@ struct MultiTileCommandListFixtureInit : public SingleRootMultiSubDeviceFixture std::unique_ptr> osLocalMemoryBackup; }; -template +template struct MultiTileCommandListFixture : public MultiTileCommandListFixtureInit { void setUp() { MultiTileCommandListFixtureInit::setUp(); - MultiTileCommandListFixtureInit::setUpParams(createImmediate, createInternal, createCopy); + MultiTileCommandListFixtureInit::setUpParams(createImmediate, createInternal, createCopy, primaryBuffer); } void tearDown() { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index 4036a509cf..7838ea28c6 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -27,7 +27,7 @@ namespace L0 { namespace ult { -using MultiTileImmediateCommandListTest = Test>; +using MultiTileImmediateCommandListTest = Test>; HWTEST2_F(MultiTileImmediateCommandListTest, GivenMultiTileDeviceWhenCreatingImmediateCommandListThenExpectPartitionCountMatchTileCount, IsWithinXeGfxFamily) { EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count()); @@ -38,7 +38,7 @@ HWTEST2_F(MultiTileImmediateCommandListTest, GivenMultiTileDeviceWhenCreatingImm EXPECT_EQ(2u, commandList->partitionCount); } -using MultiTileImmediateInternalCommandListTest = Test>; +using MultiTileImmediateInternalCommandListTest = Test>; HWTEST2_F(MultiTileImmediateInternalCommandListTest, GivenMultiTileDeviceWhenCreatingInternalImmediateCommandListThenExpectPartitionCountEqualOne, IsWithinXeGfxFamily) { EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count()); @@ -49,7 +49,7 @@ HWTEST2_F(MultiTileImmediateInternalCommandListTest, GivenMultiTileDeviceWhenCre EXPECT_EQ(1u, commandList->partitionCount); } -using MultiTileCopyEngineCommandListTest = Test>; +using MultiTileCopyEngineCommandListTest = Test>; HWTEST2_F(MultiTileCopyEngineCommandListTest, GivenMultiTileDeviceWhenCreatingCopyEngineCommandListThenExpectPartitionCountEqualOne, IsWithinXeGfxFamily) { EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count()); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp index fd1f41cd90..147893acde 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp @@ -191,9 +191,226 @@ void validateMultiTileBarrier(void *cmdBuffer, size_t &parsedOffset, } } -using MultiTileCommandListAppendBarrier = Test>; +template +struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFixture(usePrimaryBuffer)> { + using BaseClass = MultiTileCommandListFixture(usePrimaryBuffer)>; -HWTEST2_F(MultiTileCommandListAppendBarrier, WhenAppendingBarrierThenPipeControlIsGenerated, IsWithinXeGfxFamily) { + using BaseClass::commandList; + using BaseClass::context; + using BaseClass::device; + using BaseClass::driverHandle; + using BaseClass::event; + + void setUp() { + BaseClass::setUp(); + } + + void tearDown() { + BaseClass::tearDown(); + } + + template + void testBodyNonTimestampEventSignal() { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using MI_ATOMIC = typename FamilyType::MI_ATOMIC; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; + + uint64_t eventGpuAddress = event->getCompletionFieldGpuAddress(device); + ze_event_handle_t eventHandle = event->toHandle(); + + EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count()); + EXPECT_EQ(2u, commandList->partitionCount); + + LinearStream *cmdListStream = commandList->getCmdContainer().getCommandStream(); + + size_t beforeControlSectionOffset = sizeof(MI_STORE_DATA_IMM) + + sizeof(PIPE_CONTROL) + + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + + sizeof(MI_BATCH_BUFFER_START); + + size_t bbStartOffset = beforeControlSectionOffset + + (2 * sizeof(uint32_t)); + + size_t multiTileBarrierSize = bbStartOffset + + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + + sizeof(MI_STORE_DATA_IMM) + + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait(); + + size_t postSyncSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), false); + + auto useSizeBefore = cmdListStream->getUsed(); + auto result = commandList->appendBarrier(eventHandle, 0, nullptr); + auto useSizeAfter = cmdListStream->getUsed(); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(2u, event->getPacketsInUse()); + + size_t totaSizedBarrierWithNonTimestampEvent = multiTileBarrierSize + postSyncSize; + + EXPECT_EQ(totaSizedBarrierWithNonTimestampEvent, (useSizeAfter - useSizeBefore)); + + auto gpuBaseAddress = cmdListStream->getGraphicsAllocation()->getGpuAddress() + useSizeBefore; + + auto gpuCrossTileSyncAddress = gpuBaseAddress + + beforeControlSectionOffset; + + auto gpuFinalSyncAddress = gpuCrossTileSyncAddress + + sizeof(uint32_t); + + auto gpuStartAddress = gpuBaseAddress + + bbStartOffset; + + void *cmdBuffer = ptrOffset(cmdListStream->getCpuBase(), useSizeBefore); + size_t parsedOffset = 0; + + validateMultiTileBarrier(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true, !usePrimaryBuffer); + EXPECT_EQ(multiTileBarrierSize, parsedOffset); + + cmdBuffer = ptrOffset(cmdBuffer, parsedOffset); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + cmdBuffer, + postSyncSize)); + + auto itorPC = findAll(cmdList.begin(), cmdList.end()); + ASSERT_NE(0u, itorPC.size()); + uint32_t postSyncFound = 0; + for (auto it : itorPC) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED); + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_EQ(eventGpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); + EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); + postSyncFound++; + } + } + EXPECT_EQ(1u, postSyncFound); + } + + template + void testBodyTimestampEventSignal() { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using MI_ATOMIC = typename FamilyType::MI_ATOMIC; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; + using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + using MI_MATH = typename FamilyType::MI_MATH; + using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + eventPoolDesc.count = 2; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.wait = 0; + eventDesc.signal = 0; + + ze_result_t returnValue; + auto eventPoolTimeStamp = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + auto eventTimeStamp = std::unique_ptr(Event::create(eventPoolTimeStamp.get(), &eventDesc, device)); + + uint64_t eventGpuAddress = eventTimeStamp->getGpuAddress(device); + uint64_t contextStartAddress = eventGpuAddress + event->getContextStartOffset(); + uint64_t globalStartAddress = eventGpuAddress + event->getGlobalStartOffset(); + uint64_t contextEndAddress = eventGpuAddress + event->getContextEndOffset(); + uint64_t globalEndAddress = eventGpuAddress + event->getGlobalEndOffset(); + + ze_event_handle_t eventHandle = eventTimeStamp->toHandle(); + + EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count()); + EXPECT_EQ(2u, commandList->partitionCount); + + LinearStream *cmdListStream = commandList->getCmdContainer().getCommandStream(); + + size_t beforeControlSectionOffset = sizeof(MI_STORE_DATA_IMM) + + sizeof(PIPE_CONTROL) + + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + + sizeof(MI_BATCH_BUFFER_START); + + size_t bbStartOffset = beforeControlSectionOffset + + (2 * sizeof(uint32_t)); + + size_t multiTileBarrierSize = bbStartOffset + + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + + sizeof(MI_STORE_DATA_IMM) + + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait(); + + size_t timestampRegisters = 2 * (sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + + NEO::EncodeMath::streamCommandSize + sizeof(MI_STORE_REGISTER_MEM)); + if (NEO::UnitTestHelper::timestampRegisterHighAddress()) { + timestampRegisters *= 2; + } + + size_t postBarrierSynchronization = NEO::MemorySynchronizationCommands::getSizeForSingleBarrier(false) + + NEO::MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(device->getNEODevice()->getRootDeviceEnvironment()); + size_t stopRegisters = timestampRegisters + postBarrierSynchronization; + + auto useSizeBefore = cmdListStream->getUsed(); + auto result = commandList->appendBarrier(eventHandle, 0, nullptr); + auto useSizeAfter = cmdListStream->getUsed(); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(2u, eventTimeStamp->getPacketsInUse()); + + size_t totaSizedBarrierWithTimestampEvent = multiTileBarrierSize + timestampRegisters + stopRegisters; + EXPECT_EQ(totaSizedBarrierWithTimestampEvent, (useSizeAfter - useSizeBefore)); + + void *cmdBuffer = ptrOffset(cmdListStream->getCpuBase(), useSizeBefore); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + cmdBuffer, + timestampRegisters)); + auto begin = cmdList.begin(); + validateTimestampRegisters(cmdList, + begin, + REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress, + GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress, + true); + + auto gpuBaseAddress = cmdListStream->getGraphicsAllocation()->getGpuAddress() + useSizeBefore + timestampRegisters; + + auto gpuCrossTileSyncAddress = gpuBaseAddress + + beforeControlSectionOffset; + + auto gpuFinalSyncAddress = gpuCrossTileSyncAddress + + sizeof(uint32_t); + + auto gpuStartAddress = gpuBaseAddress + + bbStartOffset; + + cmdBuffer = ptrOffset(cmdBuffer, timestampRegisters); + size_t parsedOffset = 0; + + validateMultiTileBarrier(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true, !usePrimaryBuffer); + EXPECT_EQ(multiTileBarrierSize, parsedOffset); + + cmdBuffer = ptrOffset(cmdBuffer, (parsedOffset + postBarrierSynchronization)); + cmdList.clear(); + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + cmdBuffer, + timestampRegisters)); + begin = cmdList.begin(); + validateTimestampRegisters(cmdList, + begin, + REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress, + GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress, + true); + } +}; + +using MultiTileCommandListAppendBarrier = Test>; + +HWTEST2_F(MultiTileCommandListAppendBarrier, WhenAppendingBarrierThenPipeControlIsGenerated, IsAtLeastXeHpCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; @@ -246,7 +463,7 @@ HWTEST2_F(MultiTileCommandListAppendBarrier, WhenAppendingBarrierThenPipeControl } HWTEST2_F(MultiTileCommandListAppendBarrier, - GivenCurrentCommandBufferExhaustedWhenAppendingMultiTileBarrierThenPipeControlAndCrossTileSyncIsGeneratedInNewBuffer, IsWithinXeGfxFamily) { + GivenCurrentCommandBufferExhaustedWhenAppendingMultiTileBarrierThenPipeControlAndCrossTileSyncIsGeneratedInNewBuffer, IsAtLeastXeHpCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; @@ -308,204 +525,31 @@ HWTEST2_F(MultiTileCommandListAppendBarrier, } HWTEST2_F(MultiTileCommandListAppendBarrier, - GivenNonTimestampEventSignalWhenAppendingMultTileBarrierThenExpectMultiTileBarrierAndPostSyncOperation, IsWithinXeGfxFamily) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; - using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; - using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; - using MI_ATOMIC = typename FamilyType::MI_ATOMIC; - using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; - using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; - - uint64_t eventGpuAddress = event->getCompletionFieldGpuAddress(device); - ze_event_handle_t eventHandle = event->toHandle(); - - EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count()); - EXPECT_EQ(2u, commandList->partitionCount); - - LinearStream *cmdListStream = commandList->getCmdContainer().getCommandStream(); - - size_t beforeControlSectionOffset = sizeof(MI_STORE_DATA_IMM) + - sizeof(PIPE_CONTROL) + - sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + - sizeof(MI_BATCH_BUFFER_START); - - size_t bbStartOffset = beforeControlSectionOffset + - (2 * sizeof(uint32_t)); - - size_t multiTileBarrierSize = bbStartOffset + - sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + - sizeof(MI_STORE_DATA_IMM) + - sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait(); - - size_t postSyncSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), false); - - auto useSizeBefore = cmdListStream->getUsed(); - auto result = commandList->appendBarrier(eventHandle, 0, nullptr); - auto useSizeAfter = cmdListStream->getUsed(); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(2u, event->getPacketsInUse()); - - size_t totaSizedBarrierWithNonTimestampEvent = multiTileBarrierSize + postSyncSize; - - EXPECT_EQ(totaSizedBarrierWithNonTimestampEvent, (useSizeAfter - useSizeBefore)); - - auto gpuBaseAddress = cmdListStream->getGraphicsAllocation()->getGpuAddress() + useSizeBefore; - - auto gpuCrossTileSyncAddress = gpuBaseAddress + - beforeControlSectionOffset; - - auto gpuFinalSyncAddress = gpuCrossTileSyncAddress + - sizeof(uint32_t); - - auto gpuStartAddress = gpuBaseAddress + - bbStartOffset; - - void *cmdBuffer = ptrOffset(cmdListStream->getCpuBase(), useSizeBefore); - size_t parsedOffset = 0; - - validateMultiTileBarrier(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true, true); - EXPECT_EQ(multiTileBarrierSize, parsedOffset); - - cmdBuffer = ptrOffset(cmdBuffer, parsedOffset); - - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, - cmdBuffer, - postSyncSize)); - - auto itorPC = findAll(cmdList.begin(), cmdList.end()); - ASSERT_NE(0u, itorPC.size()); - uint32_t postSyncFound = 0; - for (auto it : itorPC) { - auto cmd = genCmdCast(*it); - if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { - EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED); - EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); - EXPECT_EQ(eventGpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); - EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); - postSyncFound++; - } - } - EXPECT_EQ(1u, postSyncFound); + GivenNonTimestampEventSignalWhenAppendingMultTileBarrierThenExpectMultiTileBarrierAndPostSyncOperation, IsAtLeastXeHpCore) { + testBodyNonTimestampEventSignal(); } HWTEST2_F(MultiTileCommandListAppendBarrier, - GivenTimestampEventSignalWhenAppendingMultTileBarrierThenExpectMultiTileBarrierAndTimestampOperations, IsWithinXeGfxFamily) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; - using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; - using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; - using MI_ATOMIC = typename FamilyType::MI_ATOMIC; - using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; - using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; - using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; - using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; - using MI_MATH = typename FamilyType::MI_MATH; - using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; - - ze_event_pool_desc_t eventPoolDesc = {}; - eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - eventPoolDesc.count = 2; - - ze_event_desc_t eventDesc = {}; - eventDesc.index = 0; - eventDesc.wait = 0; - eventDesc.signal = 0; - - ze_result_t returnValue; - auto eventPoolTimeStamp = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); - ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); - auto eventTimeStamp = std::unique_ptr(Event::create(eventPoolTimeStamp.get(), &eventDesc, device)); - - uint64_t eventGpuAddress = eventTimeStamp->getGpuAddress(device); - uint64_t contextStartAddress = eventGpuAddress + event->getContextStartOffset(); - uint64_t globalStartAddress = eventGpuAddress + event->getGlobalStartOffset(); - uint64_t contextEndAddress = eventGpuAddress + event->getContextEndOffset(); - uint64_t globalEndAddress = eventGpuAddress + event->getGlobalEndOffset(); - - ze_event_handle_t eventHandle = eventTimeStamp->toHandle(); - - EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count()); - EXPECT_EQ(2u, commandList->partitionCount); - - LinearStream *cmdListStream = commandList->getCmdContainer().getCommandStream(); - - size_t beforeControlSectionOffset = sizeof(MI_STORE_DATA_IMM) + - sizeof(PIPE_CONTROL) + - sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + - sizeof(MI_BATCH_BUFFER_START); - - size_t bbStartOffset = beforeControlSectionOffset + - (2 * sizeof(uint32_t)); - - size_t multiTileBarrierSize = bbStartOffset + - sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + - sizeof(MI_STORE_DATA_IMM) + - sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait(); - - size_t timestampRegisters = 2 * (sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + - NEO::EncodeMath::streamCommandSize + sizeof(MI_STORE_REGISTER_MEM)); - - size_t postBarrierSynchronization = NEO::MemorySynchronizationCommands::getSizeForSingleBarrier(false) + - NEO::MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(device->getNEODevice()->getRootDeviceEnvironment()); - size_t stopRegisters = timestampRegisters + postBarrierSynchronization; - - auto useSizeBefore = cmdListStream->getUsed(); - auto result = commandList->appendBarrier(eventHandle, 0, nullptr); - auto useSizeAfter = cmdListStream->getUsed(); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(2u, eventTimeStamp->getPacketsInUse()); - - size_t totaSizedBarrierWithTimestampEvent = multiTileBarrierSize + timestampRegisters + stopRegisters; - EXPECT_EQ(totaSizedBarrierWithTimestampEvent, (useSizeAfter - useSizeBefore)); - - void *cmdBuffer = ptrOffset(cmdListStream->getCpuBase(), useSizeBefore); - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, - cmdBuffer, - timestampRegisters)); - auto begin = cmdList.begin(); - validateTimestampRegisters(cmdList, - begin, - REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress, - GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress, - true); - - auto gpuBaseAddress = cmdListStream->getGraphicsAllocation()->getGpuAddress() + useSizeBefore + timestampRegisters; - - auto gpuCrossTileSyncAddress = gpuBaseAddress + - beforeControlSectionOffset; - - auto gpuFinalSyncAddress = gpuCrossTileSyncAddress + - sizeof(uint32_t); - - auto gpuStartAddress = gpuBaseAddress + - bbStartOffset; - - cmdBuffer = ptrOffset(cmdBuffer, timestampRegisters); - size_t parsedOffset = 0; - - validateMultiTileBarrier(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true, true); - EXPECT_EQ(multiTileBarrierSize, parsedOffset); - - cmdBuffer = ptrOffset(cmdBuffer, (parsedOffset + postBarrierSynchronization)); - cmdList.clear(); - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, - cmdBuffer, - timestampRegisters)); - begin = cmdList.begin(); - validateTimestampRegisters(cmdList, - begin, - REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress, - GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress, - true); + GivenTimestampEventSignalWhenAppendingMultTileBarrierThenExpectMultiTileBarrierAndTimestampOperations, IsAtLeastXeHpCore) { + testBodyTimestampEventSignal(); } -using MultiTileImmediateCommandListAppendBarrier = Test>; +using MultiTilePrimaryBatchBufferCommandListAppendBarrier = Test>; + +HWTEST2_F(MultiTilePrimaryBatchBufferCommandListAppendBarrier, + GivenNonTimestampEventSignalWhenAppendingMultTileBarrierThenExpectMultiTileBarrierAndPostSyncOperation, IsAtLeastXeHpCore) { + testBodyNonTimestampEventSignal(); +} + +HWTEST2_F(MultiTilePrimaryBatchBufferCommandListAppendBarrier, + GivenTimestampEventSignalWhenAppendingMultTileBarrierThenExpectMultiTileBarrierAndTimestampOperations, IsAtLeastXeHpCore) { + testBodyTimestampEventSignal(); +} + +using MultiTileImmediateCommandListAppendBarrier = Test>; HWTEST2_F(MultiTileImmediateCommandListAppendBarrier, - givenMultiTileImmediateCommandListWhenAppendingBarrierThenExpectCrossTileSyncAndNoCleanupSection, IsWithinXeGfxFamily) { + givenMultiTileImmediateCommandListWhenAppendingBarrierThenExpectCrossTileSyncAndNoCleanupSection, IsAtLeastXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; @@ -606,7 +650,7 @@ HWTEST2_F(MultiTileImmediateCommandListAppendBarrier, } HWTEST2_F(MultiTileImmediateCommandListAppendBarrier, - givenMultiTileImmediateCommandListNotUsingFlushTaskWhenAppendingBarrierThenExpectSecondaryBufferStart, IsWithinXeGfxFamily) { + givenMultiTileImmediateCommandListNotUsingFlushTaskWhenAppendingBarrierThenExpectSecondaryBufferStart, IsAtLeastXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp index 950f94dc66..e1773b532d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp @@ -595,7 +595,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenCommandBufferIsEmptyWhenAppendingWai EXPECT_EQ(1u, semaphoreWaitsFound); } -using MultTileCommandListAppendWaitOnEvent = Test>; +using MultTileCommandListAppendWaitOnEvent = Test>; HWTEST2_F(MultTileCommandListAppendWaitOnEvent, GivenMultiTileCmdListWhenPartitionedEventUsedToWaitThenExpectProperGpuAddressAndSemaphoreCount, IsAtLeastXeHpCore) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index 1d761c0bcf..57a35831ea 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -158,7 +158,7 @@ HWTEST2_F(CommandListTests, whenCommandListIsCreatedAndProgramExtendedPipeContro EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuAtomicsForStatelessAccesses()); } -using MultiTileCommandListTests = Test>; +using MultiTileCommandListTests = Test>; HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListTests, givenPartitionedCommandListWhenCommandListIsCreatedThenStateBaseAddressCmdWithMultiPartialAndAtomicsCorrectlyProgrammed) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;