diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp index 147893acde..90d313faaa 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp @@ -363,13 +363,23 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix EXPECT_EQ(2u, eventTimeStamp->getPacketsInUse()); size_t totaSizedBarrierWithTimestampEvent = multiTileBarrierSize + timestampRegisters + stopRegisters; + if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { + totaSizedBarrierWithTimestampEvent += 4 * sizeof(MI_LOAD_REGISTER_IMM); + } + EXPECT_EQ(totaSizedBarrierWithTimestampEvent, (useSizeAfter - useSizeBefore)); void *cmdBuffer = ptrOffset(cmdListStream->getCpuBase(), useSizeBefore); GenCmdList cmdList; + + auto registersSizeToParse = timestampRegisters; + if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { + registersSizeToParse += sizeof(MI_LOAD_REGISTER_IMM); + } + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdBuffer, - timestampRegisters)); + registersSizeToParse)); auto begin = cmdList.begin(); validateTimestampRegisters(cmdList, begin, @@ -377,7 +387,12 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress, true); - auto gpuBaseAddress = cmdListStream->getGraphicsAllocation()->getGpuAddress() + useSizeBefore + timestampRegisters; + auto barrierOffset = timestampRegisters; + if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { + barrierOffset += 2 * sizeof(MI_LOAD_REGISTER_IMM); + } + + auto gpuBaseAddress = cmdListStream->getGraphicsAllocation()->getGpuAddress() + useSizeBefore + barrierOffset; auto gpuCrossTileSyncAddress = gpuBaseAddress + beforeControlSectionOffset; @@ -388,7 +403,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix auto gpuStartAddress = gpuBaseAddress + bbStartOffset; - cmdBuffer = ptrOffset(cmdBuffer, timestampRegisters); + cmdBuffer = ptrOffset(cmdBuffer, barrierOffset); size_t parsedOffset = 0; validateMultiTileBarrier(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true, !usePrimaryBuffer); @@ -398,7 +413,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix cmdList.clear(); ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdBuffer, - timestampRegisters)); + registersSizeToParse)); begin = cmdList.begin(); validateTimestampRegisters(cmdList, begin, diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 966d4d7d5c..cb4fad5f70 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -1763,7 +1763,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenProgramming ASSERT_NE(nullptr, semaphoreCmd); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(nodeGpuVa + events[0]->getSinglePacketSize(), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(nodeGpuVa + NEO::TimestampPackets::getSinglePacketSize(), semaphoreCmd->getSemaphoreGraphicsAddress()); } HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenSignalingSyncAllocationThenEnablePartitionOffset, IsAtLeastXeHpCore) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp index 0e5fdc3a7b..701c034485 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp @@ -944,7 +944,7 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket, reminderPostSyncOps = 3; } arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize(); + input.storeDataImmOffset = arg.expectedPacketsInUse * NEO::TimestampPackets::getSinglePacketSize(); } testSingleTileAppendMemoryCopySingleKernel(input, arg); @@ -1228,7 +1228,7 @@ HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket, reminderPostSyncOps = 3; } arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize(); + input.storeDataImmOffset = arg.expectedPacketsInUse * NEO::TimestampPackets::getSinglePacketSize(); } testMultiTileAppendMemoryCopySingleKernel(input, arg); @@ -1479,7 +1479,7 @@ HWTEST2_F(AppendMemoryCopyL3CompactEventTest, if (input.signalAllPackets) { constexpr uint32_t reminderPostSyncOps = 2; arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = testEvent->getSinglePacketSize(); + input.storeDataImmOffset = NEO::TimestampPackets::getSinglePacketSize(); } testSingleTileAppendMemoryCopySingleKernel(input, arg); @@ -1613,7 +1613,7 @@ HWTEST2_F(MultiTileAppendMemoryCopyL3CompactEventTest, if (input.signalAllPackets) { constexpr uint32_t reminderPostSyncOps = 2; arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize(); + input.storeDataImmOffset = arg.expectedPacketsInUse * NEO::TimestampPackets::getSinglePacketSize(); } testMultiTileAppendMemoryCopySingleKernel(input, arg); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp index 9653fad355..25e486da2f 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp @@ -747,7 +747,7 @@ HWTEST2_F(AppendFillMultiPacketEventTest, reminderPostSyncOps = 2; } arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize(); + input.storeDataImmOffset = arg.expectedPacketsInUse * NEO::TimestampPackets::getSinglePacketSize(); } testSingleTileAppendMemoryFillManyImmediateKernels(input, arg); @@ -772,7 +772,7 @@ HWTEST2_F(AppendFillMultiPacketEventTest, reminderPostSyncOps = 2; } arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize(); + input.storeDataImmOffset = arg.expectedPacketsInUse * NEO::TimestampPackets::getSinglePacketSize(); } testSingleTileAppendMemoryFillManyKernels(input, arg); @@ -792,7 +792,7 @@ HWTEST2_F(AppendFillMultiPacketEventTest, reminderPostSyncOps = 3; } arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize(); + input.storeDataImmOffset = arg.expectedPacketsInUse * NEO::TimestampPackets::getSinglePacketSize(); } testSingleTileAppendMemoryFillSingleKernel(input, arg); @@ -919,7 +919,7 @@ HWTEST2_F(MultiTileAppendFillEventMultiPacketTest, if (input.signalAllPackets) { constexpr uint32_t reminderPostSyncOps = 1; arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize(); + input.storeDataImmOffset = arg.expectedPacketsInUse * NEO::TimestampPackets::getSinglePacketSize(); } testMultiTileAppendMemoryFillManyKernels(input, arg); @@ -931,6 +931,10 @@ HWTEST2_F(MultiTileAppendFillEventMultiPacketTest, arg.expectedPacketsInUse = 4; arg.expectedKernelCount = 2; arg.expectedWalkerPostSyncOp = 3; + if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { + arg.expectedWalkerPostSyncOp = 1; + } + arg.expectedPostSyncPipeControls = 0; if (NEO::MemorySynchronizationCommands::getDcFlushEnable(true, input.device->getNEODevice()->getRootDeviceEnvironment())) { // last kernel uses 4 packets, in addition to kernel two packets, use 2 packets to two tile cache flush @@ -1070,7 +1074,7 @@ HWTEST2_F(AppendFillCompactL3EventTest, if (input.signalAllPackets) { constexpr uint32_t reminderPostSyncOps = 1; arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize(); + input.storeDataImmOffset = arg.expectedPacketsInUse * NEO::TimestampPackets::getSinglePacketSize(); } testSingleTileAppendMemoryFillManyImmediateKernels(input, arg); @@ -1092,7 +1096,7 @@ HWTEST2_F(AppendFillCompactL3EventTest, if (input.signalAllPackets) { constexpr uint32_t reminderPostSyncOps = 1; arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize(); + input.storeDataImmOffset = arg.expectedPacketsInUse * NEO::TimestampPackets::getSinglePacketSize(); } testSingleTileAppendMemoryFillManyKernels(input, arg); @@ -1109,7 +1113,7 @@ HWTEST2_F(AppendFillCompactL3EventTest, if (input.signalAllPackets) { constexpr uint32_t reminderPostSyncOps = 2; arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize(); + input.storeDataImmOffset = arg.expectedPacketsInUse * NEO::TimestampPackets::getSinglePacketSize(); } testSingleTileAppendMemoryFillSingleKernel(input, arg); @@ -1184,7 +1188,7 @@ HWTEST2_F(MultiTileAppendFillCompactL3EventTest, if (input.signalAllPackets) { constexpr uint32_t partitionCount = 2; arg.expectStoreDataImm = (testEvent->getMaxPacketsCount() - arg.expectedPacketsInUse) / partitionCount; - input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize(); + input.storeDataImmOffset = arg.expectedPacketsInUse * NEO::TimestampPackets::getSinglePacketSize(); } testMultiTileAppendMemoryFillManyKernels(input, arg); @@ -1204,6 +1208,9 @@ HWTEST2_F(MultiTileAppendFillCompactL3EventTest, arg.expectedPacketsInUse = 4; arg.expectedKernelCount = 2; arg.expectedWalkerPostSyncOp = 3; + if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { + arg.expectedWalkerPostSyncOp = 1; + } arg.expectedPostSyncPipeControls = 0; arg.postSyncAddressZero = false; input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index a65004c2b4..2d0ea458bf 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -637,6 +637,11 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { if (multiTile == 0 && eventPoolFlags == 0 && !eventPool->isImplicitScalingCapableFlagSet()) { expectedWalkerPostSyncOp = 1; } + + if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() && expectedWalkerPostSyncOp == 3 && eventPoolFlags == 0 && multiTile != 0) { + expectedWalkerPostSyncOp = 1; + } + auto walkerCmd = genCmdCast(*firstWalker); EXPECT_EQ(static_cast(expectedWalkerPostSyncOp), walkerCmd->getPostSync().getOperation()); @@ -863,7 +868,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { ptrOffset(cmdStream->getCpuBase(), sizeBefore), (sizeAfter - sizeBefore))); - if (dynamicAllocSize) { + if (dynamicAllocSize && commandList->partitionCount > 1) { auto lriCmd = genCmdCast(*cmdList.begin()); ASSERT_NE(nullptr, lriCmd); @@ -899,7 +904,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { } } - if (dynamicAllocSize) { + if (dynamicAllocSize && commandList->partitionCount > 1) { auto lriCmd = genCmdCast(*cmdList.rbegin()); ASSERT_NE(nullptr, lriCmd); diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index 790bd5b648..1525d487c7 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -2722,7 +2722,11 @@ HWTEST_F(EventSizeTests, whenCreatingEventPoolThenUseCorrectSizeAndAlignment) { EXPECT_EQ(timestampPacketTypeSize * 2, eventObj0->getContextEndOffset()); EXPECT_EQ(timestampPacketTypeSize * 3, eventObj0->getGlobalEndOffset()); - EXPECT_EQ(timestampPacketTypeSize * 4, eventObj0->getSinglePacketSize()); + if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { + EXPECT_EQ(sizeof(uint64_t), eventObj0->getSinglePacketSize()); + } else { + EXPECT_EQ(timestampPacketTypeSize * 4, eventObj0->getSinglePacketSize()); + } auto hostPtrDiff = ptrDiff(eventObj1->getHostAddress(), eventObj0->getHostAddress()); EXPECT_EQ(expectedSize, hostPtrDiff); diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index afc8452a3b..b5b34a5296 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -34,6 +34,7 @@ namespace NEO { constexpr size_t TimestampDestinationAddressAlignment = 16; +constexpr size_t ImmWriteDestinationAddressAlignment = 8; template void EncodeDispatchKernel::setGrfInfo(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, @@ -299,12 +300,15 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis if (args.eventAddress != 0) { postSync.setDataportPipelineFlush(true); if (args.isTimestampEvent) { + UNRECOVERABLE_IF(!(isAligned(args.eventAddress))); + postSync.setOperation(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP); } else { + UNRECOVERABLE_IF(!(isAligned(args.eventAddress))); + postSync.setOperation(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA); postSync.setImmediateData(args.postSyncImmValue); } - UNRECOVERABLE_IF(!(isAligned(args.eventAddress))); postSync.setDestinationAddress(args.eventAddress); EncodeDispatchKernel::setupPostSyncMocs(walkerCmd, rootDeviceEnvironment, args.dcFlushEnable);