diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 387d57b255..4e57d3af0b 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -229,12 +229,12 @@ struct CommandListCoreFamily : CommandListImp { ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]); ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t *pThreadGroupDimensions); - void appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb); + void appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb, bool workloadPartition); void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask); - void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker); + void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker, bool workloadPartition); void appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker); void appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker); - void appendSignalEventPostWalker(ze_event_handle_t hEvent); + void appendSignalEventPostWalker(ze_event_handle_t hEvent, bool workloadPartition); void programStateBaseAddress(NEO::CommandContainer &container, bool genericMediaStateClearRequired); void appendComputeBarrierCommand(); NEO::PipeControlArgs createBarrierFlags(); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index d55e6a4a1d..8c6d2d5cf9 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -230,10 +230,10 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelIndirect(ze_ if (ret) { return ret; } - appendEventForProfiling(hEvent, true); + appendEventForProfiling(hEvent, true, false); ret = appendLaunchKernelWithParams(hKernel, pDispatchArgumentsBuffer, nullptr, true, false, false); - appendSignalEventPostWalker(hEvent); + appendSignalEventPostWalker(hEvent, false); return ret; } @@ -251,7 +251,7 @@ ze_result_t CommandListCoreFamily::appendLaunchMultipleKernelsInd if (ret) { return ret; } - appendEventForProfiling(hEvent, true); + appendEventForProfiling(hEvent, true, false); const bool haveLaunchArguments = pLaunchArgumentsBuffer != nullptr; auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(pNumLaunchArguments); auto alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); @@ -268,7 +268,7 @@ ze_result_t CommandListCoreFamily::appendLaunchMultipleKernelsInd } } - appendSignalEventPostWalker(hEvent); + appendSignalEventPostWalker(hEvent, false); return ret; } @@ -355,9 +355,9 @@ ze_result_t CommandListCoreFamily::appendMemoryRangesBarrier(uint return ret; } - appendEventForProfiling(hSignalEvent, true); + appendEventForProfiling(hSignalEvent, true, false); applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges); - appendSignalEventPostWalker(hSignalEvent); + appendSignalEventPostWalker(hSignalEvent, false); if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) { executeCommandListImmediate(true); @@ -801,9 +801,9 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelSplit(ze_ker template void CommandListCoreFamily::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) { if (beforeWalker) { - appendEventForProfiling(hEvent, true); + appendEventForProfiling(hEvent, true, false); } else { - appendSignalEventPostWalker(hEvent); + appendSignalEventPostWalker(hEvent, false); } } @@ -911,14 +911,14 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyBlitRegion(NEO NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties}; - appendEventForProfiling(hSignalEvent, true); + appendEventForProfiling(hSignalEvent, true, false); bool copyRegionPreferred = NEO::BlitCommandsHelper::isCopyRegionPreferred(copySizeModified, *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); if (copyRegionPreferred) { NEO::BlitCommandsHelper::dispatchBlitCommandsForBufferRegion(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); } else { NEO::BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); } - appendSignalEventPostWalker(hSignalEvent); + appendSignalEventPostWalker(hSignalEvent, false); return ZE_RESULT_SUCCESS; } @@ -941,9 +941,9 @@ ze_result_t CommandListCoreFamily::appendCopyImageBlit(NEO::Graph commandContainer.addToResidencyContainer(dst); commandContainer.addToResidencyContainer(src); commandContainer.addToResidencyContainer(clearColorAllocation); - appendEventForProfiling(hSignalEvent, true); + appendEventForProfiling(hSignalEvent, true, false); NEO::BlitCommandsHelper::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); - appendSignalEventPostWalker(hSignalEvent); + appendSignalEventPostWalker(hSignalEvent, false); return ZE_RESULT_SUCCESS; } @@ -1597,7 +1597,7 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, if (ret) { return ret; } - appendEventForProfiling(hSignalEvent, true); + appendEventForProfiling(hSignalEvent, true, false); NEO::GraphicsAllocation *gpuAllocation = device->getDriverHandle()->getDriverSystemMemoryAllocation(ptr, size, neoDevice->getRootDeviceIndex(), @@ -1623,19 +1623,19 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, *commandContainer.getCommandStream(), size, *neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); - appendSignalEventPostWalker(hSignalEvent); + appendSignalEventPostWalker(hSignalEvent, false); } return ZE_RESULT_SUCCESS; } template -void CommandListCoreFamily::appendSignalEventPostWalker(ze_event_handle_t hEvent) { +void CommandListCoreFamily::appendSignalEventPostWalker(ze_event_handle_t hEvent, bool workloadPartition) { if (hEvent == nullptr) { return; } auto event = Event::fromHandle(hEvent); if (event->isEventTimestampFlagSet()) { - appendEventForProfiling(hEvent, false); + appendEventForProfiling(hEvent, false, workloadPartition); } else { using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; @@ -1683,7 +1683,7 @@ void CommandListCoreFamily::appendEventForProfilingCopyCommand(ze NEO::MiFlushArgs args; NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, hwInfo); } - appendWriteKernelTimestamp(hEvent, beforeWalker, false); + appendWriteKernelTimestamp(hEvent, beforeWalker, false, false); } template @@ -1937,7 +1937,7 @@ ze_result_t CommandListCoreFamily::programSyncBuffer(Kernel &kern } template -void CommandListCoreFamily::appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb) { +void CommandListCoreFamily::appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb, bool workloadPartition) { constexpr uint32_t mask = 0xfffffffe; auto event = Event::fromHandle(hEvent); @@ -1949,18 +1949,18 @@ void CommandListCoreFamily::appendWriteKernelTimestamp(ze_event_h uint64_t contextAddress = ptrOffset(baseAddr, contextOffset); if (maskLsb) { - NEO::EncodeMathMMIO::encodeBitwiseAndVal(commandContainer, REG_GLOBAL_TIMESTAMP_LDW, mask, globalAddress, false); - NEO::EncodeMathMMIO::encodeBitwiseAndVal(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, mask, contextAddress, false); + NEO::EncodeMathMMIO::encodeBitwiseAndVal(commandContainer, REG_GLOBAL_TIMESTAMP_LDW, mask, globalAddress, workloadPartition); + NEO::EncodeMathMMIO::encodeBitwiseAndVal(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, mask, contextAddress, workloadPartition); } else { - NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, globalAddress, false); - NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextAddress, false); + NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, globalAddress, workloadPartition); + NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextAddress, workloadPartition); } adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask); } template -void CommandListCoreFamily::appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker) { +void CommandListCoreFamily::appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker, bool workloadPartition) { if (!hEvent) { return; } @@ -1976,7 +1976,7 @@ void CommandListCoreFamily::appendEventForProfiling(ze_event_hand commandContainer.addToResidencyContainer(&event->getAllocation(this->device)); if (beforeWalker) { - appendWriteKernelTimestamp(hEvent, beforeWalker, true); + appendWriteKernelTimestamp(hEvent, beforeWalker, true, workloadPartition); } else { const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; @@ -1988,7 +1988,7 @@ void CommandListCoreFamily::appendEventForProfiling(ze_event_hand uint64_t baseAddr = event->getGpuAddress(this->device); NEO::MemorySynchronizationCommands::addAdditionalSynchronization(*commandContainer.getCommandStream(), baseAddr, false, hwInfo); - appendWriteKernelTimestamp(hEvent, beforeWalker, true); + appendWriteKernelTimestamp(hEvent, beforeWalker, true, workloadPartition); } } } @@ -2030,7 +2030,7 @@ ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( } if (hSignalEvent) { - CommandListCoreFamily::appendSignalEventPostWalker(hSignalEvent); + CommandListCoreFamily::appendSignalEventPostWalker(hSignalEvent, false); } auto allocationStruct = getAlignedAllocation(this->device, dstptr, sizeof(uint64_t), false); @@ -2269,19 +2269,25 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ if (ret) { return ret; } - appendEventForProfiling(hSignalEvent, true); - - const auto &hwInfo = this->device->getHwInfo(); - if (!hSignalEvent) { - if (isCopyOnly()) { - NEO::MiFlushArgs args; - NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, hwInfo); - } else { - appendComputeBarrierCommand(); + bool workloadPartition = false; + if (this->partitionCount > 1 && + hSignalEvent) { + auto event = Event::fromHandle(hSignalEvent); + if (event->isEventTimestampFlagSet()) { + event->setPacketsInUse(this->partitionCount); + workloadPartition = true; } - } else { - appendSignalEventPostWalker(hSignalEvent); } + appendEventForProfiling(hSignalEvent, true, workloadPartition); + + if (isCopyOnly()) { + NEO::MiFlushArgs args; + NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, this->device->getHwInfo()); + } else { + appendComputeBarrierCommand(); + } + + appendSignalEventPostWalker(hSignalEvent, workloadPartition); return ZE_RESULT_SUCCESS; } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index e3d09d7fee..027e79a19e 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -42,7 +42,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z const auto kernel = Kernel::fromHandle(hKernel); const auto &kernelDescriptor = kernel->getKernelDescriptor(); UNRECOVERABLE_IF(kernel == nullptr); - appendEventForProfiling(hEvent, true); + appendEventForProfiling(hEvent, true, false); const auto functionImmutableData = kernel->getImmutableData(); auto perThreadScratchSize = std::max(this->getCommandListPerThreadScratchSize(), kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]); @@ -159,7 +159,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z *reinterpret_cast(surfaceStateSpace) = surfaceState; } - appendSignalEventPostWalker(hEvent); + appendSignalEventPostWalker(hEvent, false); commandContainer.addToResidencyContainer(functionImmutableData->getIsaGraphicsAllocation()); auto &residencyContainer = kernel->getResidencyContainer(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 120e956799..eda48ba7b4 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -1836,7 +1836,7 @@ HWTEST2_F(CommandListCreate, givenNullEventWhenAppendEventAfterWalkerThenNothing auto usedBefore = commandList->commandContainer.getCommandStream()->getUsed(); - commandList->appendSignalEventPostWalker(nullptr); + commandList->appendSignalEventPostWalker(nullptr, false); EXPECT_EQ(commandList->commandContainer.getCommandStream()->getUsed(), usedBefore); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp index 5bfc05f374..91d2f75fce 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp @@ -6,11 +6,13 @@ */ #include "shared/source/command_container/command_encoder.h" +#include "shared/source/helpers/hw_helper.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" +#include "level_zero/core/source/event/event.h" #include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" @@ -80,55 +82,15 @@ HWTEST_F(CommandListAppendBarrier, GivenEventVsNoEventWhenAppendingBarrierThenCo ASSERT_LE(sizeWithoutEvent, sizeWithEvent); } -using MultiTileCommandListAppendBarrier = Test>; - -HWTEST2_F(MultiTileCommandListAppendBarrier, WhenAppendingBarrierThenPipeControlIsGenerated, IsWithinXeGfxFamily) { +template +void validateMultiTileBarrier(void *cmdBuffer, size_t &parsedOffset, + uint64_t gpuFinalSyncAddress, uint64_t gpuCrossTileSyncAddress, uint64_t gpuStartAddress) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; - EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count()); - EXPECT_EQ(2u, commandList->partitionCount); - - size_t beforeControlSectionOffset = sizeof(MI_STORE_DATA_IMM) + - sizeof(PIPE_CONTROL) + - sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + - sizeof(MI_BATCH_BUFFER_START); - - size_t startOffset = beforeControlSectionOffset + - (2 * sizeof(uint32_t)); - - size_t expectedUseBuffer = startOffset + - sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + - sizeof(MI_STORE_DATA_IMM) + - sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT); - - auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); - auto gpuBaseAddress = commandList->commandContainer.getCommandStream()->getGraphicsAllocation()->getGpuAddress() + - usedSpaceBefore; - - auto gpuCrossTileSyncAddress = gpuBaseAddress + - beforeControlSectionOffset; - - auto gpuFinalSyncAddress = gpuCrossTileSyncAddress + - sizeof(uint32_t); - - auto gpuStartAddress = gpuBaseAddress + - startOffset; - - auto result = commandList->appendBarrier(nullptr, 0, nullptr); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); - ASSERT_GT(usedSpaceAfter, usedSpaceBefore); - size_t usedBuffer = usedSpaceAfter - usedSpaceBefore; - EXPECT_EQ(expectedUseBuffer, usedBuffer); - - void *cmdBuffer = ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceBefore); - size_t parsedOffset = 0; - { auto storeDataImm = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, storeDataImm); @@ -216,6 +178,59 @@ HWTEST2_F(MultiTileCommandListAppendBarrier, WhenAppendingBarrierThenPipeControl EXPECT_EQ(4u, miSemaphore->getSemaphoreDataDword()); parsedOffset += sizeof(MI_SEMAPHORE_WAIT); } +} + +using MultiTileCommandListAppendBarrier = Test>; + +HWTEST2_F(MultiTileCommandListAppendBarrier, WhenAppendingBarrierThenPipeControlIsGenerated, IsWithinXeGfxFamily) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using MI_ATOMIC = typename FamilyType::MI_ATOMIC; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count()); + EXPECT_EQ(2u, commandList->partitionCount); + + size_t beforeControlSectionOffset = sizeof(MI_STORE_DATA_IMM) + + sizeof(PIPE_CONTROL) + + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + + sizeof(MI_BATCH_BUFFER_START); + + size_t startOffset = beforeControlSectionOffset + + (2 * sizeof(uint32_t)); + + size_t expectedUseBuffer = startOffset + + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + + sizeof(MI_STORE_DATA_IMM) + + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT); + + auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); + auto gpuBaseAddress = commandList->commandContainer.getCommandStream()->getGraphicsAllocation()->getGpuAddress() + + usedSpaceBefore; + + auto gpuCrossTileSyncAddress = gpuBaseAddress + + beforeControlSectionOffset; + + auto gpuFinalSyncAddress = gpuCrossTileSyncAddress + + sizeof(uint32_t); + + auto gpuStartAddress = gpuBaseAddress + + startOffset; + + auto result = commandList->appendBarrier(nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); + ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + size_t usedBuffer = usedSpaceAfter - usedSpaceBefore; + EXPECT_EQ(expectedUseBuffer, usedBuffer); + + void *cmdBuffer = ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceBefore); + size_t parsedOffset = 0; + + validateMultiTileBarrier(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress); + EXPECT_EQ(expectedUseBuffer, parsedOffset); } @@ -276,95 +291,274 @@ HWTEST2_F(MultiTileCommandListAppendBarrier, void *cmdBuffer = cmdListStream->getCpuBase(); size_t parsedOffset = 0; - { - auto storeDataImm = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, storeDataImm); - EXPECT_EQ(gpuFinalSyncAddress, storeDataImm->getAddress()); - EXPECT_EQ(0u, storeDataImm->getDataDword0()); - parsedOffset += sizeof(MI_STORE_DATA_IMM); - } - { - auto pipeControl = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, pipeControl); - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_FALSE(pipeControl->getDcFlushEnable()); - parsedOffset += sizeof(PIPE_CONTROL); - } - { - auto miAtomic = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - auto miAtomicProgrammedAddress = NEO::UnitTestHelper::getAtomicMemoryAddress(*miAtomic); - EXPECT_EQ(gpuCrossTileSyncAddress, miAtomicProgrammedAddress); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - parsedOffset += sizeof(MI_ATOMIC); - } - { - auto miSemaphore = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphore); - EXPECT_EQ(gpuCrossTileSyncAddress, miSemaphore->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphore->getCompareOperation()); - EXPECT_EQ(2u, miSemaphore->getSemaphoreDataDword()); - parsedOffset += sizeof(MI_SEMAPHORE_WAIT); - } - { - auto bbStart = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, bbStart); - EXPECT_EQ(gpuStartAddress, bbStart->getBatchBufferStartAddress()); - EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer()); - parsedOffset += sizeof(MI_BATCH_BUFFER_START); - } - { - auto crossField = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); - EXPECT_EQ(0u, *crossField); - parsedOffset += sizeof(uint32_t); - auto finalField = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); - EXPECT_EQ(0u, *finalField); - parsedOffset += sizeof(uint32_t); - } - { - auto miAtomic = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - auto miAtomicProgrammedAddress = NEO::UnitTestHelper::getAtomicMemoryAddress(*miAtomic); - EXPECT_EQ(gpuFinalSyncAddress, miAtomicProgrammedAddress); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - parsedOffset += sizeof(MI_ATOMIC); - } - { - auto miSemaphore = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphore); - EXPECT_EQ(gpuFinalSyncAddress, miSemaphore->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphore->getCompareOperation()); - EXPECT_EQ(2u, miSemaphore->getSemaphoreDataDword()); - parsedOffset += sizeof(MI_SEMAPHORE_WAIT); - } - { - auto storeDataImm = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, storeDataImm); - EXPECT_EQ(gpuCrossTileSyncAddress, storeDataImm->getAddress()); - EXPECT_EQ(0u, storeDataImm->getDataDword0()); - parsedOffset += sizeof(MI_STORE_DATA_IMM); - } - { - auto miAtomic = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miAtomic); - auto miAtomicProgrammedAddress = NEO::UnitTestHelper::getAtomicMemoryAddress(*miAtomic); - EXPECT_EQ(gpuFinalSyncAddress, miAtomicProgrammedAddress); - EXPECT_FALSE(miAtomic->getReturnDataControl()); - EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); - parsedOffset += sizeof(MI_ATOMIC); - } - { - auto miSemaphore = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); - ASSERT_NE(nullptr, miSemaphore); - EXPECT_EQ(gpuFinalSyncAddress, miSemaphore->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphore->getCompareOperation()); - EXPECT_EQ(4u, miSemaphore->getSemaphoreDataDword()); - parsedOffset += sizeof(MI_SEMAPHORE_WAIT); - } + validateMultiTileBarrier(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress); + EXPECT_EQ(expectedUseBuffer, parsedOffset); } +HWTEST2_F(MultiTileCommandListAppendBarrier, + GivenNonTimestampEventSignalWhenAppendingMultTileBarrierThenExpectMultiTileBarrierAndPostSyncOperation, IsWithinXeGfxFamily) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using MI_ATOMIC = typename FamilyType::MI_ATOMIC; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; + + uint64_t eventGpuAddress = event->getGpuAddress(device); + if (event->isUsingContextEndOffset()) { + eventGpuAddress += event->getContextEndOffset(); + } + ze_event_handle_t eventHandle = event->toHandle(); + + EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count()); + EXPECT_EQ(2u, commandList->partitionCount); + + LinearStream *cmdListStream = commandList->commandContainer.getCommandStream(); + + size_t beforeControlSectionOffset = sizeof(MI_STORE_DATA_IMM) + + sizeof(PIPE_CONTROL) + + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + + sizeof(MI_BATCH_BUFFER_START); + + size_t bbStartOffset = beforeControlSectionOffset + + (2 * sizeof(uint32_t)); + + size_t multiTileBarrierSize = bbStartOffset + + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + + sizeof(MI_STORE_DATA_IMM) + + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT); + + size_t postSyncSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo()); + + auto useSizeBefore = cmdListStream->getUsed(); + auto result = commandList->appendBarrier(eventHandle, 0, nullptr); + auto useSizeAfter = cmdListStream->getUsed(); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(2u, event->getPacketsInUse()); + + size_t totaSizedBarrierWithNonTimestampEvent = multiTileBarrierSize + postSyncSize; + + EXPECT_EQ(totaSizedBarrierWithNonTimestampEvent, (useSizeAfter - useSizeBefore)); + + auto gpuBaseAddress = cmdListStream->getGraphicsAllocation()->getGpuAddress() + useSizeBefore; + + auto gpuCrossTileSyncAddress = gpuBaseAddress + + beforeControlSectionOffset; + + auto gpuFinalSyncAddress = gpuCrossTileSyncAddress + + sizeof(uint32_t); + + auto gpuStartAddress = gpuBaseAddress + + bbStartOffset; + + void *cmdBuffer = ptrOffset(cmdListStream->getCpuBase(), useSizeBefore); + size_t parsedOffset = 0; + + validateMultiTileBarrier(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress); + EXPECT_EQ(multiTileBarrierSize, parsedOffset); + + cmdBuffer = ptrOffset(cmdBuffer, parsedOffset); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + cmdBuffer, + postSyncSize)); + + auto itorPC = findAll(cmdList.begin(), cmdList.end()); + ASSERT_NE(0u, itorPC.size()); + uint32_t postSyncFound = 0; + for (auto it : itorPC) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED); + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_EQ(eventGpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); + EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); + postSyncFound++; + } + } + EXPECT_EQ(1u, postSyncFound); +} + +template +void validateTimestampRegisters(GenCmdList &cmdList, + uint64_t firstRegisterAddress, uint64_t secondRegisterAddress) { + using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + using MI_MATH = typename FamilyType::MI_MATH; + using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; + + constexpr uint32_t mask = 0xfffffffe; + + auto itor = find(cmdList.begin(), cmdList.end()); + + { + ASSERT_NE(cmdList.end(), itor); + auto cmdLoadReg = genCmdCast(*itor); + EXPECT_EQ(REG_GLOBAL_TIMESTAMP_LDW, cmdLoadReg->getSourceRegisterAddress()); + EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress()); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdLoadImm = genCmdCast(*itor); + EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset()); + EXPECT_EQ(mask, cmdLoadImm->getDataDword()); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdMath = genCmdCast(*itor); + EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdMem = genCmdCast(*itor); + EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress()); + EXPECT_EQ(firstRegisterAddress, cmdMem->getMemoryAddress()); + EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable()); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdLoadReg = genCmdCast(*itor); + EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmdLoadReg->getSourceRegisterAddress()); + EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress()); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdLoadImm = genCmdCast(*itor); + EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset()); + EXPECT_EQ(mask, cmdLoadImm->getDataDword()); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdMath = genCmdCast(*itor); + EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdMem = genCmdCast(*itor); + EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress()); + EXPECT_EQ(secondRegisterAddress, cmdMem->getMemoryAddress()); + EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable()); + } +} + +HWTEST2_F(MultiTileCommandListAppendBarrier, + GivenTimestampEventSignalWhenAppendingMultTileBarrierThenExpectMultiTileBarrierAndTimestampOperations, IsWithinXeGfxFamily) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using MI_ATOMIC = typename FamilyType::MI_ATOMIC; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; + using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + using MI_MATH = typename FamilyType::MI_MATH; + using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + eventPoolDesc.count = 2; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.wait = 0; + eventDesc.signal = 0; + + ze_result_t returnValue; + auto eventPoolTimeStamp = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + auto eventTimeStamp = std::unique_ptr(Event::create(eventPoolTimeStamp.get(), &eventDesc, device)); + + uint64_t eventGpuAddress = eventTimeStamp->getGpuAddress(device); + uint64_t contextStartAddress = eventGpuAddress + event->getContextStartOffset(); + uint64_t globalStartAddress = eventGpuAddress + event->getGlobalStartOffset(); + uint64_t contextEndAddress = eventGpuAddress + event->getContextEndOffset(); + uint64_t globalEndAddress = eventGpuAddress + event->getGlobalEndOffset(); + + ze_event_handle_t eventHandle = eventTimeStamp->toHandle(); + + EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count()); + EXPECT_EQ(2u, commandList->partitionCount); + + LinearStream *cmdListStream = commandList->commandContainer.getCommandStream(); + + size_t beforeControlSectionOffset = sizeof(MI_STORE_DATA_IMM) + + sizeof(PIPE_CONTROL) + + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + + sizeof(MI_BATCH_BUFFER_START); + + size_t bbStartOffset = beforeControlSectionOffset + + (2 * sizeof(uint32_t)); + + size_t multiTileBarrierSize = bbStartOffset + + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + + sizeof(MI_STORE_DATA_IMM) + + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT); + + size_t timestampRegisters = 2 * (sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + + NEO::EncodeMath::streamCommandSize + sizeof(MI_STORE_REGISTER_MEM)); + + size_t postBarrierSynchronization = NEO::MemorySynchronizationCommands::getSizeForSinglePipeControl() + + NEO::MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(device->getHwInfo()); + size_t stopRegisters = timestampRegisters + postBarrierSynchronization; + + auto useSizeBefore = cmdListStream->getUsed(); + auto result = commandList->appendBarrier(eventHandle, 0, nullptr); + auto useSizeAfter = cmdListStream->getUsed(); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(2u, eventTimeStamp->getPacketsInUse()); + + size_t totaSizedBarrierWithTimestampEvent = multiTileBarrierSize + timestampRegisters + stopRegisters; + EXPECT_EQ(totaSizedBarrierWithTimestampEvent, (useSizeAfter - useSizeBefore)); + + void *cmdBuffer = ptrOffset(cmdListStream->getCpuBase(), useSizeBefore); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + cmdBuffer, + timestampRegisters)); + validateTimestampRegisters(cmdList, globalStartAddress, contextStartAddress); + + auto gpuBaseAddress = cmdListStream->getGraphicsAllocation()->getGpuAddress() + useSizeBefore + timestampRegisters; + + auto gpuCrossTileSyncAddress = gpuBaseAddress + + beforeControlSectionOffset; + + auto gpuFinalSyncAddress = gpuCrossTileSyncAddress + + sizeof(uint32_t); + + auto gpuStartAddress = gpuBaseAddress + + bbStartOffset; + + cmdBuffer = ptrOffset(cmdBuffer, timestampRegisters); + size_t parsedOffset = 0; + + validateMultiTileBarrier(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress); + EXPECT_EQ(multiTileBarrierSize, parsedOffset); + + cmdBuffer = ptrOffset(cmdBuffer, (parsedOffset + postBarrierSynchronization)); + cmdList.clear(); + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + cmdBuffer, + timestampRegisters)); + validateTimestampRegisters(cmdList, globalEndAddress, contextEndAddress); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index 3045f8e8b8..31fd9a7595 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -318,7 +318,7 @@ HWTEST2_F(CommandListAppendSignalEvent, event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST; commandList->partitionCount = packets; - commandList->appendSignalEventPostWalker(event->toHandle()); + commandList->appendSignalEventPostWalker(event->toHandle(), false); EXPECT_EQ(packets, event->getPacketsInUse()); auto gpuAddress = event->getGpuAddress(device); diff --git a/level_zero/core/test/unit_tests/xe_hp_core/test_cmdlist_xe_hp_core.cpp b/level_zero/core/test/unit_tests/xe_hp_core/test_cmdlist_xe_hp_core.cpp index 1d85aea867..fa273abf65 100644 --- a/level_zero/core/test/unit_tests/xe_hp_core/test_cmdlist_xe_hp_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hp_core/test_cmdlist_xe_hp_core.cpp @@ -468,7 +468,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventBeforeComm auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendEventForProfiling(event->toHandle(), true); + commandList->appendEventForProfiling(event->toHandle(), true, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( @@ -504,7 +504,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventAfterComma auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendEventForProfiling(event->toHandle(), false); + commandList->appendEventForProfiling(event->toHandle(), false, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( @@ -539,7 +539,7 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingEventThenStoreRegC auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendEventForProfiling(event->toHandle(), false); + commandList->appendEventForProfiling(event->toHandle(), false, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp index cdc019403e..62f0bfca12 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp @@ -350,7 +350,7 @@ HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithProfilingEven auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendEventForProfiling(event->toHandle(), false); + commandList->appendEventForProfiling(event->toHandle(), false, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( @@ -381,7 +381,7 @@ HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithProfilingEven auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendEventForProfiling(event->toHandle(), false); + commandList->appendEventForProfiling(event->toHandle(), false, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( @@ -411,7 +411,7 @@ HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithRegularEventA auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendSignalEventPostWalker(event->toHandle()); + commandList->appendSignalEventPostWalker(event->toHandle(), false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( diff --git a/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp b/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp index ad3a3c11b1..a79808ebd9 100644 --- a/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp @@ -88,7 +88,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventBeforeComm auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendEventForProfiling(event->toHandle(), true); + commandList->appendEventForProfiling(event->toHandle(), true, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( @@ -124,7 +124,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventAfterComma auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendEventForProfiling(event->toHandle(), false); + commandList->appendEventForProfiling(event->toHandle(), false, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( @@ -159,7 +159,7 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingEventThenStoreRegC auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendEventForProfiling(event->toHandle(), false); + commandList->appendEventForProfiling(event->toHandle(), false, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));