diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index fa0b5f6ff4..484bd0b3ff 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2264,7 +2264,7 @@ void CommandListCoreFamily::appendWaitOnInOrderDependency(NEO::Gr } else { NEO::EncodeSemaphore::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), gpuAddress, waitValue, - COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, false, false); } gpuAddress += sizeof(uint64_t); @@ -3147,10 +3147,7 @@ ze_result_t CommandListCoreFamily::appendWaitOnMemory(void *desc, commandContainer.addToResidencyContainer(srcAllocationStruct.alloc); uint64_t gpuAddress = static_cast(srcAllocationStruct.alignedAllocationPtr); - NEO::EncodeSemaphore::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), - gpuAddress, - data, - comparator); + NEO::EncodeSemaphore::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), gpuAddress, data, comparator, false, false); const auto &rootDeviceEnvironment = this->device->getNEODevice()->getRootDeviceEnvironment(); auto allocType = srcAllocationStruct.alloc->getAllocationType(); @@ -3373,7 +3370,7 @@ void CommandListCoreFamily::appendWaitOnSingleEvent(Event *event, NEO::EncodeSemaphore::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), gpuAddr, Event::STATE_CLEARED, - COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); + COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, false, false); } gpuAddr += event->getSinglePacketSize(); diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl index a40287f4c0..4d862836f7 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl @@ -148,7 +148,7 @@ void CommandQueueHw::patchCommands(CommandList &commandList, uint csr->getDebugPauseStateGPUAddress(), static_cast(NEO::DebugPauseState::hasUserStartConfirmation), COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, - false, true); + false, true, false); break; } case CommandList::CommandToPatch::PauseOnEnqueueSemaphoreEnd: { @@ -156,7 +156,7 @@ void CommandQueueHw::patchCommands(CommandList &commandList, uint csr->getDebugPauseStateGPUAddress(), static_cast(NEO::DebugPauseState::hasUserEndConfirmation), COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, - false, true); + false, true, false); break; } case CommandList::CommandToPatch::PauseOnEnqueuePipeControlStart: { diff --git a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl index c14c896c54..55bf19f8d5 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl @@ -187,7 +187,7 @@ void CommandQueueHw::patchCommands(CommandList &commandList, uint csr->getDebugPauseStateGPUAddress(), static_cast(NEO::DebugPauseState::hasUserStartConfirmation), COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, - false, true); + false, true, false); break; } case CommandList::CommandToPatch::PauseOnEnqueueSemaphoreEnd: { @@ -195,7 +195,7 @@ void CommandQueueHw::patchCommands(CommandList &commandList, uint csr->getDebugPauseStateGPUAddress(), static_cast(NEO::DebugPauseState::hasUserEndConfirmation), COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, - false, true); + false, true, false); break; } case CommandList::CommandToPatch::PauseOnEnqueuePipeControlStart: { diff --git a/opencl/source/command_queue/hardware_interface_base.inl b/opencl/source/command_queue/hardware_interface_base.inl index e6e36176d4..ed1e47602a 100644 --- a/opencl/source/command_queue/hardware_interface_base.inl +++ b/opencl/source/command_queue/hardware_interface_base.inl @@ -294,7 +294,7 @@ inline void HardwareInterface::dispatchDebugPauseCommands( EncodeSemaphore::addMiSemaphoreWaitCommand(*commandStream, address, static_cast(waitCondition), - COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD); + COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, false, false); } } } diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 230c7ea730..f8ef7a7327 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -372,26 +372,26 @@ struct EncodeSemaphore { static void programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd, uint64_t compareAddress, - uint32_t compareData, + uint64_t compareData, COMPARE_OPERATION compareMode, bool registerPollMode, - bool waitMode); + bool waitMode, + bool useQwordData); static void addMiSemaphoreWaitCommand(LinearStream &commandStream, uint64_t compareAddress, - uint32_t compareData, + uint64_t compareData, COMPARE_OPERATION compareMode, - bool registerPollMode); - - static void addMiSemaphoreWaitCommand(LinearStream &commandStream, - uint64_t compareAddress, - uint32_t compareData, - COMPARE_OPERATION compareMode); + bool registerPollMode, + bool useQwordData); static void applyMiSemaphoreWaitCommand(LinearStream &commandStream, std::list &commandsList); static constexpr size_t getSizeMiSemaphoreWait() { return sizeof(MI_SEMAPHORE_WAIT); } + + protected: + static void appendSemaphoreCommand(MI_SEMAPHORE_WAIT &cmd, uint64_t compareData, bool registerPollMode, bool useQwordData); }; template diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index 5829b207e6..914e245598 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -789,27 +789,22 @@ inline size_t EncodeIndirectParams::getCmdsSizeForSetWorkDimIndirect(con } return requiredSize; } + template -void EncodeSemaphore::addMiSemaphoreWaitCommand(LinearStream &commandStream, - uint64_t compareAddress, - uint32_t compareData, - COMPARE_OPERATION compareMode) { - addMiSemaphoreWaitCommand(commandStream, compareAddress, compareData, compareMode, false); +void EncodeSemaphore::appendSemaphoreCommand(MI_SEMAPHORE_WAIT &cmd, uint64_t compareData, bool registerPollMode, bool useQwordData) { + constexpr uint64_t upper32b = static_cast(std::numeric_limits::max()) << 32; + UNRECOVERABLE_IF(useQwordData || (compareData & upper32b)); } template void EncodeSemaphore::addMiSemaphoreWaitCommand(LinearStream &commandStream, uint64_t compareAddress, - uint32_t compareData, + uint64_t compareData, COMPARE_OPERATION compareMode, - bool registerPollMode) { + bool registerPollMode, + bool useQwordData) { auto semaphoreCommand = commandStream.getSpaceForCmd(); - programMiSemaphoreWait(semaphoreCommand, - compareAddress, - compareData, - compareMode, - registerPollMode, - true); + programMiSemaphoreWait(semaphoreCommand, compareAddress, compareData, compareMode, registerPollMode, true, useQwordData); } template void EncodeSemaphore::applyMiSemaphoreWaitCommand(LinearStream &commandStream, std::list &commandsList) { diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index b563d1ec70..615dc972e5 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -542,13 +542,17 @@ inline void EncodeSurfaceState::setCoherencyType(R_SURFACE_STATE *surfac template void EncodeSemaphore::programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd, uint64_t compareAddress, - uint32_t compareData, + uint64_t compareData, COMPARE_OPERATION compareMode, bool registerPollMode, - bool waitMode) { + bool waitMode, + bool useQwordData) { + constexpr uint64_t upper32b = static_cast(std::numeric_limits::max()) << 32; + UNRECOVERABLE_IF(useQwordData || (compareData & upper32b)); + MI_SEMAPHORE_WAIT localCmd = Family::cmdInitMiSemaphoreWait; localCmd.setCompareOperation(compareMode); - localCmd.setSemaphoreDataDword(compareData); + localCmd.setSemaphoreDataDword(static_cast(compareData)); localCmd.setSemaphoreGraphicsAddress(compareAddress); localCmd.setWaitMode(waitMode ? MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE : MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_SIGNAL_MODE); diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 033599092f..16b4fc7b8a 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -741,17 +741,20 @@ inline void EncodeSurfaceState::setCoherencyType(R_SURFACE_STATE *surfac template void EncodeSemaphore::programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd, uint64_t compareAddress, - uint32_t compareData, + uint64_t compareData, COMPARE_OPERATION compareMode, bool registerPollMode, - bool waitMode) { + bool waitMode, + bool useQwordData) { MI_SEMAPHORE_WAIT localCmd = Family::cmdInitMiSemaphoreWait; localCmd.setCompareOperation(compareMode); - localCmd.setSemaphoreDataDword(compareData); + localCmd.setSemaphoreDataDword(static_cast(compareData)); localCmd.setSemaphoreGraphicsAddress(compareAddress); localCmd.setWaitMode(waitMode ? MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE : MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_SIGNAL_MODE); localCmd.setRegisterPollMode(registerPollMode ? MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_REGISTER_POLL : MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_MEMORY_POLL); + EncodeSemaphore::appendSemaphoreCommand(localCmd, compareData, registerPollMode, useQwordData); + *cmd = localCmd; } diff --git a/shared/source/command_container/walker_partition_xehp_and_later.h b/shared/source/command_container/walker_partition_xehp_and_later.h index 046482e3fd..ada08d0636 100644 --- a/shared/source/command_container/walker_partition_xehp_and_later.h +++ b/shared/source/command_container/walker_partition_xehp_and_later.h @@ -219,7 +219,7 @@ void programRegisterWithValue(void *&inputAddress, uint32_t registerOffset, uint template void programWaitForSemaphore(void *&inputAddress, uint32_t &totalBytesProgrammed, uint64_t gpuAddress, uint32_t semaphoreCompareValue, typename MI_SEMAPHORE_WAIT::COMPARE_OPERATION compareOperation) { auto semaphoreWait = putCommand>(inputAddress, totalBytesProgrammed); - NEO::EncodeSemaphore::programMiSemaphoreWait(semaphoreWait, gpuAddress, semaphoreCompareValue, compareOperation, false, true); + NEO::EncodeSemaphore::programMiSemaphoreWait(semaphoreWait, gpuAddress, semaphoreCompareValue, compareOperation, false, true, false); } template diff --git a/shared/source/command_stream/experimental_command_buffer.inl b/shared/source/command_stream/experimental_command_buffer.inl index 46857622e8..1460eb78c3 100644 --- a/shared/source/command_stream/experimental_command_buffer.inl +++ b/shared/source/command_stream/experimental_command_buffer.inl @@ -99,7 +99,7 @@ void ExperimentalCommandBuffer::addExperimentalCommands() { gpuAddr, *semaphoreData, MI_SEMAPHORE_WAIT::COMPARE_OPERATION_SAD_EQUAL_SDD, - false, false); + false, false, false); } template diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index b5dbb025fc..c48135e421 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -296,7 +296,7 @@ void DirectSubmissionHw::dispatchRelaxedOrderingScheduler schedulerCmdStream.getSpace(EncodeMiPredicate::getCmdSize()); // skip patching EncodeSemaphore::addMiSemaphoreWaitCommand(schedulerCmdStream, semaphoreGpuVa, value, - COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, false, false); } // skip patching End section @@ -590,7 +590,7 @@ inline void DirectSubmissionHw::dispatchSemaphoreSection( EncodeSemaphore::addMiSemaphoreWaitCommand(ringCommandStream, semaphoreGpuVa, value, - COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, false, false); } if (miMemFenceRequired) { @@ -879,7 +879,7 @@ void DirectSubmissionHw::preinitializeRelaxedOrderingSect EncodeMiPredicate::encode(schedulerStream, MiPredicateType::Disable); - EncodeSemaphore::addMiSemaphoreWaitCommand(schedulerStream, 0, 0, COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + EncodeSemaphore::addMiSemaphoreWaitCommand(schedulerStream, 0, 0, COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, false, false); } // 3. End section diff --git a/shared/source/helpers/blit_commands_helper_base.inl b/shared/source/helpers/blit_commands_helper_base.inl index 722fcc43e2..354d22a11f 100644 --- a/shared/source/helpers/blit_commands_helper_base.inl +++ b/shared/source/helpers/blit_commands_helper_base.inl @@ -341,7 +341,7 @@ void BlitCommandsHelper::dispatchDebugPauseCommands(LinearStream &com EncodeMiFlushDW::programWithWa(commandStream, debugPauseStateGPUAddress, static_cast(confirmationTrigger), args); - EncodeSemaphore::addMiSemaphoreWaitCommand(commandStream, debugPauseStateGPUAddress, static_cast(waitCondition), COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD); + EncodeSemaphore::addMiSemaphoreWaitCommand(commandStream, debugPauseStateGPUAddress, static_cast(waitCondition), COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, false, false); } template diff --git a/shared/source/helpers/blit_commands_helper_xehp_and_later.inl b/shared/source/helpers/blit_commands_helper_xehp_and_later.inl index 4c2412bcf1..8cfa44ef5e 100644 --- a/shared/source/helpers/blit_commands_helper_xehp_and_later.inl +++ b/shared/source/helpers/blit_commands_helper_xehp_and_later.inl @@ -369,7 +369,7 @@ void BlitCommandsHelper::programGlobalSequencerFlush(LinearStream &co globalInvalidationRegister, 0u, COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, - true); + true, false); } } diff --git a/shared/source/helpers/timestamp_packet.h b/shared/source/helpers/timestamp_packet.h index 080f44c04f..d6516a2194 100644 --- a/shared/source/helpers/timestamp_packet.h +++ b/shared/source/helpers/timestamp_packet.h @@ -104,7 +104,7 @@ struct TimestampPacketHelper { for (uint32_t packetId = 0; packetId < timestampPacketNode.getPacketsUsed(); packetId++) { uint64_t compareOffset = packetId * timestampPacketNode.getSinglePacketSize(); - EncodeSemaphore::addMiSemaphoreWaitCommand(cmdStream, compareAddress + compareOffset, TimestampPacketConstants::initValue, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); + EncodeSemaphore::addMiSemaphoreWaitCommand(cmdStream, compareAddress + compareOffset, TimestampPacketConstants::initValue, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, false, false); } } diff --git a/shared/source/xe_hpc_core/gfx_core_helper_xe_hpc_core.cpp b/shared/source/xe_hpc_core/gfx_core_helper_xe_hpc_core.cpp index f051f947d4..17040e5c8a 100644 --- a/shared/source/xe_hpc_core/gfx_core_helper_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/gfx_core_helper_xe_hpc_core.cpp @@ -187,8 +187,7 @@ void MemorySynchronizationCommands::setAdditionalSynchronization(void *& gpuAddress, EncodeSemaphore::invalidHardwareTag, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, - false, - true); + false, true, false); commandsBuffer = ptrOffset(commandsBuffer, EncodeSemaphore::getSizeMiSemaphoreWait()); } } diff --git a/shared/source/xe_hpg_core/gfx_core_helper_xe_hpg_core.cpp b/shared/source/xe_hpg_core/gfx_core_helper_xe_hpg_core.cpp index 20dc0c1198..b71b0600bd 100644 --- a/shared/source/xe_hpg_core/gfx_core_helper_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/gfx_core_helper_xe_hpg_core.cpp @@ -90,7 +90,7 @@ template <> void MemorySynchronizationCommands::addAdditionalSynchronizationForDirectSubmission(LinearStream &commandStream, uint64_t gpuAddress, bool acquire, const RootDeviceEnvironment &rootDeviceEnvironment) { using COMPARE_OPERATION = typename Family::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; - EncodeSemaphore::addMiSemaphoreWaitCommand(commandStream, gpuAddress, EncodeSemaphore::invalidHardwareTag, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); + EncodeSemaphore::addMiSemaphoreWaitCommand(commandStream, gpuAddress, EncodeSemaphore::invalidHardwareTag, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, false, false); } template <> diff --git a/shared/test/unit_test/encoders/test_encode_semaphore.cpp b/shared/test/unit_test/encoders/test_encode_semaphore.cpp index 9d14ddf884..e379d3a872 100644 --- a/shared/test/unit_test/encoders/test_encode_semaphore.cpp +++ b/shared/test/unit_test/encoders/test_encode_semaphore.cpp @@ -24,7 +24,8 @@ HWTEST_F(CommandEncodeSemaphore, WhenProgrammingThenMiSemaphoreWaitIsUsed) { 4, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, false, - true); + true, + false); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, miSemaphore1.getCompareOperation()); EXPECT_EQ(4u, miSemaphore1.getSemaphoreDataDword()); @@ -37,6 +38,7 @@ HWTEST_F(CommandEncodeSemaphore, WhenProgrammingThenMiSemaphoreWaitIsUsed) { 4, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, false, + false, false); EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_SIGNAL_MODE, miSemaphore2.getWaitMode()); } @@ -53,7 +55,7 @@ HWTEST_F(CommandEncodeSemaphore, whenAddingMiSemaphoreCommandThenExpectCompareFi EncodeSemaphore::addMiSemaphoreWaitCommand(stream, 0xFF00FF000u, 5u, - compareMode); + compareMode, false, false); EXPECT_EQ(NEO::EncodeSemaphore::getSizeMiSemaphoreWait(), stream.getUsed()); diff --git a/shared/test/unit_test/helpers/gfx_core_helper_tests_dg2_and_later.cpp b/shared/test/unit_test/helpers/gfx_core_helper_tests_dg2_and_later.cpp index 0427a81a96..d4a86ac171 100644 --- a/shared/test/unit_test/helpers/gfx_core_helper_tests_dg2_and_later.cpp +++ b/shared/test/unit_test/helpers/gfx_core_helper_tests_dg2_and_later.cpp @@ -129,7 +129,8 @@ HWTEST2_F(PipeControlHelperTestsDg2AndLater, WhenAddingPipeControlWAThenCorrectC EncodeSemaphore::invalidHardwareTag, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, false, - true); + true, + false); auto pMiSemaphoreWait = genCmdCast(*(++it)); ASSERT_NE(nullptr, pMiSemaphoreWait); EXPECT_TRUE(memcmp(&expectedMiSemaphoreWait, pMiSemaphoreWait, sizeof(MI_SEMAPHORE_WAIT)) == 0);