diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 2b8c449ad9..92d4cb4d00 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2612,11 +2612,11 @@ void CommandListCoreFamily::appendWriteKernelTimestamp(Event *eve uint64_t contextAddress = ptrOffset(baseAddr, contextOffset); if (maskLsb) { - NEO::EncodeMathMMIO::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition); - NEO::EncodeMathMMIO::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition); + NEO::EncodeMathMMIO::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, nullptr); + NEO::EncodeMathMMIO::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, nullptr); } else { - NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition); - NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition); + NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, nullptr); + NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, nullptr); } adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask, workloadPartition); diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 70de197510..59b7851dc7 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -711,14 +711,14 @@ void CommandQueueHw::processDispatchForMarkerWithTimestampPacket(Comm auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(*currentTimestampPacketNode); auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(*currentTimestampPacketNode); - EncodeStoreMMIO::encode(*commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false); - EncodeStoreMMIO::encode(*commandStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false); + EncodeStoreMMIO::encode(*commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false, nullptr); + EncodeStoreMMIO::encode(*commandStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false, nullptr); auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*currentTimestampPacketNode); auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(*currentTimestampPacketNode); - EncodeStoreMMIO::encode(*commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false); - EncodeStoreMMIO::encode(*commandStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false); + EncodeStoreMMIO::encode(*commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false, nullptr); + EncodeStoreMMIO::encode(*commandStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false, nullptr); } template diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 76a73a8d53..d20406e4bd 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -264,7 +264,8 @@ struct EncodeMathMMIO { uint32_t regOffset, uint32_t immVal, uint64_t dstAddress, - bool workloadPartition); + bool workloadPartition, + void **outCmdBuffer); static void encodeAlu(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters srcA, AluRegisters srcB, AluRegisters op, AluRegisters dest, AluRegisters result); @@ -385,7 +386,7 @@ struct EncodeStoreMMIO { using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; static const size_t size = sizeof(MI_STORE_REGISTER_MEM); - static void encode(LinearStream &csr, uint32_t offset, uint64_t address, bool workloadPartition); + static void encode(LinearStream &csr, uint32_t offset, uint64_t address, bool workloadPartition, void **outCmdBuffer); static void encode(MI_STORE_REGISTER_MEM *cmdBuffer, uint32_t offset, uint64_t address, bool workloadPartition); static void appendFlags(MI_STORE_REGISTER_MEM *storeRegMem, bool workloadPartition); }; diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index be44b364b5..4a5f4d1f18 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -120,7 +120,7 @@ void EncodeMathMMIO::encodeMulRegVal(CommandContainer &container, uint32 EncodeSetMMIO::encodeREG(container, RegisterOffsets::csGprR0, RegisterOffsets::csGprR2); i++; } - EncodeStoreMMIO::encode(*container.getCommandStream(), RegisterOffsets::csGprR1, dstAddress, false); + EncodeStoreMMIO::encode(*container.getCommandStream(), RegisterOffsets::csGprR1, dstAddress, false, nullptr); } /* @@ -149,14 +149,14 @@ void EncodeMathMMIO::encodeGreaterThanPredicate(CommandContainer &contai */ template void EncodeMathMMIO::encodeBitwiseAndVal(CommandContainer &container, uint32_t regOffset, uint32_t immVal, uint64_t dstAddress, - bool workloadPartition) { + bool workloadPartition, void **outCmdBuffer) { EncodeSetMMIO::encodeREG(container, RegisterOffsets::csGprR13, regOffset); EncodeSetMMIO::encodeIMM(container, RegisterOffsets::csGprR14, immVal, true); EncodeMath::bitwiseAnd(container, AluRegisters::gpr13, AluRegisters::gpr14, AluRegisters::gpr15); EncodeStoreMMIO::encode(*container.getCommandStream(), - RegisterOffsets::csGprR15, dstAddress, workloadPartition); + RegisterOffsets::csGprR15, dstAddress, workloadPartition, outCmdBuffer); } /* @@ -387,8 +387,11 @@ void EncodeSetMMIO::encodeREG(LinearStream &cmdStream, uint32_t dstOffse } template -void EncodeStoreMMIO::encode(LinearStream &csr, uint32_t offset, uint64_t address, bool workloadPartition) { +void EncodeStoreMMIO::encode(LinearStream &csr, uint32_t offset, uint64_t address, bool workloadPartition, void **outCmdBuffer) { auto buffer = csr.getSpaceForCmd(); + if (outCmdBuffer != nullptr) { + *outCmdBuffer = buffer; + } EncodeStoreMMIO::encode(buffer, offset, address, workloadPartition); } @@ -605,7 +608,7 @@ void EncodeIndirectParams::setGroupCountIndirect(CommandContainer &conta if (NEO::isUndefinedOffset(offsets[i])) { continue; } - EncodeStoreMMIO::encode(*container.getCommandStream(), RegisterOffsets::gpgpuDispatchDim[i], ptrOffset(crossThreadAddress, offsets[i]), false); + EncodeStoreMMIO::encode(*container.getCommandStream(), RegisterOffsets::gpgpuDispatchDim[i], ptrOffset(crossThreadAddress, offsets[i]), false, nullptr); } } @@ -696,7 +699,7 @@ void EncodeIndirectParams::setWorkDimIndirect(CommandContainer &containe EncodeMath::addition(container, resultAluRegister, backupAluRegister, resultAluRegister); } } - EncodeStoreMMIO::encode(*container.getCommandStream(), resultRegister, dstPtr, false); + EncodeStoreMMIO::encode(*container.getCommandStream(), resultRegister, dstPtr, false, nullptr); } } diff --git a/shared/source/helpers/blit_commands_helper_base.inl b/shared/source/helpers/blit_commands_helper_base.inl index a1affedbfe..1792d36177 100644 --- a/shared/source/helpers/blit_commands_helper_base.inl +++ b/shared/source/helpers/blit_commands_helper_base.inl @@ -526,8 +526,8 @@ void BlitCommandsHelper::encodeProfilingStartMmios(LinearStream &cmdS auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(timestampPacketNode); auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(timestampPacketNode); - EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false); - EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false); + EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false, nullptr); + EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false, nullptr); } template @@ -535,8 +535,8 @@ void BlitCommandsHelper::encodeProfilingEndMmios(LinearStream &cmdStr auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(timestampPacketNode); auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(timestampPacketNode); - EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false); - EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false); + EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false, nullptr); + EncodeStoreMMIO::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false, nullptr); } template diff --git a/shared/test/unit_test/encoders/command_encoder_tests_xehp_and_later.cpp b/shared/test/unit_test/encoders/command_encoder_tests_xehp_and_later.cpp index c757e39546..6a952f31a3 100644 --- a/shared/test/unit_test/encoders/command_encoder_tests_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/command_encoder_tests_xehp_and_later.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -109,18 +109,32 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterHardwareCommandsTest, givenWorkloadPart uint64_t gpuAddress = 0xFFA000; uint32_t offset = 0x123; - constexpr size_t bufferSize = 64; + constexpr size_t bufferSize = 256; uint8_t buffer[bufferSize]; LinearStream cmdStream(buffer, bufferSize); EncodeStoreMMIO::encode(cmdStream, offset, gpuAddress, - true); + true, + nullptr); auto storeRegMem = genCmdCast(buffer); ASSERT_NE(nullptr, storeRegMem); EXPECT_TRUE(storeRegMem->getWorkloadPartitionIdOffsetEnable()); + + void *outCmdBuffer = nullptr; + size_t beforeEncode = cmdStream.getUsed(); + EncodeStoreMMIO::encode(cmdStream, + offset, + gpuAddress, + true, + &outCmdBuffer); + + storeRegMem = genCmdCast(ptrOffset(buffer, beforeEncode)); + ASSERT_NE(nullptr, storeRegMem); + EXPECT_TRUE(storeRegMem->getWorkloadPartitionIdOffsetEnable()); + EXPECT_EQ(storeRegMem, outCmdBuffer); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterCommandEncoderTest, givenOffsetAndValueAndWorkloadPartitionWhenEncodeBitwiseAndValIsCalledThenContainerHasCorrectMathCommands) { @@ -135,7 +149,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterCommandEncoderTest, givenOffsetAndValue constexpr uint32_t regOffset = 0x2000u; constexpr uint32_t immVal = 0xbaau; constexpr uint64_t dstAddress = 0xDEADCAF0u; - EncodeMathMMIO::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress, true); + void *storeRegMem = nullptr; + EncodeMathMMIO::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress, true, &storeRegMem); CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), @@ -167,6 +182,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterCommandEncoderTest, givenOffsetAndValue itor++; ASSERT_NE(commands.end(), itor); auto cmdMem = genCmdCast(*itor); + EXPECT_EQ(cmdMem, storeRegMem); EXPECT_EQ(RegisterOffsets::csGprR15, cmdMem->getRegisterAddress()); EXPECT_EQ(dstAddress, cmdMem->getMemoryAddress()); EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable()); diff --git a/shared/test/unit_test/encoders/test_encode_math.cpp b/shared/test/unit_test/encoders/test_encode_math.cpp index f48738c60b..cc03762eeb 100644 --- a/shared/test/unit_test/encoders/test_encode_math.cpp +++ b/shared/test/unit_test/encoders/test_encode_math.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -182,7 +182,8 @@ HWTEST_F(CommandEncoderMathTest, givenOffsetAndValueWhenEncodeBitwiseAndValIsCal constexpr uint32_t regOffset = 0x2000u; constexpr uint32_t immVal = 0xbaau; constexpr uint64_t dstAddress = 0xDEADCAF0u; - EncodeMathMMIO::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress, false); + void *storeRegMem = nullptr; + EncodeMathMMIO::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress, false, &storeRegMem); CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), @@ -214,6 +215,7 @@ HWTEST_F(CommandEncoderMathTest, givenOffsetAndValueWhenEncodeBitwiseAndValIsCal itor++; EXPECT_NE(commands.end(), itor); auto cmdMem = genCmdCast(*itor); + EXPECT_EQ(cmdMem, storeRegMem); EXPECT_EQ(cmdMem->getRegisterAddress(), RegisterOffsets::csGprR15); EXPECT_EQ(cmdMem->getMemoryAddress(), dstAddress); }