refactor: add output buffer argument to store register to memory encoder

Related-To: NEO-10064

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2024-02-26 17:44:07 +00:00
committed by Compute-Runtime-Automation
parent 0ebaf7e1e2
commit 0bf8e8727e
7 changed files with 48 additions and 26 deletions

View File

@@ -2612,11 +2612,11 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(Event *eve
uint64_t contextAddress = ptrOffset(baseAddr, contextOffset); uint64_t contextAddress = ptrOffset(baseAddr, contextOffset);
if (maskLsb) { if (maskLsb) {
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition); NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, nullptr);
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition); NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, nullptr);
} else { } else {
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition); NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, nullptr);
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition); NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, nullptr);
} }
adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask, workloadPartition); adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask, workloadPartition);

View File

@@ -711,14 +711,14 @@ void CommandQueueHw<GfxFamily>::processDispatchForMarkerWithTimestampPacket(Comm
auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(*currentTimestampPacketNode); auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(*currentTimestampPacketNode);
auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(*currentTimestampPacketNode); auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(*currentTimestampPacketNode);
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false); EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false, nullptr);
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false); EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false, nullptr);
auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*currentTimestampPacketNode); auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*currentTimestampPacketNode);
auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(*currentTimestampPacketNode); auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(*currentTimestampPacketNode);
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false); EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false, nullptr);
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false); EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false, nullptr);
} }
template <typename GfxFamily> template <typename GfxFamily>

View File

@@ -264,7 +264,8 @@ struct EncodeMathMMIO {
uint32_t regOffset, uint32_t regOffset,
uint32_t immVal, uint32_t immVal,
uint64_t dstAddress, uint64_t dstAddress,
bool workloadPartition); bool workloadPartition,
void **outCmdBuffer);
static void encodeAlu(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters srcA, AluRegisters srcB, AluRegisters op, AluRegisters dest, AluRegisters result); static void encodeAlu(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters srcA, AluRegisters srcB, AluRegisters op, AluRegisters dest, AluRegisters result);
@@ -385,7 +386,7 @@ struct EncodeStoreMMIO {
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
static const size_t size = sizeof(MI_STORE_REGISTER_MEM); static const size_t size = sizeof(MI_STORE_REGISTER_MEM);
static void encode(LinearStream &csr, uint32_t offset, uint64_t address, bool workloadPartition); static void encode(LinearStream &csr, uint32_t offset, uint64_t address, bool workloadPartition, void **outCmdBuffer);
static void encode(MI_STORE_REGISTER_MEM *cmdBuffer, uint32_t offset, uint64_t address, bool workloadPartition); static void encode(MI_STORE_REGISTER_MEM *cmdBuffer, uint32_t offset, uint64_t address, bool workloadPartition);
static void appendFlags(MI_STORE_REGISTER_MEM *storeRegMem, bool workloadPartition); static void appendFlags(MI_STORE_REGISTER_MEM *storeRegMem, bool workloadPartition);
}; };

View File

@@ -120,7 +120,7 @@ void EncodeMathMMIO<Family>::encodeMulRegVal(CommandContainer &container, uint32
EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csGprR0, RegisterOffsets::csGprR2); EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csGprR0, RegisterOffsets::csGprR2);
i++; i++;
} }
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), RegisterOffsets::csGprR1, dstAddress, false); EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), RegisterOffsets::csGprR1, dstAddress, false, nullptr);
} }
/* /*
@@ -149,14 +149,14 @@ void EncodeMathMMIO<Family>::encodeGreaterThanPredicate(CommandContainer &contai
*/ */
template <typename Family> template <typename Family>
void EncodeMathMMIO<Family>::encodeBitwiseAndVal(CommandContainer &container, uint32_t regOffset, uint32_t immVal, uint64_t dstAddress, void EncodeMathMMIO<Family>::encodeBitwiseAndVal(CommandContainer &container, uint32_t regOffset, uint32_t immVal, uint64_t dstAddress,
bool workloadPartition) { bool workloadPartition, void **outCmdBuffer) {
EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csGprR13, regOffset); EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csGprR13, regOffset);
EncodeSetMMIO<Family>::encodeIMM(container, RegisterOffsets::csGprR14, immVal, true); EncodeSetMMIO<Family>::encodeIMM(container, RegisterOffsets::csGprR14, immVal, true);
EncodeMath<Family>::bitwiseAnd(container, AluRegisters::gpr13, EncodeMath<Family>::bitwiseAnd(container, AluRegisters::gpr13,
AluRegisters::gpr14, AluRegisters::gpr14,
AluRegisters::gpr15); AluRegisters::gpr15);
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), EncodeStoreMMIO<Family>::encode(*container.getCommandStream(),
RegisterOffsets::csGprR15, dstAddress, workloadPartition); RegisterOffsets::csGprR15, dstAddress, workloadPartition, outCmdBuffer);
} }
/* /*
@@ -387,8 +387,11 @@ void EncodeSetMMIO<Family>::encodeREG(LinearStream &cmdStream, uint32_t dstOffse
} }
template <typename Family> template <typename Family>
void EncodeStoreMMIO<Family>::encode(LinearStream &csr, uint32_t offset, uint64_t address, bool workloadPartition) { void EncodeStoreMMIO<Family>::encode(LinearStream &csr, uint32_t offset, uint64_t address, bool workloadPartition, void **outCmdBuffer) {
auto buffer = csr.getSpaceForCmd<MI_STORE_REGISTER_MEM>(); auto buffer = csr.getSpaceForCmd<MI_STORE_REGISTER_MEM>();
if (outCmdBuffer != nullptr) {
*outCmdBuffer = buffer;
}
EncodeStoreMMIO<Family>::encode(buffer, offset, address, workloadPartition); EncodeStoreMMIO<Family>::encode(buffer, offset, address, workloadPartition);
} }
@@ -605,7 +608,7 @@ void EncodeIndirectParams<Family>::setGroupCountIndirect(CommandContainer &conta
if (NEO::isUndefinedOffset(offsets[i])) { if (NEO::isUndefinedOffset(offsets[i])) {
continue; continue;
} }
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), RegisterOffsets::gpgpuDispatchDim[i], ptrOffset(crossThreadAddress, offsets[i]), false); EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), RegisterOffsets::gpgpuDispatchDim[i], ptrOffset(crossThreadAddress, offsets[i]), false, nullptr);
} }
} }
@@ -696,7 +699,7 @@ void EncodeIndirectParams<Family>::setWorkDimIndirect(CommandContainer &containe
EncodeMath<Family>::addition(container, resultAluRegister, backupAluRegister, resultAluRegister); EncodeMath<Family>::addition(container, resultAluRegister, backupAluRegister, resultAluRegister);
} }
} }
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), resultRegister, dstPtr, false); EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), resultRegister, dstPtr, false, nullptr);
} }
} }

View File

@@ -526,8 +526,8 @@ void BlitCommandsHelper<GfxFamily>::encodeProfilingStartMmios(LinearStream &cmdS
auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(timestampPacketNode); auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(timestampPacketNode);
auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(timestampPacketNode); auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(timestampPacketNode);
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false); EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false, nullptr);
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false); EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false, nullptr);
} }
template <typename GfxFamily> template <typename GfxFamily>
@@ -535,8 +535,8 @@ void BlitCommandsHelper<GfxFamily>::encodeProfilingEndMmios(LinearStream &cmdStr
auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(timestampPacketNode); auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(timestampPacketNode);
auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(timestampPacketNode); auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(timestampPacketNode);
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false); EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false, nullptr);
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false); EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false, nullptr);
} }
template <typename GfxFamily> template <typename GfxFamily>

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2021-2023 Intel Corporation * Copyright (C) 2021-2024 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -109,18 +109,32 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterHardwareCommandsTest, givenWorkloadPart
uint64_t gpuAddress = 0xFFA000; uint64_t gpuAddress = 0xFFA000;
uint32_t offset = 0x123; uint32_t offset = 0x123;
constexpr size_t bufferSize = 64; constexpr size_t bufferSize = 256;
uint8_t buffer[bufferSize]; uint8_t buffer[bufferSize];
LinearStream cmdStream(buffer, bufferSize); LinearStream cmdStream(buffer, bufferSize);
EncodeStoreMMIO<FamilyType>::encode(cmdStream, EncodeStoreMMIO<FamilyType>::encode(cmdStream,
offset, offset,
gpuAddress, gpuAddress,
true); true,
nullptr);
auto storeRegMem = genCmdCast<MI_STORE_REGISTER_MEM *>(buffer); auto storeRegMem = genCmdCast<MI_STORE_REGISTER_MEM *>(buffer);
ASSERT_NE(nullptr, storeRegMem); ASSERT_NE(nullptr, storeRegMem);
EXPECT_TRUE(storeRegMem->getWorkloadPartitionIdOffsetEnable()); EXPECT_TRUE(storeRegMem->getWorkloadPartitionIdOffsetEnable());
void *outCmdBuffer = nullptr;
size_t beforeEncode = cmdStream.getUsed();
EncodeStoreMMIO<FamilyType>::encode(cmdStream,
offset,
gpuAddress,
true,
&outCmdBuffer);
storeRegMem = genCmdCast<MI_STORE_REGISTER_MEM *>(ptrOffset(buffer, beforeEncode));
ASSERT_NE(nullptr, storeRegMem);
EXPECT_TRUE(storeRegMem->getWorkloadPartitionIdOffsetEnable());
EXPECT_EQ(storeRegMem, outCmdBuffer);
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterCommandEncoderTest, givenOffsetAndValueAndWorkloadPartitionWhenEncodeBitwiseAndValIsCalledThenContainerHasCorrectMathCommands) { HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterCommandEncoderTest, givenOffsetAndValueAndWorkloadPartitionWhenEncodeBitwiseAndValIsCalledThenContainerHasCorrectMathCommands) {
@@ -135,7 +149,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterCommandEncoderTest, givenOffsetAndValue
constexpr uint32_t regOffset = 0x2000u; constexpr uint32_t regOffset = 0x2000u;
constexpr uint32_t immVal = 0xbaau; constexpr uint32_t immVal = 0xbaau;
constexpr uint64_t dstAddress = 0xDEADCAF0u; constexpr uint64_t dstAddress = 0xDEADCAF0u;
EncodeMathMMIO<FamilyType>::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress, true); void *storeRegMem = nullptr;
EncodeMathMMIO<FamilyType>::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress, true, &storeRegMem);
CmdParse<FamilyType>::parseCommandBuffer(commands, CmdParse<FamilyType>::parseCommandBuffer(commands,
ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0),
@@ -167,6 +182,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterCommandEncoderTest, givenOffsetAndValue
itor++; itor++;
ASSERT_NE(commands.end(), itor); ASSERT_NE(commands.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor); auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmdMem, storeRegMem);
EXPECT_EQ(RegisterOffsets::csGprR15, cmdMem->getRegisterAddress()); EXPECT_EQ(RegisterOffsets::csGprR15, cmdMem->getRegisterAddress());
EXPECT_EQ(dstAddress, cmdMem->getMemoryAddress()); EXPECT_EQ(dstAddress, cmdMem->getMemoryAddress());
EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable()); EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable());

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2020-2023 Intel Corporation * Copyright (C) 2020-2024 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -182,7 +182,8 @@ HWTEST_F(CommandEncoderMathTest, givenOffsetAndValueWhenEncodeBitwiseAndValIsCal
constexpr uint32_t regOffset = 0x2000u; constexpr uint32_t regOffset = 0x2000u;
constexpr uint32_t immVal = 0xbaau; constexpr uint32_t immVal = 0xbaau;
constexpr uint64_t dstAddress = 0xDEADCAF0u; constexpr uint64_t dstAddress = 0xDEADCAF0u;
EncodeMathMMIO<FamilyType>::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress, false); void *storeRegMem = nullptr;
EncodeMathMMIO<FamilyType>::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress, false, &storeRegMem);
CmdParse<FamilyType>::parseCommandBuffer(commands, CmdParse<FamilyType>::parseCommandBuffer(commands,
ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0),
@@ -214,6 +215,7 @@ HWTEST_F(CommandEncoderMathTest, givenOffsetAndValueWhenEncodeBitwiseAndValIsCal
itor++; itor++;
EXPECT_NE(commands.end(), itor); EXPECT_NE(commands.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor); auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmdMem, storeRegMem);
EXPECT_EQ(cmdMem->getRegisterAddress(), RegisterOffsets::csGprR15); EXPECT_EQ(cmdMem->getRegisterAddress(), RegisterOffsets::csGprR15);
EXPECT_EQ(cmdMem->getMemoryAddress(), dstAddress); EXPECT_EQ(cmdMem->getMemoryAddress(), dstAddress);
} }