refactor: add output buffer argument to store register to memory encoder

Related-To: NEO-10064

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2024-02-26 17:44:07 +00:00 committed by Compute-Runtime-Automation
parent 0ebaf7e1e2
commit 0bf8e8727e
7 changed files with 48 additions and 26 deletions

View File

@ -2612,11 +2612,11 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(Event *eve
uint64_t contextAddress = ptrOffset(baseAddr, contextOffset);
if (maskLsb) {
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition);
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition);
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, nullptr);
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, nullptr);
} else {
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition);
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition);
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, nullptr);
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, nullptr);
}
adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask, workloadPartition);

View File

@ -711,14 +711,14 @@ void CommandQueueHw<GfxFamily>::processDispatchForMarkerWithTimestampPacket(Comm
auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(*currentTimestampPacketNode);
auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(*currentTimestampPacketNode);
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false);
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false);
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false, nullptr);
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false, nullptr);
auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*currentTimestampPacketNode);
auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(*currentTimestampPacketNode);
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false);
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false);
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false, nullptr);
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false, nullptr);
}
template <typename GfxFamily>

View File

@ -264,7 +264,8 @@ struct EncodeMathMMIO {
uint32_t regOffset,
uint32_t immVal,
uint64_t dstAddress,
bool workloadPartition);
bool workloadPartition,
void **outCmdBuffer);
static void encodeAlu(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters srcA, AluRegisters srcB, AluRegisters op, AluRegisters dest, AluRegisters result);
@ -385,7 +386,7 @@ struct EncodeStoreMMIO {
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
static const size_t size = sizeof(MI_STORE_REGISTER_MEM);
static void encode(LinearStream &csr, uint32_t offset, uint64_t address, bool workloadPartition);
static void encode(LinearStream &csr, uint32_t offset, uint64_t address, bool workloadPartition, void **outCmdBuffer);
static void encode(MI_STORE_REGISTER_MEM *cmdBuffer, uint32_t offset, uint64_t address, bool workloadPartition);
static void appendFlags(MI_STORE_REGISTER_MEM *storeRegMem, bool workloadPartition);
};

View File

@ -120,7 +120,7 @@ void EncodeMathMMIO<Family>::encodeMulRegVal(CommandContainer &container, uint32
EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csGprR0, RegisterOffsets::csGprR2);
i++;
}
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), RegisterOffsets::csGprR1, dstAddress, false);
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), RegisterOffsets::csGprR1, dstAddress, false, nullptr);
}
/*
@ -149,14 +149,14 @@ void EncodeMathMMIO<Family>::encodeGreaterThanPredicate(CommandContainer &contai
*/
template <typename Family>
void EncodeMathMMIO<Family>::encodeBitwiseAndVal(CommandContainer &container, uint32_t regOffset, uint32_t immVal, uint64_t dstAddress,
bool workloadPartition) {
bool workloadPartition, void **outCmdBuffer) {
EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csGprR13, regOffset);
EncodeSetMMIO<Family>::encodeIMM(container, RegisterOffsets::csGprR14, immVal, true);
EncodeMath<Family>::bitwiseAnd(container, AluRegisters::gpr13,
AluRegisters::gpr14,
AluRegisters::gpr15);
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(),
RegisterOffsets::csGprR15, dstAddress, workloadPartition);
RegisterOffsets::csGprR15, dstAddress, workloadPartition, outCmdBuffer);
}
/*
@ -387,8 +387,11 @@ void EncodeSetMMIO<Family>::encodeREG(LinearStream &cmdStream, uint32_t dstOffse
}
template <typename Family>
void EncodeStoreMMIO<Family>::encode(LinearStream &csr, uint32_t offset, uint64_t address, bool workloadPartition) {
void EncodeStoreMMIO<Family>::encode(LinearStream &csr, uint32_t offset, uint64_t address, bool workloadPartition, void **outCmdBuffer) {
auto buffer = csr.getSpaceForCmd<MI_STORE_REGISTER_MEM>();
if (outCmdBuffer != nullptr) {
*outCmdBuffer = buffer;
}
EncodeStoreMMIO<Family>::encode(buffer, offset, address, workloadPartition);
}
@ -605,7 +608,7 @@ void EncodeIndirectParams<Family>::setGroupCountIndirect(CommandContainer &conta
if (NEO::isUndefinedOffset(offsets[i])) {
continue;
}
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), RegisterOffsets::gpgpuDispatchDim[i], ptrOffset(crossThreadAddress, offsets[i]), false);
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), RegisterOffsets::gpgpuDispatchDim[i], ptrOffset(crossThreadAddress, offsets[i]), false, nullptr);
}
}
@ -696,7 +699,7 @@ void EncodeIndirectParams<Family>::setWorkDimIndirect(CommandContainer &containe
EncodeMath<Family>::addition(container, resultAluRegister, backupAluRegister, resultAluRegister);
}
}
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), resultRegister, dstPtr, false);
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), resultRegister, dstPtr, false, nullptr);
}
}

View File

@ -526,8 +526,8 @@ void BlitCommandsHelper<GfxFamily>::encodeProfilingStartMmios(LinearStream &cmdS
auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(timestampPacketNode);
auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(timestampPacketNode);
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false);
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false);
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false, nullptr);
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false, nullptr);
}
template <typename GfxFamily>
@ -535,8 +535,8 @@ void BlitCommandsHelper<GfxFamily>::encodeProfilingEndMmios(LinearStream &cmdStr
auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(timestampPacketNode);
auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(timestampPacketNode);
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false);
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false);
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false, nullptr);
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false, nullptr);
}
template <typename GfxFamily>

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -109,18 +109,32 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterHardwareCommandsTest, givenWorkloadPart
uint64_t gpuAddress = 0xFFA000;
uint32_t offset = 0x123;
constexpr size_t bufferSize = 64;
constexpr size_t bufferSize = 256;
uint8_t buffer[bufferSize];
LinearStream cmdStream(buffer, bufferSize);
EncodeStoreMMIO<FamilyType>::encode(cmdStream,
offset,
gpuAddress,
true);
true,
nullptr);
auto storeRegMem = genCmdCast<MI_STORE_REGISTER_MEM *>(buffer);
ASSERT_NE(nullptr, storeRegMem);
EXPECT_TRUE(storeRegMem->getWorkloadPartitionIdOffsetEnable());
void *outCmdBuffer = nullptr;
size_t beforeEncode = cmdStream.getUsed();
EncodeStoreMMIO<FamilyType>::encode(cmdStream,
offset,
gpuAddress,
true,
&outCmdBuffer);
storeRegMem = genCmdCast<MI_STORE_REGISTER_MEM *>(ptrOffset(buffer, beforeEncode));
ASSERT_NE(nullptr, storeRegMem);
EXPECT_TRUE(storeRegMem->getWorkloadPartitionIdOffsetEnable());
EXPECT_EQ(storeRegMem, outCmdBuffer);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterCommandEncoderTest, givenOffsetAndValueAndWorkloadPartitionWhenEncodeBitwiseAndValIsCalledThenContainerHasCorrectMathCommands) {
@ -135,7 +149,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterCommandEncoderTest, givenOffsetAndValue
constexpr uint32_t regOffset = 0x2000u;
constexpr uint32_t immVal = 0xbaau;
constexpr uint64_t dstAddress = 0xDEADCAF0u;
EncodeMathMMIO<FamilyType>::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress, true);
void *storeRegMem = nullptr;
EncodeMathMMIO<FamilyType>::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress, true, &storeRegMem);
CmdParse<FamilyType>::parseCommandBuffer(commands,
ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0),
@ -167,6 +182,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterCommandEncoderTest, givenOffsetAndValue
itor++;
ASSERT_NE(commands.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmdMem, storeRegMem);
EXPECT_EQ(RegisterOffsets::csGprR15, cmdMem->getRegisterAddress());
EXPECT_EQ(dstAddress, cmdMem->getMemoryAddress());
EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable());

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -182,7 +182,8 @@ HWTEST_F(CommandEncoderMathTest, givenOffsetAndValueWhenEncodeBitwiseAndValIsCal
constexpr uint32_t regOffset = 0x2000u;
constexpr uint32_t immVal = 0xbaau;
constexpr uint64_t dstAddress = 0xDEADCAF0u;
EncodeMathMMIO<FamilyType>::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress, false);
void *storeRegMem = nullptr;
EncodeMathMMIO<FamilyType>::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress, false, &storeRegMem);
CmdParse<FamilyType>::parseCommandBuffer(commands,
ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0),
@ -214,6 +215,7 @@ HWTEST_F(CommandEncoderMathTest, givenOffsetAndValueWhenEncodeBitwiseAndValIsCal
itor++;
EXPECT_NE(commands.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmdMem, storeRegMem);
EXPECT_EQ(cmdMem->getRegisterAddress(), RegisterOffsets::csGprR15);
EXPECT_EQ(cmdMem->getMemoryAddress(), dstAddress);
}